]> git.pld-linux.org Git - packages/mysql.git/commitdiff
- rel 0.5 (consider this to be test before rel 1); update percona patches; drop obsol...
authorArkadiusz Miśkiewicz <arekm@maven.pl>
Fri, 28 Jan 2011 09:18:03 +0000 (09:18 +0000)
committercvs2git <feedback@pld-linux.org>
Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
Changed files:
    bug580324.patch -> 1.1
    control_online_alter_index.patch -> 1.1
    error_pad.patch -> 1.1
    innodb_adaptive_hash_index_partitions.patch -> 1.1
    innodb_admin_command_base.patch -> 1.1
    innodb_buffer_pool_pages_i_s.patch -> 1.1
    innodb_buffer_pool_shm.patch -> 1.1
    innodb_deadlock_count.patch -> 1.1
    innodb_dict_size_limit.patch -> 1.1
    innodb_expand_import.patch -> 1.1
    innodb_extend_slow.patch -> 1.1
    innodb_extra_rseg.patch -> 1.1
    innodb_fast_checksum.patch -> 1.1
    innodb_files_extend.patch -> 1.1
    innodb_fix_misc.patch -> 1.1
    innodb_io_patches.patch -> 1.1
    innodb_lru_dump_restore.patch -> 1.1
    innodb_opt_lru_count.patch -> 1.1
    innodb_overwrite_relay_log_info.patch -> 1.1
    innodb_pass_corrupt_table.patch -> 1.1
    innodb_recovery_patches.patch -> 1.1
    innodb_separate_doublewrite.patch -> 1.1
    innodb_show_lock_name.patch -> 1.1
    innodb_show_status.patch -> 1.1
    innodb_show_status_extend.patch -> 1.1
    innodb_show_sys_tables.patch -> 1.1
    innodb_split_buf_pool_mutex.patch -> 1.1
    innodb_stats.patch -> 1.1
    innodb_thread_concurrency_timer_based.patch -> 1.1
    log_connection_error.patch -> 1.1
    log_warnings_silence.patch -> 1.1
    microsec_process.patch -> 1.1
    mysql-fix-dummy-thread-race-condition.patch -> 1.3
    mysql-innodb_extend_slow.patch -> 1.3
    mysql-innodb_split_buf_pool_mutex.patch -> 1.4
    mysql-libs.patch -> 1.25
    mysql-libwrap.patch -> 1.5
    mysql-microsec_process.patch -> 1.4
    mysql-noproc.patch -> 1.3
    mysql-test.diff -> 1.1
    mysql-userstat.patch -> 1.3
    mysql.spec -> 1.513
    mysql_dump_ignore_ct.patch -> 1.1
    mysql_remove_eol_carret.patch -> 1.1
    mysql_syslog.patch -> 1.1
    optimizer_fix.patch -> 1.1
    percona.sh -> 1.8
    percona_support.patch -> 1.1
    plugin-avoid-version.patch -> 1.4
    query_cache_enhance.patch -> 1.1
    remove_fcntl_excessive_calls.patch -> 1.1
    response_time_distribution.patch -> 1.1
    show_slave_status_nolock.patch -> 1.1
    show_temp.patch -> 1.1
    slow_extended.patch -> 1.1
    sql_no_fcache.patch -> 1.1
    userstat.patch -> 1.1

54 files changed:
bug580324.patch [new file with mode: 0644]
control_online_alter_index.patch [new file with mode: 0644]
error_pad.patch [new file with mode: 0644]
innodb_adaptive_hash_index_partitions.patch [new file with mode: 0644]
innodb_admin_command_base.patch [new file with mode: 0644]
innodb_buffer_pool_pages_i_s.patch [new file with mode: 0644]
innodb_buffer_pool_shm.patch [new file with mode: 0644]
innodb_deadlock_count.patch [new file with mode: 0644]
innodb_dict_size_limit.patch [new file with mode: 0644]
innodb_expand_import.patch [new file with mode: 0644]
innodb_extend_slow.patch [moved from mysql-innodb_extend_slow.patch with 70% similarity]
innodb_extra_rseg.patch [new file with mode: 0644]
innodb_fast_checksum.patch [new file with mode: 0644]
innodb_files_extend.patch [new file with mode: 0644]
innodb_fix_misc.patch [new file with mode: 0644]
innodb_io_patches.patch [new file with mode: 0644]
innodb_lru_dump_restore.patch [new file with mode: 0644]
innodb_opt_lru_count.patch [new file with mode: 0644]
innodb_overwrite_relay_log_info.patch [new file with mode: 0644]
innodb_pass_corrupt_table.patch [new file with mode: 0644]
innodb_recovery_patches.patch [new file with mode: 0644]
innodb_separate_doublewrite.patch [new file with mode: 0644]
innodb_show_lock_name.patch [new file with mode: 0644]
innodb_show_status.patch [new file with mode: 0644]
innodb_show_status_extend.patch [new file with mode: 0644]
innodb_show_sys_tables.patch [new file with mode: 0644]
innodb_split_buf_pool_mutex.patch [new file with mode: 0644]
innodb_stats.patch [new file with mode: 0644]
innodb_thread_concurrency_timer_based.patch [new file with mode: 0644]
log_connection_error.patch [new file with mode: 0644]
log_warnings_silence.patch [new file with mode: 0644]
microsec_process.patch [moved from mysql-microsec_process.patch with 74% similarity]
mysql-fix-dummy-thread-race-condition.patch [deleted file]
mysql-innodb_split_buf_pool_mutex.patch [deleted file]
mysql-libs.patch [deleted file]
mysql-libwrap.patch [deleted file]
mysql-noproc.patch [deleted file]
mysql-test.diff [new file with mode: 0644]
mysql.spec
mysql_dump_ignore_ct.patch [new file with mode: 0644]
mysql_remove_eol_carret.patch [new file with mode: 0644]
mysql_syslog.patch [new file with mode: 0644]
optimizer_fix.patch [new file with mode: 0644]
percona.sh [deleted file]
percona_support.patch [new file with mode: 0644]
plugin-avoid-version.patch [deleted file]
query_cache_enhance.patch [new file with mode: 0644]
remove_fcntl_excessive_calls.patch [new file with mode: 0644]
response_time_distribution.patch [new file with mode: 0644]
show_slave_status_nolock.patch [new file with mode: 0644]
show_temp.patch [new file with mode: 0644]
slow_extended.patch [new file with mode: 0644]
sql_no_fcache.patch [new file with mode: 0644]
userstat.patch [moved from mysql-userstat.patch with 63% similarity]

diff --git a/bug580324.patch b/bug580324.patch
new file mode 100644 (file)
index 0000000..35d0e2b
--- /dev/null
@@ -0,0 +1,109 @@
+# name       : bug580324.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/sql/sql_base.cc b/sql/sql_base.cc
+--- a/sql/sql_base.cc  2011-01-11 21:35:26.000000000 +0300
++++ b/sql/sql_base.cc  2011-01-11 21:42:02.000000000 +0300
+@@ -251,8 +251,12 @@
+                           const TABLE_LIST *table_list,
+                           bool tmp_table)
+ {
+-  uint key_length= (uint) (strmov(strmov(key, table_list->db)+1,
+-                                  table_list->table_name)-key)+1;
++  char *db_end= strnmov(key, table_list->db, MAX_DBKEY_LENGTH - 2);
++  *db_end++= '\0';
++  char *table_end= strnmov(db_end, table_list->table_name,
++                           key + MAX_DBKEY_LENGTH - 1 - db_end);
++  *table_end++= '\0';
++  uint key_length= (uint) (table_end-key);
+   if (tmp_table)
+   {
+     int4store(key + key_length, thd->server_id);
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2011-01-11 21:35:26.000000000 +0300
++++ b/sql/sql_parse.cc 2011-01-11 21:48:50.000000000 +0300
+@@ -1110,11 +1110,18 @@
+     break;
+ #else
+   {
+-    char *fields, *packet_end= packet + packet_length, *arg_end;
++    char *fields;
++    char *packet_end= packet + packet_length;
++    char *wildcard;
+     /* Locked closure of all tables */
+     TABLE_LIST table_list;
++    char table_name_buff[NAME_LEN+1];
+     LEX_STRING table_name;
++    uint dummy_errors;
+     LEX_STRING db;
++
++    table_name.str= table_name_buff;
++    table_name.length= 0;
+     /*
+       SHOW statements should not add the used tables to the list of tables
+       used in a transaction.
+@@ -1127,24 +1134,23 @@
+     /*
+       We have name + wildcard in packet, separated by endzero
+     */
+-    arg_end= strend(packet);
+-    uint arg_length= arg_end - packet;
+-
+-    /* Check given table name length. */
+-    if (arg_length >= packet_length || arg_length > NAME_LEN)
++    wildcard= strend(packet);
++    table_name.length= wildcard - packet;
++    wildcard++;
++    uint query_length= (uint) (packet_end - wildcard); // Don't count end \0
++    if (table_name.length > NAME_LEN || query_length > NAME_LEN)
+     {
+       my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
+       break;
+     }
+-    thd->convert_string(&table_name, system_charset_info,
+-                      packet, arg_length, thd->charset());
+-    if (check_table_name(table_name.str, table_name.length, FALSE))
+-    {
+-      /* this is OK due to convert_string() null-terminating the string */
+-      my_error(ER_WRONG_TABLE_NAME, MYF(0), table_name.str);
++    table_name.length= copy_and_convert(table_name.str,
++                                        sizeof(table_name_buff)-1,
++                                        system_charset_info,
++                                        packet, table_name.length,
++                                        thd->charset(), &dummy_errors);
++    table_name.str[table_name.length]= '\0';
++    if (!(fields= (char *) thd->memdup(wildcard, query_length + 1)))
+       break;
+-    }
+-    packet= arg_end + 1;
+     mysql_reset_thd_for_next_command(thd);
+     lex_start(thd);
+     /* Must be before we init the table list. */
+@@ -1169,9 +1175,6 @@
+         table_list.schema_table= schema_table;
+     }
+-    uint query_length= (uint) (packet_end - packet); // Don't count end \0
+-    if (!(fields= (char *) thd->memdup(packet, query_length + 1)))
+-      break;
+     thd->set_query(fields, query_length);
+     general_log_print(thd, command, "%s %s", table_list.table_name, fields);
+diff -ruN a/strings/ctype-utf8.c b/strings/ctype-utf8.c
+--- a/strings/ctype-utf8.c     2010-12-03 20:58:26.000000000 +0300
++++ b/strings/ctype-utf8.c     2011-01-11 21:42:02.000000000 +0300
+@@ -4212,6 +4212,10 @@
+ {
+   int code;
+   char hex[]= "0123456789abcdef";
++
++  if (s >= e)
++    return MY_CS_TOOSMALL;
++
+   if (wc < 128 && filename_safe_char[wc])
+   {
+     *s= (uchar) wc;
diff --git a/control_online_alter_index.patch b/control_online_alter_index.patch
new file mode 100644 (file)
index 0000000..371ce57
--- /dev/null
@@ -0,0 +1,90 @@
+# name       : control_online_alter_index.patch
+# introduced : 12
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h    2010-11-03 07:01:14.000000000 +0900
++++ b/sql/handler.h    2010-12-03 13:51:04.727293058 +0900
+@@ -194,6 +194,19 @@
+ #define HA_ONLINE_DROP_UNIQUE_INDEX             (1L << 9) /*drop uniq. online*/
+ #define HA_ONLINE_ADD_PK_INDEX                  (1L << 10)/*add prim. online*/
+ #define HA_ONLINE_DROP_PK_INDEX                 (1L << 11)/*drop prim. online*/
++
++#define HA_ONLINE_ALTER_INDEX_MASK    (HA_ONLINE_ADD_INDEX_NO_WRITES \
++                                              | HA_ONLINE_DROP_INDEX_NO_WRITES \
++                                              | HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES \
++                                              | HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES \
++                                              | HA_ONLINE_ADD_PK_INDEX_NO_WRITES \
++                                              | HA_ONLINE_DROP_PK_INDEX_NO_WRITES \
++                                              | HA_ONLINE_ADD_INDEX \
++                                              | HA_ONLINE_DROP_INDEX \
++                                              | HA_ONLINE_ADD_UNIQUE_INDEX \
++                                              | HA_ONLINE_DROP_UNIQUE_INDEX \
++                                              | HA_ONLINE_ADD_PK_INDEX \
++                                              | HA_ONLINE_DROP_PK_INDEX)
+ /*
+   HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is
+   supported at all.
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h  2010-12-02 20:31:56.200956501 +0900
++++ b/sql/sql_class.h  2010-12-03 13:51:04.744953174 +0900
+@@ -481,6 +481,8 @@
+   my_bool engine_condition_pushdown;
+   my_bool keep_files_on_create;
++  my_bool online_alter_index;
++
+   my_bool old_alter_table;
+   my_bool old_passwords;
+   my_bool big_tables;
+diff -ruN a/sql/sql_partition.cc b/sql/sql_partition.cc
+--- a/sql/sql_partition.cc     2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_partition.cc     2010-12-03 13:59:56.444039002 +0900
+@@ -4635,7 +4635,12 @@
+         alter_info->num_parts= curr_part_no - new_part_no;
+       }
+     }
+-    if (!(flags= new_table->file->alter_table_flags(alter_info->flags)))
++    flags= new_table->file->alter_table_flags(alter_info->flags);
++    if (!thd->variables.online_alter_index)
++    {
++      flags&= ~((uint)HA_ONLINE_ALTER_INDEX_MASK);
++    }
++    if (!flags)
+     {
+       my_error(ER_PARTITION_FUNCTION_FAILURE, MYF(0));
+       goto err;
+diff -ruN a/sql/sql_table.cc b/sql/sql_table.cc
+--- a/sql/sql_table.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_table.cc 2010-12-03 13:51:04.768955495 +0900
+@@ -6107,6 +6107,10 @@
+     uint  *idx_end_p;
+     alter_flags= table->file->alter_table_flags(alter_info->flags);
++    if (!thd->variables.online_alter_index)
++    {
++      alter_flags&= ~((ulong)HA_ONLINE_ALTER_INDEX_MASK);
++    }
+     DBUG_PRINT("info", ("alter_flags: %lu", alter_flags));
+     /* Check dropped indexes. */
+     for (idx_p= index_drop_buffer, idx_end_p= idx_p + index_drop_count;
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc  2010-12-02 21:23:05.569356468 +0900
++++ b/sql/sys_vars.cc  2010-12-03 14:05:28.857356603 +0900
+@@ -2124,6 +2124,13 @@
+        GLOBAL_VAR(opt_optimizer_fix),
+        NO_CMD_LINE, DEFAULT(TRUE));
++static Sys_var_mybool Sys_fast_index_creation(
++       "fast_index_creation",
++       "If disabled, suppresses online operations for indexes of ALTER TABLE "
++       "(e.g. fast index creation of InnoDB Plugin) for the session.",
++       SESSION_VAR(online_alter_index), NO_CMD_LINE,
++       DEFAULT(TRUE));
++
+ /** propagates changes to the relevant flag of @@optimizer_switch */
+ static bool fix_engine_condition_pushdown(sys_var *self, THD *thd,
+                                           enum_var_type type)
diff --git a/error_pad.patch b/error_pad.patch
new file mode 100644 (file)
index 0000000..55619db
--- /dev/null
@@ -0,0 +1,267 @@
+# name       : error_pad.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/extra/comp_err.c b/extra/comp_err.c
+--- a/extra/comp_err.c 2010-08-03 17:24:24.000000000 +0000
++++ b/extra/comp_err.c 2010-09-14 16:49:28.000000000 +0000
+@@ -30,11 +30,12 @@
+ #include <assert.h>
+ #include <my_dir.h>
+-#define MAX_ROWS  1000
++#define MAX_ROWS  5000
+ #define HEADER_LENGTH 32                /* Length of header in errmsg.sys */
+ #define DEFAULT_CHARSET_DIR "../sql/share/charsets"
+ #define ER_PREFIX "ER_"
+ #define WARN_PREFIX "WARN_"
++#define PADD_PREFIX "PADD_"
+ static char *OUTFILE= (char*) "errmsg.sys";
+ static char *HEADERFILE= (char*) "mysqld_error.h";
+ static char *NAMEFILE= (char*) "mysqld_ername.h";
+@@ -89,6 +90,7 @@
+   const char *sql_code1;              /* sql state */
+   const char *sql_code2;              /* ODBC state */
+   struct errors *next_error;            /* Pointer to next error */
++  my_bool is_padding;                   /* If true - padd this er_name while er_code != d_code*/
+   DYNAMIC_ARRAY msg;                    /* All language texts for this error */
+ };
+@@ -127,6 +129,7 @@
+ static struct languages *parse_charset_string(char *str);
++static struct errors *parse_padd_string(char *ptr, int er_count);
+ static struct errors *parse_error_string(char *ptr, int er_count);
+ static struct message *parse_message_string(struct message *new_message,
+                                           char *str);
+@@ -252,6 +255,11 @@
+   for (tmp_error= error_head; tmp_error; tmp_error= tmp_error->next_error)
+   {
++    if (tmp_error->is_padding)
++    {
++      er_last= tmp_error->d_code;
++      continue;
++    }
+     /*
+        generating mysqld_error.h
+        fprintf() will automatically add \r on windows
+@@ -344,12 +352,29 @@
+               "language\n", tmp_error->er_name, tmp_lang->lang_short_name);
+       goto err;
+       }
+-      if (copy_rows(to, tmp->text, row_nr, start_pos))
++      if (tmp_error->is_padding)
+       {
+-      fprintf(stderr, "Failed to copy rows to %s\n", outfile);
+-      goto err;
++        uint padd_to= tmp_error->d_code;
++        char* padd_message= tmp->text;
++        while ((row_nr+er_offset) < padd_to)
++        {
++          if (copy_rows(to, padd_message,row_nr,start_pos))
++          {
++            fprintf(stderr, "Failed to copy rows to %s\n", outfile);
++            goto err;
++          }
++          row_nr++;
++        }
++      }
++      else
++      {
++        if (copy_rows(to, tmp->text, row_nr, start_pos))
++        {
++          fprintf(stderr, "Failed to copy rows to %s\n", outfile);
++          goto err;
++        }
++        row_nr++;
+       }
+-      row_nr++;
+     }
+     /* continue with header of the errmsg.sys file */
+@@ -500,14 +525,26 @@
+       DBUG_RETURN(0);
+       continue;
+     }
+-    if (is_prefix(str, ER_PREFIX) || is_prefix(str, WARN_PREFIX))
++    if (is_prefix(str, ER_PREFIX) || is_prefix(str, WARN_PREFIX) || is_prefix(str, PADD_PREFIX))
+     {
+-      if (!(current_error= parse_error_string(str, rcount)))
++      if (is_prefix(str, PADD_PREFIX))
+       {
+-      fprintf(stderr, "Failed to parse the error name string\n");
+-      DBUG_RETURN(0);
++        if (!(current_error= parse_padd_string(str, rcount)))
++        {
++          fprintf(stderr, "Failed to parse the error padd string\n");
++          DBUG_RETURN(0);
++        }
++        rcount= current_error->d_code - er_offset;  /* Count number of unique errors */
++      }
++      else
++      {
++        if (!(current_error= parse_error_string(str, rcount)))
++        {
++          fprintf(stderr, "Failed to parse the error name string\n");
++          DBUG_RETURN(0);
++        }
++        rcount++;                         /* Count number of unique errors */
+       }
+-      rcount++;                         /* Count number of unique errors */
+       /* add error to the list */
+       *tail_error= current_error;
+@@ -848,78 +885,122 @@
+   DBUG_RETURN(new_message);
+ }
++static struct errors* create_new_error(my_bool is_padding, char *er_name, int d_code, const char *sql_code1, const char *sql_code2)
++{
++  struct errors *new_error;
++  DBUG_ENTER("create_new_error");
++  /* create a new element */
++  new_error= (struct errors *) my_malloc(sizeof(*new_error), MYF(MY_WME));
++  if (my_init_dynamic_array(&new_error->msg, sizeof(struct message), 0, 0))
++    DBUG_RETURN(0);                           /* OOM: Fatal error */
++  new_error->is_padding= is_padding;
++  DBUG_PRINT("info", ("is_padding: %s", (is_padding ? "true" : "false")));
++  new_error->er_name= er_name;
++  DBUG_PRINT("info", ("er_name: %s", er_name));
++  new_error->d_code= d_code;
++  DBUG_PRINT("info", ("d_code: %d", d_code));
++  new_error->sql_code1= sql_code1;
++  DBUG_PRINT("info", ("sql_code1: %s", sql_code1));
++  new_error->sql_code2= sql_code2;
++  DBUG_PRINT("info", ("sql_code2: %s", sql_code2));
++  DBUG_RETURN(new_error);
++}
+ /*
+-  Parsing the string with error name and codes; returns the pointer to
++  Parsing the string with padd syntax (name + error to pad); returns the pointer to
+   the errors struct
+ */
+-static struct errors *parse_error_string(char *str, int er_count)
++static struct errors *parse_padd_string(char* str, int er_count)
+ {
+-  struct errors *new_error;
++  char *er_name;
++  uint d_code;
++  char *start;
+   DBUG_ENTER("parse_error_string");
+   DBUG_PRINT("enter", ("str: %s", str));
+-  /* create a new element */
+-  new_error= (struct errors *) my_malloc(sizeof(*new_error), MYF(MY_WME));
++  start= str;
++  str= skip_delimiters(str);
+-  if (my_init_dynamic_array(&new_error->msg, sizeof(struct message), 0, 0))
++  /* getting the error name */
++
++  if (!(er_name= get_word(&str)))
+     DBUG_RETURN(0);                           /* OOM: Fatal error */
+-  /* getting the error name */
+   str= skip_delimiters(str);
+-  if (!(new_error->er_name= get_word(&str)))
++  if (!(d_code= parse_error_offset(start)))
++  {
++    fprintf(stderr, "Failed to parse the error padd string '%s' '%s' (d_code doesn't parse)!\n",er_name,str);
++    DBUG_RETURN(0);
++  }
++  if (d_code < (uint)(er_offset + er_count))
++  {
++    fprintf(stderr, "Error to padding less current error number!\n");
++    DBUG_RETURN(0);
++  }
++  DBUG_RETURN(create_new_error(TRUE,er_name,d_code,empty_string,empty_string));
++}
++
++/*
++  Parsing the string with error name and codes; returns the pointer to
++  the errors struct
++*/
++
++static struct errors *parse_error_string(char *str, int er_count)
++{
++  char *er_name;
++  int d_code;
++  const char *sql_code1= empty_string;
++  const char *sql_code2= empty_string;
++  DBUG_ENTER("parse_error_string");
++  DBUG_PRINT("enter", ("str: %s", str));
++
++  str= skip_delimiters(str);
++
++  /* getting the error name */
++
++  if (!(er_name= get_word(&str)))
+     DBUG_RETURN(0);                           /* OOM: Fatal error */
+-  DBUG_PRINT("info", ("er_name: %s", new_error->er_name));
+   str= skip_delimiters(str);
+   /* getting the code1 */
+-
+-  new_error->d_code= er_offset + er_count;
+-  DBUG_PRINT("info", ("d_code: %d", new_error->d_code));
++  d_code= er_offset + er_count;
+   str= skip_delimiters(str);
+   /* if we reached EOL => no more codes, but this can happen */
+   if (!*str)
+   {
+-    new_error->sql_code1= empty_string;
+-    new_error->sql_code2= empty_string;
+     DBUG_PRINT("info", ("str: %s", str));
+-    DBUG_RETURN(new_error);
++    goto complete_create;
+   }
+-
+   /* getting the sql_code 1 */
+-
+-  if (!(new_error->sql_code1= get_word(&str)))
++  if (!(sql_code1= get_word(&str)))
+     DBUG_RETURN(0);                           /* OOM: Fatal error */
+-  DBUG_PRINT("info", ("sql_code1: %s", new_error->sql_code1));
+   str= skip_delimiters(str);
+   /* if we reached EOL => no more codes, but this can happen */
+   if (!*str)
+   {
+-    new_error->sql_code2= empty_string;
+     DBUG_PRINT("info", ("str: %s", str));
+-    DBUG_RETURN(new_error);
++    goto complete_create;
+   }
+-
+   /* getting the sql_code 2 */
+-  if (!(new_error->sql_code2= get_word(&str)))
++  if (!(sql_code2= get_word(&str)))
+     DBUG_RETURN(0);                           /* OOM: Fatal error */
+-  DBUG_PRINT("info", ("sql_code2: %s", new_error->sql_code2));
+   str= skip_delimiters(str);
++
+   if (*str)
+   {
+     fprintf(stderr, "The error line did not end with sql/odbc code!");
+     DBUG_RETURN(0);
+   }
+-
+-  DBUG_RETURN(new_error);
++complete_create:
++  DBUG_RETURN(create_new_error(FALSE,er_name,d_code,sql_code1,sql_code2));
+ }
diff --git a/innodb_adaptive_hash_index_partitions.patch b/innodb_adaptive_hash_index_partitions.patch
new file mode 100644 (file)
index 0000000..a171106
--- /dev/null
@@ -0,0 +1,1508 @@
+# name       : innodb_adaptive_hash_index_num.patch
+# introduced : XtraDB on 5.5 (-13?)
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
+--- a/storage/innobase/btr/btr0btr.c   2010-12-04 15:52:23.355483176 +0900
++++ b/storage/innobase/btr/btr0btr.c   2010-12-04 16:12:48.639514256 +0900
+@@ -954,7 +954,7 @@
+       }
+       ut_a(block);
+-      btr_search_drop_page_hash_index(block);
++      btr_search_drop_page_hash_index(block, NULL);
+       header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+ #ifdef UNIV_BTR_DEBUG
+@@ -1023,7 +1023,7 @@
+ #ifndef UNIV_HOTBACKUP
+       if (UNIV_LIKELY(!recovery)) {
+-              btr_search_drop_page_hash_index(block);
++              btr_search_drop_page_hash_index(block, index);
+       }
+       block->check_index_page_at_flush = TRUE;
+@@ -1188,7 +1188,7 @@
+       ut_a(!page_zip || page_zip_validate(page_zip, page));
+ #endif /* UNIV_ZIP_DEBUG */
+-      btr_search_drop_page_hash_index(block);
++      btr_search_drop_page_hash_index(block, index);
+       /* Recreate the page: note that global data on page (possible
+       segment headers, next page-field, etc.) is preserved intact */
+@@ -2497,7 +2497,7 @@
+               mem_heap_free(heap);
+       }
+-      btr_search_drop_page_hash_index(block);
++      btr_search_drop_page_hash_index(block, index);
+       /* Make the father empty */
+       btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+@@ -2720,7 +2720,7 @@
+                       goto err_exit;
+               }
+-              btr_search_drop_page_hash_index(block);
++              btr_search_drop_page_hash_index(block, index);
+               /* Remove the page from the level list */
+               btr_level_list_remove(space, zip_size, page, mtr);
+@@ -2761,7 +2761,7 @@
+                       goto err_exit;
+               }
+-              btr_search_drop_page_hash_index(block);
++              btr_search_drop_page_hash_index(block, index);
+ #ifdef UNIV_BTR_DEBUG
+               if (UNIV_LIKELY_NULL(merge_page_zip)) {
+@@ -2875,7 +2875,7 @@
+               ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
+               ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+-              btr_search_drop_page_hash_index(block);
++              btr_search_drop_page_hash_index(block, index);
+               btr_page_get_father(index, block, mtr, &cursor);
+               father = btr_cur_get_block(&cursor);
+@@ -2980,7 +2980,7 @@
+       page = buf_block_get_frame(block);
+       ut_a(page_is_comp(merge_page) == page_is_comp(page));
+-      btr_search_drop_page_hash_index(block);
++      btr_search_drop_page_hash_index(block, index);
+       if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c   2010-12-04 15:52:23.359513820 +0900
++++ b/storage/innobase/btr/btr0cur.c   2010-12-04 16:12:48.643551837 +0900
+@@ -486,7 +486,7 @@
+ #ifdef UNIV_SEARCH_PERF_STAT
+       info->n_searches++;
+ #endif
+-      if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
++      if (rw_lock_get_writer(btr_search_get_latch(cursor->index->id)) == RW_LOCK_NOT_LOCKED
+           && latch_mode <= BTR_MODIFY_LEAF
+           && info->last_hash_succ
+           && !estimate
+@@ -522,7 +522,7 @@
+       if (has_search_latch) {
+               /* Release possible search latch to obey latching order */
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(cursor->index->id));
+       }
+       /* Store the position of the tree latch we push to mtr so that we
+@@ -844,7 +844,7 @@
+       if (has_search_latch) {
+-              rw_lock_s_lock(&btr_search_latch);
++              rw_lock_s_lock(btr_search_get_latch(cursor->index->id));
+       }
+ }
+@@ -2059,7 +2059,7 @@
+                       btr_search_update_hash_on_delete(cursor);
+               }
+-              rw_lock_x_lock(&btr_search_latch);
++              rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+       }
+       if (!(flags & BTR_KEEP_SYS_FLAG)) {
+@@ -2073,7 +2073,7 @@
+       row_upd_rec_in_place(rec, index, offsets, update, page_zip);
+       if (block->is_hashed) {
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+       }
+       if (page_zip && !dict_index_is_clust(index)
+@@ -2857,7 +2857,7 @@
+       block = btr_cur_get_block(cursor);
+       if (block->is_hashed) {
+-              rw_lock_x_lock(&btr_search_latch);
++              rw_lock_x_lock(btr_search_get_latch(index->id));
+       }
+       page_zip = buf_block_get_page_zip(block);
+@@ -2872,7 +2872,7 @@
+       }
+       if (block->is_hashed) {
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(index->id));
+       }
+       btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
+@@ -3003,13 +3003,13 @@
+             == dict_table_is_comp(cursor->index->table));
+       if (block->is_hashed) {
+-              rw_lock_x_lock(&btr_search_latch);
++              rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+       }
+       btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
+       if (block->is_hashed) {
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+       }
+       btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c   2010-12-04 15:52:23.387513429 +0900
++++ b/storage/innobase/btr/btr0sea.c   2010-12-04 16:14:51.721884049 +0900
+@@ -48,6 +48,8 @@
+ UNIV_INTERN char              btr_search_enabled      = TRUE;
+ UNIV_INTERN ibool             btr_search_fully_disabled = FALSE;
++UNIV_INTERN ulint             btr_search_index_num    = 1;
++
+ /** Mutex protecting btr_search_enabled */
+ static mutex_t                        btr_search_enabled_mutex;
+@@ -79,7 +81,9 @@
+ /* We will allocate the latch from dynamic memory to get it to the
+ same DRAM page as other hotspot semaphores */
+-UNIV_INTERN rw_lock_t*                btr_search_latch_temp;
++//UNIV_INTERN rw_lock_t*              btr_search_latch_temp;
++
++UNIV_INTERN rw_lock_t**               btr_search_latch_part;
+ /** padding to prevent other memory update hotspots from residing on
+ the same memory cache line */
+@@ -131,18 +135,19 @@
+ will not guarantee success. */
+ static
+ void
+-btr_search_check_free_space_in_heap(void)
++btr_search_check_free_space_in_heap(
+ /*=====================================*/
++      index_id_t      key)
+ {
+       hash_table_t*   table;
+       mem_heap_t*     heap;
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_SHARED));
++      ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+-      table = btr_search_sys->hash_index;
++      table = btr_search_get_hash_index(key);
+       heap = table->heap;
+@@ -153,7 +158,7 @@
+       if (heap->free_block == NULL) {
+               buf_block_t*    block = buf_block_alloc(NULL, 0);
+-              rw_lock_x_lock(&btr_search_latch);
++              rw_lock_x_lock(btr_search_get_latch(key));
+               if (heap->free_block == NULL) {
+                       heap->free_block = block;
+@@ -161,7 +166,7 @@
+                       buf_block_free(block);
+               }
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(key));
+       }
+ }
+@@ -173,19 +178,30 @@
+ /*==================*/
+       ulint   hash_size)      /*!< in: hash index hash table size */
+ {
++      ulint i;
+       /* We allocate the search latch from dynamic memory:
+       see above at the global variable definition */
+-      btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
++      //btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
+-      rw_lock_create(btr_search_latch_key, &btr_search_latch,
+-                     SYNC_SEARCH_SYS);
++      //rw_lock_create(btr_search_latch_key, &btr_search_latch,
++      //             SYNC_SEARCH_SYS);
+       mutex_create(btr_search_enabled_mutex_key,
+                    &btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
+       btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
+-      btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
++      /* btr_search_index_num should be <= 32. (bits of trx->has_search_latch) */
++      btr_search_latch_part = mem_alloc(sizeof(rw_lock_t*) * btr_search_index_num);
++      btr_search_sys->hash_index = mem_alloc(sizeof(hash_table_t*) * btr_search_index_num);
++      for (i = 0; i < btr_search_index_num; i++) {
++              btr_search_latch_part[i] = mem_alloc(sizeof(rw_lock_t));
++
++              rw_lock_create(btr_search_latch_key,
++                              btr_search_latch_part[i], SYNC_SEARCH_SYS);
++
++              btr_search_sys->hash_index[i] = ha_create(hash_size, 0, 0);
++      }
+ }
+ /*****************************************************************//**
+@@ -195,11 +211,20 @@
+ btr_search_sys_free(void)
+ /*=====================*/
+ {
+-      rw_lock_free(&btr_search_latch);
+-      mem_free(btr_search_latch_temp);
+-      btr_search_latch_temp = NULL;
+-      mem_heap_free(btr_search_sys->hash_index->heap);
+-      hash_table_free(btr_search_sys->hash_index);
++      ulint i;
++
++      for (i = 0; i < btr_search_index_num; i++) {
++              mem_heap_free(btr_search_sys->hash_index[i]->heap);
++              hash_table_free(btr_search_sys->hash_index[i]);
++
++              rw_lock_free(btr_search_latch_part[i]);
++
++              mem_free(btr_search_latch_part[i]);
++      }
++
++      //rw_lock_free(&btr_search_latch);
++      //mem_free(btr_search_latch_temp);
++      //btr_search_latch_temp = NULL;
+       mem_free(btr_search_sys);
+       btr_search_sys = NULL;
+ }
+@@ -212,7 +237,7 @@
+ /*====================*/
+ {
+       mutex_enter(&btr_search_enabled_mutex);
+-      rw_lock_x_lock(&btr_search_latch);
++      btr_search_x_lock_all();
+       /* Disable access to hash index, also tell ha_insert_for_fold()
+       stop adding new nodes to hash index, but still allow updating
+@@ -230,7 +255,7 @@
+       /* btr_search_enabled_mutex should guarantee this. */
+       ut_ad(!btr_search_enabled);
+-      rw_lock_x_unlock(&btr_search_latch);
++      btr_search_x_unlock_all();
+       mutex_exit(&btr_search_enabled_mutex);
+ }
+@@ -242,12 +267,12 @@
+ /*====================*/
+ {
+       mutex_enter(&btr_search_enabled_mutex);
+-      rw_lock_x_lock(&btr_search_latch);
++      btr_search_x_lock_all();
+       btr_search_enabled = TRUE;
+       btr_search_fully_disabled = FALSE;
+-      rw_lock_x_unlock(&btr_search_latch);
++      btr_search_x_unlock_all();
+       mutex_exit(&btr_search_enabled_mutex);
+ }
+@@ -300,20 +325,21 @@
+ ulint
+ btr_search_info_get_ref_count(
+ /*==========================*/
+-      btr_search_t*   info)   /*!< in: search info. */
++      btr_search_t*   info,   /*!< in: search info. */
++      index_id_t      key)
+ {
+       ulint ret;
+       ut_ad(info);
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_SHARED));
++      ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+-      rw_lock_s_lock(&btr_search_latch);
++      rw_lock_s_lock(btr_search_get_latch(key));
+       ret = info->ref_count;
+-      rw_lock_s_unlock(&btr_search_latch);
++      rw_lock_s_unlock(btr_search_get_latch(key));
+       return(ret);
+ }
+@@ -334,8 +360,8 @@
+       int             cmp;
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_SHARED));
++      ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+       index = cursor->index;
+@@ -453,8 +479,8 @@
+                               /*!< in: cursor */
+ {
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_SHARED));
++      ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+       ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
+             || rw_lock_own(&block->lock, RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -538,7 +564,7 @@
+       ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+       ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+             || rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -578,10 +604,10 @@
+                       mem_heap_free(heap);
+               }
+ #ifdef UNIV_SYNC_DEBUG
+-              ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++              ut_ad(rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+-              ha_insert_for_fold(btr_search_sys->hash_index, fold,
++              ha_insert_for_fold(btr_search_get_hash_index(cursor->index->id), fold,
+                                  block, rec);
+       }
+ }
+@@ -601,8 +627,8 @@
+       ulint*          params2;
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_SHARED));
++      ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+       block = btr_cur_get_block(cursor);
+@@ -623,7 +649,7 @@
+       if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
+-              btr_search_check_free_space_in_heap();
++              btr_search_check_free_space_in_heap(cursor->index->id);
+       }
+       if (cursor->flag == BTR_CUR_HASH_FAIL) {
+@@ -633,11 +659,11 @@
+               btr_search_n_hash_fail++;
+ #endif /* UNIV_SEARCH_PERF_STAT */
+-              rw_lock_x_lock(&btr_search_latch);
++              rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+               btr_search_update_hash_ref(info, block, cursor);
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+       }
+       if (build_index) {
+@@ -881,17 +907,17 @@
+       cursor->flag = BTR_CUR_HASH;
+       if (UNIV_LIKELY(!has_search_latch)) {
+-              rw_lock_s_lock(&btr_search_latch);
++              rw_lock_s_lock(btr_search_get_latch(index_id));
+               if (UNIV_UNLIKELY(!btr_search_enabled)) {
+                       goto failure_unlock;
+               }
+       }
+-      ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
+-      ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
++      ut_ad(rw_lock_get_writer(btr_search_get_latch(index_id)) != RW_LOCK_EX);
++      ut_ad(rw_lock_get_reader_count(btr_search_get_latch(index_id)) > 0);
+-      rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
++      rec = ha_search_and_get_data(btr_search_get_hash_index(index_id), fold);
+       if (UNIV_UNLIKELY(!rec)) {
+               goto failure_unlock;
+@@ -909,7 +935,7 @@
+                       goto failure_unlock;
+               }
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index_id));
+               buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
+       }
+@@ -1006,7 +1032,7 @@
+       /*-------------------------------------------*/
+ failure_unlock:
+       if (UNIV_LIKELY(!has_search_latch)) {
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index_id));
+       }
+ failure:
+       cursor->flag = BTR_CUR_HASH_FAIL;
+@@ -1029,10 +1055,11 @@
+ void
+ btr_search_drop_page_hash_index(
+ /*============================*/
+-      buf_block_t*    block)  /*!< in: block containing index page,
++      buf_block_t*    block,  /*!< in: block containing index page,
+                               s- or x-latched, or an index page
+                               for which we know that
+                               block->buf_fix_count == 0 */
++      dict_index_t*   index_in)
+ {
+       hash_table_t*           table;
+       ulint                   n_fields;
+@@ -1051,22 +1078,60 @@
+       ulint*                  offsets;
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      if (index_in) {
++              ut_ad(!rw_lock_own(btr_search_get_latch(index_in->id), RW_LOCK_SHARED));
++              ut_ad(!rw_lock_own(btr_search_get_latch(index_in->id), RW_LOCK_EX));
++      }
+ #endif /* UNIV_SYNC_DEBUG */
+ retry:
+-      rw_lock_s_lock(&btr_search_latch);
++      if (index_in) {
++              index = index_in;
++              rw_lock_s_lock(btr_search_get_latch(index->id));
++      } else if (btr_search_index_num > 1) {
++              rw_lock_t*      btr_search_latch;
++
++              /* FIXME: This may be optimistic implementation still. */
++              btr_search_latch = (rw_lock_t*)(block->btr_search_latch);
++              if (UNIV_LIKELY(!btr_search_latch)) {
++                      if (block->is_hashed) {
++                              goto retry;
++                      }
++                      return;
++              }
++              rw_lock_s_lock(btr_search_latch);
++              if (UNIV_LIKELY(btr_search_latch != block->btr_search_latch)) {
++                      rw_lock_s_unlock(btr_search_latch);
++                      goto retry;
++              }
++              if (UNIV_LIKELY(!block->is_hashed)) {
++                      rw_lock_s_unlock(btr_search_latch);
++                      return;
++              }
++              index = block->index;
++              ut_a(btr_search_latch == btr_search_get_latch(index->id));
++      } else {
++              /* btr_search_index_num == 1 */
++              /* btr_search_latch is only one and able to obtain
++                 before evaluating block->is_hashed. */
++              rw_lock_s_lock(btr_search_latch_part[0]);
++              if (UNIV_LIKELY(!block->is_hashed)) {
++                      rw_lock_s_unlock(btr_search_latch_part[0]);
++                      return;
++              }
++              index = block->index;
++      }
++
+       page = block->frame;
+       if (UNIV_LIKELY(!block->is_hashed)) {
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index->id));
+               return;
+       }
+-      table = btr_search_sys->hash_index;
++      table = btr_search_get_hash_index(index->id);
+ #ifdef UNIV_SYNC_DEBUG
+       ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+@@ -1076,14 +1141,14 @@
+       n_fields = block->curr_n_fields;
+       n_bytes = block->curr_n_bytes;
+-      index = block->index;
++      ut_a(index == block->index);
+       ut_a(!dict_index_is_ibuf(index));
+       /* NOTE: The fields of block must not be accessed after
+       releasing btr_search_latch, as the index page might only
+       be s-latched! */
+-      rw_lock_s_unlock(&btr_search_latch);
++      rw_lock_s_unlock(btr_search_get_latch(index->id));
+       ut_a(n_fields + n_bytes > 0);
+@@ -1133,7 +1198,7 @@
+               mem_heap_free(heap);
+       }
+-      rw_lock_x_lock(&btr_search_latch);
++      rw_lock_x_lock(btr_search_get_latch(index->id));
+       if (UNIV_UNLIKELY(!block->is_hashed)) {
+               /* Someone else has meanwhile dropped the hash index */
+@@ -1149,7 +1214,7 @@
+               /* Someone else has meanwhile built a new hash index on the
+               page, with different parameters */
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(index->id));
+               mem_free(folds);
+               goto retry;
+@@ -1165,6 +1230,7 @@
+       block->is_hashed = FALSE;
+       block->index = NULL;
++      block->btr_search_latch = NULL;
+       
+ cleanup:
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+@@ -1177,14 +1243,14 @@
+                       "InnoDB: the hash index to a page of %s,"
+                       " still %lu hash nodes remain.\n",
+                       index->name, (ulong) block->n_pointers);
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(index->id));
+               btr_search_validate();
+       } else {
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(index->id));
+       }
+ #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+-      rw_lock_x_unlock(&btr_search_latch);
++      rw_lock_x_unlock(btr_search_get_latch(index->id));
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+       mem_free(folds);
+@@ -1215,10 +1281,10 @@
+       mem_heap_t*     heap    = NULL;
+       ulint*          offsets;
+-      rw_lock_x_lock(&btr_search_latch);
++      rw_lock_x_lock(btr_search_get_latch(index->id));
+       //buf_pool_mutex_enter_all();
+-      table = btr_search_sys->hash_index;
++      table = btr_search_get_hash_index(index->id);
+       for (j = 0; j < srv_buf_pool_instances; j++) {
+               buf_pool_t*     buf_pool;
+@@ -1291,6 +1357,7 @@
+                               block->is_hashed = FALSE;
+                               block->index = NULL;
++                              block->btr_search_latch = NULL;
+       
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+                               if (UNIV_UNLIKELY(block->n_pointers)) {
+@@ -1313,7 +1380,7 @@
+       }
+       //buf_pool_mutex_exit_all();
+-      rw_lock_x_unlock(&btr_search_latch);
++      rw_lock_x_unlock(btr_search_get_latch(index->id));
+       if (UNIV_LIKELY_NULL(heap)) {
+               mem_heap_free(heap);
+@@ -1360,7 +1427,7 @@
+               buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
+-              btr_search_drop_page_hash_index(block);
++              btr_search_drop_page_hash_index(block, NULL);
+       }
+       mtr_commit(&mtr);
+@@ -1402,26 +1469,26 @@
+       ut_ad(index);
+       ut_a(!dict_index_is_ibuf(index));
+-      table = btr_search_sys->hash_index;
++      table = btr_search_get_hash_index(index->id);
+       page = buf_block_get_frame(block);
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX));
+       ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+             || rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+-      rw_lock_s_lock(&btr_search_latch);
++      rw_lock_s_lock(btr_search_get_latch(index->id));
+       if (block->is_hashed && ((block->curr_n_fields != n_fields)
+                                || (block->curr_n_bytes != n_bytes)
+                                || (block->curr_left_side != left_side))) {
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index->id));
+-              btr_search_drop_page_hash_index(block);
++              btr_search_drop_page_hash_index(block, index);
+       } else {
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index->id));
+       }
+       n_recs = page_get_n_recs(page);
+@@ -1515,9 +1582,9 @@
+               fold = next_fold;
+       }
+-      btr_search_check_free_space_in_heap();
++      btr_search_check_free_space_in_heap(index->id);
+-      rw_lock_x_lock(&btr_search_latch);
++      rw_lock_x_lock(btr_search_get_latch(index->id));
+       if (UNIV_UNLIKELY(btr_search_fully_disabled)) {
+               goto exit_func;
+@@ -1545,6 +1612,7 @@
+       block->curr_n_bytes = n_bytes;
+       block->curr_left_side = left_side;
+       block->index = index;
++      block->btr_search_latch = btr_search_get_latch(index->id);
+       for (i = 0; i < n_cached; i++) {
+@@ -1552,7 +1620,7 @@
+       }
+ exit_func:
+-      rw_lock_x_unlock(&btr_search_latch);
++      rw_lock_x_unlock(btr_search_get_latch(index->id));
+       mem_free(folds);
+       mem_free(recs);
+@@ -1591,13 +1659,13 @@
+       ut_a(!(new_block->is_hashed || block->is_hashed)
+            || !dict_index_is_ibuf(index));
+-      rw_lock_s_lock(&btr_search_latch);
++      rw_lock_s_lock(btr_search_get_latch(index->id));
+       if (new_block->is_hashed) {
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index->id));
+-              btr_search_drop_page_hash_index(block);
++              btr_search_drop_page_hash_index(block, index);
+               return;
+       }
+@@ -1612,7 +1680,7 @@
+               new_block->n_bytes = block->curr_n_bytes;
+               new_block->left_side = left_side;
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index->id));
+               ut_a(n_fields + n_bytes > 0);
+@@ -1624,7 +1692,7 @@
+               return;
+       }
+-      rw_lock_s_unlock(&btr_search_latch);
++      rw_lock_s_unlock(btr_search_get_latch(index->id));
+ }
+ /********************************************************************//**
+@@ -1663,7 +1731,7 @@
+       ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+       ut_a(!dict_index_is_ibuf(cursor->index));
+-      table = btr_search_sys->hash_index;
++      table = btr_search_get_hash_index(cursor->index->id);
+       index_id = cursor->index->id;
+       fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
+@@ -1672,11 +1740,11 @@
+       if (UNIV_LIKELY_NULL(heap)) {
+               mem_heap_free(heap);
+       }
+-      rw_lock_x_lock(&btr_search_latch);
++      rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+       ha_search_and_delete_if_found(table, fold, rec);
+-      rw_lock_x_unlock(&btr_search_latch);
++      rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+ }
+ /********************************************************************//**
+@@ -1710,21 +1778,21 @@
+       ut_a(block->index == cursor->index);
+       ut_a(!dict_index_is_ibuf(cursor->index));
+-      rw_lock_x_lock(&btr_search_latch);
++      rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+       if ((cursor->flag == BTR_CUR_HASH)
+           && (cursor->n_fields == block->curr_n_fields)
+           && (cursor->n_bytes == block->curr_n_bytes)
+           && !block->curr_left_side) {
+-              table = btr_search_sys->hash_index;
++              table = btr_search_get_hash_index(cursor->index->id);
+               ha_search_and_update_if_found(table, cursor->fold, rec,
+                                             block, page_rec_get_next(rec));
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+       } else {
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+               btr_search_update_hash_on_insert(cursor);
+       }
+@@ -1759,9 +1827,9 @@
+       ulint*          offsets         = offsets_;
+       rec_offs_init(offsets_);
+-      table = btr_search_sys->hash_index;
++      table = btr_search_get_hash_index(cursor->index->id);
+-      btr_search_check_free_space_in_heap();
++      btr_search_check_free_space_in_heap(cursor->index->id);
+       rec = btr_cur_get_rec(cursor);
+@@ -1806,7 +1874,7 @@
+       } else {
+               if (left_side) {
+-                      rw_lock_x_lock(&btr_search_latch);
++                      rw_lock_x_lock(btr_search_get_latch(index_id));
+                       locked = TRUE;
+@@ -1820,7 +1888,7 @@
+               if (!locked) {
+-                      rw_lock_x_lock(&btr_search_latch);
++                      rw_lock_x_lock(btr_search_get_latch(index_id));
+                       locked = TRUE;
+               }
+@@ -1838,7 +1906,7 @@
+               if (!left_side) {
+                       if (!locked) {
+-                              rw_lock_x_lock(&btr_search_latch);
++                              rw_lock_x_lock(btr_search_get_latch(index_id));
+                               locked = TRUE;
+                       }
+@@ -1853,7 +1921,7 @@
+               if (!locked) {
+-                      rw_lock_x_lock(&btr_search_latch);
++                      rw_lock_x_lock(btr_search_get_latch(index_id));
+                       locked = TRUE;
+               }
+@@ -1876,7 +1944,7 @@
+               mem_heap_free(heap);
+       }
+       if (locked) {
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(index_id));
+       }
+ }
+@@ -1892,7 +1960,7 @@
+       ha_node_t*      node;
+       ulint           n_page_dumps    = 0;
+       ibool           ok              = TRUE;
+-      ulint           i;
++      ulint           i,j;
+       ulint           cell_count;
+       mem_heap_t*     heap            = NULL;
+       ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+@@ -1904,23 +1972,25 @@
+       rec_offs_init(offsets_);
+-      rw_lock_x_lock(&btr_search_latch);
++      btr_search_x_lock_all();
+       buf_pool_page_hash_x_lock_all();
+-      cell_count = hash_get_n_cells(btr_search_sys->hash_index);
++      for (j = 0; j < btr_search_index_num; j++) {
++
++      cell_count = hash_get_n_cells(btr_search_sys->hash_index[j]);
+       for (i = 0; i < cell_count; i++) {
+               /* We release btr_search_latch every once in a while to
+               give other queries a chance to run. */
+               if ((i != 0) && ((i % chunk_size) == 0)) {
+                       buf_pool_page_hash_x_unlock_all();
+-                      rw_lock_x_unlock(&btr_search_latch);
++                      btr_search_x_unlock_all();
+                       os_thread_yield();
+-                      rw_lock_x_lock(&btr_search_latch);
++                      btr_search_x_lock_all();
+                       buf_pool_page_hash_x_lock_all();
+               }
+-              node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
++              node = hash_get_nth_cell(btr_search_sys->hash_index[j], i)->node;
+               for (; node != NULL; node = node->next) {
+                       const buf_block_t*      block
+@@ -2029,19 +2099,21 @@
+               give other queries a chance to run. */
+               if (i != 0) {
+                       buf_pool_page_hash_x_unlock_all();
+-                      rw_lock_x_unlock(&btr_search_latch);
++                      btr_search_x_unlock_all();
+                       os_thread_yield();
+-                      rw_lock_x_lock(&btr_search_latch);
++                      btr_search_x_lock_all();
+                       buf_pool_page_hash_x_lock_all();
+               }
+-              if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
++              if (!ha_validate(btr_search_sys->hash_index[j], i, end_index)) {
+                       ok = FALSE;
+               }
+       }
++      } /*for (j = 0; j < btr_search_index_num; j++)*/
++
+       buf_pool_page_hash_x_unlock_all();
+-      rw_lock_x_unlock(&btr_search_latch);
++      btr_search_x_unlock_all();
+       if (UNIV_LIKELY_NULL(heap)) {
+               mem_heap_free(heap);
+       }
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-04 15:55:21.351597052 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-04 16:12:48.654550708 +0900
+@@ -949,6 +949,7 @@
+       block->check_index_page_at_flush = FALSE;
+       block->index = NULL;
++      block->btr_search_latch = NULL;
+       block->is_hashed = FALSE;
+@@ -1477,7 +1478,7 @@
+                       /* To follow the latching order, we
+                       have to release btr_search_latch
+                       before acquiring block->latch. */
+-                      rw_lock_x_unlock(&btr_search_latch);
++                      btr_search_x_unlock_all();
+                       /* When we release the search latch,
+                       we must rescan all blocks, because
+                       some may become hashed again. */
+@@ -1508,11 +1509,11 @@
+                       anything.  block->is_hashed can only
+                       be set on uncompressed file pages. */
+                       
+-                      btr_search_drop_page_hash_index(block);
++                      btr_search_drop_page_hash_index(block, NULL);
+                       
+                       rw_lock_x_unlock(&block->lock);
+                       
+-                      rw_lock_x_lock(&btr_search_latch);
++                      btr_search_x_lock_all();
+                       
+                       ut_ad(!btr_search_enabled);
+               }
+@@ -1531,7 +1532,11 @@
+       ibool           released_search_latch;
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ulint   j;
++
++      for (j = 0; j < btr_search_index_num; j++) {
++              ut_ad(rw_lock_own(btr_search_latch_part[j], RW_LOCK_EX));
++      }
+ #endif /* UNIV_SYNC_DEBUG */
+       ut_ad(!btr_search_enabled);
+@@ -2635,6 +2640,7 @@
+ {
+       block->check_index_page_at_flush = FALSE;
+       block->index            = NULL;
++      block->btr_search_latch = NULL;
+       block->n_hash_helps     = 0;
+       block->is_hashed        = FALSE;
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c   2010-12-04 15:35:29.137347521 +0900
++++ b/storage/innobase/buf/buf0lru.c   2010-12-04 16:12:48.658550840 +0900
+@@ -1775,7 +1775,7 @@
+               UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
+                              UNIV_PAGE_SIZE);
+-              btr_search_drop_page_hash_index((buf_block_t*) bpage);
++              btr_search_drop_page_hash_index((buf_block_t*) bpage, NULL);
+               UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
+                                UNIV_PAGE_SIZE);
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c        2010-12-04 15:52:23.398513916 +0900
++++ b/storage/innobase/dict/dict0dict.c        2010-12-04 16:12:48.662550715 +0900
+@@ -1802,7 +1802,7 @@
+       zero. */
+       for (;;) {
+-              ulint ref_count = btr_search_info_get_ref_count(info);
++              ulint ref_count = btr_search_info_get_ref_count(info, index->id);
+               if (ref_count == 0) {
+                       break;
+               }
+diff -ruN a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c
+--- a/storage/innobase/ha/ha0ha.c      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/ha/ha0ha.c      2010-12-04 16:12:48.665593752 +0900
+@@ -102,7 +102,8 @@
+       ut_ad(table);
+       ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
++      /* cannot identificate which btr_search_latch[i] for now */
++      //ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
+ #endif /* UNIV_SYNC_DEBUG */
+ #ifndef UNIV_HOTBACKUP
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 16:12:20.185850734 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 16:12:48.674552412 +0900
+@@ -11611,6 +11611,11 @@
+   "Disable with --skip-innodb-adaptive-hash-index.",
+   NULL, innodb_adaptive_hash_index_update, TRUE);
++static MYSQL_SYSVAR_ULONG(adaptive_hash_index_partitions, btr_search_index_num,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "Number of InnoDB adaptive hash index partitions (default 1: disable partitioning)",
++  NULL, NULL, 1, 1, 32, 0);
++
+ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
+   PLUGIN_VAR_RQCMDARG,
+   "Replication thread delay (ms) on the slave server if "
+@@ -11963,6 +11968,7 @@
+   MYSQL_SYSVAR(use_sys_stats_table),
+   MYSQL_SYSVAR(stats_sample_pages),
+   MYSQL_SYSVAR(adaptive_hash_index),
++  MYSQL_SYSVAR(adaptive_hash_index_partitions),
+   MYSQL_SYSVAR(replication_delay),
+   MYSQL_SYSVAR(status_file),
+   MYSQL_SYSVAR(strict_mode),
+diff -ruN a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
+--- a/storage/innobase/include/btr0sea.h       2010-12-03 15:48:03.070987226 +0900
++++ b/storage/innobase/include/btr0sea.h       2010-12-04 16:12:48.707551382 +0900
+@@ -85,7 +85,8 @@
+ ulint
+ btr_search_info_get_ref_count(
+ /*==========================*/
+-      btr_search_t*   info);  /*!< in: search info. */
++      btr_search_t*   info,   /*!< in: search info. */
++      index_id_t      key);
+ /*********************************************************************//**
+ Updates the search info. */
+ UNIV_INLINE
+@@ -136,10 +137,11 @@
+ void
+ btr_search_drop_page_hash_index(
+ /*============================*/
+-      buf_block_t*    block); /*!< in: block containing index page,
++      buf_block_t*    block,  /*!< in: block containing index page,
+                               s- or x-latched, or an index page
+                               for which we know that
+                               block->buf_fix_count == 0 */
++      dict_index_t*   index_in);
+ /************************************************************************
+ Drops a page hash index based on index */
+ UNIV_INTERN
+@@ -199,10 +201,47 @@
+ # define btr_search_validate()        TRUE
+ #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
++/********************************************************************//**
++New functions to control split btr_search_index */
++UNIV_INLINE
++hash_table_t*
++btr_search_get_hash_index(
++/*======================*/
++      index_id_t      key);
++
++UNIV_INLINE
++rw_lock_t*
++btr_search_get_latch(
++/*=================*/
++      index_id_t      key);
++
++UNIV_INLINE
++void
++btr_search_x_lock_all(void);
++/*========================*/
++
++UNIV_INLINE
++void
++btr_search_x_unlock_all(void);
++/*==========================*/
++
++UNIV_INLINE
++void
++btr_search_s_lock_all(void);
++/*========================*/
++
++UNIV_INLINE
++void
++btr_search_s_unlock_all(void);
++/*==========================*/
++
++
+ /** Flag: has the search system been enabled?
+ Protected by btr_search_latch and btr_search_enabled_mutex. */
+ extern char   btr_search_enabled;
++extern ulint  btr_search_index_num;
++
+ /** Flag: whether the search system has completed its disabling process,
+ It is set to TRUE right after buf_pool_drop_hash_index() in
+ btr_search_disable(), indicating hash index entries are cleaned up.
+@@ -269,7 +308,7 @@
+ /** The hash index system */
+ struct btr_search_sys_struct{
+-      hash_table_t*   hash_index;     /*!< the adaptive hash index,
++      hash_table_t**  hash_index;     /*!< the adaptive hash index,
+                                       mapping dtuple_fold values
+                                       to rec_t pointers on index pages */
+ };
+@@ -290,10 +329,12 @@
+ Bear in mind (3) and (4) when using the hash index.
+ */
+-extern rw_lock_t*     btr_search_latch_temp;
++//extern rw_lock_t*   btr_search_latch_temp;
++
++extern rw_lock_t**    btr_search_latch_part;
+ /** The latch protecting the adaptive search system */
+-#define btr_search_latch      (*btr_search_latch_temp)
++//#define btr_search_latch    (*btr_search_latch_temp)
+ #ifdef UNIV_SEARCH_PERF_STAT
+ /** Number of successful adaptive hash index lookups */
+diff -ruN a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
+--- a/storage/innobase/include/btr0sea.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/btr0sea.ic      2010-12-04 16:12:48.709511202 +0900
+@@ -62,8 +62,8 @@
+       btr_search_t*   info;
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+-      ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++      ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_SHARED));
++      ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+       info = btr_search_get_info(index);
+@@ -82,3 +82,72 @@
+       btr_search_info_update_slow(info, cursor);
+ }
++
++/*********************************************************************//**
++New functions to control split btr_search_index */
++UNIV_INLINE
++hash_table_t*
++btr_search_get_hash_index(
++/*======================*/
++      index_id_t      key)
++{
++      return(btr_search_sys->hash_index[key % btr_search_index_num]);
++}
++
++UNIV_INLINE
++rw_lock_t*
++btr_search_get_latch(
++/*=================*/
++      index_id_t      key)
++{
++      return(btr_search_latch_part[key % btr_search_index_num]);
++}
++
++UNIV_INLINE
++void
++btr_search_x_lock_all(void)
++/*=======================*/
++{
++      ulint   i;
++
++      for (i = 0; i < btr_search_index_num; i++) {
++              rw_lock_x_lock(btr_search_latch_part[i]);
++      }
++}
++
++UNIV_INLINE
++void
++btr_search_x_unlock_all(void)
++/*==========================*/
++{
++      ulint   i;
++
++      for (i = 0; i < btr_search_index_num; i++) {
++              rw_lock_x_unlock(btr_search_latch_part[i]);
++      }
++}
++
++UNIV_INLINE
++void
++btr_search_s_lock_all(void)
++/*=======================*/
++{
++      ulint   i;
++
++      for (i = 0; i < btr_search_index_num; i++) {
++              rw_lock_s_lock(btr_search_latch_part[i]);
++      }
++}
++
++UNIV_INLINE
++void
++btr_search_s_unlock_all(void)
++/*=========================*/
++{
++      ulint   i;
++
++      for (i = 0; i < btr_search_index_num; i++) {
++              rw_lock_s_unlock(btr_search_latch_part[i]);
++      }
++}
++
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-15 19:00:07.713604580 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-15 20:58:03.546839883 +0900
+@@ -1473,7 +1473,7 @@
+                                       pointers in the adaptive hash index
+                                       pointing to this frame */
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+-      unsigned        is_hashed:1;    /*!< TRUE if hash index has
++      volatile unsigned       is_hashed:1;    /*!< TRUE if hash index has
+                                       already been built on this
+                                       page; note that it does not
+                                       guarantee that the index is
+@@ -1487,6 +1487,7 @@
+       unsigned        curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
+       dict_index_t*   index;          /*!< Index for which the adaptive
+                                       hash index has been created. */
++      volatile rw_lock_t*     btr_search_latch;
+       /* @} */
+ # ifdef UNIV_SYNC_DEBUG
+       /** @name Debug fields */
+diff -ruN a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
+--- a/storage/innobase/include/row0upd.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/row0upd.ic      2010-12-04 16:12:48.710551113 +0900
+@@ -158,7 +158,7 @@
+       ut_ad(dict_index_is_clust(index));
+       ut_ad(rec_offs_validate(rec, index, offsets));
+ #ifdef UNIV_SYNC_DEBUG
+-      if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) {
++      if (!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX)) {
+               ut_ad(!buf_block_align(rec)->is_hashed);
+       }
+ #endif /* UNIV_SYNC_DEBUG */
+diff -ruN a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
+--- a/storage/innobase/page/page0page.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/page/page0page.c        2010-12-04 16:12:48.712550963 +0900
+@@ -218,7 +218,7 @@
+       const ibool     is_hashed       = block->is_hashed;
+       if (is_hashed) {
+-              rw_lock_x_lock(&btr_search_latch);
++              rw_lock_x_lock(btr_search_get_latch(block->index->id));
+       }
+       ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+@@ -244,7 +244,7 @@
+ #ifndef UNIV_HOTBACKUP
+       if (is_hashed) {
+-              rw_lock_x_unlock(&btr_search_latch);
++              rw_lock_x_unlock(btr_search_get_latch(block->index->id));
+       }
+ #endif /* !UNIV_HOTBACKUP */
+ }
+diff -ruN a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
+--- a/storage/innobase/page/page0zip.c 2010-12-04 15:57:13.061494433 +0900
++++ b/storage/innobase/page/page0zip.c 2010-12-04 16:12:48.716470334 +0900
+@@ -4445,7 +4445,7 @@
+ #ifndef UNIV_HOTBACKUP
+       temp_block = buf_block_alloc(buf_pool, 0);
+-      btr_search_drop_page_hash_index(block);
++      btr_search_drop_page_hash_index(block, index);
+       block->check_index_page_at_flush = TRUE;
+ #else /* !UNIV_HOTBACKUP */
+       ut_ad(block == back_block1);
+diff -ruN a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
+--- a/storage/innobase/row/row0sel.c   2010-12-04 16:09:53.204513572 +0900
++++ b/storage/innobase/row/row0sel.c   2010-12-04 16:12:48.722551273 +0900
+@@ -1210,7 +1210,7 @@
+       ut_ad(plan->unique_search);
+       ut_ad(!plan->must_get_clust);
+ #ifdef UNIV_SYNC_DEBUG
+-      ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
++      ut_ad(rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_SHARED));
+ #endif /* UNIV_SYNC_DEBUG */
+       row_sel_open_pcur(plan, TRUE, mtr);
+@@ -1381,10 +1381,10 @@
+           && !plan->must_get_clust
+           && !plan->table->big_rows) {
+               if (!search_latch_locked) {
+-                      rw_lock_s_lock(&btr_search_latch);
++                      rw_lock_s_lock(btr_search_get_latch(index->id));
+                       search_latch_locked = TRUE;
+-              } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
++              } else if (rw_lock_get_writer(btr_search_get_latch(index->id)) == RW_LOCK_WAIT_EX) {
+                       /* There is an x-latch request waiting: release the
+                       s-latch for a moment; as an s-latch here is often
+@@ -1393,8 +1393,8 @@
+                       from acquiring an s-latch for a long time, lowering
+                       performance significantly in multiprocessors. */
+-                      rw_lock_s_unlock(&btr_search_latch);
+-                      rw_lock_s_lock(&btr_search_latch);
++                      rw_lock_s_unlock(btr_search_get_latch(index->id));
++                      rw_lock_s_lock(btr_search_get_latch(index->id));
+               }
+               found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
+@@ -1417,7 +1417,7 @@
+       }
+       if (search_latch_locked) {
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index->id));
+               search_latch_locked = FALSE;
+       }
+@@ -1993,7 +1993,7 @@
+ func_exit:
+       if (search_latch_locked) {
+-              rw_lock_s_unlock(&btr_search_latch);
++              rw_lock_s_unlock(btr_search_get_latch(index->id));
+       }
+       if (UNIV_LIKELY_NULL(heap)) {
+               mem_heap_free(heap);
+@@ -3356,6 +3356,8 @@
+       /* if the returned record was locked and we did a semi-consistent
+       read (fetch the newest committed version), then this is set to
+       TRUE */
++      ulint           i;
++      ulint           should_release;
+ #ifdef UNIV_SEARCH_DEBUG
+       ulint           cnt                             = 0;
+ #endif /* UNIV_SEARCH_DEBUG */
+@@ -3441,18 +3443,32 @@
+       /* PHASE 0: Release a possible s-latch we are holding on the
+       adaptive hash index latch if there is someone waiting behind */
+-      if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
+-          && trx->has_search_latch) {
++      should_release = 0;
++      for (i = 0; i < btr_search_index_num; i++) {
++              if ((trx->has_search_latch & ((ulint)1 << i))
++                  && rw_lock_get_writer(btr_search_latch_part[i])
++                              != RW_LOCK_NOT_LOCKED) {
++                      should_release |= ((ulint)1 << i);
++              }
++      }
++
++      if (should_release) {
+               /* There is an x-latch request on the adaptive hash index:
+               release the s-latch to reduce starvation and wait for
+               BTR_SEA_TIMEOUT rounds before trying to keep it again over
+               calls from MySQL */
+-              rw_lock_s_unlock(&btr_search_latch);
+-              trx->has_search_latch = FALSE;
++              for (i = 0; i < btr_search_index_num; i++) {
++                      if (should_release & ((ulint)1 << i)) {
++                              rw_lock_s_unlock(btr_search_latch_part[i]);
++                              trx->has_search_latch &= ~((ulint)1 << i);
++                      }
++              }
++              if (!trx->has_search_latch) {
+               trx->search_latch_timeout = BTR_SEA_TIMEOUT;
++              }
+       }
+       /* Reset the new record lock info if srv_locks_unsafe_for_binlog
+@@ -3603,9 +3619,11 @@
+                       hash index semaphore! */
+ #ifndef UNIV_SEARCH_DEBUG
+-                      if (!trx->has_search_latch) {
+-                              rw_lock_s_lock(&btr_search_latch);
+-                              trx->has_search_latch = TRUE;
++                      if (!(trx->has_search_latch
++                            & ((ulint)1 << (index->id % btr_search_index_num)))) {
++                              rw_lock_s_lock(btr_search_get_latch(index->id));
++                              trx->has_search_latch |=
++                                      (ulint)1 << (index->id % btr_search_index_num);
+                       }
+ #endif
+                       switch (row_sel_try_search_shortcut_for_mysql(
+@@ -3666,7 +3684,11 @@
+                                       trx->search_latch_timeout--;
+-                                      rw_lock_s_unlock(&btr_search_latch);
++                                      for (i = 0; i < btr_search_index_num; i++) {
++                                              if (trx->has_search_latch & ((ulint)1 << i)) {
++                                                      rw_lock_s_unlock(btr_search_latch_part[i]);
++                                              }
++                                      }
+                                       trx->has_search_latch = FALSE;
+                               }
+@@ -3690,7 +3712,12 @@
+       /* PHASE 3: Open or restore index cursor position */
+       if (trx->has_search_latch) {
+-              rw_lock_s_unlock(&btr_search_latch);
++
++              for (i = 0; i < btr_search_index_num; i++) {
++                      if (trx->has_search_latch & ((ulint)1 << i)) {
++                              rw_lock_s_unlock(btr_search_latch_part[i]);
++                      }
++              }
+               trx->has_search_latch = FALSE;
+       }
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-04 16:12:20.231484679 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-04 16:12:48.726551018 +0900
+@@ -2008,7 +2008,9 @@
+             "-------------------------------------\n", file);
+       ibuf_print(file);
+-      ha_print_info(file, btr_search_sys->hash_index);
++      for (i = 0; i < btr_search_index_num; i++) {
++              ha_print_info(file, btr_search_get_hash_index((index_id_t)i));
++      }
+       fprintf(file,
+               "%.2f hash searches/s, %.2f non-hash searches/s\n",
+@@ -2033,14 +2035,15 @@
+                       ut_total_allocated_memory,
+                       mem_pool_get_reserved(mem_comm_pool));
+       /* Calcurate reserved memories */
+-      if (btr_search_sys && btr_search_sys->hash_index->heap) {
+-              btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap);
++      if (btr_search_sys && btr_search_sys->hash_index[0]->heap) {
++              btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index[0]->heap);
+       } else {
+               btr_search_sys_subtotal = 0;
+-              for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) {
+-                      btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]);
++              for (i=0; i < btr_search_sys->hash_index[0]->n_mutexes; i++) {
++                      btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index[0]->heaps[i]);
+               }
+       }
++      btr_search_sys_subtotal *= btr_search_index_num;
+       lock_sys_subtotal = 0;
+       if (trx_sys) {
+@@ -2067,10 +2070,10 @@
+                       "    Threads             %lu \t(%lu + %lu)\n",
+                       (ulong) (btr_search_sys
+-                              ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0)
++                              ? (btr_search_sys->hash_index[0]->n_cells * btr_search_index_num * sizeof(hash_cell_t)) : 0)
+                       + btr_search_sys_subtotal,
+                       (ulong) (btr_search_sys
+-                              ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0),
++                              ? (btr_search_sys->hash_index[0]->n_cells * btr_search_index_num * sizeof(hash_cell_t)) : 0),
+                       (ulong) btr_search_sys_subtotal,
+                       (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c        2010-12-03 17:36:44.300986571 +0900
++++ b/storage/innobase/sync/sync0sync.c        2010-12-04 16:12:48.729513564 +0900
+@@ -1183,7 +1183,6 @@
+       case SYNC_ANY_LATCH:
+       case SYNC_FILE_FORMAT_TAG:
+       case SYNC_DOUBLEWRITE:
+-      case SYNC_SEARCH_SYS:
+       case SYNC_SEARCH_SYS_CONF:
+       case SYNC_TRX_LOCK_HEAP:
+       case SYNC_KERNEL:
+@@ -1204,6 +1203,7 @@
+                       ut_error;
+               }
+               break;
++      case SYNC_SEARCH_SYS:
+       case SYNC_BUF_LRU_LIST:
+       case SYNC_BUF_FLUSH_LIST:
+       case SYNC_BUF_PAGE_HASH:
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c   2010-12-03 17:49:11.623953784 +0900
++++ b/storage/innobase/trx/trx0trx.c   2010-12-04 16:12:48.731513275 +0900
+@@ -266,8 +266,14 @@
+ /*=================================*/
+       trx_t*     trx) /*!< in: transaction */
+ {
++      ulint   i;
++
+       if (trx->has_search_latch) {
+-              rw_lock_s_unlock(&btr_search_latch);
++              for (i = 0; i < btr_search_index_num; i++) {
++                      if (trx->has_search_latch & ((ulint)1 << i)) {
++                              rw_lock_s_unlock(btr_search_latch_part[i]);
++                      }
++              }
+               trx->has_search_latch = FALSE;
+       }
diff --git a/innodb_admin_command_base.patch b/innodb_admin_command_base.patch
new file mode 100644 (file)
index 0000000..b7e89e5
--- /dev/null
@@ -0,0 +1,173 @@
+# name       : innodb_admin_command_base.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:32:15.624039043 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:32:35.424957827 +0900
+@@ -11775,7 +11775,8 @@
+ i_s_innodb_sys_foreign_cols,
+ i_s_innodb_sys_stats,
+ i_s_innodb_table_stats,
+-i_s_innodb_index_stats
++i_s_innodb_index_stats,
++i_s_innodb_admin_command
+ mysql_declare_plugin_end;
+ /** @brief Initialize the default value of innodb_commit_concurrency.
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-03 17:30:16.299955549 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-03 17:32:35.425989972 +0900
+@@ -4162,3 +4162,139 @@
+       STRUCT_FLD(system_vars, NULL),
+       STRUCT_FLD(__reserved1, NULL)
+ };
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO  i_s_innodb_admin_command_info[] =
++{
++      {STRUCT_FLD(field_name,         "result_message"),
++       STRUCT_FLD(field_length,       1024),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++#ifndef INNODB_COMPATIBILITY_HOOKS
++#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
++#endif
++
++extern "C" {
++char **thd_query(MYSQL_THD thd);
++}
++
++static
++int
++i_s_innodb_admin_command_fill(
++/*==========================*/
++      THD*            thd,
++      TABLE_LIST*     tables,
++      COND*           cond)
++{
++      TABLE*  i_s_table       = (TABLE *) tables->table;
++      char**  query_str;
++      char*   ptr;
++      char    quote   = '\0';
++      const char*     command_head = "XTRA_";
++
++      DBUG_ENTER("i_s_innodb_admin_command_fill");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++              DBUG_RETURN(0);
++      }
++
++      if(thd_sql_command(thd) != SQLCOM_SELECT) {
++              field_store_string(i_s_table->field[0],
++                      "SELECT command is only accepted.");
++              goto end_func;
++      }
++
++      query_str = thd_query(thd);
++      ptr = *query_str;
++      
++      for (; *ptr; ptr++) {
++              if (*ptr == quote) {
++                      quote = '\0';
++              } else if (quote) {
++              } else if (*ptr == '`' || *ptr == '"') {
++                      quote = *ptr;
++              } else {
++                      long    i;
++                      for (i = 0; command_head[i]; i++) {
++                              if (toupper((int)(unsigned char)(ptr[i]))
++                                  != toupper((int)(unsigned char)
++                                    (command_head[i]))) {
++                                      goto nomatch;
++                              }
++                      }
++                      break;
++nomatch:
++                      ;
++              }
++      }
++
++      if (!*ptr) {
++              field_store_string(i_s_table->field[0],
++                      "No XTRA_* command in the SQL statement."
++                      " Please add /*!XTRA_xxxx*/ to the SQL.");
++              goto end_func;
++      }
++
++      if (!strncasecmp("XTRA_HELLO", ptr, 10)) {
++              /* This is example command XTRA_HELLO */
++
++              ut_print_timestamp(stderr);
++              fprintf(stderr, " InnoDB: administration command test for XtraDB"
++                              " 'XTRA_HELLO' was detected.\n");
++
++              field_store_string(i_s_table->field[0],
++                      "Hello!");
++              goto end_func;
++      }
++
++      field_store_string(i_s_table->field[0],
++              "Undefined XTRA_* command.");
++      goto end_func;
++
++end_func:
++      if (schema_table_store_record(thd, i_s_table)) {
++              DBUG_RETURN(1);
++      } else {
++              DBUG_RETURN(0);
++      }
++}
++
++static
++int
++i_s_innodb_admin_command_init(
++/*==========================*/
++      void*   p)
++{
++      DBUG_ENTER("i_s_innodb_admin_command_init");
++      ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++      schema->fields_info = i_s_innodb_admin_command_info;
++      schema->fill_table = i_s_innodb_admin_command_fill;
++
++      DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_admin_command =
++{
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++      STRUCT_FLD(info, &i_s_info),
++      STRUCT_FLD(name, "XTRADB_ADMIN_COMMAND"),
++      STRUCT_FLD(author, plugin_author),
++      STRUCT_FLD(descr, "XtraDB specific command acceptor"),
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++      STRUCT_FLD(init, i_s_innodb_admin_command_init),
++      STRUCT_FLD(deinit, i_s_common_deinit),
++      STRUCT_FLD(version, 0x0100 /* 1.0 */),
++      STRUCT_FLD(status_vars, NULL),
++      STRUCT_FLD(system_vars, NULL),
++      STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h   2010-12-03 17:30:16.301987692 +0900
++++ b/storage/innobase/handler/i_s.h   2010-12-03 17:32:35.426954555 +0900
+@@ -44,5 +44,6 @@
+ extern struct st_mysql_plugin i_s_innodb_sys_stats;
+ extern struct st_mysql_plugin i_s_innodb_table_stats;
+ extern struct st_mysql_plugin i_s_innodb_index_stats;
++extern struct st_mysql_plugin i_s_innodb_admin_command;
+ #endif /* i_s_h */
diff --git a/innodb_buffer_pool_pages_i_s.patch b/innodb_buffer_pool_pages_i_s.patch
new file mode 100644 (file)
index 0000000..b9d8bdd
--- /dev/null
@@ -0,0 +1,803 @@
+# name       : innodb_buffer_pool_pages_i_s.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-04 20:20:44.595483291 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-06 19:28:04.055227506 +0900
+@@ -4514,6 +4514,36 @@
+       mutex_exit(block_mutex);
+ }
++/********************************************************************//**
++*/
++UNIV_INTERN
++buf_block_t*
++buf_page_from_array(
++/*================*/
++      buf_pool_t*     buf_pool,
++      ulint           n_block)
++{
++      ulint           n_chunks, offset;
++      buf_chunk_t*    chunk;
++
++      ut_a(n_block < buf_pool->curr_size);
++
++      chunk = buf_pool->chunks;
++      offset = n_block;
++
++      for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++              if (offset < chunk->size) {
++                      return(&chunk->blocks[offset]);
++              }
++
++              offset -= chunk->size;
++      }
++
++      ut_error;
++
++      return(NULL);
++}
++
+ /*********************************************************************//**
+ Asserts that all file pages in the buffer are in a replaceable state.
+ @return       TRUE */
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 20:20:44.614551139 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-06 19:23:47.622195800 +0900
+@@ -12042,6 +12042,9 @@
+ i_s_innodb_sys_stats,
+ i_s_innodb_table_stats,
+ i_s_innodb_index_stats,
++i_s_innodb_buffer_pool_pages,
++i_s_innodb_buffer_pool_pages_index,
++i_s_innodb_buffer_pool_pages_blob,
+ i_s_innodb_admin_command
+ mysql_declare_plugin_end;
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-04 19:46:39.786513783 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-06 19:28:52.270226921 +0900
+@@ -51,6 +51,7 @@
+ #include "trx0sys.h" /* for trx_sys */
+ #include "dict0dict.h" /* for dict_sys */
+ #include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
++#include "btr0btr.h" /* for btr_page_get_index_id */
+ }
+ static const char plugin_author[] = "Innobase Oy";
+@@ -4329,3 +4330,701 @@
+       STRUCT_FLD(system_vars, NULL),
+       STRUCT_FLD(__reserved1, NULL)
+ };
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO  i_s_innodb_buffer_pool_pages_fields_info[] =
++{
++      {STRUCT_FLD(field_name,         "page_type"),
++       STRUCT_FLD(field_length,       64),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_MAYBE_NULL),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "space_id"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "page_no"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "lru_position"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "fix_count"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "flush_type"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO  i_s_innodb_buffer_pool_pages_index_fields_info[] =
++{
++      {STRUCT_FLD(field_name,         "index_id"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "space_id"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "page_no"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "n_recs"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "data_size"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "hashed"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "access_time"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "modified"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "dirty"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "old"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "lru_position"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "fix_count"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "flush_type"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO  i_s_innodb_buffer_pool_pages_blob_fields_info[] =
++{
++      {STRUCT_FLD(field_name,         "space_id"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "page_no"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "compressed"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "part_len"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "next_page_no"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "lru_position"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "fix_count"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "flush_type"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_fill(
++/*================*/
++                              /* out: 0 on success, 1 on failure */
++      THD*            thd,    /* in: thread */
++      TABLE_LIST*     tables, /* in/out: tables to fill */
++      COND*           cond)   /* in: condition (ignored) */
++{
++      TABLE*  table   = (TABLE *) tables->table;
++      int     status  = 0;
++      ulint   i;
++
++      DBUG_ENTER("i_s_innodb_buffer_pool_pages_fill");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++              DBUG_RETURN(0);
++      }
++
++      RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              ulint           n_block;
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++
++              buf_pool_mutex_enter(buf_pool);
++
++              for (n_block = 0; n_block < buf_pool->curr_size; n_block++) {
++                      buf_block_t*    block = buf_page_from_array(buf_pool, n_block);
++                      const buf_frame_t*      frame = block->frame;
++
++                      char page_type[64];
++
++                      switch(fil_page_get_type(frame))
++                      {
++                              case FIL_PAGE_INDEX:
++                                      strcpy(page_type, "index");
++                                      break;
++                              case FIL_PAGE_UNDO_LOG:
++                                      strcpy(page_type, "undo_log");
++                                      break;
++                              case FIL_PAGE_INODE:
++                                      strcpy(page_type, "inode");
++                                      break;
++                              case FIL_PAGE_IBUF_FREE_LIST:
++                                      strcpy(page_type, "ibuf_free_list");
++                                      break;
++                              case FIL_PAGE_TYPE_ALLOCATED:
++                                      strcpy(page_type, "allocated");
++                                      break;
++                              case FIL_PAGE_IBUF_BITMAP:
++                                      strcpy(page_type, "bitmap");
++                                      break;
++                              case FIL_PAGE_TYPE_SYS:
++                                      strcpy(page_type, "sys");
++                                      break;
++                              case FIL_PAGE_TYPE_TRX_SYS:
++                                      strcpy(page_type, "trx_sys");
++                                      break;
++                              case FIL_PAGE_TYPE_FSP_HDR:
++                                      strcpy(page_type, "fsp_hdr");
++                                      break;
++                              case FIL_PAGE_TYPE_XDES:
++                                      strcpy(page_type, "xdes");
++                                      break;
++                              case FIL_PAGE_TYPE_BLOB:
++                                      strcpy(page_type, "blob");
++                                      break;
++                              case FIL_PAGE_TYPE_ZBLOB:
++                                      strcpy(page_type, "zblob");
++                                      break;
++                              case FIL_PAGE_TYPE_ZBLOB2:
++                                      strcpy(page_type, "zblob2");
++                                      break;
++                              default:
++                                      sprintf(page_type, "unknown (type=%li)", fil_page_get_type(frame));
++                      }
++
++                      field_store_string(table->field[0], page_type);
++                      table->field[1]->store(block->page.space);
++                      table->field[2]->store(block->page.offset);
++                      table->field[3]->store(0);
++                      table->field[4]->store(block->page.buf_fix_count);
++                      table->field[5]->store(block->page.flush_type);
++
++                      if (schema_table_store_record(thd, table)) {
++                              status = 1;
++                              break;
++                      }
++
++              }      
++
++              buf_pool_mutex_exit(buf_pool);
++      }
++
++      DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_fill(
++/*================*/
++                              /* out: 0 on success, 1 on failure */
++      THD*            thd,    /* in: thread */
++      TABLE_LIST*     tables, /* in/out: tables to fill */
++      COND*           cond)   /* in: condition (ignored) */
++{
++      TABLE*  table   = (TABLE *) tables->table;
++      int     status  = 0;
++      ulint   i;
++      index_id_t      index_id;
++
++      DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++              DBUG_RETURN(0);
++      }
++
++      RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              ulint           n_block;
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++
++              buf_pool_mutex_enter(buf_pool);
++      
++              for (n_block = 0; n_block < buf_pool->curr_size; n_block++) {
++                      buf_block_t*    block = buf_page_from_array(buf_pool, n_block);
++                      const buf_frame_t* frame = block->frame;
++
++                      if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
++                              index_id = btr_page_get_index_id(frame);
++                              table->field[0]->store(index_id);
++                              table->field[1]->store(block->page.space);
++                              table->field[2]->store(block->page.offset);
++                              table->field[3]->store(page_get_n_recs(frame));
++                              table->field[4]->store(page_get_data_size(frame));
++                              table->field[5]->store(block->is_hashed);
++                              table->field[6]->store(block->page.access_time);
++                              table->field[7]->store(block->page.newest_modification != 0);
++                              table->field[8]->store(block->page.oldest_modification != 0);
++                              table->field[9]->store(block->page.old);
++                              table->field[10]->store(0);
++                              table->field[11]->store(block->page.buf_fix_count);
++                              table->field[12]->store(block->page.flush_type);
++
++                              if (schema_table_store_record(thd, table)) {
++                                      status = 1;
++                                      break;
++                              }
++                      }      
++              }
++
++              buf_pool_mutex_exit(buf_pool);
++      }
++
++      DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_fill(
++/*================*/
++                              /* out: 0 on success, 1 on failure */
++      THD*            thd,    /* in: thread */
++      TABLE_LIST*     tables, /* in/out: tables to fill */
++      COND*           cond)   /* in: condition (ignored) */
++{
++      TABLE*  table   = (TABLE *) tables->table;
++      int     status  = 0;
++      ulint   i;
++
++      ulint           part_len;
++      ulint           next_page_no;
++
++      DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_fill");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++              DBUG_RETURN(0);
++      }
++
++      RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              ulint           n_block;
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++
++              buf_pool_mutex_enter(buf_pool);
++      
++              for (n_block = 0; n_block < buf_pool->curr_size; n_block++) {
++                      buf_block_t*    block = buf_page_from_array(buf_pool, n_block);
++                      page_zip_des_t* block_page_zip = buf_block_get_page_zip(block);
++                      const buf_frame_t* frame = block->frame;
++
++                      if (fil_page_get_type(frame) == FIL_PAGE_TYPE_BLOB) {
++
++                              if (UNIV_LIKELY_NULL(block_page_zip)) {
++                                      part_len = 0; /* hmm, can't figure it out */
++
++                                      next_page_no = mach_read_from_4(
++                                                      buf_block_get_frame(block)
++                                                      + FIL_PAGE_NEXT);        
++                              } else {
++                                      part_len = mach_read_from_4(
++                                                      buf_block_get_frame(block)
++                                                      + FIL_PAGE_DATA
++                                                      + 0 /*BTR_BLOB_HDR_PART_LEN*/);
++
++                                      next_page_no = mach_read_from_4(
++                                                      buf_block_get_frame(block)
++                                                      + FIL_PAGE_DATA
++                                                      + 4 /*BTR_BLOB_HDR_NEXT_PAGE_NO*/);
++                              }
++
++                              table->field[0]->store(block->page.space);
++                              table->field[1]->store(block->page.offset);
++                              table->field[2]->store(block_page_zip != NULL);
++                              table->field[3]->store(part_len);
++
++                              if(next_page_no == FIL_NULL)
++                              {
++                                      table->field[4]->store(0);
++                              } else {
++                                      table->field[4]->store(block->page.offset);
++                              }
++
++                              table->field[5]->store(0);
++                              table->field[6]->store(block->page.buf_fix_count);
++                              table->field[7]->store(block->page.flush_type);
++
++                              if (schema_table_store_record(thd, table)) {
++                                      status = 1;
++                                      break;
++                              }
++
++                      }
++              }      
++
++              buf_pool_mutex_exit(buf_pool);
++      }
++
++      DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_init(
++/*=========*/
++                      /* out: 0 on success */
++      void*   p)      /* in/out: table schema object */
++{
++      DBUG_ENTER("i_s_innodb_buffer_pool_pages_init");
++      ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++      schema->fields_info = i_s_innodb_buffer_pool_pages_fields_info;
++      schema->fill_table = i_s_innodb_buffer_pool_pages_fill;
++
++      DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_init(
++/*=========*/
++                      /* out: 0 on success */
++      void*   p)      /* in/out: table schema object */
++{
++      DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_init");
++      ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++      schema->fields_info = i_s_innodb_buffer_pool_pages_index_fields_info;
++      schema->fill_table = i_s_innodb_buffer_pool_pages_index_fill;
++
++      DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_init(
++/*=========*/
++                      /* out: 0 on success */
++      void*   p)      /* in/out: table schema object */
++{
++      DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_init");
++      ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++      schema->fields_info = i_s_innodb_buffer_pool_pages_blob_fields_info;
++      schema->fill_table = i_s_innodb_buffer_pool_pages_blob_fill;
++
++      DBUG_RETURN(0);
++}
++
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_buffer_pool_pages =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB buffer pool pages"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_buffer_pool_pages_index =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_INDEX"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB buffer pool index pages"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_index_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_buffer_pool_pages_blob =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_BLOB"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB buffer pool blob pages"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_blob_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h   2010-12-04 19:46:39.657513849 +0900
++++ b/storage/innobase/handler/i_s.h   2010-12-06 19:23:47.635192988 +0900
+@@ -45,5 +45,8 @@
+ extern struct st_mysql_plugin i_s_innodb_table_stats;
+ extern struct st_mysql_plugin i_s_innodb_index_stats;
+ extern struct st_mysql_plugin i_s_innodb_admin_command;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages_index;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages_blob;
+ #endif /* i_s_h */
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-04 19:46:40.197471531 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-06 19:23:47.638195824 +0900
+@@ -1072,6 +1072,14 @@
+ /*===========*/
+       const buf_pool_t*       buf_pool)       /*!< in: buffer pool */
+       __attribute__((nonnull, const));
++/********************************************************************//**
++*/
++UNIV_INTERN
++buf_block_t*
++buf_page_from_array(
++/*================*/
++      buf_pool_t*     buf_pool,
++      ulint           n_block);
+ /******************************************************************//**
+ Returns the buffer pool instance given a page instance
+ @return buf_pool */
diff --git a/innodb_buffer_pool_shm.patch b/innodb_buffer_pool_shm.patch
new file mode 100644 (file)
index 0000000..45424f7
--- /dev/null
@@ -0,0 +1,1224 @@
+# name       : innodb_buffer_pool_shm.patch
+# introduced : 12
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-12-04 19:46:39.372513543 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-07 17:56:28.302087851 +0900
+@@ -183,7 +183,7 @@
+       void*           buf,            /*!< in: buffer frame to deallocate */
+       ibool           have_page_hash_mutex)
+ {
+-      const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
++      const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf_pool, buf);
+       buf_page_t*     bpage;
+       buf_block_t*    block;
+@@ -227,7 +227,7 @@
+       buf_block_t*    block)  /*!< in: buffer frame to allocate */
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_block(block);
+-      const ulint     fold = BUF_POOL_ZIP_FOLD(block);
++      const ulint     fold = BUF_POOL_ZIP_FOLD(buf_pool, block);
+       //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-06 20:16:21.726195340 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-07 20:40:30.824749814 +0900
+@@ -53,6 +53,10 @@
+ #include "page0zip.h"
+ #include "trx0trx.h"
+ #include "srv0start.h"
++#include "que0que.h"
++#include "read0read.h"
++#include "row0row.h"
++#include "ha_prototypes.h"
+ /* prototypes for new functions added to ha_innodb.cc */
+ trx_t* innobase_get_trx();
+@@ -342,6 +346,31 @@
+                                       was allocated for the frames */
+       buf_block_t*    blocks;         /*!< array of buffer control blocks */
+ };
++
++/* Buffer pool shared memory segment information */
++typedef       struct buf_shm_info_struct      buf_shm_info_t;
++
++struct buf_shm_info_struct {
++      char    head_str[8];
++      ulint   binary_id;
++      ibool   is_new;         /* during initializing */
++      ibool   clean;          /* clean shutdowned and free */
++      ibool   reusable;       /* reusable */
++      ulint   buf_pool_size;  /* backup value */
++      ulint   page_size;      /* backup value */
++      ulint   frame_offset;   /* offset of the first frame based on chunk->mem */
++      ulint   zip_hash_offset;
++      ulint   zip_hash_n;
++
++      ulint   checksum;
++
++      buf_pool_t      buf_pool_backup;
++      buf_chunk_t     chunk_backup;
++
++      ib_uint64_t     dummy;
++};
++
++#define BUF_SHM_INFO_HEAD "XTRA_SHM"
+ #endif /* !UNIV_HOTBACKUP */
+ /********************************************************************//**
+@@ -988,6 +1017,58 @@
+ #endif /* UNIV_SYNC_DEBUG */
+ }
++static
++void
++buf_block_reuse(
++/*============*/
++      buf_block_t*    block,
++      ptrdiff_t       frame_offset)
++{
++      /* block_init */
++      block->frame += frame_offset;
++
++      UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
++
++      block->index = NULL;
++      block->btr_search_latch = NULL;
++
++#ifdef UNIV_DEBUG
++      /* recreate later */
++      block->page.in_page_hash = FALSE;
++      block->page.in_zip_hash = FALSE;
++#endif /* UNIV_DEBUG */
++
++#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
++      block->n_pointers = 0;
++#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
++
++      if (block->page.zip.data)
++              block->page.zip.data += frame_offset;
++
++      block->is_hashed = FALSE;
++
++#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
++      /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
++      of buffer block mutex/rwlock with performance schema. If
++      PFS_GROUP_BUFFER_SYNC is defined, skip the registration
++      since buffer block mutex/rwlock will be registered later in
++      pfs_register_buffer_block() */
++
++      mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
++      rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
++#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
++      mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
++      rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
++#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
++
++      ut_ad(rw_lock_validate(&(block->lock)));
++
++#ifdef UNIV_SYNC_DEBUG
++      rw_lock_create(buf_block_debug_latch_key,
++                     &block->debug_latch, SYNC_NO_ORDER_CHECK);
++#endif /* UNIV_SYNC_DEBUG */
++}
++
+ /********************************************************************//**
+ Allocates a chunk of buffer frames.
+ @return       chunk, or NULL on failure */
+@@ -1001,26 +1082,188 @@
+ {
+       buf_block_t*    block;
+       byte*           frame;
++      ulint           zip_hash_n = 0;
++      ulint           zip_hash_mem_size = 0;
++      hash_table_t*   zip_hash_tmp = NULL;
+       ulint           i;
++      buf_shm_info_t* shm_info = NULL;
+       /* Round down to a multiple of page size,
+       although it already should be. */
+       mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
++
++      srv_buffer_pool_shm_is_reused = FALSE;
++
++      if (srv_buffer_pool_shm_key) {
++              /* zip_hash size */
++              zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
++              zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
++                                                + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
++      }
++
+       /* Reserve space for the block descriptors. */
+       mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+                                 + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
++      if (srv_buffer_pool_shm_key) {
++               mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
++                                         + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
++               mem_size += zip_hash_mem_size;
++      }
+       chunk->mem_size = mem_size;
++
++      if (srv_buffer_pool_shm_key) {
++              ulint   binary_id;
++              ibool   is_new;
++
++              ut_a(buf_pool->n_chunks == 1);
++
++              fprintf(stderr,
++              "InnoDB: Notice: The innodb_buffer_pool_shm_key option has been specified.\n"
++              "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
++              "InnoDB:   * the mysqld executable between restarts of the server.\n"
++              "InnoDB:   * the value of innodb_buffer_pool_size.\n"
++              "InnoDB:   * the value of innodb_page_size.\n"
++              "InnoDB:   * datafiles created by InnoDB during this session.\n"
++              "InnoDB: Otherwise, data corruption in datafiles may result.\n");
++
++              /* FIXME: This is vague id still */
++              binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
++                        + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
++                        + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
++                        + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
++                        + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
++                        + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
++                        + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
++                        + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
++                        + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
++                        + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
++                        + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
++
++              chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
++
++              if (UNIV_UNLIKELY(chunk->mem == NULL)) {
++                      return(NULL);
++              }
++init_again:
++#ifdef UNIV_SET_MEM_TO_ZERO
++              if (is_new) {
++                      memset(chunk->mem, '\0', chunk->mem_size);
++              }
++#endif
++              /* for ut_fold_binary_32(), these values should be 32-bit aligned */
++              ut_a(sizeof(buf_shm_info_t) % 4 == 0);
++              ut_a((ulint)chunk->mem % 4 == 0);
++              ut_a(chunk->mem_size % 4 == 0);
++
++              shm_info = chunk->mem;
++
++              zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
++
++              if (is_new) {
++                      strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
++                      shm_info->binary_id = binary_id;
++                      shm_info->is_new = TRUE;        /* changed to FALSE when the initialization is finished */
++                      shm_info->clean = FALSE;        /* changed to TRUE when free the segment. */
++                      shm_info->reusable = FALSE;     /* changed to TRUE when validation is finished. */
++                      shm_info->buf_pool_size = srv_buf_pool_size;
++                      shm_info->page_size = srv_page_size;
++                      shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
++                      shm_info->zip_hash_n = zip_hash_n;
++              } else {
++                      ulint   checksum;
++
++                      if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
++                              fprintf(stderr,
++                              "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
++                              return(NULL);
++                      }
++                      if (shm_info->binary_id != binary_id) {
++                              fprintf(stderr,
++                              "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
++                              return(NULL);
++                      }
++                      if (shm_info->is_new) {
++                              fprintf(stderr,
++                              "InnoDB: Error: The shared memory was not initialized yet.\n");
++                              return(NULL);
++                      }
++                      if (shm_info->buf_pool_size != srv_buf_pool_size) {
++                              fprintf(stderr,
++                              "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
++                              shm_info->buf_pool_size, srv_buf_pool_size);
++                              return(NULL);
++                      }
++                      if (shm_info->page_size != srv_page_size) {
++                              fprintf(stderr,
++                              "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
++                              shm_info->page_size, srv_page_size);
++                              return(NULL);
++                      }
++                      if (!shm_info->reusable) {
++                              fprintf(stderr,
++                              "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
++                              "InnoDB: The shared memory segment is initialized.\n");
++                              is_new = TRUE;
++                              goto init_again;
++                      }
++                      if (!shm_info->clean) {
++                              fprintf(stderr,
++                              "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
++                              "InnoDB: The shared memory segment is initialized.\n");
++                              is_new = TRUE;
++                              goto init_again;
++                      }
++
++                      ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
++                      ut_a(shm_info->zip_hash_n == zip_hash_n);
++
++                      /* check checksum */
++                      if (srv_buffer_pool_shm_checksum) {
++                              checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
++                                                           chunk->mem_size - sizeof(buf_shm_info_t));
++                      } else {
++                              checksum = BUF_NO_CHECKSUM_MAGIC;
++                      }
++
++                      if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
++                          && shm_info->checksum != checksum) {
++                              fprintf(stderr,
++                              "InnoDB: Error: checksum of the shared memory is not match. "
++                              "(stored=%lu calculated=%lu)\n",
++                              shm_info->checksum, checksum);
++                              return(NULL);
++                      }
++
++                      /* flag to use the segment. */
++                      shm_info->clean = FALSE;        /* changed to TRUE when free the segment. */
++              }
++
++              /* init zip_hash contents */
++              if (is_new) {
++                      hash_create_init(zip_hash_tmp, zip_hash_n);
++              } else {
++                      /* adjust offset is done later */
++                      hash_create_reuse(zip_hash_tmp);
++
++                      srv_buffer_pool_shm_is_reused = TRUE;
++              }
++      } else {
+       chunk->mem = os_mem_alloc_large(&chunk->mem_size);
+       if (UNIV_UNLIKELY(chunk->mem == NULL)) {
+               return(NULL);
+       }
++      }
+       /* Allocate the block descriptors from
+       the start of the memory block. */
++      if (srv_buffer_pool_shm_key) {
++              chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
++      } else {
+       chunk->blocks = chunk->mem;
++      }
+       /* Align a pointer to the first frame.  Note that when
+       os_large_page_size is smaller than UNIV_PAGE_SIZE,
+@@ -1028,8 +1271,13 @@
+       it is bigger, we may allocate more blocks than requested. */
+       frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
++      if (srv_buffer_pool_shm_key) {
++              /* reserve zip_hash space and always -1 for reproductibity */
++              chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
++      } else {
+       chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
+               - (frame != chunk->mem);
++      }
+       /* Subtract the space needed for block descriptors. */
+       {
+@@ -1043,6 +1291,98 @@
+               chunk->size = size;
+       }
++      if (shm_info && !(shm_info->is_new)) {
++              /* convert the shared memory segment for reuse */
++              ptrdiff_t       phys_offset;
++              ptrdiff_t       logi_offset;
++              ptrdiff_t       blocks_offset;
++              void*           previous_frame_address;
++
++              if (chunk->size < shm_info->chunk_backup.size) {
++                      fprintf(stderr,
++                      "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
++                      "InnoDB: Retrying may avoid this situation.\n");
++                      shm_info->clean = TRUE; /* release the flag for retrying */
++                      return(NULL);
++              }
++
++              chunk->size = shm_info->chunk_backup.size;
++              phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
++              logi_offset = frame - chunk->blocks[0].frame;
++              previous_frame_address = chunk->blocks[0].frame;
++              blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
++
++              if (phys_offset || logi_offset || blocks_offset) {
++                      fprintf(stderr,
++                      "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
++                      "InnoDB: Previous frames in address      : %p\n"
++                      "InnoDB: Previous frames were located    : %p\n"
++                      "InnoDB: Current frames should be located: %p\n"
++                      "InnoDB: Pysical offset                  : %ld (%#lx)\n"
++                      "InnoDB: Logical offset (frames)         : %ld (%#lx)\n"
++                      "InnoDB: Logical offset (blocks)         : %ld (%#lx)\n",
++                              (byte*)chunk->mem + shm_info->frame_offset,
++                              chunk->blocks[0].frame, frame,
++                              phys_offset, phys_offset, logi_offset, logi_offset,
++                              blocks_offset, blocks_offset);
++              } else {
++                      fprintf(stderr,
++                      "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
++              }
++
++              if (phys_offset) {
++                      fprintf(stderr,
++                      "InnoDB: Aligning physical offset...");
++
++                      memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
++                              chunk->size * UNIV_PAGE_SIZE);
++
++                      fprintf(stderr,
++                      " Done.\n");
++              }
++
++              /* buf_block_t */
++              block = chunk->blocks;
++              for (i = chunk->size; i--; ) {
++                      buf_block_reuse(block, logi_offset);
++                      block++;
++              }
++
++              if (logi_offset || blocks_offset) {
++                      fprintf(stderr,
++                      "InnoDB: Aligning logical offset...");
++
++
++                      /* buf_pool_t buf_pool_backup */
++                      UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
++                                      previous_frame_address, logi_offset, blocks_offset);
++                      UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
++                                      previous_frame_address, logi_offset, blocks_offset);
++                      UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
++                                      previous_frame_address, logi_offset, blocks_offset);
++                      if (shm_info->buf_pool_backup.LRU_old)
++                              shm_info->buf_pool_backup.LRU_old =
++                                      (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
++                                              + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
++                                                ? logi_offset : blocks_offset));
++
++                      UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
++                                      previous_frame_address, logi_offset, blocks_offset);
++
++                      UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
++                                      previous_frame_address, logi_offset, blocks_offset);
++                      for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
++                              UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
++                                      previous_frame_address, logi_offset, blocks_offset);
++                      }
++
++                      HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
++                                      previous_frame_address, logi_offset, blocks_offset);
++
++                      fprintf(stderr,
++                      " Done.\n");
++              }
++      } else {
+       /* Init block structs and assign frames for them. Then we
+       assign the frames to the first blocks (we already mapped the
+       memory above). */
+@@ -1068,6 +1408,11 @@
+               block++;
+               frame += UNIV_PAGE_SIZE;
+       }
++      }
++
++      if (shm_info) {
++              shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
++      }
+ #ifdef PFS_GROUP_BUFFER_SYNC
+       pfs_register_buffer_block(chunk);
+@@ -1249,6 +1594,8 @@
+               UNIV_MEM_UNDESC(block);
+       }
++      ut_a(!srv_buffer_pool_shm_key);
++
+       os_mem_free_large(chunk->mem, chunk->mem_size);
+ }
+@@ -1289,7 +1636,7 @@
+       ulint           instance_no)    /*!< in: id of the instance */
+ {
+       ulint           i;
+-      buf_chunk_t*    chunk;
++      buf_chunk_t*    chunk = NULL;
+       /* 1. Initialize general fields
+       ------------------------------- */
+@@ -1335,7 +1682,10 @@
+               buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
+               buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
++              /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
++              if (!srv_buffer_pool_shm_key) {
+               buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
++              }
+               
+               buf_pool->last_printout_time = ut_time();
+       }
+@@ -1354,6 +1704,86 @@
+       /* All fields are initialized by mem_zalloc(). */
++      if (chunk && srv_buffer_pool_shm_key) {
++              buf_shm_info_t* shm_info;
++
++              ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
++              shm_info = chunk->mem;
++
++              buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
++
++              if(shm_info->is_new) {
++                      shm_info->is_new = FALSE; /* initialization was finished */
++              } else {
++                      buf_block_t*    block = chunk->blocks;
++                      buf_page_t*     b;
++
++                      /* shm_info->buf_pool_backup should be converted */
++                      /* at buf_chunk_init(). So copy simply. */
++                      buf_pool->flush_list            = shm_info->buf_pool_backup.flush_list;
++                      buf_pool->freed_page_clock      = shm_info->buf_pool_backup.freed_page_clock;
++                      buf_pool->free                  = shm_info->buf_pool_backup.free;
++                      buf_pool->LRU                   = shm_info->buf_pool_backup.LRU;
++                      buf_pool->LRU_old               = shm_info->buf_pool_backup.LRU_old;
++                      buf_pool->LRU_old_len           = shm_info->buf_pool_backup.LRU_old_len;
++                      buf_pool->unzip_LRU             = shm_info->buf_pool_backup.unzip_LRU;
++                      buf_pool->zip_clean             = shm_info->buf_pool_backup.zip_clean;
++                      for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
++                              buf_pool->zip_free[i]   = shm_info->buf_pool_backup.zip_free[i];
++                      }
++
++                      for (i = 0; i < chunk->size; i++, block++) {
++                              if (buf_block_get_state(block)
++                                  == BUF_BLOCK_FILE_PAGE) {
++                                      ut_d(block->page.in_page_hash = TRUE);
++                                      HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
++                                                  buf_page_address_fold(
++                                                          block->page.space,
++                                                          block->page.offset),
++                                                  &block->page);
++                              }
++                      }
++
++                      for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
++                           b = UT_LIST_GET_NEXT(zip_list, b)) {
++                              ut_ad(!b->in_flush_list);
++                              ut_ad(b->in_LRU_list);
++
++                              ut_d(b->in_page_hash = TRUE);
++                              HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
++                                          buf_page_address_fold(b->space, b->offset), b);
++                      }
++
++                      for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
++                           b = UT_LIST_GET_NEXT(flush_list, b)) {
++                              ut_ad(b->in_flush_list);
++                              ut_ad(b->in_LRU_list);
++
++                              switch (buf_page_get_state(b)) {
++                              case BUF_BLOCK_ZIP_DIRTY:
++                                      ut_d(b->in_page_hash = TRUE);
++                                      HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
++                                                  buf_page_address_fold(b->space,
++                                                                        b->offset), b);
++                                      break;
++                              case BUF_BLOCK_FILE_PAGE:
++                                      /* uncompressed page */
++                                      break;
++                              case BUF_BLOCK_ZIP_FREE:
++                              case BUF_BLOCK_ZIP_PAGE:
++                              case BUF_BLOCK_NOT_USED:
++                              case BUF_BLOCK_READY_FOR_USE:
++                              case BUF_BLOCK_MEMORY:
++                              case BUF_BLOCK_REMOVE_HASH:
++                                      ut_error;
++                                      break;
++                              }
++                      }
++
++
++              }
++      }
++
+       mutex_exit(&buf_pool->LRU_list_mutex);
+       rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_exit(buf_pool);
+@@ -1373,6 +1803,42 @@
+       buf_chunk_t*    chunk;
+       buf_chunk_t*    chunks;
++      if (srv_buffer_pool_shm_key) {
++              buf_shm_info_t* shm_info;
++
++              ut_a(buf_pool->n_chunks == 1);
++
++              chunk = buf_pool->chunks;
++              shm_info = chunk->mem;
++              ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
++
++              /* if opened, close shm. */
++              if (!shm_info->clean) {
++                      /* validation the shared memory segment doesn't have unrecoverable contents. */
++                      /* Currently, validation became not needed */
++                      shm_info->reusable = TRUE;
++
++                      memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
++                      memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
++
++                      if (srv_fast_shutdown < 2) {
++                              if (srv_buffer_pool_shm_checksum) {
++                                      shm_info->checksum =
++                                              ut_fold_binary_32(
++                                                      (byte*)chunk->mem + sizeof(buf_shm_info_t),
++                                                      chunk->mem_size - sizeof(buf_shm_info_t));
++                              } else {
++                                      shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
++                              }
++                              shm_info->clean = TRUE;
++                      }
++
++                      fprintf(stderr,
++                              "InnoDB: The shared memory was closed.\n");
++              }
++
++              os_shm_free(chunk->mem, chunk->mem_size);
++      } else {
+       chunks = buf_pool->chunks;
+       chunk = chunks + buf_pool->n_chunks;
+@@ -1381,10 +1847,13 @@
+               would fail at shutdown. */
+               os_mem_free_large(chunk->mem, chunk->mem_size);
+       }
++      }
+       mem_free(buf_pool->chunks);
+       hash_table_free(buf_pool->page_hash);
++      if (!srv_buffer_pool_shm_key) {
+       hash_table_free(buf_pool->zip_hash);
++      }
+ }
+ /********************************************************************//**
+@@ -1668,6 +2137,11 @@
+       //buf_pool_mutex_enter(buf_pool);
+       mutex_enter(&buf_pool->LRU_list_mutex);
++      if (srv_buffer_pool_shm_key) {
++              /* Cannot support shrink */
++              goto func_done;
++      }
++
+ shrink_again:
+       if (buf_pool->n_chunks <= 1) {
+@@ -1848,7 +2322,7 @@
+       zip_hash = hash_create(2 * buf_pool->curr_size);
+       HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
+-                   BUF_POOL_ZIP_FOLD_BPAGE);
++                   buf_pool, BUF_POOL_ZIP_FOLD_BPAGE);
+       hash_table_free(buf_pool->zip_hash);
+       buf_pool->zip_hash = zip_hash;
+@@ -2130,6 +2604,11 @@
+       ulint   change_size;
+       ulint   min_change_size = 1048576 * srv_buf_pool_instances;
++      if (srv_buffer_pool_shm_key) {
++              /* Cannot support resize */
++              return;
++      }
++
+       buf_pool_mutex_enter_all();
+   
+       if (srv_buf_pool_old_size == srv_buf_pool_size) {
+diff -ruN a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
+--- a/storage/innobase/ha/hash0hash.c  2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/ha/hash0hash.c  2010-12-07 16:10:14.937749140 +0900
+@@ -133,6 +133,70 @@
+ }
+ /*************************************************************//**
++*/
++UNIV_INTERN
++ulint
++hash_create_needed(
++/*===============*/
++      ulint   n)
++{
++      ulint   prime;
++      ulint   offset;
++
++      prime = ut_find_prime(n);
++
++      offset = (sizeof(hash_table_t) + 7) / 8;
++      offset *= 8;
++
++      return(offset + sizeof(hash_cell_t) * prime);
++}
++
++UNIV_INTERN
++void
++hash_create_init(
++/*=============*/
++      hash_table_t*   table,
++      ulint           n)
++{
++      ulint   prime;
++      ulint   offset;
++
++      prime = ut_find_prime(n);
++
++      offset = (sizeof(hash_table_t) + 7) / 8;
++      offset *= 8;
++
++      table->array = (hash_cell_t*)(((byte*)table) + offset);
++      table->n_cells = prime;
++# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
++      table->adaptive = FALSE;
++# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
++      table->n_mutexes = 0;
++      table->mutexes = NULL;
++      table->heaps = NULL;
++      table->heap = NULL;
++      ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
++
++      /* Initialize the cell array */
++      hash_table_clear(table);
++}
++
++UNIV_INTERN
++void
++hash_create_reuse(
++/*==============*/
++      hash_table_t*   table)
++{
++      ulint   offset;
++
++      offset = (sizeof(hash_table_t) + 7) / 8;
++      offset *= 8;
++
++      table->array = (hash_cell_t*)(((byte*)table) + offset);
++      ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
++}
++
++/*************************************************************//**
+ Frees a hash table. */
+ UNIV_INTERN
+ void
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-06 20:16:21.733263627 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-07 17:56:28.316139830 +0900
+@@ -194,6 +194,7 @@
+ static my_bool        innobase_create_status_file             = FALSE;
+ static my_bool        innobase_stats_on_metadata              = TRUE;
+ static my_bool        innobase_use_sys_stats_table            = FALSE;
++static my_bool        innobase_buffer_pool_shm_checksum       = TRUE;
+ static char*  internal_innobase_data_file_path        = NULL;
+@@ -2620,6 +2621,14 @@
+       srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
+       srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
++      if (srv_buffer_pool_shm_key && srv_buf_pool_instances > 1) {
++              fprintf(stderr,
++                      "InnoDB: Warning: innodb_buffer_pool_shm_key cannot be used with several innodb_buffer_pool_instances.\n"
++                      "InnoDB:          innodb_buffer_pool_instances was set to 1.\n");
++              srv_buf_pool_instances = 1;
++              innobase_buffer_pool_instances = 1;
++      }
++
+       srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
+       srv_n_file_io_threads = (ulint) innobase_file_io_threads;
+@@ -2636,6 +2645,7 @@
+       srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+       srv_use_checksums = (ibool) innobase_use_checksums;
+       srv_fast_checksum = (ibool) innobase_fast_checksum;
++      srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
+ #ifdef HAVE_LARGE_PAGES
+         if ((os_use_large_pages = (ibool) my_use_large_pages))
+@@ -11642,6 +11652,16 @@
+   "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
+   NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
++static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
++  NULL, NULL, 0, 0, INT_MAX32, 0);
++
++static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
++  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++  "Enable buffer_pool_shm checksum validation (enabled by default).",
++  NULL, NULL, TRUE);
++
+ static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
+   PLUGIN_VAR_RQCMDARG,
+   "Helps in performance tuning in heavily concurrent environments.",
+@@ -11921,6 +11941,8 @@
+   MYSQL_SYSVAR(autoextend_increment),
+   MYSQL_SYSVAR(buffer_pool_size),
+   MYSQL_SYSVAR(buffer_pool_instances),
++  MYSQL_SYSVAR(buffer_pool_shm_key),
++  MYSQL_SYSVAR(buffer_pool_shm_checksum),
+   MYSQL_SYSVAR(checksums),
+   MYSQL_SYSVAR(fast_checksum),
+   MYSQL_SYSVAR(commit_concurrency),
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-06 20:16:21.778264552 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-07 17:56:28.322749380 +0900
+@@ -36,6 +36,7 @@
+ #ifndef UNIV_HOTBACKUP
+ #include "ut0rbt.h"
+ #include "os0proc.h"
++#include "srv0srv.h"
+ /** @name Modes for buf_page_get_gen */
+ /* @{ */
+@@ -1520,9 +1521,12 @@
+ /**********************************************************************//**
+ Compute the hash fold value for blocks in buf_pool->zip_hash. */
+ /* @{ */
+-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+-#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
++/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
++#define BUF_POOL_ZIP_FOLD_PTR(bpool, ptr) (!srv_buffer_pool_shm_key\
++                                      ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
++                                      :((ulint) ((byte*)ptr - (byte*)(buf_page_from_array(bpool, 0)->frame)) / UNIV_PAGE_SIZE))
++#define BUF_POOL_ZIP_FOLD(bpool, b) BUF_POOL_ZIP_FOLD_PTR(bpool, (b)->frame)
++#define BUF_POOL_ZIP_FOLD_BPAGE(bpool, b) BUF_POOL_ZIP_FOLD(bpool, (buf_block_t*) (b))
+ /* @} */
+ /** @brief The buffer pool statistics structure. */
+diff -ruN a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
+--- a/storage/innobase/include/hash0hash.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/hash0hash.h     2010-12-07 17:56:28.324726446 +0900
+@@ -49,6 +49,28 @@
+ hash_create(
+ /*========*/
+       ulint   n);     /*!< in: number of array cells */
++
++/*************************************************************//**
++*/
++UNIV_INTERN
++ulint
++hash_create_needed(
++/*===============*/
++      ulint   n);
++
++UNIV_INTERN
++void
++hash_create_init(
++/*=============*/
++      hash_table_t*   table,
++      ulint           n);
++
++UNIV_INTERN
++void
++hash_create_reuse(
++/*==============*/
++      hash_table_t*   table);
++
+ #ifndef UNIV_HOTBACKUP
+ /*************************************************************//**
+ Creates a mutex array to protect a hash table. */
+@@ -306,7 +328,7 @@
+ /****************************************************************//**
+ Move all hash table entries from OLD_TABLE to NEW_TABLE. */
+-#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
++#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, BPOOL, FOLD_FUNC) \
+ do {\
+       ulint           i2222;\
+       ulint           cell_count2222;\
+@@ -318,7 +340,7 @@
+ \
+               while (node2222) {\
+                       NODE_TYPE*      next2222 = node2222->PTR_NAME;\
+-                      ulint           fold2222 = FOLD_FUNC(node2222);\
++                      ulint           fold2222 = FOLD_FUNC(BPOOL, node2222);\
+ \
+                       HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
+                               fold2222, node2222);\
+@@ -327,6 +349,33 @@
+               }\
+       }\
+ } while (0)
++
++/********************************************************************//**
++Align nodes with moving location.*/
++#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
++do {\
++      ulint           i2222;\
++      ulint           cell_count2222;\
++\
++      cell_count2222 = hash_get_n_cells(TABLE);\
++\
++      for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
++              NODE_TYPE*      node2222;\
++\
++              if ((TABLE)->array[i2222].node) \
++                      (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
++                      + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
++              node2222 = HASH_GET_FIRST((TABLE), i2222);\
++\
++              while (node2222) {\
++                      if (node2222->PTR_NAME) \
++                              node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
++                              + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
++\
++                      node2222 = node2222->PTR_NAME;\
++              }\
++      }\
++} while (0)
+ /************************************************************//**
+ Gets the mutex index for a fold value in a hash table.
+diff -ruN a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
+--- a/storage/innobase/include/os0proc.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0proc.h       2010-12-07 16:10:14.955718750 +0900
+@@ -32,6 +32,11 @@
+ #ifdef UNIV_LINUX
+ #include <sys/ipc.h>
+ #include <sys/shm.h>
++#else
++# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
++#include <sys/ipc.h>
++#include <sys/shm.h>
++# endif
+ #endif
+ typedef void*                 os_process_t;
+@@ -70,6 +75,29 @@
+       ulint   size);                  /*!< in: size returned by
+                                       os_mem_alloc_large() */
++
++/****************************************************************//**
++Allocates or attaches and reuses shared memory segment.
++The content is not cleared automatically.
++@return       allocated memory */
++UNIV_INTERN
++void*
++os_shm_alloc(
++/*=========*/
++      ulint*  n,                      /*!< in/out: number of bytes */
++      uint    key,
++      ibool*  is_new);
++
++/****************************************************************//**
++Detach shared memory segment. */
++UNIV_INTERN
++void
++os_shm_free(
++/*========*/
++      void    *ptr,                   /*!< in: pointer returned by
++                                      os_shm_alloc() */
++      ulint   size);                  /*!< in: size returned by
++                                      os_shm_alloc() */
+ #ifndef UNIV_NONINL
+ #include "os0proc.ic"
+ #endif
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-04 20:20:28.016566697 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-07 16:10:14.956717659 +0900
+@@ -171,6 +171,10 @@
+ extern ulint  srv_mem_pool_size;
+ extern ulint  srv_lock_table_size;
++extern uint   srv_buffer_pool_shm_key;
++extern ibool  srv_buffer_pool_shm_is_reused;
++extern ibool  srv_buffer_pool_shm_checksum;
++
+ extern ibool  srv_thread_concurrency_timer_based;
+ extern ulint  srv_n_file_io_threads;
+diff -ruN a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
+--- a/storage/innobase/include/ut0lst.h        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ut0lst.h        2010-12-07 16:10:14.957785525 +0900
+@@ -257,5 +257,48 @@
+       ut_a(ut_list_node_313 == NULL);                                 \
+ } while (0)
++/********************************************************************//**
++Align nodes with moving location.
++@param NAME           the name of the list
++@param TYPE           node type
++@param BASE           base node (not a pointer to it)
++@param OFFSET         offset moved */
++#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET)     \
++do {                                                                  \
++      ulint   ut_list_i_313;                                          \
++      TYPE*   ut_list_node_313;                                       \
++                                                                      \
++      if ((BASE).start)                                               \
++              (BASE).start = (void*)((byte*)((BASE).start)                    \
++                      + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
++      if ((BASE).end)                                                 \
++              (BASE).end   = (void*)((byte*)((BASE).end)                      \
++                      + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
++                                                                      \
++      ut_list_node_313 = (BASE).start;                                \
++                                                                      \
++      for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {         \
++              ut_a(ut_list_node_313);                                 \
++              if ((ut_list_node_313->NAME).prev)                      \
++                      (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
++                              + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
++              if ((ut_list_node_313->NAME).next)                      \
++                      (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
++                              + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
++              ut_list_node_313 = (ut_list_node_313->NAME).next;       \
++      }                                                               \
++                                                                      \
++      ut_a(ut_list_node_313 == NULL);                                 \
++                                                                      \
++      ut_list_node_313 = (BASE).end;                                  \
++                                                                      \
++      for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {         \
++              ut_a(ut_list_node_313);                                 \
++              ut_list_node_313 = (ut_list_node_313->NAME).prev;       \
++      }                                                               \
++                                                                      \
++      ut_a(ut_list_node_313 == NULL);                                 \
++} while (0)
++
+ #endif
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c  2010-12-04 19:46:40.212513377 +0900
++++ b/storage/innobase/log/log0recv.c  2010-12-07 16:10:14.959785817 +0900
+@@ -2912,6 +2912,7 @@
+ /*==========================*/
+ {
+       ut_a(!recv_needed_recovery);
++      ut_a(!srv_buffer_pool_shm_is_reused);
+       recv_needed_recovery = TRUE;
+diff -ruN a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
+--- a/storage/innobase/os/os0proc.c    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/os/os0proc.c    2010-12-07 16:10:14.960800123 +0900
+@@ -229,3 +229,173 @@
+       }
+ #endif
+ }
++
++/****************************************************************//**
++Allocates or attaches and reuses shared memory segment.
++The content is not cleared automatically.
++@return       allocated memory */
++UNIV_INTERN
++void*
++os_shm_alloc(
++/*=========*/
++      ulint*  n,                      /*!< in/out: number of bytes */
++      uint    key,
++      ibool*  is_new)
++{
++      void*   ptr;
++#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
++      ulint   size;
++      int     shmid;
++
++      *is_new = FALSE;
++      fprintf(stderr,
++              "InnoDB: The shared memory segment containing the buffer pool is: key  %#x (%d).\n",
++              key, key);
++# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
++      if (!os_use_large_pages || !os_large_page_size) {
++              goto skip;
++      }
++
++      /* Align block size to os_large_page_size */
++      ut_ad(ut_is_2pow(os_large_page_size));
++      size = ut_2pow_round(*n + (os_large_page_size - 1),
++                           os_large_page_size);
++
++      shmid = shmget((key_t)key, (size_t)size,
++                      IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
++      if (shmid < 0) {
++              if (errno == EEXIST) {
++                      fprintf(stderr,
++                              "InnoDB: HugeTLB: The shared memory segment exists.\n");
++                      shmid = shmget((key_t)key, (size_t)size,
++                                      SHM_HUGETLB | SHM_R | SHM_W);
++                      if (shmid < 0) {
++                              fprintf(stderr,
++                                      "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
++                                      size, errno);
++                              goto skip;
++                      } else {
++                              fprintf(stderr,
++                                      "InnoDB: HugeTLB: The existent shared memory segment is used.\n");
++                      }
++              } else {
++                      fprintf(stderr,
++                              "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
++                              size, errno);
++                      goto skip;
++              }
++      } else {
++              *is_new = TRUE;
++              fprintf(stderr,
++                      "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
++      }
++
++      ptr = shmat(shmid, NULL, 0);
++      if (ptr == (void *)-1) {
++              fprintf(stderr,
++                      "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
++                      errno);
++              ptr = NULL;
++      }
++
++      if (ptr) {
++              *n = size;
++              os_fast_mutex_lock(&ut_list_mutex);
++              ut_total_allocated_memory += size;
++              os_fast_mutex_unlock(&ut_list_mutex);
++              UNIV_MEM_ALLOC(ptr, size);
++              return(ptr);
++      }
++skip:
++      *is_new = FALSE;
++# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
++# ifdef HAVE_GETPAGESIZE
++      size = getpagesize();
++# else
++      size = UNIV_PAGE_SIZE;
++# endif
++      /* Align block size to system page size */
++      ut_ad(ut_is_2pow(size));
++      size = *n = ut_2pow_round(*n + (size - 1), size);
++
++      shmid = shmget((key_t)key, (size_t)size,
++                      IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
++      if (shmid < 0) {
++              if (errno == EEXIST) {
++                      fprintf(stderr,
++                              "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
++                      shmid = shmget((key_t)key, (size_t)size,
++                                      SHM_R | SHM_W);
++                      if (shmid < 0) {
++                              fprintf(stderr,
++                                      "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
++                                      size, errno);
++                              ptr = NULL;
++                              goto end;
++                      } else {
++                              fprintf(stderr,
++                                      "InnoDB: The existent shared memory segment is used.\n");
++                      }
++              } else {
++                      fprintf(stderr,
++                              "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
++                              size, errno);
++                      ptr = NULL;
++                      goto end;
++              }
++      } else {
++              *is_new = TRUE;
++              fprintf(stderr,
++                      "InnoDB: A new shared memory segment has been created.\n");
++      }
++
++      ptr = shmat(shmid, NULL, 0);
++      if (ptr == (void *)-1) {
++              fprintf(stderr,
++                      "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
++                      errno);
++              ptr = NULL;
++      }
++
++      if (ptr) {
++              *n = size;
++              os_fast_mutex_lock(&ut_list_mutex);
++              ut_total_allocated_memory += size;
++              os_fast_mutex_unlock(&ut_list_mutex);
++              UNIV_MEM_ALLOC(ptr, size);
++      }
++end:
++#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++      fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
++      ptr = NULL;
++#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++      return(ptr);
++}
++
++/****************************************************************//**
++Detach shared memory segment. */
++UNIV_INTERN
++void
++os_shm_free(
++/*========*/
++      void    *ptr,                   /*!< in: pointer returned by
++                                      os_shm_alloc() */
++      ulint   size)                   /*!< in: size returned by
++                                      os_shm_alloc() */
++{
++      os_fast_mutex_lock(&ut_list_mutex);
++      ut_a(ut_total_allocated_memory >= size);
++      os_fast_mutex_unlock(&ut_list_mutex);
++
++#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
++      if (!shmdt(ptr)) {
++              os_fast_mutex_lock(&ut_list_mutex);
++              ut_a(ut_total_allocated_memory >= size);
++              ut_total_allocated_memory -= size;
++              os_fast_mutex_unlock(&ut_list_mutex);
++              UNIV_MEM_FREE(ptr, size);
++      }
++#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++      fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
++#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++}
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-04 20:20:44.687550693 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-07 16:10:14.962785720 +0900
+@@ -233,6 +233,11 @@
+ UNIV_INTERN ulint     srv_mem_pool_size       = ULINT_MAX;
+ UNIV_INTERN ulint     srv_lock_table_size     = ULINT_MAX;
++/* key value for shm */
++UNIV_INTERN uint      srv_buffer_pool_shm_key = 0;
++UNIV_INTERN ibool     srv_buffer_pool_shm_is_reused = FALSE;
++UNIV_INTERN ibool     srv_buffer_pool_shm_checksum = TRUE;
++
+ /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
+ instead. */
+ UNIV_INTERN ulint     srv_n_file_io_threads   = ULINT_MAX;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 20:19:29.806482628 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-07 16:10:14.964785346 +0900
+@@ -1759,6 +1759,8 @@
+               Note that this is not as heavy weight as it seems. At
+               this point there will be only ONE page in the buf_LRU
+               and there must be no page in the buf_flush list. */
++              /* buffer_pool_shm should not be reused when recovery was needed. */
++              if (!srv_buffer_pool_shm_is_reused)
+               buf_pool_invalidate();
+               /* We always try to do a recovery, even if the database had
diff --git a/innodb_deadlock_count.patch b/innodb_deadlock_count.patch
new file mode 100644 (file)
index 0000000..f75903c
--- /dev/null
@@ -0,0 +1,71 @@
+# name       : innodb_deadlock_count.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 16:09:53.145500265 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 16:10:24.605515894 +0900
+@@ -667,6 +667,8 @@
+   (char*) &export_vars.innodb_dblwr_pages_written,      SHOW_LONG},
+   {"dblwr_writes",
+   (char*) &export_vars.innodb_dblwr_writes,             SHOW_LONG},
++  {"deadlocks",
++  (char*) &export_vars.innodb_deadlocks,                SHOW_LONG},
+   {"dict_tables",
+   (char*) &export_vars.innodb_dict_tables,              SHOW_LONG},
+   {"have_atomic_builtins",
+diff -ruN a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
+--- a/storage/innobase/include/lock0lock.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/lock0lock.h     2010-12-04 16:10:24.605515894 +0900
+@@ -43,6 +43,7 @@
+ #endif /* UNIV_DEBUG */
+ /* Buffer for storing information about the most recent deadlock error */
+ extern FILE*  lock_latest_err_file;
++extern ulint  srv_n_lock_deadlock_count;
+ /*********************************************************************//**
+ Gets the size of a lock struct.
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-04 15:55:21.378480843 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-04 16:10:24.606550983 +0900
+@@ -750,6 +750,7 @@
+       ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
+       ulint innodb_dblwr_pages_written;       /*!< srv_dblwr_pages_written */
+       ulint innodb_dblwr_writes;              /*!< srv_dblwr_writes */
++      ulint innodb_deadlocks;
+       ibool innodb_have_atomic_builtins;      /*!< HAVE_ATOMIC_BUILTINS */
+       ulint innodb_log_waits;                 /*!< srv_log_waits */
+       ulint innodb_log_write_requests;        /*!< srv_log_write_requests */
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c        2010-12-03 17:49:11.609953956 +0900
++++ b/storage/innobase/lock/lock0lock.c        2010-12-04 16:10:24.608513889 +0900
+@@ -3328,6 +3328,7 @@
+               break;
+       case LOCK_VICTIM_IS_START:
++              srv_n_lock_deadlock_count++;
+               fputs("*** WE ROLL BACK TRANSACTION (2)\n",
+                     lock_latest_err_file);
+               break;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-04 15:57:13.069513371 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-04 16:10:24.610593039 +0900
+@@ -465,6 +465,7 @@
+ static ulint  srv_n_rows_deleted_old          = 0;
+ static ulint  srv_n_rows_read_old             = 0;
++UNIV_INTERN ulint             srv_n_lock_deadlock_count       = 0;
+ UNIV_INTERN ulint             srv_n_lock_wait_count           = 0;
+ UNIV_INTERN ulint             srv_n_lock_wait_current_count   = 0;
+ UNIV_INTERN ib_int64_t        srv_n_lock_wait_time            = 0;
+@@ -2251,6 +2252,7 @@
+       export_vars.innodb_buffer_pool_pages_data = LRU_len;
+       export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
+       export_vars.innodb_buffer_pool_pages_free = free_len;
++      export_vars.innodb_deadlocks = srv_n_lock_deadlock_count;
+ #ifdef UNIV_DEBUG
+       export_vars.innodb_buffer_pool_pages_latched
+               = buf_get_latched_pages_number();
diff --git a/innodb_dict_size_limit.patch b/innodb_dict_size_limit.patch
new file mode 100644 (file)
index 0000000..83bfd33
--- /dev/null
@@ -0,0 +1,446 @@
+# name       : innodb_dict_size_limit.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0sea.c   2010-12-03 15:45:47.503988924 +0900
+@@ -1185,6 +1185,132 @@
+       mem_free(folds);
+ }
++/************************************************************************
++Drops a page hash index based on index */
++UNIV_INTERN
++void
++btr_search_drop_page_hash_index_on_index(
++/*=====================================*/
++      dict_index_t*   index)          /* in: record descriptor */
++{
++      buf_page_t*     bpage;
++      hash_table_t*   table;
++      buf_block_t*    block;
++      ulint           n_fields;
++      ulint           n_bytes;
++      const page_t*           page;
++      const rec_t*            rec;
++      ulint           fold;
++      ulint           prev_fold;
++      index_id_t      index_id;
++      ulint           n_cached;
++      ulint           n_recs;
++      ulint*          folds;
++      ulint           i, j;
++      mem_heap_t*     heap    = NULL;
++      ulint*          offsets;
++
++      rw_lock_x_lock(&btr_search_latch);
++      buf_pool_mutex_enter_all();
++
++      table = btr_search_sys->hash_index;
++
++      for (j = 0; j < srv_buf_pool_instances; j++) {
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(j);
++
++              bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++
++              while (bpage != NULL) {
++                      block = (buf_block_t*) bpage;
++                      if (block->index == index && block->is_hashed) {
++                              page = block->frame;
++
++                              /* from btr_search_drop_page_hash_index() */
++                              n_fields = block->curr_n_fields;
++                              n_bytes = block->curr_n_bytes;
++
++                              ut_a(n_fields + n_bytes > 0);
++
++                              n_recs = page_get_n_recs(page);
++
++                              /* Calculate and cache fold values into an array for fast deletion
++                              from the hash index */
++
++                              folds = mem_alloc(n_recs * sizeof(ulint));
++
++                              n_cached = 0;
++
++                              rec = page_get_infimum_rec(page);
++                              rec = page_rec_get_next_low(rec, page_is_comp(page));
++
++                              index_id = btr_page_get_index_id(page);
++      
++                              ut_a(index_id == index->id);
++
++                              prev_fold = 0;
++
++                              offsets = NULL;
++
++                              while (!page_rec_is_supremum(rec)) {
++                                      offsets = rec_get_offsets(rec, index, offsets,
++                                                              n_fields + (n_bytes > 0), &heap);
++                                      ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
++                                      fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
++
++                                      if (fold == prev_fold && prev_fold != 0) {
++
++                                              goto next_rec;
++                                      }
++
++                                      /* Remove all hash nodes pointing to this page from the
++                                      hash chain */
++
++                                      folds[n_cached] = fold;
++                                      n_cached++;
++next_rec:
++                                      rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
++                                      prev_fold = fold;
++                              }
++
++                              for (i = 0; i < n_cached; i++) {
++
++                                      ha_remove_all_nodes_to_page(table, folds[i], page);
++                              }
++
++                              ut_a(index->search_info->ref_count > 0);
++                              index->search_info->ref_count--;
++
++                              block->is_hashed = FALSE;
++                              block->index = NULL;
++      
++#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
++                              if (UNIV_UNLIKELY(block->n_pointers)) {
++                                      /* Corruption */
++                                      ut_print_timestamp(stderr);
++                                      fprintf(stderr,
++"  InnoDB: Corruption of adaptive hash index. After dropping\n"
++"InnoDB: the hash index to a page of %s, still %lu hash nodes remain.\n",
++                                              index->name, (ulong) block->n_pointers);
++                              }
++#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
++
++                              mem_free(folds);
++                      }
++
++                      bpage = UT_LIST_GET_PREV(LRU, bpage);
++              }
++      }
++
++      buf_pool_mutex_exit_all();
++      rw_lock_x_unlock(&btr_search_latch);
++
++      if (UNIV_LIKELY_NULL(heap)) {
++              mem_heap_free(heap);
++      }
++}
++
+ /********************************************************************//**
+ Drops a page hash index when a page is freed from a fseg to the file system.
+ Drops possible hash index if the page happens to be in the buffer pool. */
+diff -ruN a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c
+--- a/storage/innobase/dict/dict0boot.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0boot.c        2010-12-03 15:45:47.503988924 +0900
+@@ -284,6 +284,7 @@
+       system tables */
+       /*-------------------------*/
+       table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
++      table->n_mysql_handles_opened = 1; /* for pin */
+       dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
+       dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+@@ -336,6 +337,7 @@
+       /*-------------------------*/
+       table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
++      table->n_mysql_handles_opened = 1; /* for pin */
+       dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
+       dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
+@@ -368,6 +370,7 @@
+       /*-------------------------*/
+       table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
++      table->n_mysql_handles_opened = 1; /* for pin */
+       dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
+       dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+@@ -413,6 +416,7 @@
+       /*-------------------------*/
+       table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
++      table->n_mysql_handles_opened = 1; /* for pin */
+       dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
+       dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
+diff -ruN a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c
+--- a/storage/innobase/dict/dict0crea.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0crea.c        2010-12-03 15:45:47.521955810 +0900
+@@ -1210,6 +1210,9 @@
+               /* Foreign constraint system tables have already been
+               created, and they are ok */
++              table1->n_mysql_handles_opened = 1; /* for pin */
++              table2->n_mysql_handles_opened = 1; /* for pin */
++
+               mutex_exit(&(dict_sys->mutex));
+               return(DB_SUCCESS);
+@@ -1291,6 +1294,11 @@
+       trx_commit_for_mysql(trx);
++      table1 = dict_table_get_low("SYS_FOREIGN");
++      table2 = dict_table_get_low("SYS_FOREIGN_COLS");
++      table1->n_mysql_handles_opened = 1; /* for pin */
++      table2->n_mysql_handles_opened = 1; /* for pin */
++
+       row_mysql_unlock_data_dictionary(trx);
+       trx_free_for_mysql(trx);
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0dict.c        2010-12-03 15:45:47.525953769 +0900
+@@ -625,6 +625,8 @@
+       table = dict_table_get_on_id_low(table_id);
++      dict_table_LRU_trim(table);
++
+       mutex_exit(&(dict_sys->mutex));
+       return(table);
+@@ -743,6 +745,8 @@
+               table->n_mysql_handles_opened++;
+       }
++      dict_table_LRU_trim(table);
++
+       mutex_exit(&(dict_sys->mutex));
+       if (table != NULL) {
+@@ -1256,6 +1260,64 @@
+       dict_mem_table_free(table);
+ }
++/**************************************************************************
++Frees tables from the end of table_LRU if the dictionary cache occupies
++too much space. */
++UNIV_INTERN
++void
++dict_table_LRU_trim(
++/*================*/
++      dict_table_t*   self)
++{
++      dict_table_t*   table;
++      dict_table_t*   prev_table;
++      dict_foreign_t* foreign;
++      ulint           n_removed;
++      ulint           n_have_parent;
++      ulint           cached_foreign_tables;
++
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(mutex_own(&(dict_sys->mutex)));
++#endif /* UNIV_SYNC_DEBUG */
++
++retry:
++      n_removed = n_have_parent = 0;
++      table = UT_LIST_GET_LAST(dict_sys->table_LRU);
++
++      while ( srv_dict_size_limit && table
++              && ((dict_sys->table_hash->n_cells
++                   + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t)
++                  + dict_sys->size) > srv_dict_size_limit ) {
++              prev_table = UT_LIST_GET_PREV(table_LRU, table);
++
++              if (table == self || table->n_mysql_handles_opened)
++                      goto next_loop;
++
++              cached_foreign_tables = 0;
++              foreign = UT_LIST_GET_FIRST(table->foreign_list);
++              while (foreign != NULL) {
++                      if (foreign->referenced_table)
++                              cached_foreign_tables++;
++                      foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
++              }
++
++              if (cached_foreign_tables == 0) {
++                      dict_table_remove_from_cache(table);
++                      n_removed++;
++              } else {
++                      n_have_parent++;
++              }
++next_loop:
++              table = prev_table;
++      }
++
++      if ( srv_dict_size_limit && n_have_parent && n_removed
++              && ((dict_sys->table_hash->n_cells
++                   + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t)
++                  + dict_sys->size) > srv_dict_size_limit )
++              goto retry;
++}
++
+ /****************************************************************//**
+ If the given column name is reserved for InnoDB system columns, return
+ TRUE.
+@@ -1719,6 +1781,11 @@
+       ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+       ut_ad(mutex_own(&(dict_sys->mutex)));
++      /* remove all entry of the index from adaptive hash index,
++      because removing from adaptive hash index needs dict_index */
++      if (btr_search_enabled && srv_dict_size_limit)
++              btr_search_drop_page_hash_index_on_index(index);
++
+       /* We always create search info whether or not adaptive
+       hash index is enabled or not. */
+       info = index->search_info;
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:43:57.294986852 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:45:47.534959966 +0900
+@@ -655,6 +655,8 @@
+   (char*) &export_vars.innodb_dblwr_pages_written,      SHOW_LONG},
+   {"dblwr_writes",
+   (char*) &export_vars.innodb_dblwr_writes,             SHOW_LONG},
++  {"dict_tables",
++  (char*) &export_vars.innodb_dict_tables,              SHOW_LONG},
+   {"have_atomic_builtins",
+   (char*) &export_vars.innodb_have_atomic_builtins,     SHOW_BOOL},
+   {"log_waits",
+@@ -11543,6 +11545,11 @@
+   "Number of extra user rollback segments which are used in a round-robin fashion.",
+   NULL, NULL, 127, 0, 127, 0);
++static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
++  PLUGIN_VAR_RQCMDARG,
++  "Limit the allocated memory for dictionary cache. (0: unlimited)",
++  NULL, NULL, 0, 0, LONG_MAX, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+@@ -11611,6 +11618,7 @@
+   MYSQL_SYSVAR(adaptive_flushing_method),
+   MYSQL_SYSVAR(enable_unsafe_group_commit),
+   MYSQL_SYSVAR(extra_rsegments),
++  MYSQL_SYSVAR(dict_size_limit),
+   MYSQL_SYSVAR(use_sys_malloc),
+   MYSQL_SYSVAR(use_native_aio),
+   MYSQL_SYSVAR(change_buffering),
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:18:48.889024455 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:45:47.553025057 +0900
+@@ -578,6 +578,7 @@
+       /* Use old-style record format for the insert buffer. */
+       table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
++      table->n_mysql_handles_opened = 1; /* for pin */
+       dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
+diff -ruN a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
+--- a/storage/innobase/include/btr0sea.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/btr0sea.h       2010-12-03 15:45:47.555024229 +0900
+@@ -140,6 +140,13 @@
+                               s- or x-latched, or an index page
+                               for which we know that
+                               block->buf_fix_count == 0 */
++/************************************************************************
++Drops a page hash index based on index */
++UNIV_INTERN
++void
++btr_search_drop_page_hash_index_on_index(
++/*=====================================*/
++      dict_index_t*   index);         /* in: record descriptor */
+ /********************************************************************//**
+ Drops a page hash index when a page is freed from a fseg to the file system.
+ Drops possible hash index if the page happens to be in the buffer pool. */
+diff -ruN a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
+--- a/storage/innobase/include/dict0dict.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0dict.h     2010-12-03 15:45:47.558024515 +0900
+@@ -1158,6 +1158,12 @@
+ /*====================================*/
+       dict_table_t*   table,  /*!< in: table */
+       const char*     name);  /*!< in: name of the index to find */
++
++UNIV_INTERN
++void
++dict_table_LRU_trim(
++/*================*/
++      dict_table_t*   self);
+ /* Buffers for storing detailed information about the latest foreign key
+ and unique key errors */
+ extern FILE*  dict_foreign_err_file;
+diff -ruN a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
+--- a/storage/innobase/include/dict0dict.ic    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0dict.ic    2010-12-03 15:45:47.560024398 +0900
+@@ -824,6 +824,13 @@
+       HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
+                   dict_table_t*, table, ut_ad(table->cached),
+                   !strcmp(table->name, table_name));
++
++      /* make young in table_LRU */
++      if (table) {
++              UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
++              UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
++      }
++
+       return(table);
+ }
+@@ -877,6 +884,12 @@
+               table = dict_load_table_on_id(table_id);
+       }
++      /* make young in table_LRU */
++      if (table) {
++              UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
++              UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
++      }
++
+       ut_ad(!table || table->cached);
+       /* TODO: should get the type information from MySQL */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 15:43:57.297067100 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 15:45:47.562024404 +0900
+@@ -228,7 +228,7 @@
+ extern ulint  srv_adaptive_flushing_method;
+ extern ulint  srv_extra_rsegments;
+-
++extern ulint  srv_dict_size_limit;
+ /*-------------------------------------------*/
+ extern ulint  srv_n_rows_inserted;
+@@ -700,6 +700,7 @@
+       ulint innodb_data_writes;               /*!< I/O write requests */
+       ulint innodb_data_written;              /*!< Data bytes written */
+       ulint innodb_data_reads;                /*!< I/O read requests */
++      ulint innodb_dict_tables;
+       ulint innodb_buffer_pool_pages_total;   /*!< Buffer pool size */
+       ulint innodb_buffer_pool_pages_data;    /*!< Data pages */
+       ulint innodb_buffer_pool_pages_dirty;   /*!< Dirty data pages */
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:43:57.301024390 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:45:47.565023830 +0900
+@@ -414,6 +414,7 @@
+ UNIV_INTERN ulint     srv_adaptive_flushing_method = 0; /* 0: native  1: estimate  2: keep_average */
+ UNIV_INTERN ulint     srv_extra_rsegments = 127; /* extra rseg for users */
++UNIV_INTERN ulint     srv_dict_size_limit = 0;
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong     srv_n_spin_wait_rounds  = 30;
+ UNIV_INTERN ulong     srv_n_free_tickets_to_enter = 500;
+@@ -2192,6 +2193,7 @@
+       export_vars.innodb_data_reads = os_n_file_reads;
+       export_vars.innodb_data_writes = os_n_file_writes;
+       export_vars.innodb_data_written = srv_data_written;
++      export_vars.innodb_dict_tables= (dict_sys ? UT_LIST_GET_LEN(dict_sys->table_LRU) : 0);
+       export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
+       export_vars.innodb_buffer_pool_write_requests
+               = srv_buf_pool_write_requests;
diff --git a/innodb_expand_import.patch b/innodb_expand_import.patch
new file mode 100644 (file)
index 0000000..f3f77e7
--- /dev/null
@@ -0,0 +1,561 @@
+# name       : innodb_expand_import.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-03 15:09:51.274957577 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-03 15:52:23.553986552 +0900
+@@ -40,6 +40,12 @@
+ #include "dict0dict.h"
+ #include "page0page.h"
+ #include "page0zip.h"
++#include "trx0trx.h"
++#include "trx0sys.h"
++#include "pars0pars.h"
++#include "row0mysql.h"
++#include "row0row.h"
++#include "que0que.h"
+ #ifndef UNIV_HOTBACKUP
+ # include "buf0lru.h"
+ # include "ibuf0ibuf.h"
+@@ -3050,7 +3056,7 @@
+       file = os_file_create_simple_no_error_handling(
+               innodb_file_data_key, filepath, OS_FILE_OPEN,
+-              OS_FILE_READ_ONLY, &success);
++              OS_FILE_READ_WRITE, &success);
+       if (!success) {
+               /* The following call prints an error message */
+               os_file_get_last_error(TRUE);
+@@ -3097,6 +3103,466 @@
+       space_id = fsp_header_get_space_id(page);
+       space_flags = fsp_header_get_flags(page);
++      if (srv_expand_import
++          && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
++              ibool           file_is_corrupt = FALSE;
++              byte*           buf3;
++              byte*           descr_page;
++              ibool           descr_is_corrupt = FALSE;
++              index_id_t      old_id[31];
++              index_id_t      new_id[31];
++              ulint           root_page[31];
++              ulint           n_index;
++              os_file_t       info_file = -1;
++              char*           info_file_path;
++              ulint   i;
++              int             len;
++              ib_uint64_t     current_lsn;
++              ulint           size_low, size_high, size, free_limit;
++              ib_int64_t      size_bytes, free_limit_bytes;
++              dict_table_t*   table;
++              dict_index_t*   index;
++              fil_system_t*   system;
++              fil_node_t*     node = NULL;
++              fil_space_t*    space;
++
++              buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
++              descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
++
++              current_lsn = log_get_lsn();
++
++              /* check the header page's consistency */
++              if (buf_page_is_corrupted(page,
++                                        dict_table_flags_to_zip_size(space_flags))) {
++                      fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
++                      file_is_corrupt = TRUE;
++                      descr_is_corrupt = TRUE;
++              }
++
++              /* store as first descr page */
++              memcpy(descr_page, page, UNIV_PAGE_SIZE);
++
++              /* get free limit (page number) of the table space */
++/* these should be same to the definition in fsp0fsp.c */
++#define FSP_HEADER_OFFSET     FIL_PAGE_DATA
++#define       FSP_FREE_LIMIT          12
++              free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
++              free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE;
++
++              /* overwrite fsp header */
++              fsp_header_init_fields(page, id, flags);
++              mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
++              space_id = id;
++              space_flags = flags;
++              if (mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
++                      mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
++              mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
++                              srv_use_checksums
++                              ? buf_calc_page_new_checksum(page)
++                                              : BUF_NO_CHECKSUM_MAGIC);
++              mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++                              srv_use_checksums
++                              ? buf_calc_page_old_checksum(page)
++                                              : BUF_NO_CHECKSUM_MAGIC);
++              success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
++
++              /* get file size */
++              os_file_get_size(file, &size_low, &size_high);
++              size_bytes = (((ib_int64_t)size_high) << 32)
++                              + (ib_int64_t)size_low;
++
++              if (size_bytes < free_limit_bytes) {
++                      free_limit_bytes = size_bytes;
++                      if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
++                              fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
++                              file_is_corrupt = TRUE;
++                      }
++              }
++
++              /* get cruster index information */
++              table = dict_table_get_low(name);
++              index = dict_table_get_first_index(table);
++              ut_a(index->page==3);
++
++              /* read metadata from .exp file */
++              n_index = 0;
++              memset(old_id, 0, sizeof(old_id));
++              memset(new_id, 0, sizeof(new_id));
++              memset(root_page, 0, sizeof(root_page));
++
++              info_file_path = fil_make_ibd_name(name, FALSE);
++              len = strlen(info_file_path);
++              info_file_path[len - 3] = 'e';
++              info_file_path[len - 2] = 'x';
++              info_file_path[len - 1] = 'p';
++
++              info_file = os_file_create_simple_no_error_handling(innodb_file_data_key,
++                              info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
++              if (!success) {
++                      fprintf(stderr, "InnoDB: cannot open %s\n", info_file_path);
++                      file_is_corrupt = TRUE;
++                      goto skip_info;
++              }
++              success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
++              if (!success) {
++                      fprintf(stderr, "InnoDB: cannot read %s\n", info_file_path);
++                      file_is_corrupt = TRUE;
++                      goto skip_info;
++              }
++              if (mach_read_from_4(page) != 0x78706f72UL
++                  || mach_read_from_4(page + 4) != 0x74696e66UL) {
++                      fprintf(stderr, "InnoDB: %s seems not to be a correct .exp file\n", info_file_path);
++                      file_is_corrupt = TRUE;
++                      goto skip_info;
++              }
++
++              fprintf(stderr, "InnoDB: import: extended import of %s is started.\n", name);
++
++              n_index = mach_read_from_4(page + 8);
++              fprintf(stderr, "InnoDB: import: %lu indexes are detected.\n", (ulong)n_index);
++              for (i = 0; i < n_index; i++) {
++                      new_id[i] =
++                              dict_table_get_index_on_name(table,
++                                              (char*)(page + (i + 1) * 512 + 12))->id;
++                      old_id[i] = mach_read_from_8(page + (i + 1) * 512);
++                      root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8);
++              }
++
++skip_info:
++              if (info_file != -1)
++                      os_file_close(info_file);
++
++              /*
++              if (size_bytes >= 1024 * 1024) {
++                      size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
++              }
++              */
++              if (!(flags & DICT_TF_ZSSIZE_MASK)) {
++                      mem_heap_t*     heap = NULL;
++                      ulint           offsets_[REC_OFFS_NORMAL_SIZE];
++                      ulint*          offsets = offsets_;
++                      ib_int64_t      offset;
++
++                      size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
++                      /* over write space id of all pages */
++                      rec_offs_init(offsets_);
++
++                      fprintf(stderr, "InnoDB: Progress in %%:");
++
++                      for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) {
++                              ulint           checksum_field;
++                              ulint           old_checksum_field;
++                              ibool           page_is_corrupt;
++
++                              success = os_file_read(file, page,
++                                                      (ulint)(offset & 0xFFFFFFFFUL),
++                                                      (ulint)(offset >> 32), UNIV_PAGE_SIZE);
++
++                              page_is_corrupt = FALSE;
++
++                              /* check consistency */
++                              if (memcmp(page + FIL_PAGE_LSN + 4,
++                                         page + UNIV_PAGE_SIZE
++                                         - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
++
++                                      page_is_corrupt = TRUE;
++                              }
++
++                              if (mach_read_from_4(page + FIL_PAGE_OFFSET)
++                                  != offset / UNIV_PAGE_SIZE) {
++
++                                      page_is_corrupt = TRUE;
++                              }
++
++                              checksum_field = mach_read_from_4(page
++                                                                + FIL_PAGE_SPACE_OR_CHKSUM);
++
++                              old_checksum_field = mach_read_from_4(
++                                      page + UNIV_PAGE_SIZE
++                                      - FIL_PAGE_END_LSN_OLD_CHKSUM);
++
++                              if (old_checksum_field != mach_read_from_4(page
++                                                                         + FIL_PAGE_LSN)
++                                  && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
++                                  && old_checksum_field
++                                  != buf_calc_page_old_checksum(page)) {
++
++                                      page_is_corrupt = TRUE;
++                              }
++
++                              if (checksum_field != 0
++                                  && checksum_field != BUF_NO_CHECKSUM_MAGIC
++                                  && checksum_field
++                                  != buf_calc_page_new_checksum(page)) {
++
++                                      page_is_corrupt = TRUE;
++                              }
++
++                              /* if it is free page, inconsistency is acceptable */
++                              if (!offset) {
++                                      /* header page*/
++                                      /* it should be overwritten already */
++                                      ut_a(!page_is_corrupt);
++
++                              } else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) {
++                                      /* descr page (not header) */
++                                      if (page_is_corrupt) {
++                                              file_is_corrupt = TRUE;
++                                              descr_is_corrupt = TRUE;
++                                      } else {
++                                              ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
++                                              descr_is_corrupt = FALSE;
++                                      }
++
++                                      /* store as descr page */
++                                      memcpy(descr_page, page, UNIV_PAGE_SIZE);
++
++                              } else if (descr_is_corrupt) {
++                                      /* unknown state of the page */
++                                      if (page_is_corrupt) {
++                                              file_is_corrupt = TRUE;
++                                      }
++
++                              } else {
++                                      /* check free page or not */
++                                      /* These definitions should be same to fsp0fsp.c */
++#define       FSP_HEADER_SIZE         (32 + 5 * FLST_BASE_NODE_SIZE)
++
++#define       XDES_BITMAP             (FLST_NODE_SIZE + 12)
++#define       XDES_BITS_PER_PAGE      2
++#define       XDES_FREE_BIT           0
++#define       XDES_SIZE                                                       \
++      (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
++#define       XDES_ARR_OFFSET         (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
++
++                                      /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
++                                      /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
++                                      byte*   descr;
++                                      ulint   index;
++                                      ulint   byte_index;
++                                      ulint   bit_index;
++
++                                      descr = descr_page + XDES_ARR_OFFSET
++                                              + XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE);
++
++                                      index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE);
++                                      byte_index = index / 8;
++                                      bit_index = index % 8;
++
++                                      if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
++                                              /* free page */
++                                              if (page_is_corrupt) {
++                                                      goto skip_write;
++                                              }
++                                      } else {
++                                              /* not free */
++                                              if (page_is_corrupt) {
++                                                      file_is_corrupt = TRUE;
++                                              }
++                                      }
++                              }
++
++                              if (page_is_corrupt) {
++                                      fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE);
++
++                                      /* cannot treat corrupt page */
++                                      goto skip_write;
++                              }
++
++                              if (mach_read_from_4(page + FIL_PAGE_OFFSET) || !offset) {
++                                      mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
++
++                                      for (i = 0; i < n_index; i++) {
++                                              if (offset / UNIV_PAGE_SIZE == root_page[i]) {
++                                                      /* this is index root page */
++                                                      mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
++                                                                                      + FSEG_HDR_SPACE, id);
++                                                      mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
++                                                                                      + FSEG_HDR_SPACE, id);
++                                                      break;
++                                              }
++                                      }
++
++                                      if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
++                                              index_id_t tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
++
++                                              if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
++                                                  && old_id[0] == tmp) {
++                                                      /* leaf page of cluster index, reset trx_id of records */
++                                                      rec_t*  rec;
++                                                      rec_t*  supremum;
++                                                      ulint   n_recs;
++
++                                                      supremum = page_get_supremum_rec(page);
++                                                      rec = page_rec_get_next(page_get_infimum_rec(page));
++                                                      n_recs = page_get_n_recs(page);
++
++                                                      while (rec && rec != supremum && n_recs > 0) {
++                                                              ulint   n_fields;
++                                                              ulint   i;
++                                                              ulint   offset = index->trx_id_offset;
++                                                              offsets = rec_get_offsets(rec, index, offsets,
++                                                                              ULINT_UNDEFINED, &heap);
++                                                              n_fields = rec_offs_n_fields(offsets);
++                                                              if (!offset) {
++                                                                      offset = row_get_trx_id_offset(rec, index, offsets);
++                                                              }
++                                                              trx_write_trx_id(rec + offset, 1);
++
++                                                              for (i = 0; i < n_fields; i++) {
++                                                                      if (rec_offs_nth_extern(offsets, i)) {
++                                                                              ulint   local_len;
++                                                                              byte*   data;
++
++                                                                              data = rec_get_nth_field(rec, offsets, i, &local_len);
++
++                                                                              local_len -= BTR_EXTERN_FIELD_REF_SIZE;
++
++                                                                              mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
++                                                                      }
++                                                              }
++
++                                                              rec = page_rec_get_next(rec);
++                                                              n_recs--;
++                                                      }
++                                              }
++
++                                              for (i = 0; i < n_index; i++) {
++                                                      if (old_id[i] == tmp) {
++                                                              mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
++                                                              break;
++                                                      }
++                                              }
++                                      }
++
++                                      if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
++                                              mach_write_to_8(page + FIL_PAGE_LSN, current_lsn);
++                                              mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++                                                                              current_lsn);
++                                      }
++
++                                      mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
++                                                      srv_use_checksums
++                                                      ? buf_calc_page_new_checksum(page)
++                                                                      : BUF_NO_CHECKSUM_MAGIC);
++                                      mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++                                                      srv_use_checksums
++                                                      ? buf_calc_page_old_checksum(page)
++                                                                      : BUF_NO_CHECKSUM_MAGIC);
++
++                                      success = os_file_write(filepath, file, page,
++                                                              (ulint)(offset & 0xFFFFFFFFUL),
++                                                              (ulint)(offset >> 32), UNIV_PAGE_SIZE);
++                              }
++
++skip_write:
++                              if (free_limit_bytes
++                                  && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)
++                                      != ((offset * 100) / free_limit_bytes)) {
++                                      fprintf(stderr, " %lu",
++                                              (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes));
++                              }
++                      }
++
++                      fprintf(stderr, " done.\n");
++
++                      /* update SYS_INDEXES set root page */
++                      index = dict_table_get_first_index(table);
++                      while (index) {
++                              for (i = 0; i < n_index; i++) {
++                                      if (new_id[i] == index->id) {
++                                              break;
++                                      }
++                              }
++
++                              if (i != n_index
++                                  && root_page[i] != index->page) {
++                                      /* must update */
++                                      ulint   error;
++                                      trx_t*  trx;
++                                      pars_info_t*    info = NULL;
++
++                                      trx = trx_allocate_for_mysql();
++                                      trx->op_info = "extended import";
++
++                                      info = pars_info_create();
++
++                                      pars_info_add_ull_literal(info, "indexid", new_id[i]);
++                                      pars_info_add_int4_literal(info, "new_page", (lint) root_page[i]);
++
++                                      error = que_eval_sql(info,
++                                              "PROCEDURE UPDATE_INDEX_PAGE () IS\n"
++                                              "BEGIN\n"
++                                              "UPDATE SYS_INDEXES"
++                                              " SET PAGE_NO = :new_page"
++                                              " WHERE ID = :indexid;\n"
++                                              "COMMIT WORK;\n"
++                                              "END;\n",
++                                              FALSE, trx);
++
++                                      if (error != DB_SUCCESS) {
++                                              fprintf(stderr, "InnoDB: failed to update SYS_INDEXES\n");
++                                      }
++
++                                      trx_commit_for_mysql(trx);
++
++                                      trx_free_for_mysql(trx);
++
++                                      index->page = root_page[i];
++                              }
++
++                              index = dict_table_get_next_index(index);
++                      }
++                      if (UNIV_LIKELY_NULL(heap)) {
++                              mem_heap_free(heap);
++                      }
++              } else {
++                      /* zip page? */
++                      size = (ulint)
++                      (size_bytes
++                                      / dict_table_flags_to_zip_size(flags));
++                      fprintf(stderr, "InnoDB: import: table %s seems to be in newer format."
++                                      " It may not be able to treated for now.\n", name);
++              }
++              /* .exp file should be removed */
++              success = os_file_delete(info_file_path);
++              if (!success) {
++                      success = os_file_delete_if_exists(info_file_path);
++              }
++              mem_free(info_file_path);
++
++              system  = fil_system;
++              mutex_enter(&(system->mutex));
++              space = fil_space_get_by_id(id);
++              if (space)
++                      node = UT_LIST_GET_FIRST(space->chain);
++              if (node && node->size < size) {
++                      space->size += (size - node->size);
++                      node->size = size;
++              }
++              mutex_exit(&(system->mutex));
++
++              ut_free(buf3);
++
++              if (file_is_corrupt) {
++                      ut_print_timestamp(stderr);
++                      fputs("  InnoDB: Error: file ",
++                            stderr);
++                      ut_print_filename(stderr, filepath);
++                      fprintf(stderr, " seems to be corrupt.\n"
++                              "InnoDB: anyway, all not corrupt pages were tried to be converted to salvage.\n"
++                              "InnoDB: ##### CAUTION #####\n"
++                              "InnoDB: ## The .ibd must cause to crash InnoDB, though re-import would seem to be succeeded.\n"
++                              "InnoDB: ## If you don't have knowledge about salvaging data from .ibd, you should not use the file.\n"
++                              "InnoDB: ###################\n");
++                      success = FALSE;
++
++                      ut_free(buf2);
++
++                      goto func_exit;
++              }
++      }
++
+       ut_free(buf2);
+       if (UNIV_UNLIKELY(space_id != id
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:49:59.195023983 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:52:23.555957062 +0900
+@@ -7330,6 +7330,14 @@
+               err = row_discard_tablespace_for_mysql(dict_table->name, trx);
+       } else {
+               err = row_import_tablespace_for_mysql(dict_table->name, trx);
++
++              /* in expanded import mode re-initialize auto_increment again */
++              if ((err == DB_SUCCESS) && srv_expand_import &&
++                  (table->found_next_number_field != NULL)) {
++                      dict_table_autoinc_lock(dict_table);
++                      innobase_initialize_autoinc();
++                      dict_table_autoinc_unlock(dict_table);
++              }
+       }
+       err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
+@@ -11545,6 +11553,11 @@
+   "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
+   NULL, NULL, 0, 0, 1, 0);
++static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable converting automatically *.ibd files when import tablespace.",
++  NULL, NULL, 0, 0, 1, 0);
++
+ static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
+   PLUGIN_VAR_RQCMDARG,
+   "Number of extra user rollback segments which are used in a round-robin fashion.",
+@@ -11622,6 +11635,7 @@
+   MYSQL_SYSVAR(read_ahead),
+   MYSQL_SYSVAR(adaptive_flushing_method),
+   MYSQL_SYSVAR(enable_unsafe_group_commit),
++  MYSQL_SYSVAR(expand_import),
+   MYSQL_SYSVAR(extra_rsegments),
+   MYSQL_SYSVAR(dict_size_limit),
+   MYSQL_SYSVAR(use_sys_malloc),
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 15:48:03.077954270 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 15:52:23.561986996 +0900
+@@ -227,6 +227,8 @@
+ extern ulint  srv_read_ahead;
+ extern ulint  srv_adaptive_flushing_method;
++extern ulint  srv_expand_import;
++
+ extern ulint  srv_extra_rsegments;
+ extern ulint  srv_dict_size_limit;
+ /*-------------------------------------------*/
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:49:59.230956118 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:52:23.562954411 +0900
+@@ -413,6 +413,8 @@
+ UNIV_INTERN ulint     srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
+ UNIV_INTERN ulint     srv_adaptive_flushing_method = 0; /* 0: native  1: estimate  2: keep_average */
++UNIV_INTERN ulint     srv_expand_import = 0; /* 0:disable 1:enable */
++
+ UNIV_INTERN ulint     srv_extra_rsegments = 127; /* extra rseg for users */
+ UNIV_INTERN ulint     srv_dict_size_limit = 0;
+ /*-------------------------------------------*/
similarity index 70%
rename from mysql-innodb_extend_slow.patch
rename to innodb_extend_slow.patch
index a11320dd400167f8c2c7cad811daad8f4fd05825..c85b5098bbb78ad7c75442455a0414e1efc05d28 100644 (file)
@@ -5,9 +5,9 @@
 #!!! notice !!!
 # Any small change to this file in the main branch
 # should be done or reviewed by the maintainer!
-diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
---- a/storage/innodb_plugin/buf/buf0buf.c      2010-08-27 16:13:11.061058561 +0900
-+++ b/storage/innodb_plugin/buf/buf0buf.c      2010-08-27 16:30:47.341987400 +0900
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-03 15:49:59.175955882 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-03 17:42:42.074307123 +0900
 @@ -51,6 +51,40 @@
  #include "dict0dict.h"
  #include "log0recv.h"
@@ -49,7 +49,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
  
  /*
                IMPLEMENTATION OF THE BUFFER POOL
-@@ -1726,10 +1760,18 @@
+@@ -2397,11 +2431,19 @@
        mutex_t*        block_mutex;
        ibool           must_read;
        unsigned        access_time;
@@ -58,6 +58,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
 +      ulint           ms;
 +      ib_uint64_t     start_time;
 +      ib_uint64_t     finish_time;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
  
  #ifndef UNIV_LOG_DEBUG
        ut_ad(!ibuf_inside());
@@ -68,16 +69,16 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
        buf_pool->stat.n_page_gets++;
  
        for (;;) {
-@@ -1746,7 +1788,7 @@
-               //buf_pool_mutex_exit();
-               rw_lock_s_unlock(&page_hash_latch);
+@@ -2419,7 +2461,7 @@
+               //buf_pool_mutex_exit(buf_pool);
+               rw_lock_s_unlock(&buf_pool->page_hash_latch);
  
 -              buf_read_page(space, zip_size, offset);
 +              buf_read_page(space, zip_size, offset, trx);
  
  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
                ut_a(++buf_dbg_counter % 37 || buf_validate());
-@@ -1822,6 +1864,13 @@
+@@ -2497,6 +2539,13 @@
                /* Let us wait until the read operation
                completes */
  
@@ -91,7 +92,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
                for (;;) {
                        enum buf_io_fix io_fix;
  
-@@ -1836,6 +1885,12 @@
+@@ -2511,6 +2560,12 @@
                                break;
                        }
                }
@@ -104,19 +105,19 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
        }
  
  #ifdef UNIV_IBUF_COUNT_DEBUG
-@@ -2092,6 +2147,11 @@
+@@ -2823,6 +2878,11 @@
        ibool           must_read;
        ulint           retries = 0;
-       mutex_t*        block_mutex;
-+      trx_t*          trx = NULL;
-+      ulint           sec;
-+      ulint           ms;
-+      ib_uint64_t     start_time;
-+      ib_uint64_t     finish_time;
+       mutex_t*        block_mutex = NULL;
++      trx_t*          trx = NULL;
++      ulint           sec;
++      ulint           ms;
++      ib_uint64_t     start_time;
++      ib_uint64_t     finish_time;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
  
        ut_ad(mtr);
-       ut_ad(mtr->state == MTR_ACTIVE);
-@@ -2106,6 +2166,9 @@
+@@ -2840,6 +2900,9 @@
  #ifndef UNIV_LOG_DEBUG
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
  #endif
@@ -124,9 +125,9 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
 +              trx = innobase_get_trx();
 +      }
        buf_pool->stat.n_page_gets++;
+       fold = buf_page_address_fold(space, offset);
  loop:
-       block = guess;
-@@ -2159,7 +2222,7 @@
+@@ -2913,7 +2976,7 @@
                        return(NULL);
                }
  
@@ -135,7 +136,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
                        retries = 0;
                } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
                        ++retries;
-@@ -2444,6 +2507,13 @@
+@@ -3216,6 +3279,13 @@
                        /* Let us wait until the read operation
                        completes */
  
@@ -149,7 +150,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
                        for (;;) {
                                enum buf_io_fix io_fix;
  
-@@ -2458,6 +2528,12 @@
+@@ -3230,6 +3300,12 @@
                                        break;
                                }
                        }
@@ -162,7 +163,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
                }
  
                fix_type = MTR_MEMO_BUF_FIX;
-@@ -2483,13 +2559,17 @@
+@@ -3255,13 +3331,17 @@
                /* In the case of a first access, try to apply linear
                read-ahead */
  
@@ -181,7 +182,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
        return(block);
  }
  
-@@ -2512,6 +2592,7 @@
+@@ -3285,6 +3365,7 @@
        unsigned        access_time;
        ibool           success;
        ulint           fix_type;
@@ -189,7 +190,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
  
        ut_ad(block);
        ut_ad(mtr);
-@@ -2589,13 +2670,17 @@
+@@ -3362,13 +3443,17 @@
  #ifdef UNIV_DEBUG_FILE_ACCESSES
        ut_a(block->page.file_page_was_freed == FALSE);
  #endif
@@ -208,8 +209,8 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
        }
  
  #ifdef UNIV_IBUF_COUNT_DEBUG
-@@ -2604,6 +2689,9 @@
- #endif
+@@ -3378,6 +3463,9 @@
+       buf_pool = buf_pool_from_block(block);
        buf_pool->stat.n_page_gets++;
  
 +      if (innobase_get_slow_log()) {
@@ -218,15 +219,15 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
        return(TRUE);
  }
  
-@@ -2625,6 +2713,7 @@
- {
+@@ -3400,6 +3488,7 @@
+       buf_pool_t*     buf_pool;
        ibool           success;
        ulint           fix_type;
 +      trx_t*          trx = NULL;
  
        ut_ad(mtr);
        ut_ad(mtr->state == MTR_ACTIVE);
-@@ -2709,6 +2798,11 @@
+@@ -3486,6 +3575,11 @@
  #endif
        buf_pool->stat.n_page_gets++;
  
@@ -238,10 +239,10 @@ diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0
        return(TRUE);
  }
  
-diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
---- a/storage/innodb_plugin/buf/buf0rea.c      2010-08-27 16:23:31.014020792 +0900
-+++ b/storage/innodb_plugin/buf/buf0rea.c      2010-08-27 16:30:47.342987531 +0900
-@@ -75,7 +75,8 @@
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-03 17:32:15.617037263 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-03 17:42:42.075297193 +0900
+@@ -77,7 +77,8 @@
                        treat the tablespace as dropped; this is a timestamp we
                        use to stop dangling page reads from a tablespace
                        which we have DISCARDed + IMPORTed back */
@@ -251,9 +252,9 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
  {
        buf_page_t*     bpage;
        ulint           wake_later;
-@@ -176,15 +177,15 @@
-       ut_ad(buf_page_in_file(bpage));
+@@ -179,15 +180,15 @@
  
+       thd_wait_begin(NULL, THD_WAIT_DISKIO);
        if (zip_size) {
 -              *err = fil_io(OS_FILE_READ | wake_later,
 +              *err = _fil_io(OS_FILE_READ | wake_later,
@@ -269,9 +270,9 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
 -                            ((buf_block_t*) bpage)->frame, bpage);
 +                            ((buf_block_t*) bpage)->frame, bpage, trx);
        }
+       thd_wait_end(NULL);
        ut_a(*err == DB_SUCCESS);
-@@ -209,7 +210,8 @@
+@@ -213,7 +214,8 @@
  /*==========*/
        ulint   space,  /*!< in: space id */
        ulint   zip_size,/*!< in: compressed page size in bytes, or 0 */
@@ -279,9 +280,9 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
 +      ulint   offset, /*!< in: page number */
 +      trx_t*  trx)
  {
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
        ib_int64_t      tablespace_version;
-       ulint           count;
-@@ -222,7 +224,7 @@
+@@ -227,7 +229,7 @@
  
        count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
                                  zip_size, FALSE,
@@ -290,7 +291,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
        srv_buf_pool_reads += count;
        if (err == DB_TABLESPACE_DELETED) {
                ut_print_timestamp(stderr);
-@@ -273,8 +275,9 @@
+@@ -278,8 +280,9 @@
  /*==================*/
        ulint   space,  /*!< in: space id */
        ulint   zip_size,/*!< in: compressed page size in bytes, or 0 */
@@ -299,9 +300,9 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
                        must want access to this page (see NOTE 3 above) */
 +      trx_t*  trx)
  {
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
        ib_int64_t      tablespace_version;
-       buf_page_t*     bpage;
-@@ -497,7 +500,7 @@
+@@ -500,7 +503,7 @@
                        count += buf_read_page_low(
                                &err, FALSE,
                                ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
@@ -310,7 +311,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
                        if (err == DB_TABLESPACE_DELETED) {
                                ut_print_timestamp(stderr);
                                fprintf(stderr,
-@@ -587,7 +590,7 @@
+@@ -594,7 +597,7 @@
                buf_read_page_low(&err, sync && (i + 1 == n_stored),
                                  BUF_READ_ANY_PAGE, space_ids[i],
                                  zip_size, TRUE, space_versions[i],
@@ -319,7 +320,7 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
  
                if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
  tablespace_deleted:
-@@ -728,12 +731,12 @@
+@@ -736,12 +739,12 @@
                if ((i + 1 == n_stored) && sync) {
                        buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
                                          zip_size, TRUE, tablespace_version,
@@ -334,10 +335,10 @@ diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0
                }
        }
  
-diff -ruN a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c
---- a/storage/innodb_plugin/fil/fil0fil.c      2010-08-27 16:15:55.187400372 +0900
-+++ b/storage/innodb_plugin/fil/fil0fil.c      2010-08-27 16:30:47.346992376 +0900
-@@ -4325,7 +4325,7 @@
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-03 15:53:54.610037199 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-03 17:42:42.079064198 +0900
+@@ -4395,7 +4395,7 @@
                                 node->name, node->handle, buf,
                                 offset_low, offset_high,
                                 page_size * n_pages,
@@ -346,7 +347,7 @@ diff -ruN a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0
  #endif
                if (success) {
                        node->size += n_pages;
-@@ -4652,7 +4652,7 @@
+@@ -4722,7 +4722,7 @@
  i/o on a tablespace which does not exist */
  UNIV_INTERN
  ulint
@@ -355,7 +356,7 @@ diff -ruN a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0
  /*===*/
        ulint   type,           /*!< in: OS_FILE_READ or OS_FILE_WRITE,
                                ORed to OS_FILE_LOG, if a log i/o
-@@ -4677,8 +4677,9 @@
+@@ -4747,8 +4747,9 @@
        void*   buf,            /*!< in/out: buffer where to store read data
                                or from where to write; in aio this must be
                                appropriately aligned */
@@ -366,7 +367,7 @@ diff -ruN a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0
  {
        ulint           mode;
        fil_space_t*    space;
-@@ -4848,7 +4849,7 @@
+@@ -4918,7 +4919,7 @@
  #else
        /* Queue the aio request */
        ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
@@ -375,10 +376,10 @@ diff -ruN a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0
  #endif
        ut_a(ret);
  
-diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
---- a/storage/innodb_plugin/handler/ha_innodb.cc       2010-08-27 16:30:34.589021493 +0900
-+++ b/storage/innodb_plugin/handler/ha_innodb.cc       2010-08-27 16:30:47.356987871 +0900
-@@ -1372,6 +1372,16 @@
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:36:44.293955189 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:42:42.090024586 +0900
+@@ -1526,6 +1526,16 @@
        trx->check_unique_secondary = !thd_test_options(
                thd, OPTION_RELAXED_UNIQUE_CHECKS);
  
@@ -395,10 +396,10 @@ diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/h
        DBUG_VOID_RETURN;
  }
  
-@@ -1427,6 +1437,32 @@
+@@ -1580,6 +1590,32 @@
+       return(trx);
  }
  
 +/*************************************************************************
 +Gets current trx. */
 +extern "C"
@@ -426,15 +427,15 @@ diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/h
 +}
 +
  /*********************************************************************//**
- Construct ha_innobase handler. */
- UNIV_INTERN
-@@ -8940,6 +8976,25 @@
+ Note that a transaction has been registered with MySQL.
+ @return true if transaction is registered with MySQL 2PC coordinator */
+@@ -9200,6 +9236,25 @@
        statement has ended */
  
        if (trx->n_mysql_tables_in_use == 0) {
 +#ifdef EXTENDED_SLOWLOG
 +              increment_thd_innodb_stats(thd,
-+                                      (unsigned long long) ut_conv_dulint_to_longlong(trx->id),
++                                      (unsigned long long) trx->id,
 +                                      trx->io_reads,
 +                                      trx->io_read,
 +                                      trx->io_reads_wait_timer,
@@ -454,19 +455,9 @@ diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/h
  
                trx->mysql_n_tables_locked = 0;
                prebuilt->used_in_HANDLER = FALSE;
-diff -ruN a/storage/innodb_plugin/handler/innodb_patch_info.h b/storage/innodb_plugin/handler/innodb_patch_info.h
---- a/storage/innodb_plugin/handler/innodb_patch_info.h        2010-08-27 16:30:34.590004526 +0900
-+++ b/storage/innodb_plugin/handler/innodb_patch_info.h        2010-08-27 16:30:47.361987777 +0900
-@@ -40,5 +40,6 @@
- {"innodb_purge_thread","Enable to use purge devoted thread","","http://www.percona.com/docs/wiki/percona-xtradb"},
- {"innodb_admin_command_base","XtraDB specific command interface through i_s","","http://www.percona.com/docs/wiki/percona-xtradb"},
- {"innodb_show_lock_name","Show mutex/lock name instead of crated file/line","","http://www.percona.com/docs/wiki/percona-xtradb"},
-+{"innodb_extend_slow","Extended statistics in slow.log","It is InnoDB-part only. It needs to patch also to mysqld.","http://www.percona.com/docs/wiki/percona-xtradb"},
- {NULL, NULL, NULL, NULL}
- };
-diff -ruN a/storage/innodb_plugin/include/buf0rea.h b/storage/innodb_plugin/include/buf0rea.h
---- a/storage/innodb_plugin/include/buf0rea.h  2010-08-27 15:54:18.078987755 +0900
-+++ b/storage/innodb_plugin/include/buf0rea.h  2010-08-27 16:30:47.363031394 +0900
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h       2010-12-03 15:18:48.891024406 +0900
++++ b/storage/innobase/include/buf0rea.h       2010-12-03 17:42:42.096026873 +0900
 @@ -27,6 +27,7 @@
  #define buf0rea_h
  
@@ -496,10 +487,10 @@ diff -ruN a/storage/innodb_plugin/include/buf0rea.h b/storage/innodb_plugin/incl
  /********************************************************************//**
  Issues read requests for pages which the ibuf module wants to read in, in
  order to contract the insert buffer tree. Technically, this function is like
-diff -ruN a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/include/fil0fil.h
---- a/storage/innodb_plugin/include/fil0fil.h  2010-08-27 15:52:14.325059269 +0900
-+++ b/storage/innodb_plugin/include/fil0fil.h  2010-08-27 16:30:47.365059512 +0900
-@@ -610,9 +610,12 @@
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h       2010-12-03 15:09:51.290958543 +0900
++++ b/storage/innobase/include/fil0fil.h       2010-12-03 17:42:42.097027548 +0900
+@@ -611,9 +611,12 @@
  Reads or writes data. This operation is asynchronous (aio).
  @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
  i/o on a tablespace which does not exist */
@@ -513,7 +504,7 @@ diff -ruN a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/incl
  /*===*/
        ulint   type,           /*!< in: OS_FILE_READ or OS_FILE_WRITE,
                                ORed to OS_FILE_LOG, if a log i/o
-@@ -637,8 +640,9 @@
+@@ -638,8 +641,9 @@
        void*   buf,            /*!< in/out: buffer where to store read data
                                or from where to write; in aio this must be
                                appropriately aligned */
@@ -524,9 +515,9 @@ diff -ruN a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/incl
  /**********************************************************************//**
  Waits for an aio operation to complete. This function is used to write the
  handler for completed requests. The aio array of pending requests is divided
-diff -ruN a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/include/os0file.h
---- a/storage/innodb_plugin/include/os0file.h  2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/os0file.h  2010-08-27 16:30:47.366987560 +0900
+diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
+--- a/storage/innobase/include/os0file.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.h       2010-12-03 17:42:42.100023783 +0900
 @@ -36,6 +36,7 @@
  #define os0file_h
  
@@ -535,21 +526,63 @@ diff -ruN a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/incl
  
  #ifndef __WIN__
  #include <dirent.h>
-@@ -482,9 +483,12 @@
- /*******************************************************************//**
- Requests a synchronous read operation.
- @return       TRUE if request was successful, FALSE if fail */
-+#define os_file_read(file, buf, offset, offset_high, n)         \
-+              _os_file_read(file, buf, offset, offset_high, n, NULL)
+@@ -277,13 +278,17 @@
+       pfs_os_file_close_func(file, __FILE__, __LINE__)
+ # define os_aio(type, mode, name, file, buf, offset, offset_high,     \
+-              n, message1, message2)                                  \
++              n, message1, message2, trx)                             \
+       pfs_os_aio_func(type, mode, name, file, buf, offset,            \
+-                      offset_high, n, message1, message2,             \
++                      offset_high, n, message1, message2, trx,        \
+                       __FILE__, __LINE__)
+ # define os_file_read(file, buf, offset, offset_high, n)              \
+-      pfs_os_file_read_func(file, buf, offset, offset_high, n,        \
++      pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL,  \
++                            __FILE__, __LINE__)
 +
- UNIV_INTERN
- ibool
--os_file_read(
-+_os_file_read(
- /*=========*/
-       os_file_t       file,   /*!< in: handle to a file */
-       void*           buf,    /*!< in: buffer where to read */
-@@ -492,7 +496,8 @@
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx)     \
++      pfs_os_file_read_func(file, buf, offset, offset_high, n, trx,   \
+                             __FILE__, __LINE__)
+ # define os_file_read_no_error_handling(file, buf, offset,            \
+@@ -319,12 +324,15 @@
+ # define os_file_close(file)  os_file_close_func(file)
+ # define os_aio(type, mode, name, file, buf, offset, offset_high,     \
+-             n, message1, message2)                                   \
++             n, message1, message2, trx)                              \
+       os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\
+-                  message1, message2)
++                  message1, message2, trx)
+ # define os_file_read(file, buf, offset, offset_high, n)              \
+-      os_file_read_func(file, buf, offset, offset_high, n)
++      os_file_read_func(file, buf, offset, offset_high, n, NULL)
++
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx)     \
++      os_file_read_func(file, buf, offset, offset_high, n, trx)
+ # define os_file_read_no_error_handling(file, buf, offset,            \
+                                      offset_high, n)                  \
+@@ -690,6 +698,7 @@
+       ulint           offset_high,/*!< in: most significant 32 bits of
+                               offset */
+       ulint           n,      /*!< in: number of bytes to read */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line);/*!< in: line where the func invoked */
+@@ -744,6 +753,7 @@
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                                 OS_AIO_SYNC */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line);/*!< in: line where the func invoked */
+ /*******************************************************************//**
+@@ -885,7 +895,8 @@
                                offset where to read */
        ulint           offset_high,/*!< in: most significant 32 bits of
                                offset */
@@ -559,7 +592,7 @@ diff -ruN a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/incl
  /*******************************************************************//**
  Rewind file to its start, read at most size - 1 bytes from it to str, and
  NUL-terminate str. All errors are silently ignored. This function is
-@@ -646,10 +651,11 @@
+@@ -1044,10 +1055,11 @@
                                (can be used to identify a completed
                                aio operation); ignored if mode is
                                OS_AIO_SYNC */
@@ -572,10 +605,47 @@ diff -ruN a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/incl
  /************************************************************************//**
  Wakes up all async i/o threads so that they know to exit themselves in
  shutdown. */
-diff -ruN a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
---- a/storage/innodb_plugin/include/srv0srv.h  2010-08-27 16:27:30.225055856 +0900
-+++ b/storage/innodb_plugin/include/srv0srv.h  2010-08-27 16:30:47.367988259 +0900
-@@ -62,6 +62,9 @@
+diff -ruN a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
+--- a/storage/innobase/include/os0file.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.ic      2010-12-03 17:42:42.102024458 +0900
+@@ -229,6 +229,7 @@
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                                 OS_AIO_SYNC */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line)/*!< in: line where the func invoked */
+ {
+@@ -244,7 +245,7 @@
+                                  src_file, src_line);
+       result = os_aio_func(type, mode, name, file, buf, offset, offset_high,
+-                           n, message1, message2);
++                           n, message1, message2, trx);
+       register_pfs_file_io_end(locker, n);
+@@ -268,6 +269,7 @@
+       ulint           offset_high,/*!< in: most significant 32 bits of
+                               offset */
+       ulint           n,      /*!< in: number of bytes to read */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line)/*!< in: line where the func invoked */
+ {
+@@ -278,7 +280,7 @@
+       register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+                                  src_file, src_line);
+-      result = os_file_read_func(file, buf, offset, offset_high, n);
++      result = os_file_read_func(file, buf, offset, offset_high, n, trx);
+       register_pfs_file_io_end(locker, n);
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 17:32:15.634987408 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 17:42:42.104028644 +0900
+@@ -71,6 +71,9 @@
  #define SRV_AUTO_EXTEND_INCREMENT     \
        (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
  
@@ -585,9 +655,9 @@ diff -ruN a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/incl
  /* This is set to TRUE if the MySQL user has set it in MySQL */
  extern ibool  srv_lower_case_table_names;
  
-diff -ruN a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h
---- a/storage/innodb_plugin/include/trx0trx.h  2010-08-27 16:08:45.301058614 +0900
-+++ b/storage/innodb_plugin/include/trx0trx.h  2010-08-27 16:30:47.369989369 +0900
+diff -ruN a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
+--- a/storage/innobase/include/trx0trx.h       2010-12-03 15:41:52.049372966 +0900
++++ b/storage/innobase/include/trx0trx.h       2010-12-03 17:42:42.107024532 +0900
 @@ -738,6 +738,17 @@
        /*------------------------------*/
        char detailed_error[256];       /*!< detailed error message for last
@@ -606,10 +676,10 @@ diff -ruN a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/incl
  };
  
  #define TRX_MAX_N_THREADS     32      /* maximum number of
-diff -ruN a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c
---- a/storage/innodb_plugin/lock/lock0lock.c   2010-08-27 15:52:14.332058513 +0900
-+++ b/storage/innodb_plugin/lock/lock0lock.c   2010-08-27 16:30:47.374058285 +0900
-@@ -1757,6 +1757,8 @@
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c        2010-12-03 15:09:51.297986437 +0900
++++ b/storage/innobase/lock/lock0lock.c        2010-12-03 17:42:42.111024587 +0900
+@@ -1755,6 +1755,8 @@
  {
        lock_t* lock;
        trx_t*  trx;
@@ -618,7 +688,7 @@ diff -ruN a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/
  
        ut_ad(mutex_own(&kernel_mutex));
  
-@@ -1815,6 +1817,10 @@
+@@ -1813,6 +1815,10 @@
        trx->que_state = TRX_QUE_LOCK_WAIT;
        trx->was_chosen_as_deadlock_victim = FALSE;
        trx->wait_started = time(NULL);
@@ -629,7 +699,7 @@ diff -ruN a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/
  
        ut_a(que_thr_stop(thr));
  
-@@ -3695,6 +3701,8 @@
+@@ -3692,6 +3698,8 @@
  {
        lock_t* lock;
        trx_t*  trx;
@@ -638,7 +708,7 @@ diff -ruN a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/
  
        ut_ad(mutex_own(&kernel_mutex));
  
-@@ -3750,6 +3758,10 @@
+@@ -3747,6 +3755,10 @@
                return(DB_SUCCESS);
        }
  
@@ -649,10 +719,10 @@ diff -ruN a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/
        trx->que_state = TRX_QUE_LOCK_WAIT;
        trx->was_chosen_as_deadlock_victim = FALSE;
        trx->wait_started = time(NULL);
-diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c
---- a/storage/innodb_plugin/os/os0file.c       2010-08-27 16:23:31.038058669 +0900
-+++ b/storage/innodb_plugin/os/os0file.c       2010-08-27 16:30:47.380058815 +0900
-@@ -38,6 +38,8 @@
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c    2010-12-03 17:32:15.644024974 +0900
++++ b/storage/innobase/os/os0file.c    2010-12-03 17:42:42.117023467 +0900
+@@ -43,6 +43,8 @@
  #include "srv0start.h"
  #include "fil0fil.h"
  #include "buf0buf.h"
@@ -661,20 +731,7 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
  #include "log0recv.h"
  #ifndef UNIV_HOTBACKUP
  # include "os0sync.h"
-@@ -2087,22 +2089,30 @@
- /*******************************************************************//**
- Does a synchronous read operation in Posix.
- @return       number of bytes read, -1 if error */
-+#define os_file_pread(file, buf, n, offset, offset_high)        \
-+              _os_file_pread(file, buf, n, offset, offset_high, NULL);
-+
- static
- ssize_t
--os_file_pread(
-+_os_file_pread(
- /*==========*/
-       os_file_t       file,   /*!< in: handle to a file */
-       void*           buf,    /*!< in: buffer where to read */
+@@ -2177,13 +2179,18 @@
        ulint           n,      /*!< in: number of bytes to read */
        ulint           offset, /*!< in: least significant 32 bits of file
                                offset from where to read */
@@ -694,7 +751,7 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
  
        ut_a((offset & 0xFFFFFFFFUL) == offset);
  
-@@ -2123,6 +2133,15 @@
+@@ -2204,6 +2211,15 @@
  
        os_n_file_reads++;
  
@@ -710,7 +767,7 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
  #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
        os_mutex_enter(os_file_count_mutex);
        os_file_n_pending_preads++;
-@@ -2136,6 +2155,13 @@
+@@ -2217,6 +2233,13 @@
        os_n_pending_reads--;
        os_mutex_exit(os_file_count_mutex);
  
@@ -724,7 +781,7 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
        return(n_bytes);
  #else
        {
-@@ -2172,6 +2198,13 @@
+@@ -2253,6 +2276,13 @@
                os_n_pending_reads--;
                os_mutex_exit(os_file_count_mutex);
  
@@ -738,16 +795,7 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
                return(ret);
        }
  #endif
-@@ -2302,7 +2335,7 @@
- @return       TRUE if request was successful, FALSE if fail */
- UNIV_INTERN
- ibool
--os_file_read(
-+_os_file_read(
- /*=========*/
-       os_file_t       file,   /*!< in: handle to a file */
-       void*           buf,    /*!< in: buffer where to read */
-@@ -2310,7 +2343,8 @@
+@@ -2393,7 +2423,8 @@
                                offset where to read */
        ulint           offset_high, /*!< in: most significant 32 bits of
                                offset */
@@ -757,16 +805,25 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
  {
  #ifdef __WIN__
        BOOL            ret;
-@@ -2385,7 +2419,7 @@
+@@ -2468,7 +2499,7 @@
        os_bytes_read_since_printout += n;
  
  try_again:
 -      ret = os_file_pread(file, buf, n, offset, offset_high);
-+      ret = _os_file_pread(file, buf, n, offset, offset_high, trx);
++      ret = os_file_pread(file, buf, n, offset, offset_high, trx);
  
        if ((ulint)ret == n) {
  
-@@ -3356,7 +3390,8 @@
+@@ -2597,7 +2628,7 @@
+       os_bytes_read_since_printout += n;
+ try_again:
+-      ret = os_file_pread(file, buf, n, offset, offset_high);
++      ret = os_file_pread(file, buf, n, offset, offset_high, NULL);
+       if ((ulint)ret == n) {
+@@ -3619,7 +3650,8 @@
                                offset */
        ulint           offset_high, /*!< in: most significant 32 bits of
                                offset */
@@ -774,9 +831,9 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
 +      ulint           len,    /*!< in: length of the block to read or write */
 +      trx_t*          trx)
  {
-       os_aio_slot_t*  slot;
-       ulint           i;
-@@ -3642,10 +3677,11 @@
+       os_aio_slot_t*  slot = NULL;
+ #ifdef WIN_ASYNC_IO
+@@ -3991,10 +4023,11 @@
                                (can be used to identify a completed
                                aio operation); ignored if mode is
                                OS_AIO_SYNC */
@@ -789,18 +846,18 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
  {
        os_aio_array_t* array;
        os_aio_slot_t*  slot;
-@@ -3687,8 +3723,8 @@
+@@ -4035,8 +4068,8 @@
                wait in the Windows case. */
  
                if (type == OS_FILE_READ) {
 -                      return(os_file_read(file, buf, offset,
 -                                          offset_high, n));
-+                      return(_os_file_read(file, buf, offset,
++                      return(os_file_read_trx(file, buf, offset,
 +                                          offset_high, n, trx));
                }
  
                ut_a(type == OS_FILE_WRITE);
-@@ -3721,8 +3757,13 @@
+@@ -4074,8 +4107,13 @@
                ut_error;
        }
  
@@ -813,14 +870,14 @@ diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0fil
 -                                       name, buf, offset, offset_high, n);
 +                                       name, buf, offset, offset_high, n, trx);
        if (type == OS_FILE_READ) {
-               if (os_aio_use_native_aio) {
- #ifdef WIN_ASYNC_IO
-diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
---- a/storage/innodb_plugin/srv/srv0srv.c      2010-08-27 16:27:30.233022109 +0900
-+++ b/storage/innodb_plugin/srv/srv0srv.c      2010-08-27 16:30:47.384058509 +0900
-@@ -86,6 +86,9 @@
- #include "trx0i_s.h"
- #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
+               if (srv_use_native_aio) {
+                       os_n_file_reads++;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 17:32:15.648024399 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 17:45:05.067023254 +0900
+@@ -88,6 +88,9 @@
+ #include "mysql/plugin.h"
+ #include "mysql/service_thd_wait.h"
  
 +/* prototypes for new functions added to ha_innodb.cc */
 +ibool innobase_get_slow_log();
@@ -828,7 +885,7 @@ diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0
  /* This is set to TRUE if the MySQL user has set it in MySQL; currently
  affects only FOREIGN KEY definition parsing */
  UNIV_INTERN ibool     srv_lower_case_table_names      = FALSE;
-@@ -1155,6 +1158,10 @@
+@@ -1219,6 +1222,10 @@
        ibool                   has_slept = FALSE;
        srv_conc_slot_t*        slot      = NULL;
        ulint                   i;
@@ -839,7 +896,7 @@ diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0
  
        if (trx->mysql_thd != NULL
            && thd_is_replication_slave_thread(trx->mysql_thd)) {
-@@ -1231,6 +1238,7 @@
+@@ -1295,6 +1302,7 @@
                switches. */
                if (SRV_THREAD_SLEEP_DELAY > 0) {
                        os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
@@ -847,7 +904,7 @@ diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0
                }
  
                trx->op_info = "";
-@@ -1286,12 +1294,25 @@
+@@ -1350,6 +1358,13 @@
        /* Go to wait for the event; when a thread leaves InnoDB it will
        release this thread */
  
@@ -860,7 +917,8 @@ diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0
 +
        trx->op_info = "waiting in InnoDB queue";
  
-       os_event_wait(slot->event);
+       thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
+@@ -1358,6 +1373,12 @@
  
        trx->op_info = "";
  
@@ -873,10 +931,10 @@ diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0
        os_fast_mutex_lock(&srv_conc_mutex);
  
        srv_conc_n_waiting_threads--;
-diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c
---- a/storage/innodb_plugin/trx/trx0trx.c      2010-08-27 16:08:45.306058896 +0900
-+++ b/storage/innodb_plugin/trx/trx0trx.c      2010-08-27 16:30:47.387058330 +0900
-@@ -179,6 +179,15 @@
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c   2010-12-03 15:41:52.053955669 +0900
++++ b/storage/innobase/trx/trx0trx.c   2010-12-03 17:42:42.127023410 +0900
+@@ -185,6 +185,15 @@
        trx->global_read_view = NULL;
        trx->read_view = NULL;
  
@@ -892,7 +950,7 @@ diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0
        /* Set X/Open XA transaction identification to NULL */
        memset(&trx->xid, 0, sizeof(trx->xid));
        trx->xid.formatID = -1;
-@@ -216,6 +225,11 @@
+@@ -222,6 +231,11 @@
  
        trx->mysql_process_no = os_proc_get_number();
  
@@ -904,7 +962,7 @@ diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0
        return(trx);
  }
  
-@@ -347,6 +361,12 @@
+@@ -353,6 +367,12 @@
  /*===============*/
        trx_t*  trx)    /*!< in, own: trx object */
  {
@@ -917,7 +975,7 @@ diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0
        mutex_enter(&kernel_mutex);
  
        UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-@@ -368,6 +388,12 @@
+@@ -374,6 +394,12 @@
  /*====================*/
        trx_t*  trx)    /*!< in, own: trx object */
  {
@@ -930,7 +988,7 @@ diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0
        mutex_enter(&kernel_mutex);
  
        trx_free(trx);
-@@ -1093,6 +1119,9 @@
+@@ -1091,6 +1117,9 @@
        trx_t*  trx)    /*!< in: transaction */
  {
        que_thr_t*      thr;
@@ -940,7 +998,7 @@ diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0
  
        ut_ad(mutex_own(&kernel_mutex));
        ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-@@ -1107,6 +1136,11 @@
+@@ -1105,6 +1134,11 @@
                thr = UT_LIST_GET_FIRST(trx->wait_thrs);
        }
  
@@ -952,7 +1010,7 @@ diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0
        trx->que_state = TRX_QUE_RUNNING;
  }
  
-@@ -1120,6 +1154,9 @@
+@@ -1118,6 +1152,9 @@
        trx_t*  trx)    /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
  {
        que_thr_t*      thr;
@@ -962,7 +1020,7 @@ diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0
  
        ut_ad(mutex_own(&kernel_mutex));
        ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-@@ -1134,6 +1171,11 @@
+@@ -1132,6 +1169,11 @@
                thr = UT_LIST_GET_FIRST(trx->wait_thrs);
        }
  
diff --git a/innodb_extra_rseg.patch b/innodb_extra_rseg.patch
new file mode 100644 (file)
index 0000000..66df794
--- /dev/null
@@ -0,0 +1,273 @@
+# name       : innodb_extra_rseg.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:18:48.879955903 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:22:53.779955671 +0900
+@@ -11330,6 +11330,11 @@
+   "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
+   NULL, NULL, 0, 0, 1, 0);
++static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
++  PLUGIN_VAR_RQCMDARG,
++  "Number of extra user rollback segments which are used in a round-robin fashion.",
++  NULL, NULL, 127, 0, 127, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+@@ -11395,6 +11400,7 @@
+   MYSQL_SYSVAR(read_ahead),
+   MYSQL_SYSVAR(adaptive_flushing_method),
+   MYSQL_SYSVAR(enable_unsafe_group_commit),
++  MYSQL_SYSVAR(extra_rsegments),
+   MYSQL_SYSVAR(use_sys_malloc),
+   MYSQL_SYSVAR(use_native_aio),
+   MYSQL_SYSVAR(change_buffering),
+@@ -11423,6 +11429,7 @@
+   innobase_system_variables, /* system variables */
+   NULL /* reserved */
+ },
++i_s_innodb_rseg,
+ i_s_innodb_trx,
+ i_s_innodb_locks,
+ i_s_innodb_lock_waits,
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-03 15:06:58.742986460 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-03 15:33:08.790070078 +0900
+@@ -45,6 +45,8 @@
+ #include "srv0start.h" /* for srv_was_started */
+ #include "trx0i_s.h"
+ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
++#include "trx0rseg.h" /* for trx_rseg_struct */
++#include "trx0sys.h" /* for trx_sys */
+ }
+ static const char plugin_author[] = "Innobase Oy";
+@@ -1782,3 +1784,166 @@
+       DBUG_RETURN(0);
+ }
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO  i_s_innodb_rseg_fields_info[] =
++{
++      {STRUCT_FLD(field_name,         "rseg_id"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "space_id"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "zip_size"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "page_no"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "max_size"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "curr_size"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++static
++int
++i_s_innodb_rseg_fill(
++/*=================*/
++      THD*            thd,    /* in: thread */
++      TABLE_LIST*     tables, /* in/out: tables to fill */
++      COND*           cond)   /* in: condition (ignored) */
++{
++      TABLE*  table   = (TABLE *) tables->table;
++      int     status  = 0;
++      trx_rseg_t*     rseg;
++
++      DBUG_ENTER("i_s_innodb_rseg_fill");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++              DBUG_RETURN(0);
++      }
++
++      RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++      rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
++
++      while (rseg) {
++              table->field[0]->store(rseg->id);
++              table->field[1]->store(rseg->space);
++              table->field[2]->store(rseg->zip_size);
++              table->field[3]->store(rseg->page_no);
++              table->field[4]->store(rseg->max_size);
++              table->field[5]->store(rseg->curr_size);
++
++              if (schema_table_store_record(thd, table)) {
++                      status = 1;
++                      break;
++              }
++
++              rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
++      }
++
++      DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_rseg_init(
++/*=================*/
++                      /* out: 0 on success */
++      void*   p)      /* in/out: table schema object */
++{
++      DBUG_ENTER("i_s_innodb_rseg_init");
++      ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++      schema->fields_info = i_s_innodb_rseg_fields_info;
++      schema->fill_table = i_s_innodb_rseg_fill;
++
++      DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_rseg =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_RSEG"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB rollback segment information"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, i_s_innodb_rseg_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h   2010-12-03 15:06:58.744953398 +0900
++++ b/storage/innobase/handler/i_s.h   2010-12-03 15:22:53.783953418 +0900
+@@ -33,5 +33,6 @@
+ extern struct st_mysql_plugin i_s_innodb_cmp_reset;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
++extern struct st_mysql_plugin i_s_innodb_rseg;
+ #endif /* i_s_h */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 15:18:48.894029379 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 15:22:53.786986025 +0900
+@@ -225,6 +225,8 @@
+ extern ulint  srv_read_ahead;
+ extern ulint  srv_adaptive_flushing_method;
++extern ulint  srv_extra_rsegments;
++
+ /*-------------------------------------------*/
+ extern ulint  srv_n_rows_inserted;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:18:48.913956140 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:22:53.789987037 +0900
+@@ -411,6 +411,8 @@
+ UNIV_INTERN ulint     srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+ UNIV_INTERN ulint     srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
+ UNIV_INTERN ulint     srv_adaptive_flushing_method = 0; /* 0: native  1: estimate  2: keep_average */
++
++UNIV_INTERN ulint     srv_extra_rsegments = 127; /* extra rseg for users */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong     srv_n_spin_wait_rounds  = 30;
+ UNIV_INTERN ulong     srv_n_free_tickets_to_enter = 500;
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c   2010-12-03 15:18:48.918982105 +0900
++++ b/storage/innobase/trx/trx0trx.c   2010-12-03 15:22:53.792983193 +0900
+@@ -619,7 +619,7 @@
+       rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+-      if (rseg == NULL) {
++      if (rseg == NULL || rseg->id > srv_extra_rsegments) {
+               rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+       }
+@@ -627,7 +627,8 @@
+       it */
+       if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
+-          && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
++          && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)
++          && srv_extra_rsegments > 0) {
+               goto loop;
+       }
diff --git a/innodb_fast_checksum.patch b/innodb_fast_checksum.patch
new file mode 100644 (file)
index 0000000..bd11c2e
--- /dev/null
@@ -0,0 +1,301 @@
+# name       : innodb_fast_checksum.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-04 15:52:23.391514910 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-04 15:53:45.013513772 +0900
+@@ -511,6 +511,27 @@
+       return(checksum);
+ }
++UNIV_INTERN
++ulint
++buf_calc_page_new_checksum_32(
++/*==========================*/
++      const byte*     page)   /*!< in: buffer page */
++{
++      ulint checksum;
++
++      checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
++                                FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
++              + ut_fold_binary(page + FIL_PAGE_DATA,
++                               FIL_PAGE_DATA_ALIGN_32 - FIL_PAGE_DATA)
++              + ut_fold_binary_32(page + FIL_PAGE_DATA_ALIGN_32,
++                                  UNIV_PAGE_SIZE - FIL_PAGE_DATA_ALIGN_32
++                                  - FIL_PAGE_END_LSN_OLD_CHKSUM);
++
++      checksum = checksum & 0xFFFFFFFFUL;
++
++      return(checksum);
++}
++
+ /********************************************************************//**
+ In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+ looked at the first few bytes of the page. This calculates that old
+@@ -627,9 +648,21 @@
+               /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
+               (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
+-              if (checksum_field != 0
++              if (!srv_fast_checksum
++                  && checksum_field != 0
++                  && checksum_field != BUF_NO_CHECKSUM_MAGIC
++                  && checksum_field
++                  != buf_calc_page_new_checksum(read_buf)) {
++
++                      return(TRUE);
++              }
++
++              if (srv_fast_checksum
++                  && checksum_field != 0
+                   && checksum_field != BUF_NO_CHECKSUM_MAGIC
+                   && checksum_field
++                  != buf_calc_page_new_checksum_32(read_buf)
++                  && checksum_field
+                   != buf_calc_page_new_checksum(read_buf)) {
+                       return(TRUE);
+@@ -653,6 +686,7 @@
+       dict_index_t*   index;
+ #endif /* !UNIV_HOTBACKUP */
+       ulint           checksum;
++      ulint           checksum_32;
+       ulint           old_checksum;
+       ulint           size    = zip_size;
+@@ -739,12 +773,14 @@
+       checksum = srv_use_checksums
+               ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
++      checksum_32 = srv_use_checksums
++              ? buf_calc_page_new_checksum_32(read_buf) : BUF_NO_CHECKSUM_MAGIC;
+       old_checksum = srv_use_checksums
+               ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
+       ut_print_timestamp(stderr);
+       fprintf(stderr,
+-              "  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
++              "  InnoDB: Page checksum %lu (32bit_calc: %lu), prior-to-4.0.14-form"
+               " checksum %lu\n"
+               "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
+               " stored checksum %lu\n"
+@@ -753,7 +789,7 @@
+               "InnoDB: Page number (if stored to page already) %lu,\n"
+               "InnoDB: space id (if created with >= MySQL-4.1.1"
+               " and stored already) %lu\n",
+-              (ulong) checksum, (ulong) old_checksum,
++              (ulong) checksum, (ulong) checksum_32, (ulong) old_checksum,
+               (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+               (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+                                        - FIL_PAGE_END_LSN_OLD_CHKSUM),
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c   2010-12-04 15:37:50.555568346 +0900
++++ b/storage/innobase/buf/buf0flu.c   2010-12-04 15:53:45.015513917 +0900
+@@ -1027,7 +1027,9 @@
+       mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+                       srv_use_checksums
+-                      ? buf_calc_page_new_checksum(page)
++                      ? (!srv_fast_checksum
++                         ? buf_calc_page_new_checksum(page)
++                         : buf_calc_page_new_checksum_32(page))
+                       : BUF_NO_CHECKSUM_MAGIC);
+       /* We overwrite the first 4 bytes of the end lsn field to store
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-04 15:52:23.406513743 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-04 15:53:45.020513800 +0900
+@@ -3171,7 +3171,9 @@
+                       mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
+               mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+                               srv_use_checksums
+-                              ? buf_calc_page_new_checksum(page)
++                              ? (!srv_fast_checksum
++                                 ? buf_calc_page_new_checksum(page)
++                                 : buf_calc_page_new_checksum_32(page))
+                                               : BUF_NO_CHECKSUM_MAGIC);
+               mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+                               srv_use_checksums
+@@ -3303,7 +3305,8 @@
+                                       page_is_corrupt = TRUE;
+                               }
+-                              if (checksum_field != 0
++                              if (!srv_fast_checksum
++                                  && checksum_field != 0
+                                   && checksum_field != BUF_NO_CHECKSUM_MAGIC
+                                   && checksum_field
+                                   != buf_calc_page_new_checksum(page)) {
+@@ -3311,6 +3314,17 @@
+                                       page_is_corrupt = TRUE;
+                               }
++                              if (srv_fast_checksum
++                                  && checksum_field != 0
++                                  && checksum_field != BUF_NO_CHECKSUM_MAGIC
++                                  && checksum_field
++                                  != buf_calc_page_new_checksum_32(page)
++                                  && checksum_field
++                                  != buf_calc_page_new_checksum(page)) {
++
++                                      page_is_corrupt = TRUE;
++                              }
++
+                               /* if it is free page, inconsistency is acceptable */
+                               if (!offset) {
+                                       /* header page*/
+@@ -3456,7 +3470,9 @@
+                                       mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+                                                       srv_use_checksums
+-                                                      ? buf_calc_page_new_checksum(page)
++                                                      ? (!srv_fast_checksum
++                                                         ? buf_calc_page_new_checksum(page)
++                                                         : buf_calc_page_new_checksum_32(page))
+                                                                       : BUF_NO_CHECKSUM_MAGIC);
+                                       mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+                                                       srv_use_checksums
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:52:23.420480329 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:53:45.029551892 +0900
+@@ -183,6 +183,7 @@
+ #endif /* UNIV_LOG_ARCHIVE */
+ static my_bool        innobase_use_doublewrite                = TRUE;
+ static my_bool        innobase_use_checksums                  = TRUE;
++static my_bool        innobase_fast_checksum                  = FALSE;
+ static my_bool        innobase_recovery_stats                 = TRUE;
+ static my_bool        innobase_locks_unsafe_for_binlog        = FALSE;
+ static my_bool        innobase_overwrite_relay_log_info       = FALSE;
+@@ -2573,6 +2574,7 @@
+       srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+       srv_use_checksums = (ibool) innobase_use_checksums;
++      srv_fast_checksum = (ibool) innobase_fast_checksum;
+ #ifdef HAVE_LARGE_PAGES
+         if ((os_use_large_pages = (ibool) my_use_large_pages))
+@@ -11321,6 +11323,15 @@
+   "Disable with --skip-innodb-checksums.",
+   NULL, NULL, TRUE);
++static MYSQL_SYSVAR_BOOL(fast_checksum, innobase_fast_checksum,
++  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++  "Change the algorithm of checksum for the whole of datapage to 4-bytes word based. "
++  "The original checksum is checked after the new one. It may be slow for reading page"
++  " which has orginal checksum. Overwrite the page or recreate the InnoDB database, "
++  "if you want the entire benefit for performance at once. "
++  "#### Attention: The checksum is not compatible for normal or disabled version! ####",
++  NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
+   PLUGIN_VAR_READONLY,
+   "The common part for InnoDB table spaces.",
+@@ -11833,6 +11844,7 @@
+   MYSQL_SYSVAR(buffer_pool_size),
+   MYSQL_SYSVAR(buffer_pool_instances),
+   MYSQL_SYSVAR(checksums),
++  MYSQL_SYSVAR(fast_checksum),
+   MYSQL_SYSVAR(commit_concurrency),
+   MYSQL_SYSVAR(concurrency_tickets),
+   MYSQL_SYSVAR(data_file_path),
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-04 15:52:23.458514045 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-04 15:53:45.044514150 +0900
+@@ -531,6 +531,11 @@
+ buf_calc_page_new_checksum(
+ /*=======================*/
+       const byte*     page);  /*!< in: buffer page */
++UNIV_INTERN
++ulint
++buf_calc_page_new_checksum_32(
++/*==========================*/
++      const byte*     page);  /*!< in: buffer page */
+ /********************************************************************//**
+ In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+ looked at the first few bytes of the page. This calculates that old
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h       2010-12-04 15:52:23.466513796 +0900
++++ b/storage/innobase/include/fil0fil.h       2010-12-04 15:53:45.046513558 +0900
+@@ -118,6 +118,7 @@
+ #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
+                                       contains the space id of the page */
+ #define FIL_PAGE_DATA         38      /*!< start of the data on the page */
++#define FIL_PAGE_DATA_ALIGN_32        40
+ /* @} */
+ /** File page trailer @{ */
+ #define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-04 15:52:23.474482590 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-04 15:53:45.048512100 +0900
+@@ -226,6 +226,7 @@
+ extern ibool  srv_use_doublewrite_buf;
+ extern ibool  srv_use_checksums;
++extern ibool  srv_fast_checksum;
+ extern ulong  srv_max_buf_pool_modified_pct;
+ extern ulong  srv_max_purge_lag;
+diff -ruN a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
+--- a/storage/innobase/include/ut0rnd.h        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ut0rnd.h        2010-12-04 15:53:45.049510146 +0900
+@@ -124,6 +124,13 @@
+       const byte*     str,    /*!< in: string of bytes */
+       ulint           len)    /*!< in: length */
+       __attribute__((pure));
++UNIV_INLINE
++ulint
++ut_fold_binary_32(
++/*==============*/
++      const byte*     str,    /*!< in: string of bytes */
++      ulint           len)    /*!< in: length */
++      __attribute__((pure));
+ /***********************************************************//**
+ Looks for a prime number slightly greater than the given argument.
+ The prime is chosen so that it is not near any power of 2.
+diff -ruN a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
+--- a/storage/innobase/include/ut0rnd.ic       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ut0rnd.ic       2010-12-04 15:53:45.050565975 +0900
+@@ -226,3 +226,28 @@
+       return(fold);
+ }
++
++UNIV_INLINE
++ulint
++ut_fold_binary_32(
++/*==============*/
++      const byte*     str,    /*!< in: string of bytes */
++      ulint           len)    /*!< in: length */
++{
++      const ib_uint32_t*      str_end = (const ib_uint32_t*) (str + len);
++      const ib_uint32_t*      str_32 = (const ib_uint32_t*) str;
++      ulint                   fold = 0;
++
++      ut_ad(str);
++      /* This function is only for word-aligned data */
++      ut_ad(len % 4 == 0);
++      ut_ad((ulint)str % 4 == 0);
++
++      while (str_32 < str_end) {
++              fold = ut_fold_ulint_pair(fold, (ulint)(*str_32));
++
++              str_32++;
++      }
++
++      return(fold);
++}
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-04 15:52:23.498513634 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-04 15:53:45.053550283 +0900
+@@ -412,6 +412,7 @@
+ UNIV_INTERN ibool     srv_use_doublewrite_buf = TRUE;
+ UNIV_INTERN ibool     srv_use_checksums = TRUE;
++UNIV_INTERN ibool     srv_fast_checksum = FALSE;
+ UNIV_INTERN ulong     srv_replication_delay           = 0;
diff --git a/innodb_files_extend.patch b/innodb_files_extend.patch
new file mode 100644 (file)
index 0000000..0833cfa
--- /dev/null
@@ -0,0 +1,523 @@
+# name       : innodb_files_extend.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-04 15:55:21.358513751 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-04 15:55:58.243481131 +0900
+@@ -4800,9 +4800,9 @@
+       ut_ad(ut_is_2pow(zip_size));
+       ut_ad(buf);
+       ut_ad(len > 0);
+-#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
+-# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
+-#endif
++//#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
++//# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
++//#endif
+       ut_ad(fil_validate());
+ #ifndef UNIV_HOTBACKUP
+ # ifndef UNIV_LOG_DEBUG
+diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
+--- a/storage/innobase/fsp/fsp0fsp.c   2010-12-04 15:52:23.411513754 +0900
++++ b/storage/innobase/fsp/fsp0fsp.c   2010-12-04 15:55:58.244514273 +0900
+@@ -657,15 +657,16 @@
+       ulint   offset)         /*!< in: page offset */
+ {
+ #ifndef DOXYGEN /* Doxygen gets confused of these */
+-# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
+-              + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+-#  error
+-# endif
++//# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET
++//            + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
++//#  error
++//# endif
+ # if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET \
+               + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+ #  error
+ # endif
+ #endif /* !DOXYGEN */
++      ut_a(UNIV_PAGE_SIZE > XDES_ARR_OFFSET + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE);
+       ut_ad(ut_is_2pow(zip_size));
+       if (!zip_size) {
+@@ -1464,12 +1465,12 @@
+                                                          mtr);
+               xdes_init(descr, mtr);
+-#if UNIV_PAGE_SIZE % FSP_EXTENT_SIZE
+-# error "UNIV_PAGE_SIZE % FSP_EXTENT_SIZE != 0"
+-#endif
+-#if PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE
+-# error "PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE != 0"
+-#endif
++//#if UNIV_PAGE_SIZE % FSP_EXTENT_SIZE
++//# error "UNIV_PAGE_SIZE % FSP_EXTENT_SIZE != 0"
++//#endif
++//#if PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE
++//# error "PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE != 0"
++//#endif
+               if (UNIV_UNLIKELY(init_xdes)) {
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:55:21.367482924 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:55:58.248549631 +0900
+@@ -148,6 +148,9 @@
+ static ulong innobase_write_io_threads;
+ static long innobase_buffer_pool_instances = 1;
++static ulong innobase_page_size;
++static ulong innobase_log_block_size;
++
+ static my_bool innobase_thread_concurrency_timer_based;
+ static long long innobase_buffer_pool_size, innobase_log_file_size;
+@@ -2269,6 +2272,62 @@
+       }
+ #endif /* UNIV_DEBUG */
++      srv_page_size = 0;
++      srv_page_size_shift = 0;
++
++      if (innobase_page_size != (1 << 14)) {
++              uint n_shift;
++
++              fprintf(stderr,
++                      "InnoDB: Warning: innodb_page_size has been changed from default value 16384. (###EXPERIMENTAL### operation)\n");
++              for (n_shift = 12; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) {
++                      if (innobase_page_size == ((ulong)1 << n_shift)) {
++                              srv_page_size_shift = n_shift;
++                              srv_page_size = (1 << srv_page_size_shift);
++                              fprintf(stderr,
++                                      "InnoDB: The universal page size of the database is set to %lu.\n",
++                                      srv_page_size);
++                              break;
++                      }
++              }
++      } else {
++              srv_page_size_shift = 14;
++              srv_page_size = (1 << srv_page_size_shift);
++      }
++
++      if (!srv_page_size_shift) {
++              fprintf(stderr,
++                      "InnoDB: Error: %lu is not valid value for innodb_page_size.\n",
++                      innobase_page_size);
++              goto error;
++      }
++
++      srv_log_block_size = 0;
++      if (innobase_log_block_size != (1 << 9)) { /*!=512*/
++              uint    n_shift;
++
++              fprintf(stderr,
++                      "InnoDB: Warning: innodb_log_block_size has been changed from default value 512. (###EXPERIMENTAL### operation)\n");
++              for (n_shift = 9; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) {
++                      if (innobase_log_block_size == ((ulong)1 << n_shift)) {
++                              srv_log_block_size = (1 << n_shift);
++                              fprintf(stderr,
++                                      "InnoDB: The log block size is set to %lu.\n",
++                                      srv_log_block_size);
++                              break;
++                      }
++              }
++      } else {
++              srv_log_block_size = 512;
++      }
++
++      if (!srv_log_block_size) {
++              fprintf(stderr,
++                      "InnoDB: Error: %lu is not valid value for innodb_log_block_size.\n",
++                      innobase_log_block_size);
++              goto error;
++      }
++
+ #ifndef MYSQL_SERVER
+       innodb_overwrite_relay_log_info = FALSE;
+ #endif
+@@ -7212,9 +7271,9 @@
+                               | DICT_TF_COMPACT
+                               | DICT_TF_FORMAT_ZIP
+                               << DICT_TF_FORMAT_SHIFT;
+-#if DICT_TF_ZSSIZE_MAX < 1
+-# error "DICT_TF_ZSSIZE_MAX < 1"
+-#endif
++//#if DICT_TF_ZSSIZE_MAX < 1
++//# error "DICT_TF_ZSSIZE_MAX < 1"
++//#endif
+               }
+       }
+@@ -11332,6 +11391,16 @@
+   "#### Attention: The checksum is not compatible for normal or disabled version! ####",
+   NULL, NULL, FALSE);
++static MYSQL_SYSVAR_ULONG(page_size, innobase_page_size,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "###EXPERIMENTAL###: The universal page size of the database. Changing for created database is not supported. Use on your own risk!",
++  NULL, NULL, (1 << 14), (1 << 12), (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
++
++static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!",
++  NULL, NULL, (1 << 9)/*512*/, (1 << 9)/*512*/, (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
++
+ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
+   PLUGIN_VAR_READONLY,
+   "The common part for InnoDB table spaces.",
+@@ -11839,6 +11908,8 @@
+   NULL, NULL, 0, 0, 1, 0);
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
++  MYSQL_SYSVAR(page_size),
++  MYSQL_SYSVAR(log_block_size),
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+   MYSQL_SYSVAR(buffer_pool_size),
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-04 15:55:21.375482937 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-04 15:55:58.258469088 +0900
+@@ -1602,7 +1602,7 @@
+       time_t          last_printout_time;
+                                       /*!< when buf_print_io was last time
+                                       called */
+-      buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES + 1];
++      buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
+                                       /*!< Statistics of buddy system,
+                                       indexed by block size */
+       buf_pool_stat_t stat;           /*!< current statistics */
+@@ -1698,7 +1698,7 @@
+       /* @{ */
+       UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
+                                       /*!< unmodified compressed pages */
+-      UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES];
++      UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES_MAX];
+                                       /*!< buddy free lists */
+       buf_page_t                      watch[BUF_POOL_WATCH_SIZE];
+@@ -1706,9 +1706,9 @@
+                                       pool watches. Protected by
+                                       buf_pool->mutex. */
+-#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
+-# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
+-#endif
++//#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
++//# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
++//#endif
+ #if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE
+ # error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE"
+ #endif
+diff -ruN a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
+--- a/storage/innobase/include/buf0types.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0types.h     2010-12-04 15:55:58.259482590 +0900
+@@ -72,6 +72,7 @@
+                                       buddy system; must be at least
+                                       sizeof(buf_page_t) */
+ #define BUF_BUDDY_SIZES               (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
++#define BUF_BUDDY_SIZES_MAX   (UNIV_PAGE_SIZE_SHIFT_MAX - BUF_BUDDY_LOW_SHIFT)
+                                       /*!< number of buddy sizes */
+ /** twice the maximum block size of the buddy system;
+diff -ruN a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
+--- a/storage/innobase/include/log0log.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/log0log.h       2010-12-09 18:16:47.737728305 +0900
+@@ -672,6 +672,9 @@
+                                       when mysqld is first time started
+                                       on the restored database, it can
+                                       print helpful info for the user */
++#define LOG_FILE_OS_FILE_LOG_BLOCK_SIZE 64
++                                      /* extend to record log_block_size
++                                      of XtraDB. 0 means default 512 */
+ #define       LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
+                                       /* this 4-byte field is TRUE when
+                                       the writing of an archived log file
+diff -ruN a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
+--- a/storage/innobase/include/mtr0log.ic      2010-12-04 15:37:50.590551517 +0900
++++ b/storage/innobase/include/mtr0log.ic      2010-12-04 15:55:58.260482404 +0900
+@@ -203,7 +203,7 @@
+       system tablespace */
+       if ((space == TRX_SYS_SPACE
+            || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
+-          && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
++          && offset >= (ulint)FSP_EXTENT_SIZE && offset < 3 * (ulint)FSP_EXTENT_SIZE) {
+               if (trx_doublewrite_buf_is_being_created) {
+                       /* Do nothing: we only come to this branch in an
+                       InnoDB database creation. We do not redo log
+diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
+--- a/storage/innobase/include/os0file.h       2010-12-09 18:16:02.323727427 +0900
++++ b/storage/innobase/include/os0file.h       2010-12-09 18:16:47.738694194 +0900
+@@ -101,7 +101,7 @@
+ if this fails for a log block, then it is equivalent to a media failure in the
+ log. */
+-#define OS_FILE_LOG_BLOCK_SIZE                512
++#define OS_FILE_LOG_BLOCK_SIZE                srv_log_block_size
+ /** Options for file_create @{ */
+ #define       OS_FILE_OPEN                    51
+@@ -190,6 +190,8 @@
+ extern ulint  os_n_file_writes;
+ extern ulint  os_n_fsyncs;
++extern ulint  srv_log_block_size;
++
+ #ifdef UNIV_PFS_IO
+ /* Keys to register InnoDB I/O with performance schema */
+ extern mysql_pfs_key_t        innodb_file_data_key;
+diff -ruN a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
+--- a/storage/innobase/include/page0types.h    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0types.h    2010-12-04 15:55:58.261483930 +0900
+@@ -56,8 +56,9 @@
+ /** Number of supported compressed page sizes */
+ #define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
+-#if PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)
+-# error "PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)"
++#define PAGE_ZIP_NUM_SSIZE_MAX (UNIV_PAGE_SIZE_SHIFT_MAX - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
++#if PAGE_ZIP_NUM_SSIZE_MAX > (1 << PAGE_ZIP_SSIZE_BITS)
++# error "PAGE_ZIP_NUM_SSIZE_MAX > (1 << PAGE_ZIP_SSIZE_BITS)"
+ #endif
+ /** Compressed page descriptor */
+@@ -98,7 +99,7 @@
+ typedef struct page_zip_stat_struct page_zip_stat_t;
+ /** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
+-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
++extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE_MAX - 1];
+ /**********************************************************************//**
+ Write the "deleted" flag of a record on a compressed page.  The flag must
+diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
+--- a/storage/innobase/include/trx0sys.h       2010-12-04 15:37:50.593480594 +0900
++++ b/storage/innobase/include/trx0sys.h       2010-12-04 15:55:58.262549554 +0900
+@@ -521,9 +521,9 @@
+ /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
+ #define TRX_SYS_MYSQL_LOG_MAGIC_N     873422344
+-#if UNIV_PAGE_SIZE < 4096
+-# error "UNIV_PAGE_SIZE < 4096"
+-#endif
++//#if UNIV_PAGE_SIZE < 4096
++//# error "UNIV_PAGE_SIZE < 4096"
++//#endif
+ /** The offset of the MySQL replication info in the trx system header;
+ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+ #define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
+diff -ruN a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
+--- a/storage/innobase/include/univ.i  2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/univ.i  2010-12-04 15:55:58.263549721 +0900
+@@ -292,9 +292,13 @@
+ */
+ /* The 2-logarithm of UNIV_PAGE_SIZE: */
+-#define UNIV_PAGE_SIZE_SHIFT  14
++/* #define UNIV_PAGE_SIZE_SHIFT       14 */
++#define UNIV_PAGE_SIZE_SHIFT_MAX      14
++#define UNIV_PAGE_SIZE_SHIFT  srv_page_size_shift
+ /* The universal page size of the database */
+-#define UNIV_PAGE_SIZE                (1 << UNIV_PAGE_SIZE_SHIFT)
++/* #define UNIV_PAGE_SIZE             (1 << UNIV_PAGE_SIZE_SHIFT) */
++#define UNIV_PAGE_SIZE                srv_page_size
++#define UNIV_PAGE_SIZE_MAX    (1 << UNIV_PAGE_SIZE_SHIFT_MAX)
+ /* Maximum number of parallel threads in a parallelized operation */
+ #define UNIV_MAX_PARALLELISM  32
+@@ -401,7 +405,7 @@
+ stored part of the field in the tablespace. The length field then
+ contains the sum of the following flag and the locally stored len. */
+-#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE)
++#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_MAX)
+ /* Some macros to improve branch prediction and reduce cache misses */
+ #if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+@@ -504,4 +508,6 @@
+       UNIV_MEM_ALLOC(addr, size);                     \
+ } while (0)
++extern ulint  srv_page_size_shift;
++extern ulint  srv_page_size;
+ #endif
+diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
+--- a/storage/innobase/log/log0log.c   2010-12-03 15:18:48.899986203 +0900
++++ b/storage/innobase/log/log0log.c   2010-12-04 15:55:58.266551567 +0900
+@@ -603,7 +603,9 @@
+       offset = (gr_lsn_size_offset + difference) % group_size;
++      if (sizeof(ulint) == 4) {
+       ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
++      }
+       /* fprintf(stderr,
+       "Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
+@@ -1200,6 +1202,9 @@
+       /* Wipe over possible label of ibbackup --restore */
+       memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "    ", 4);
++      mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE,
++                      srv_log_block_size);
++
+       dest_offset = nth_file * group->file_size;
+ #ifdef UNIV_DEBUG
+@@ -1793,9 +1798,7 @@
+       ulint           i;
+       ut_ad(mutex_own(&(log_sys->mutex)));
+-#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
+-# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
+-#endif
++      ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
+       buf = group->checkpoint_buf;
+@@ -1809,6 +1812,7 @@
+       mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+ #ifdef UNIV_LOG_ARCHIVE
++#error "UNIV_LOG_ARCHIVE could not be enabled"
+       if (log_sys->archiving_state == LOG_ARCH_OFF) {
+               archived_lsn = IB_ULONGLONG_MAX;
+       } else {
+@@ -1822,7 +1826,9 @@
+       mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
+ #else /* UNIV_LOG_ARCHIVE */
+-      mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
++      mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN,
++                      (ib_uint64_t)log_group_calc_lsn_offset(
++                              log_sys->next_checkpoint_lsn, group));
+ #endif /* UNIV_LOG_ARCHIVE */
+       for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c  2010-12-03 17:32:15.638986405 +0900
++++ b/storage/innobase/log/log0recv.c  2010-12-04 15:55:58.269550689 +0900
+@@ -712,8 +712,22 @@
+                       group->lsn = mach_read_from_8(
+                               buf + LOG_CHECKPOINT_LSN);
++
++#ifdef UNIV_LOG_ARCHIVE
++#error "UNIV_LOG_ARCHIVE could not be enabled"
++#endif
++                      {
++                      ib_uint64_t tmp_lsn_offset = mach_read_from_8(
++                                      buf + LOG_CHECKPOINT_ARCHIVED_LSN);
++                              if (sizeof(ulint) != 4
++                                  && tmp_lsn_offset != IB_ULONGLONG_MAX) {
++                                      group->lsn_offset = (ulint) tmp_lsn_offset;
++                              } else {
+                       group->lsn_offset = mach_read_from_4(
+                               buf + LOG_CHECKPOINT_OFFSET);
++                              }
++                      }
++
+                       checkpoint_no = mach_read_from_8(
+                               buf + LOG_CHECKPOINT_NO);
+@@ -2955,6 +2969,7 @@
+       log_group_t*    max_cp_group;
+       log_group_t*    up_to_date_group;
+       ulint           max_cp_field;
++      ulint           log_hdr_log_block_size;
+       ib_uint64_t     checkpoint_lsn;
+       ib_uint64_t     checkpoint_no;
+       ib_uint64_t     old_scanned_lsn;
+@@ -3056,6 +3071,20 @@
+                      log_hdr_buf, max_cp_group);
+       }
++      log_hdr_log_block_size
++              = mach_read_from_4(log_hdr_buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE);
++      if (log_hdr_log_block_size == 0) {
++              /* 0 means default value */
++              log_hdr_log_block_size = 512;
++      }
++      if (log_hdr_log_block_size != srv_log_block_size) {
++              fprintf(stderr,
++                      "InnoDB: Error: The block size of ib_logfile (%lu) "
++                      "is not equal to innodb_log_block_size.\n",
++                      log_hdr_log_block_size);
++              return(DB_ERROR);
++      }
++
+ #ifdef UNIV_LOG_ARCHIVE
+       group = UT_LIST_GET_FIRST(log_sys->log_groups);
+diff -ruN a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
+--- a/storage/innobase/page/page0zip.c 2010-12-04 15:52:23.484482786 +0900
++++ b/storage/innobase/page/page0zip.c 2010-12-04 15:55:58.274551431 +0900
+@@ -49,7 +49,7 @@
+ #ifndef UNIV_HOTBACKUP
+ /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+-UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
++UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE_MAX - 1];
+ #endif /* !UNIV_HOTBACKUP */
+ /* Please refer to ../include/page0zip.ic for a description of the
+diff -ruN a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
+--- a/storage/innobase/row/row0merge.c 2010-12-04 15:52:23.490513562 +0900
++++ b/storage/innobase/row/row0merge.c 2010-12-04 15:55:58.277550562 +0900
+@@ -97,7 +97,7 @@
+ row_merge_block_t.  Thus, it must be able to hold one merge record,
+ whose maximum size is the same as the minimum size of
+ row_merge_block_t. */
+-typedef byte  mrec_buf_t[UNIV_PAGE_SIZE];
++typedef byte  mrec_buf_t[UNIV_PAGE_SIZE_MAX];
+ /** @brief Merge record in row_merge_block_t.
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-04 15:55:21.384486344 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-04 15:55:58.282550845 +0900
+@@ -239,6 +239,13 @@
+ UNIV_INTERN ulint     srv_n_read_io_threads   = ULINT_MAX;
+ UNIV_INTERN ulint     srv_n_write_io_threads  = ULINT_MAX;
++/* The universal page size of the database */
++UNIV_INTERN ulint     srv_page_size_shift     = 0;
++UNIV_INTERN ulint     srv_page_size           = 0;
++
++/* The log block size */
++UNIV_INTERN ulint     srv_log_block_size      = 0;
++
+ /* User settable value of the number of pages that must be present
+ in the buffer cache and accessed sequentially for InnoDB to trigger a
+ readahead request. */
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:52:23.502513556 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:55:58.285550583 +0900
+@@ -1521,10 +1521,12 @@
+       }
+ #endif /* UNIV_LOG_ARCHIVE */
+-      if (srv_n_log_files * srv_log_file_size >= 262144) {
++      if (sizeof(ulint) == 4
++          && srv_n_log_files * srv_log_file_size
++             >= ((ulint)1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
+               fprintf(stderr,
+                       "InnoDB: Error: combined size of log files"
+-                      " must be < 4 GB\n");
++                      " must be < 4 GB on 32-bit systems\n");
+               return(DB_ERROR);
+       }
+@@ -1533,7 +1535,7 @@
+       for (i = 0; i < srv_n_data_files; i++) {
+ #ifndef __WIN__
+-              if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= 262144) {
++              if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= ((ulint)1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
+                       fprintf(stderr,
+                               "InnoDB: Error: file size must be < 4 GB"
+                               " with this MySQL binary\n"
diff --git a/innodb_fix_misc.patch b/innodb_fix_misc.patch
new file mode 100644 (file)
index 0000000..9a83e85
--- /dev/null
@@ -0,0 +1,206 @@
+# name       : innodb_fix_misc.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+#
+# comment: http://lists.mysql.com/commits/112400 is applied also for innodb_plugin
+#          to pass innodb_bug53756.test by innodb_plugin
+diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
+--- a/storage/innobase/dict/dict0load.c        2010-12-04 15:37:50.559480289 +0900
++++ b/storage/innobase/dict/dict0load.c        2010-12-04 15:57:53.078513745 +0900
+@@ -1851,6 +1851,8 @@
+       ut_ad(mutex_own(&(dict_sys->mutex)));
++      table = NULL;
++
+       /* NOTE that the operation of this function is protected by
+       the dictionary mutex, and therefore no deadlocks can occur
+       with other dictionary operations. */
+@@ -1877,15 +1879,17 @@
+                                 BTR_SEARCH_LEAF, &pcur, &mtr);
+       rec = btr_pcur_get_rec(&pcur);
+-      if (!btr_pcur_is_on_user_rec(&pcur)
+-          || rec_get_deleted_flag(rec, 0)) {
++      if (!btr_pcur_is_on_user_rec(&pcur)) {
+               /* Not found */
++              goto func_exit;
++      }
+-              btr_pcur_close(&pcur);
+-              mtr_commit(&mtr);
+-              mem_heap_free(heap);
+-
+-              return(NULL);
++      /* Find the first record that is not delete marked */
++      while (rec_get_deleted_flag(rec, 0)) {
++              if (!btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
++                      goto func_exit;
++              }
++              rec = btr_pcur_get_rec(&pcur);
+       }
+       /*---------------------------------------------------*/
+@@ -1898,12 +1902,7 @@
+       /* Check if the table id in record is the one searched for */
+       if (table_id != mach_read_from_8(field)) {
+-
+-              btr_pcur_close(&pcur);
+-              mtr_commit(&mtr);
+-              mem_heap_free(heap);
+-
+-              return(NULL);
++              goto func_exit;
+       }
+       /* Now we get the table name from the record */
+@@ -1911,7 +1910,7 @@
+       /* Load the table definition to memory */
+       table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len),
+                               TRUE);
+-
++func_exit:
+       btr_pcur_close(&pcur);
+       mtr_commit(&mtr);
+       mem_heap_free(heap);
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:57:13.035513990 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:57:53.084513775 +0900
+@@ -12007,7 +12007,7 @@
+   &innobase_storage_engine,
+   innobase_hton_name,
+   "Innobase Oy",
+-  "Supports transactions, row-level locking, and foreign keys",
++  "Percona-XtraDB, Supports transactions, row-level locking, and foreign keys",
+   PLUGIN_LICENSE_GPL,
+   innobase_init, /* Plugin Init */
+   NULL, /* Plugin Deinit */
+diff -ruN a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
+--- a/storage/innobase/include/univ.i  2010-12-04 15:57:13.050485224 +0900
++++ b/storage/innobase/include/univ.i  2010-12-04 15:57:53.091592933 +0900
+@@ -48,6 +48,11 @@
+ #define INNODB_VERSION_MINOR  1
+ #define INNODB_VERSION_BUGFIX 4
++#ifndef PERCONA_INNODB_VERSION
++#define PERCONA_INNODB_VERSION 12.1
++#endif
++
++
+ /* The following is the InnoDB version as shown in
+ SELECT plugin_version FROM information_schema.plugins;
+ calculated in make_version_string() in sql/sql_show.cc like this:
+@@ -58,13 +63,15 @@
+       (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+ /* auxiliary macros to help creating the version as string */
+-#define __INNODB_VERSION(a, b, c)     (#a "." #b "." #c)
+-#define _INNODB_VERSION(a, b, c)      __INNODB_VERSION(a, b, c)
++#define __INNODB_VERSION(a, b, c, d)   (#a "." #b "." #c "-" #d)
++#define _INNODB_VERSION(a, b, c, d)    __INNODB_VERSION(a, b, c, d)
++
+ #define INNODB_VERSION_STR                    \
+       _INNODB_VERSION(INNODB_VERSION_MAJOR,   \
+                       INNODB_VERSION_MINOR,   \
+-                      INNODB_VERSION_BUGFIX)
++                      INNODB_VERSION_BUGFIX,  \
++                      PERCONA_INNODB_VERSION)
+ #define REFMAN "http://dev.mysql.com/doc/refman/5.1/en/"
+diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
+--- a/storage/innobase/row/row0mysql.c 2010-12-04 15:37:50.598481116 +0900
++++ b/storage/innobase/row/row0mysql.c 2010-12-04 15:57:53.092563335 +0900
+@@ -1194,6 +1194,13 @@
+       thr = que_fork_get_first_thr(prebuilt->ins_graph);
++      if (!prebuilt->mysql_has_locked && !(prebuilt->table->flags & (DICT_TF2_TEMPORARY << DICT_TF2_SHIFT))) {
++              fprintf(stderr, "InnoDB: Error: row_insert_for_mysql is called without ha_innobase::external_lock()\n");
++              if (trx->mysql_thd != NULL) {
++                      innobase_mysql_print_thd(stderr, trx->mysql_thd, 600);
++              }
++      }
++
+       if (prebuilt->sql_stat_start) {
+               node->state = INS_NODE_SET_IX_LOCK;
+               prebuilt->sql_stat_start = FALSE;
+diff -ruN a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
+--- a/storage/innobase/row/row0sel.c   2010-12-04 15:52:23.494514495 +0900
++++ b/storage/innobase/row/row0sel.c   2010-12-04 16:01:38.320883699 +0900
+@@ -3366,6 +3366,7 @@
+       ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+       ulint*          offsets                         = offsets_;
+       ibool           table_lock_waited               = FALSE;
++      ibool           problematic_use                 = FALSE;
+       rec_offs_init(offsets_);
+@@ -3732,6 +3733,17 @@
+       /* Do some start-of-statement preparations */
++      if (!prebuilt->mysql_has_locked) {
++              if (!(prebuilt->table->flags & (DICT_TF2_TEMPORARY << DICT_TF2_SHIFT))) {
++                      fprintf(stderr, "InnoDB: Error: row_search_for_mysql() is called without ha_innobase::external_lock()\n");
++                      if (trx->mysql_thd != NULL) {
++                              innobase_mysql_print_thd(stderr, trx->mysql_thd, 600);
++                      }
++              }
++              problematic_use = TRUE;
++      }
++retry_check:
++      
+       if (!prebuilt->sql_stat_start) {
+               /* No need to set an intention lock or assign a read view */
+@@ -3742,6 +3754,18 @@
+                             " perform a consistent read\n"
+                             "InnoDB: but the read view is not assigned!\n",
+                             stderr);
++                      if (problematic_use) {
++                              fprintf(stderr, "InnoDB: It may be caused by calling "
++                                              "without ha_innobase::external_lock()\n"
++                                              "InnoDB: For the first-aid, avoiding the crash. "
++                                              "But it should be fixed ASAP.\n");
++                              if (prebuilt->table->flags & (DICT_TF2_TEMPORARY << DICT_TF2_SHIFT)
++                                  && trx->mysql_thd != NULL) {
++                                      innobase_mysql_print_thd(stderr, trx->mysql_thd, 600);
++                              }
++                              prebuilt->sql_stat_start = TRUE;
++                              goto retry_check;
++                      }
+                       trx_print(stderr, trx, 600);
+                       fputc('\n', stderr);
+                       ut_error;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:57:13.073495392 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 16:02:50.704884053 +0900
+@@ -2032,7 +2032,7 @@
+       if (srv_print_verbose_log) {
+               ut_print_timestamp(stderr);
+               fprintf(stderr,
+-                      "  InnoDB: %s started; "
++                      " Percona XtraDB (http://www.percona.com) %s started; "
+                       "log sequence number %llu\n",
+                       INNODB_VERSION_STR, srv_start_lsn);
+       }
+diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
+--- a/storage/innobase/trx/trx0purge.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/trx/trx0purge.c 2010-12-04 15:57:53.106551154 +0900
+@@ -1131,8 +1131,7 @@
+       /* If we cannot advance the 'purge view' because of an old
+       'consistent read view', then the DML statements cannot be delayed.
+       Also, srv_max_purge_lag <= 0 means 'infinity'. */
+-      if (srv_max_purge_lag > 0
+-          && !UT_LIST_GET_LAST(trx_sys->view_list)) {
++      if (srv_max_purge_lag > 0) {
+               float   ratio = (float) trx_sys->rseg_history_len
+                       / srv_max_purge_lag;
+               if (ratio > ULINT_MAX / 10000) {
diff --git a/innodb_io_patches.patch b/innodb_io_patches.patch
new file mode 100644 (file)
index 0000000..3d737d3
--- /dev/null
@@ -0,0 +1,1101 @@
+# name       : innodb_io_patches.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-03 15:09:51.273986410 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-03 15:10:08.934990091 +0900
+@@ -320,6 +320,7 @@
+       /* When we traverse all the flush lists we don't want another
+       thread to add a dirty page to any flush list. */
++      if (srv_buf_pool_instances > 1)
+       log_flush_order_mutex_enter();
+       for (i = 0; i < srv_buf_pool_instances; i++) {
+@@ -343,6 +344,7 @@
+               }
+       }
++      if (srv_buf_pool_instances > 1)
+       log_flush_order_mutex_exit();
+       /* The returned answer may be out of date: the flush_list can
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0flu.c   2010-12-03 15:10:08.934990091 +0900
+@@ -1348,7 +1348,7 @@
+       ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+-      if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
++      if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
+               /* If there is little space, it is better not to flush
+               any block except from the end of the LRU list */
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-03 15:10:08.937050537 +0900
+@@ -260,6 +260,10 @@
+               = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
+       ulint           threshold;
++      if (!(srv_read_ahead & 2)) {
++              return(0);
++      }
++
+       if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
+               /* No read-ahead to avoid thread deadlocks */
+               return(0);
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:09:51.283956391 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:10:08.963980444 +0900
+@@ -425,6 +425,12 @@
+   "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
+   NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
++static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
++  "Set to 0 (write and flush once per second),"
++  " 1 (write and flush at each commit)"
++  " or 2 (write at commit, flush once per second).",
++  NULL, NULL, 1, 0, 2, 0);
++
+ static handler *innobase_create_handler(handlerton *hton,
+                                         TABLE_SHARE *table,
+@@ -819,6 +825,17 @@
+       }
+ }
++/******************************************************************//**
++*/
++extern "C" UNIV_INTERN
++ulong
++thd_flush_log_at_trx_commit(
++/*================================*/
++      void*   thd)
++{
++      return(THDVAR((THD*) thd, flush_log_at_trx_commit));
++}
++
+ /********************************************************************//**
+ Obtain the InnoDB transaction of a MySQL thread.
+ @return       reference to transaction pointer */
+@@ -2390,6 +2407,9 @@
+       srv_n_read_io_threads = (ulint) innobase_read_io_threads;
+       srv_n_write_io_threads = (ulint) innobase_write_io_threads;
++      srv_read_ahead &= 3;
++      srv_adaptive_flushing_method %= 3;
++
+       srv_force_recovery = (ulint) innobase_force_recovery;
+       srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+@@ -10107,6 +10127,10 @@
+           && (all
+               || !thd_test_options(
+                       thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
++              if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
++                      /* choose group commit rather than binlog order */
++                      return(error);
++              }
+               /* For ibbackup to work the order of transactions in binlog
+               and InnoDB must be the same. Consider the situation
+@@ -10917,9 +10941,9 @@
+ static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
+   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+-  "Purge threads can be either 0 or 1. Default is 0.",
++  "Purge threads can be either 0 or 1. Default is 1.",
+   NULL, NULL,
+-  0,                  /* Default setting */
++  1,                  /* Default setting */
+   0,                  /* Minimum value */
+   1, 0);              /* Maximum value */
+@@ -10961,12 +10985,18 @@
+   innodb_file_format_max_validate,
+   innodb_file_format_max_update, "Antelope");
+-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
+-  PLUGIN_VAR_OPCMDARG,
+-  "Set to 0 (write and flush once per second),"
+-  " 1 (write and flush at each commit)"
+-  " or 2 (write at commit, flush once per second).",
+-  NULL, NULL, 1, 0, 2, 0);
++/* Changed to the THDVAR */
++//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
++//  PLUGIN_VAR_OPCMDARG,
++//  "Set to 0 (write and flush once per second),"
++//  " 1 (write and flush at each commit)"
++//  " or 2 (write at commit, flush once per second).",
++//  NULL, NULL, 1, 0, 2, 0);
++
++static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
++  PLUGIN_VAR_NOCMDARG,
++  "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
++  NULL, NULL, TRUE);
+ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
+   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -11061,7 +11091,7 @@
+ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
+   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
+-  NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
++  NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
+ static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
+   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -11206,6 +11236,100 @@
+   "trigger a readahead.",
+   NULL, NULL, 56, 0, 64, 0);
++static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "The maximum size of the insert buffer. (in bytes)",
++  NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
++  NULL, NULL, 1, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
++  PLUGIN_VAR_RQCMDARG,
++  "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
++  NULL, NULL, 100, 100, 999999999, 0);
++
++static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
++  PLUGIN_VAR_RQCMDARG,
++  "Control soft limit of checkpoint age. (0 : not control)",
++  NULL, NULL, 0, 0, ~0UL, 0);
++
++static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
++  NULL, NULL, 1, 0, 1, 0);
++
++static
++void
++innodb_read_ahead_update(
++  THD* thd,
++  struct st_mysql_sys_var*     var,
++  void*        var_ptr,
++  const void*  save)
++{
++  *(long *)var_ptr= (*(long *)save) & 3;
++}
++const char *read_ahead_names[]=
++{
++  "none", /* 0 */
++  "random",
++  "linear",
++  "both", /* 3 */
++  /* For compatibility of the older patch */
++  "0", /* 4 ("none" + 4) */
++  "1",
++  "2",
++  "3", /* 7 ("both" + 4) */
++  NullS
++};
++TYPELIB read_ahead_typelib=
++{
++  array_elements(read_ahead_names) - 1, "read_ahead_typelib",
++  read_ahead_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
++  PLUGIN_VAR_RQCMDARG,
++  "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
++  NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
++
++static
++void
++innodb_adaptive_flushing_method_update(
++  THD* thd,
++  struct st_mysql_sys_var*     var,
++  void*        var_ptr,
++  const void*  save)
++{
++  *(long *)var_ptr= (*(long *)save) % 4;
++}
++const char *adaptive_flushing_method_names[]=
++{
++  "native", /* 0 */
++  "estimate", /* 1 */
++  "keep_average", /* 2 */
++  /* For compatibility of the older patch */
++  "0", /* 3 ("none" + 3) */
++  "1", /* 4 ("estimate" + 3) */
++  "2", /* 5 ("keep_average" + 3) */
++  NullS
++};
++TYPELIB adaptive_flushing_method_typelib=
++{
++  array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
++  adaptive_flushing_method_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
++  PLUGIN_VAR_RQCMDARG,
++  "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
++  NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
++
++static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
++  NULL, NULL, 0, 0, 1, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+@@ -11226,6 +11350,7 @@
+   MYSQL_SYSVAR(file_format_check),
+   MYSQL_SYSVAR(file_format_max),
+   MYSQL_SYSVAR(flush_log_at_trx_commit),
++  MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
+   MYSQL_SYSVAR(flush_method),
+   MYSQL_SYSVAR(force_recovery),
+   MYSQL_SYSVAR(locks_unsafe_for_binlog),
+@@ -11262,6 +11387,14 @@
+   MYSQL_SYSVAR(show_verbose_locks),
+   MYSQL_SYSVAR(show_locks_held),
+   MYSQL_SYSVAR(version),
++  MYSQL_SYSVAR(ibuf_max_size),
++  MYSQL_SYSVAR(ibuf_active_contract),
++  MYSQL_SYSVAR(ibuf_accel_rate),
++  MYSQL_SYSVAR(checkpoint_age_target),
++  MYSQL_SYSVAR(flush_neighbor_pages),
++  MYSQL_SYSVAR(read_ahead),
++  MYSQL_SYSVAR(adaptive_flushing_method),
++  MYSQL_SYSVAR(enable_unsafe_group_commit),
+   MYSQL_SYSVAR(use_sys_malloc),
+   MYSQL_SYSVAR(use_native_aio),
+   MYSQL_SYSVAR(change_buffering),
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:10:09.073984282 +0900
+@@ -524,8 +524,10 @@
+       grow in size, as the references on the upper levels of the tree can
+       change */
+-      ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
+-              / IBUF_POOL_SIZE_PER_MAX_SIZE;
++      ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
++              / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
++
++      srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
+       mutex_create(ibuf_pessimistic_insert_mutex_key,
+                    &ibuf_pessimistic_insert_mutex,
+@@ -2651,9 +2653,11 @@
+       size = ibuf->size;
+       max_size = ibuf->max_size;
++      if (!srv_ibuf_active_contract) {
+       if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
+               return;
+       }
++      }
+       sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0rea.h       2010-12-03 15:10:09.076066335 +0900
+@@ -124,8 +124,7 @@
+ /** The size in pages of the area which the read-ahead algorithms read if
+ invoked */
+-#define       BUF_READ_AHEAD_AREA(b)                                  \
+-      ut_min(64, ut_2_power_up((b)->curr_size / 32))
++#define       BUF_READ_AHEAD_AREA(b)          64
+ /** @name Modes used in read-ahead @{ */
+ /** read only pages belonging to the insert buffer tree */
+diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
+--- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900
+@@ -275,5 +275,12 @@
+ /*===================*/
+         void*   thd,  /*!< in: thread handle (THD*) */
+         ulint   value);       /*!< in: time waited for the lock */
++/******************************************************************//**
++*/
++
++ulong
++thd_flush_log_at_trx_commit(
++/*================================*/
++      void*   thd);
+ #endif
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 15:09:51.291955835 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 15:10:09.079029047 +0900
+@@ -141,7 +141,8 @@
+ extern ulint  srv_n_log_files;
+ extern ulint  srv_log_file_size;
+ extern ulint  srv_log_buffer_size;
+-extern ulong  srv_flush_log_at_trx_commit;
++//extern ulong        srv_flush_log_at_trx_commit;
++extern char   srv_use_global_flush_log_at_trx_commit;
+ extern char   srv_adaptive_flushing;
+@@ -214,6 +215,16 @@
+ extern ulong  srv_max_purge_lag;
+ extern ulong  srv_replication_delay;
++
++extern long long      srv_ibuf_max_size;
++extern ulint  srv_ibuf_active_contract;
++extern ulint  srv_ibuf_accel_rate;
++extern ulint  srv_checkpoint_age_target;
++extern ulint  srv_flush_neighbor_pages;
++extern ulint  srv_enable_unsafe_group_commit;
++extern ulint  srv_read_ahead;
++extern ulint  srv_adaptive_flushing_method;
++
+ /*-------------------------------------------*/
+ extern ulint  srv_n_rows_inserted;
+@@ -389,8 +400,9 @@
+                               when writing data files, but do flush
+                               after writing to log files */
+       SRV_UNIX_NOSYNC,        /*!< do not flush after writing */
+-      SRV_UNIX_O_DIRECT       /*!< invoke os_file_set_nocache() on
++      SRV_UNIX_O_DIRECT,      /*!< invoke os_file_set_nocache() on
+                               data files */
++      SRV_UNIX_ALL_O_DIRECT   /* new method for examination: logfile also open O_DIRECT */
+ };
+ /** Alternatives for file i/o in Windows */
+diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
+--- a/storage/innobase/log/log0log.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/log/log0log.c   2010-12-03 15:10:09.084023562 +0900
+@@ -359,6 +359,33 @@
+ }
+ /************************************************************//**
++*/
++UNIV_INLINE
++ulint
++log_max_modified_age_async()
++{
++      if (srv_checkpoint_age_target) {
++              return(ut_min(log_sys->max_modified_age_async,
++                              srv_checkpoint_age_target
++                              - srv_checkpoint_age_target / 8));
++      } else {
++              return(log_sys->max_modified_age_async);
++      }
++}
++
++UNIV_INLINE
++ulint
++log_max_checkpoint_age_async()
++{
++      if (srv_checkpoint_age_target) {
++              return(ut_min(log_sys->max_checkpoint_age_async,
++                              srv_checkpoint_age_target));
++      } else {
++              return(log_sys->max_checkpoint_age_async);
++      }
++}
++
++/************************************************************//**
+ Closes the log.
+ @return       lsn */
+ UNIV_INTERN
+@@ -427,7 +454,7 @@
+               }
+       }
+-      if (checkpoint_age <= log->max_modified_age_async) {
++      if (checkpoint_age <= log_max_modified_age_async()) {
+               goto function_exit;
+       }
+@@ -435,8 +462,8 @@
+       oldest_lsn = buf_pool_get_oldest_modification();
+       if (!oldest_lsn
+-          || lsn - oldest_lsn > log->max_modified_age_async
+-          || checkpoint_age > log->max_checkpoint_age_async) {
++          || lsn - oldest_lsn > log_max_modified_age_async()
++          || checkpoint_age > log_max_checkpoint_age_async()) {
+               log->check_flush_or_checkpoint = TRUE;
+       }
+@@ -1100,6 +1127,7 @@
+               group = (log_group_t*)((ulint)group - 1);
+               if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
++                  && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
+                   && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+                       fil_flush(group->space_id);
+@@ -1121,8 +1149,9 @@
+                       logs and cannot end up here! */
+       if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
++          && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
+           && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+-          && srv_flush_log_at_trx_commit != 2) {
++          && thd_flush_log_at_trx_commit(NULL) != 2) {
+               fil_flush(group->space_id);
+       }
+@@ -1501,7 +1530,8 @@
+       mutex_exit(&(log_sys->mutex));
+-      if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
++      if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
++          || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
+               /* O_DSYNC means the OS did not buffer the log file at all:
+               so we have also flushed to disk what we have written */
+@@ -2120,10 +2150,10 @@
+               sync = TRUE;
+               advance = 2 * (age - log->max_modified_age_sync);
+-      } else if (age > log->max_modified_age_async) {
++      } else if (age > log_max_modified_age_async()) {
+               /* A flush is not urgent: we do an asynchronous preflush */
+-              advance = age - log->max_modified_age_async;
++              advance = age - log_max_modified_age_async();
+       } else {
+               advance = 0;
+       }
+@@ -2137,7 +2167,7 @@
+               do_checkpoint = TRUE;
+-      } else if (checkpoint_age > log->max_checkpoint_age_async) {
++      } else if (checkpoint_age > log_max_checkpoint_age_async()) {
+               /* A checkpoint is not urgent: do it asynchronously */
+               do_checkpoint = TRUE;
+@@ -3349,6 +3379,17 @@
+               log_sys->flushed_to_disk_lsn,
+               log_sys->last_checkpoint_lsn);
++      fprintf(file,
++              "Max checkpoint age    %lu\n"
++              "Checkpoint age target %lu\n"
++              "Modified age          %lu\n"
++              "Checkpoint age        %lu\n",
++              (ulong) log_sys->max_checkpoint_age,
++              (ulong) log_max_checkpoint_age_async(),
++              (ulong) (log_sys->lsn -
++                              log_buf_pool_get_oldest_modification()),
++              (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
++
+       current_time = time(NULL);
+       time_elapsed = 0.001 + difftime(current_time,
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c  2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/log/log0recv.c  2010-12-03 15:10:09.089024191 +0900
+@@ -2906,9 +2906,12 @@
+       ib_uint64_t     archived_lsn;
+ #endif /* UNIV_LOG_ARCHIVE */
+       byte*           buf;
+-      byte            log_hdr_buf[LOG_FILE_HDR_SIZE];
++      byte*           log_hdr_buf;
++      byte            log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
+       ulint           err;
++      log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
++
+ #ifdef UNIV_LOG_ARCHIVE
+       ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
+ /** TRUE when recovering from a checkpoint */
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/os/os0file.c    2010-12-03 15:10:09.093023540 +0900
+@@ -1399,7 +1399,7 @@
+ #endif
+ #ifdef UNIV_NON_BUFFERED_IO
+ # ifndef UNIV_HOTBACKUP
+-              if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
++              if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
+                       /* Do not use unbuffered i/o to log files because
+                       value 2 denotes that we do not flush the log at every
+                       commit, but only once per second */
+@@ -1415,7 +1415,7 @@
+               attributes = 0;
+ #ifdef UNIV_NON_BUFFERED_IO
+ # ifndef UNIV_HOTBACKUP
+-              if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
++              if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
+                       /* Do not use unbuffered i/o to log files because
+                       value 2 denotes that we do not flush the log at every
+                       commit, but only once per second */
+@@ -1560,6 +1560,11 @@
+               os_file_set_nocache(file, name, mode_str);
+       }
++      /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
++      if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
++              os_file_set_nocache(file, name, mode_str);
++      }
++
+ #ifdef USE_FILE_LOCK
+       if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:09:51.301987792 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:13:29.369986988 +0900
+@@ -188,7 +188,8 @@
+ UNIV_INTERN ulint     srv_log_file_size       = ULINT_MAX;
+ /* size in database pages */
+ UNIV_INTERN ulint     srv_log_buffer_size     = ULINT_MAX;
+-UNIV_INTERN ulong     srv_flush_log_at_trx_commit = 1;
++//UNIV_INTERN ulong   srv_flush_log_at_trx_commit = 1;
++UNIV_INTERN char      srv_use_global_flush_log_at_trx_commit  = TRUE;
+ /* Try to flush dirty pages so as to avoid IO bursts at
+ the checkpoints. */
+@@ -399,6 +400,17 @@
+ UNIV_INTERN ulong     srv_replication_delay           = 0;
++UNIV_INTERN long long srv_ibuf_max_size = 0;
++UNIV_INTERN ulint     srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint     srv_ibuf_accel_rate = 100;
++#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
++
++UNIV_INTERN ulint     srv_checkpoint_age_target = 0;
++UNIV_INTERN ulint     srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
++
++UNIV_INTERN ulint     srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint     srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
++UNIV_INTERN ulint     srv_adaptive_flushing_method = 0; /* 0: native  1: estimate  2: keep_average */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong     srv_n_spin_wait_rounds  = 30;
+ UNIV_INTERN ulong     srv_n_free_tickets_to_enter = 500;
+@@ -2703,6 +2715,7 @@
+       ulint           n_pages_purged  = 0;
+       ulint           n_bytes_merged;
+       ulint           n_pages_flushed;
++      ulint           n_pages_flushed_prev = 0;
+       ulint           n_bytes_archived;
+       ulint           n_tables_to_drop;
+       ulint           n_ios;
+@@ -2710,7 +2723,20 @@
+       ulint           n_ios_very_old;
+       ulint           n_pend_ios;
+       ulint           next_itr_time;
++      ulint           prev_adaptive_flushing_method = ULINT_UNDEFINED;
++      ulint           inner_loop = 0;
++      ibool           skip_sleep      = FALSE;
+       ulint           i;
++      struct t_prev_flush_info_struct {
++              ulint           count;
++              unsigned        space:32;
++              unsigned        offset:32;
++              ib_uint64_t     oldest_modification;
++      } prev_flush_info[MAX_BUFFER_POOLS];
++
++      ib_uint64_t     lsn_old;
++
++      ib_uint64_t     oldest_lsn;
+ #ifdef UNIV_DEBUG_THREAD_CREATION
+       fprintf(stderr, "Master thread starts, id %lu\n",
+@@ -2732,6 +2758,9 @@
+       mutex_exit(&kernel_mutex);
++      mutex_enter(&(log_sys->mutex));
++      lsn_old = log_sys->lsn;
++      mutex_exit(&(log_sys->mutex));
+ loop:
+       /*****************************************************************/
+       /* ---- When there is database activity by users, we cycle in this
+@@ -2762,9 +2791,13 @@
+       /* Sleep for 1 second on entrying the for loop below the first time. */
+       next_itr_time = ut_time_ms() + 1000;
++      skip_sleep = FALSE;
++
+       for (i = 0; i < 10; i++) {
+               ulint   cur_time = ut_time_ms();
++              n_pages_flushed = 0; /* initialize */
++
+               /* ALTER TABLE in MySQL requires on Unix that the table handler
+               can drop tables lazily after there no longer are SELECT
+               queries to them. */
+@@ -2788,6 +2821,7 @@
+               srv_main_thread_op_info = "sleeping";
+               srv_main_1_second_loops++;
++              if (!skip_sleep) {
+               if (next_itr_time > cur_time
+                   && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+@@ -2798,10 +2832,26 @@
+                                       (next_itr_time - cur_time)
+                                        * 1000));
+                       srv_main_sleeps++;
++
++                      /*
++                      mutex_enter(&(log_sys->mutex));
++                      oldest_lsn = buf_pool_get_oldest_modification();
++                      ib_uint64_t     lsn = log_sys->lsn;
++                      mutex_exit(&(log_sys->mutex));
++
++                      if(oldest_lsn)
++                      fprintf(stderr,
++                              "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
++                              (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
++                              lsn - lsn_old);
++                      */
+               }
+               /* Each iteration should happen at 1 second interval. */
+               next_itr_time = ut_time_ms() + 1000;
++              } /* if (!skip_sleep) */
++
++              skip_sleep = FALSE;
+               /* Flush logs if needed */
+               srv_sync_log_buffer_in_background();
+@@ -2821,7 +2871,7 @@
+               if (n_pend_ios < SRV_PEND_IO_THRESHOLD
+                   && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
+                       srv_main_thread_op_info = "doing insert buffer merge";
+-                      ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
++                      ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
+                       /* Flush logs if needed */
+                       srv_sync_log_buffer_in_background();
+@@ -2838,7 +2888,11 @@
+                       n_pages_flushed = buf_flush_list(
+                               PCT_IO(100), IB_ULONGLONG_MAX);
+-              } else if (srv_adaptive_flushing) {
++                      mutex_enter(&(log_sys->mutex));
++                      lsn_old = log_sys->lsn;
++                      mutex_exit(&(log_sys->mutex));
++                      prev_adaptive_flushing_method = ULINT_UNDEFINED;
++              } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
+                       /* Try to keep the rate of flushing of dirty
+                       pages such that redo log generation does not
+@@ -2854,6 +2908,223 @@
+                                               n_flush,
+                                               IB_ULONGLONG_MAX);
+                       }
++
++                      mutex_enter(&(log_sys->mutex));
++                      lsn_old = log_sys->lsn;
++                      mutex_exit(&(log_sys->mutex));
++                      prev_adaptive_flushing_method = ULINT_UNDEFINED;
++              } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
++
++                      /* Try to keep modified age not to exceed
++                      max_checkpoint_age * 7/8 line */
++
++                      mutex_enter(&(log_sys->mutex));
++
++                      oldest_lsn = buf_pool_get_oldest_modification();
++                      if (oldest_lsn == 0) {
++                              lsn_old = log_sys->lsn;
++                              mutex_exit(&(log_sys->mutex));
++
++                      } else {
++                              if ((log_sys->lsn - oldest_lsn)
++                                  > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
++                                      /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
++                                      /* We should not flush from here. */
++                                      lsn_old = log_sys->lsn;
++                                      mutex_exit(&(log_sys->mutex));
++                              } else if ((log_sys->lsn - oldest_lsn)
++                                         > (log_sys->max_checkpoint_age)/4 ) {
++
++                                      /* defence line (max_checkpoint_age * 1/2) */
++                                      ib_uint64_t     lsn = log_sys->lsn;
++
++                                      ib_uint64_t     level, bpl;
++                                      buf_page_t*     bpage;
++                                      ulint           j;
++
++                                      mutex_exit(&(log_sys->mutex));
++
++                                      bpl = 0;
++
++                                      for (j = 0; j < srv_buf_pool_instances; j++) {
++                                              buf_pool_t*     buf_pool;
++                                              ulint           n_blocks;
++
++                                              buf_pool = buf_pool_from_array(j);
++
++                                              /* The scanning flush_list is optimistic here */
++
++                                              level = 0;
++                                              n_blocks = 0;
++                                              bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++
++                                              while (bpage != NULL) {
++                                                      ib_uint64_t     oldest_modification = bpage->oldest_modification;
++                                                      if (oldest_modification != 0) {
++                                                              level += log_sys->max_checkpoint_age
++                                                                       - (lsn - oldest_modification);
++                                                      }
++                                                      bpage = UT_LIST_GET_NEXT(list, bpage);
++                                                      n_blocks++;
++                                              }
++
++                                              if (level) {
++                                                      bpl += ((ib_uint64_t) n_blocks * n_blocks
++                                                              * (lsn - lsn_old)) / level;
++                                              }
++
++                                      }
++
++                                      if (!srv_use_doublewrite_buf) {
++                                              /* flush is faster than when doublewrite */
++                                              bpl = (bpl * 7) / 8;
++                                      }
++
++                                      if (bpl) {
++retry_flush_batch:
++                                              n_pages_flushed = buf_flush_list(bpl,
++                                                                      oldest_lsn + (lsn - lsn_old));
++                                              if (n_pages_flushed == ULINT_UNDEFINED) {
++                                                      os_thread_sleep(5000);
++                                                      goto retry_flush_batch;
++                                              }
++                                      }
++
++                                      lsn_old = lsn;
++                                      /*
++                                      fprintf(stderr,
++                                              "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
++                                              (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
++                                              lsn - lsn_old, bpl);
++                                      */
++                              } else {
++                                      lsn_old = log_sys->lsn;
++                                      mutex_exit(&(log_sys->mutex));
++                              }
++                      }
++                      prev_adaptive_flushing_method = 1;
++              } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
++                      buf_pool_t*     buf_pool;
++                      buf_page_t*     bpage;
++                      ib_uint64_t     lsn;
++                      ulint           j;
++
++                      mutex_enter(&(log_sys->mutex));
++                      oldest_lsn = buf_pool_get_oldest_modification();
++                      lsn = log_sys->lsn;
++                      mutex_exit(&(log_sys->mutex));
++
++                      /* upper loop/sec. (x10) */
++                      next_itr_time -= 900; /* 1000 - 900 == 100 */
++                      inner_loop++;
++                      if (inner_loop < 10) {
++                              i--;
++                      } else {
++                              inner_loop = 0;
++                      }
++
++                      if (prev_adaptive_flushing_method == 2) {
++                              lint    n_flush;
++                              lint    blocks_sum, new_blocks_sum, flushed_blocks_sum;
++
++                              blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
++
++                              /* prev_flush_info[j] should be the previous loop's */
++                              for (j = 0; j < srv_buf_pool_instances; j++) {
++                                      lint    blocks_num, new_blocks_num, flushed_blocks_num;
++                                      ibool   found;
++
++                                      buf_pool = buf_pool_from_array(j);
++
++                                      blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
++                                      bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++                                      new_blocks_num = 0;
++
++                                      found = FALSE;
++                                      while (bpage != NULL) {
++                                              if (prev_flush_info[j].space == bpage->space
++                                                  && prev_flush_info[j].offset == bpage->offset
++                                                  && prev_flush_info[j].oldest_modification
++                                                              == bpage->oldest_modification) {
++                                                      found = TRUE;
++                                                      break;
++                                              }
++                                              bpage = UT_LIST_GET_NEXT(list, bpage);
++                                              new_blocks_num++;
++                                      }
++                                      if (!found) {
++                                              new_blocks_num = blocks_num;
++                                      }
++
++                                      flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
++                                                              - blocks_num;
++                                      if (flushed_blocks_num < 0) {
++                                              flushed_blocks_num = 0;
++                                      }
++
++                                      bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++
++                                      prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
++                                      if (bpage) {
++                                              prev_flush_info[j].space = bpage->space;
++                                              prev_flush_info[j].offset = bpage->offset;
++                                              prev_flush_info[j].oldest_modification = bpage->oldest_modification;
++                                      } else {
++                                              prev_flush_info[j].space = 0;
++                                              prev_flush_info[j].offset = 0;
++                                              prev_flush_info[j].oldest_modification = 0;
++                                      }
++
++                                      new_blocks_sum += new_blocks_num;
++                                      flushed_blocks_sum += flushed_blocks_num;
++                                      blocks_sum += blocks_num;
++                              }
++
++                              n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
++                              if (flushed_blocks_sum > n_pages_flushed_prev) {
++                                      n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
++                              }
++
++                              if (n_flush > 0) {
++                                      n_flush++;
++                                      n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
++                              } else {
++                                      n_pages_flushed = 0;
++                              }                                       
++                      } else {
++                              /* store previous first pages of the flush_list */
++                              for (j = 0; j < srv_buf_pool_instances; j++) {
++                                      buf_pool = buf_pool_from_array(j);
++
++                                      bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++
++                                      prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
++                                      if (bpage) {
++                                              prev_flush_info[j].space = bpage->space;
++                                              prev_flush_info[j].offset = bpage->offset;
++                                              prev_flush_info[j].oldest_modification = bpage->oldest_modification;
++                                      } else {
++                                              prev_flush_info[j].space = 0;
++                                              prev_flush_info[j].offset = 0;
++                                              prev_flush_info[j].oldest_modification = 0;
++                                      }
++                              }
++                              n_pages_flushed = 0;
++                      }
++
++                      lsn_old = lsn;
++                      prev_adaptive_flushing_method = 2;
++              } else {
++                      mutex_enter(&(log_sys->mutex));
++                      lsn_old = log_sys->lsn;
++                      mutex_exit(&(log_sys->mutex));
++                      prev_adaptive_flushing_method = ULINT_UNDEFINED;
++              }
++
++              if (n_pages_flushed == ULINT_UNDEFINED) {
++                      n_pages_flushed_prev = 0;
++              } else {
++                      n_pages_flushed_prev = n_pages_flushed;
+               }
+               if (srv_activity_count == old_activity_count) {
+@@ -2902,7 +3173,7 @@
+       even if the server were active */
+       srv_main_thread_op_info = "doing insert buffer merge";
+-      ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
++      ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
+       /* Flush logs if needed */
+       srv_sync_log_buffer_in_background();
+@@ -3010,7 +3281,7 @@
+               buf_flush_list below. Otherwise, the system favors
+               clean pages over cleanup throughput. */
+               n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
+-                                                         PCT_IO(100));
++                                                         PCT_IBUF_IO(100));
+       }
+       srv_main_thread_op_info = "reserving kernel mutex";
+@@ -3156,6 +3427,7 @@
+       srv_slot_t*     slot;
+       ulint           slot_no = ULINT_UNDEFINED;
+       ulint           n_total_purged = ULINT_UNDEFINED;
++      ulint           next_itr_time;
+       ut_a(srv_n_purge_threads == 1);
+@@ -3178,9 +3450,12 @@
+       mutex_exit(&kernel_mutex);
++      next_itr_time = ut_time_ms();
++
+       while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+               ulint   n_pages_purged;
++              ulint   cur_time;
+               /* If there are very few records to purge or the last
+               purge didn't purge any records then wait for activity.
+@@ -3221,6 +3496,16 @@
+               } while (n_pages_purged > 0 && !srv_fast_shutdown);
+               srv_sync_log_buffer_in_background();
++
++              cur_time = ut_time_ms();
++              if (next_itr_time > cur_time) {
++                      os_thread_sleep(ut_min(1000000,
++                                      (next_itr_time - cur_time)
++                                       * 1000));
++                      next_itr_time = ut_time_ms() + 1000;
++              } else {
++                      next_itr_time = cur_time + 1000;
++              }
+       }
+       mutex_enter(&kernel_mutex);
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900
+@@ -1184,6 +1184,9 @@
+       } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
+               srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
++      } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
++              srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
++
+       } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
+               srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/trx/trx0trx.c   2010-12-03 15:10:09.106023937 +0900
+@@ -865,6 +865,7 @@
+       trx->read_view = NULL;
+       if (lsn) {
++              ulint   flush_log_at_trx_commit;
+               mutex_exit(&kernel_mutex);
+@@ -873,6 +874,12 @@
+                       trx_undo_insert_cleanup(trx);
+               }
++              if (srv_use_global_flush_log_at_trx_commit) {
++                      flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
++              } else {
++                      flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
++              }
++
+               /* NOTE that we could possibly make a group commit more
+               efficient here: call os_thread_yield here to allow also other
+               trxs to come to commit! */
+@@ -904,9 +911,9 @@
+               if (trx->flush_log_later) {
+                       /* Do nothing yet */
+                       trx->must_flush_log_later = TRUE;
+-              } else if (srv_flush_log_at_trx_commit == 0) {
++              } else if (flush_log_at_trx_commit == 0) {
+                       /* Do nothing */
+-              } else if (srv_flush_log_at_trx_commit == 1) {
++              } else if (flush_log_at_trx_commit == 1) {
+                       if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+                               /* Write the log but do not flush it to disk */
+@@ -918,7 +925,7 @@
+                               log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+                       }
+-              } else if (srv_flush_log_at_trx_commit == 2) {
++              } else if (flush_log_at_trx_commit == 2) {
+                       /* Write the log but do not flush it to disk */
+@@ -1582,16 +1589,23 @@
+       trx_t*  trx)    /*!< in: trx handle */
+ {
+       ib_uint64_t     lsn     = trx->commit_lsn;
++      ulint           flush_log_at_trx_commit;
+       ut_a(trx);
+       trx->op_info = "flushing log";
++      if (srv_use_global_flush_log_at_trx_commit) {
++              flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
++      } else {
++              flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
++      }
++
+       if (!trx->must_flush_log_later) {
+               /* Do nothing */
+-      } else if (srv_flush_log_at_trx_commit == 0) {
++      } else if (flush_log_at_trx_commit == 0) {
+               /* Do nothing */
+-      } else if (srv_flush_log_at_trx_commit == 1) {
++      } else if (flush_log_at_trx_commit == 1) {
+               if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+                       /* Write the log but do not flush it to disk */
+@@ -1602,7 +1616,7 @@
+                       log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+               }
+-      } else if (srv_flush_log_at_trx_commit == 2) {
++      } else if (flush_log_at_trx_commit == 2) {
+               /* Write the log but do not flush it to disk */
+@@ -1855,6 +1869,8 @@
+       /*--------------------------------------*/
+       if (lsn) {
++              ulint   flush_log_at_trx_commit;
++
+               /* Depending on the my.cnf options, we may now write the log
+               buffer to the log files, making the prepared state of the
+               transaction durable if the OS does not crash. We may also
+@@ -1874,9 +1890,15 @@
+               mutex_exit(&kernel_mutex);
+-              if (srv_flush_log_at_trx_commit == 0) {
++              if (srv_use_global_flush_log_at_trx_commit) {
++                      flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
++              } else {
++                      flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
++              }
++
++              if (flush_log_at_trx_commit == 0) {
+                       /* Do nothing */
+-              } else if (srv_flush_log_at_trx_commit == 1) {
++              } else if (flush_log_at_trx_commit == 1) {
+                       if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+                               /* Write the log but do not flush it to disk */
+@@ -1888,7 +1910,7 @@
+                               log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+                       }
+-              } else if (srv_flush_log_at_trx_commit == 2) {
++              } else if (flush_log_at_trx_commit == 2) {
+                       /* Write the log but do not flush it to disk */
diff --git a/innodb_lru_dump_restore.patch b/innodb_lru_dump_restore.patch
new file mode 100644 (file)
index 0000000..6e40210
--- /dev/null
@@ -0,0 +1,677 @@
+# name       : innodb_lru_dump_restore.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c   2010-12-03 15:49:59.185023424 +0900
++++ b/storage/innobase/buf/buf0lru.c   2010-12-04 15:33:37.626482350 +0900
+@@ -2250,6 +2250,285 @@
+       memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
+ }
++/********************************************************************//**
++Dump the LRU page list to the specific file. */
++#define LRU_DUMP_FILE "ib_lru_dump"
++
++UNIV_INTERN
++ibool
++buf_LRU_file_dump(void)
++/*===================*/
++{
++      os_file_t       dump_file = -1;
++      ibool           success;
++      byte*           buffer_base = NULL;
++      byte*           buffer = NULL;
++      buf_page_t*     bpage;
++      ulint           buffers;
++      ulint           offset;
++      ibool           ret = FALSE;
++      ulint           i;
++
++      for (i = 0; i < srv_n_data_files; i++) {
++              if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) {
++                      fprintf(stderr,
++                              " InnoDB: The name '%s' seems to be used for"
++                              " innodb_data_file_path. Dumping LRU list is not"
++                              " done for safeness.\n", LRU_DUMP_FILE);
++                      goto end;
++              }
++      }
++
++      buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
++      buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
++      if (!buffer) {
++              fprintf(stderr,
++                      " InnoDB: cannot allocate buffer.\n");
++              goto end;
++      }
++
++      dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE,
++                              OS_FILE_NORMAL, OS_DATA_FILE, &success);
++      if (!success) {
++              os_file_get_last_error(TRUE);
++              fprintf(stderr,
++                      " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
++              goto end;
++      }
++
++      buffers = offset = 0;
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++
++              mutex_enter(&buf_pool->LRU_list_mutex);
++              bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++
++              while (bpage != NULL) {
++                      if (offset == 0) {
++                              memset(buffer, 0, UNIV_PAGE_SIZE);
++                      }
++
++                      mach_write_to_4(buffer + offset * 4, bpage->space);
++                      offset++;
++                      mach_write_to_4(buffer + offset * 4, bpage->offset);
++                      offset++;
++
++                      if (offset == UNIV_PAGE_SIZE/4) {
++                              success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
++                                              (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
++                                              (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
++                                              UNIV_PAGE_SIZE);
++                              if (!success) {
++                                      mutex_exit(&buf_pool->LRU_list_mutex);
++                                      fprintf(stderr,
++                                              " InnoDB: cannot write page %lu of %s\n",
++                                              buffers, LRU_DUMP_FILE);
++                                      goto end;
++                              }
++                              buffers++;
++                              offset = 0;
++                      }
++
++                      bpage = UT_LIST_GET_PREV(LRU, bpage);
++              }
++              mutex_exit(&buf_pool->LRU_list_mutex);
++      }
++
++      if (offset == 0) {
++              memset(buffer, 0, UNIV_PAGE_SIZE);
++      }
++
++      mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
++      offset++;
++      mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
++      offset++;
++
++      success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
++                      (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
++                      (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
++                      UNIV_PAGE_SIZE);
++      if (!success) {
++              goto end;
++      }
++
++      ret = TRUE;
++end:
++      if (dump_file != -1)
++              os_file_close(dump_file);
++      if (buffer_base)
++              ut_free(buffer_base);
++
++      return(ret);
++}
++
++typedef struct {
++      ib_uint32_t space_id;
++      ib_uint32_t page_no;
++} dump_record_t;
++
++static int dump_record_cmp(const void *a, const void *b)
++{
++      const dump_record_t *rec1 = (dump_record_t *) a;
++      const dump_record_t *rec2 = (dump_record_t *) b;
++
++      if (rec1->space_id < rec2->space_id)
++              return -1;
++      if (rec1->space_id > rec2->space_id)
++              return 1;
++      if (rec1->page_no < rec2->page_no)
++              return -1;
++      return rec1->page_no > rec2->page_no;
++}
++
++/********************************************************************//**
++Read the pages based on the specific file.*/
++UNIV_INTERN
++ibool
++buf_LRU_file_restore(void)
++/*======================*/
++{
++      os_file_t       dump_file = -1;
++      ibool           success;
++      byte*           buffer_base = NULL;
++      byte*           buffer = NULL;
++      ulint           buffers;
++      ulint           offset;
++      ulint           reads = 0;
++      ulint           req = 0;
++      ibool           terminated = FALSE;
++      ibool           ret = FALSE;
++      dump_record_t*  records = NULL;
++      ulint           size;
++      ulint           size_high;
++      ulint           length;
++
++      dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key,
++              LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
++      if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
++              os_file_get_last_error(TRUE);
++              fprintf(stderr,
++                      " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
++              goto end;
++      }
++      if (size == 0 || size_high > 0 || size % 8) {
++              fprintf(stderr, " InnoDB: broken LRU dump file\n");
++              goto end;
++      }
++      buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
++      buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
++      records = ut_malloc(size);
++      if (!buffer || !records) {
++              fprintf(stderr,
++                      " InnoDB: cannot allocate buffer.\n");
++              goto end;
++      }
++
++      buffers = 0;
++      length = 0;
++      while (!terminated) {
++              success = os_file_read(dump_file, buffer,
++                              (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
++                              (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
++                              UNIV_PAGE_SIZE);
++              if (!success) {
++                      fprintf(stderr,
++                              " InnoDB: cannot read page %lu of %s,"
++                              " or meet unexpected terminal.\n",
++                              buffers, LRU_DUMP_FILE);
++                      goto end;
++              }
++
++              for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
++                      ulint   space_id;
++                      ulint   page_no;
++
++                      space_id = mach_read_from_4(buffer + offset * 4);
++                      page_no = mach_read_from_4(buffer + (offset + 1) * 4);
++                      if (space_id == 0xFFFFFFFFUL
++                          || page_no == 0xFFFFFFFFUL) {
++                              terminated = TRUE;
++                              break;
++                      }
++
++                      records[length].space_id = space_id;
++                      records[length].page_no = page_no;
++                      length++;
++                      if (length * 8 >= size) {
++                              fprintf(stderr,
++                                      " InnoDB: could not find the "
++                                      "end-of-file marker after reading "
++                                      "the expected %lu bytes from the "
++                                      "LRU dump file.\n"
++                                      " InnoDB: this could be caused by a "
++                                      "broken or incomplete file.\n"
++                                      " InnoDB: trying to process what has "
++                                      "been read so far.\n",
++                                      size);
++                              terminated= TRUE;
++                              break;
++                      }
++              }
++              buffers++;
++      }
++
++      qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
++
++      for (offset = 0; offset < length; offset++) {
++              ulint           space_id;
++              ulint           page_no;
++              ulint           zip_size;
++              ulint           err;
++              ib_int64_t      tablespace_version;
++
++              space_id = records[offset].space_id;
++              page_no = records[offset].page_no;
++
++              if (offset % 16 == 15) {
++                      os_aio_simulated_wake_handler_threads();
++                      buf_flush_free_margins(FALSE);
++              }
++
++              zip_size = fil_space_get_zip_size(space_id);
++              if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
++                      continue;
++              }
++
++              if (fil_area_is_exist(space_id, zip_size, page_no, 0,
++                                    zip_size ? zip_size : UNIV_PAGE_SIZE)) {
++
++                      tablespace_version = fil_space_get_version(space_id);
++
++                      req++;
++                      reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
++                                                 | OS_AIO_SIMULATED_WAKE_LATER,
++                                                 space_id, zip_size, TRUE,
++                                                 tablespace_version, page_no, NULL);
++                      buf_LRU_stat_inc_io();
++              }
++      }
++
++      os_aio_simulated_wake_handler_threads();
++      buf_flush_free_margins(FALSE);
++
++      ut_print_timestamp(stderr);
++      fprintf(stderr,
++              " InnoDB: reading pages based on the dumped LRU list was done."
++              " (requested: %lu, read: %lu)\n", req, reads);
++      ret = TRUE;
++end:
++      if (dump_file != -1)
++              os_file_close(dump_file);
++      if (buffer_base)
++              ut_free(buffer_base);
++      if (records)
++              ut_free(records);
++
++      return(ret);
++}
++
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ /**********************************************************************//**
+ Validates the LRU list for one buffer pool instance. */
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-03 17:49:11.576124814 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-04 15:33:37.628480605 +0900
+@@ -58,7 +58,7 @@
+ which case it is never read into the pool, or if the tablespace does
+ not exist or is being dropped 
+ @return 1 if read request is issued. 0 if it is not */
+-static
++UNIV_INTERN
+ ulint
+ buf_read_page_low(
+ /*==============*/
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-03 17:49:11.581025127 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-04 15:33:37.632482885 +0900
+@@ -4939,6 +4939,78 @@
+       return(DB_SUCCESS);
+ }
++/********************************************************************//**
++Confirm whether the parameters are valid or not */
++UNIV_INTERN
++ibool
++fil_area_is_exist(
++/*==============*/
++      ulint   space_id,       /*!< in: space id */
++      ulint   zip_size,       /*!< in: compressed page size in bytes;
++                              0 for uncompressed pages */
++      ulint   block_offset,   /*!< in: offset in number of blocks */
++      ulint   byte_offset,    /*!< in: remainder of offset in bytes; in
++                              aio this must be divisible by the OS block
++                              size */
++      ulint   len)            /*!< in: how many bytes to read or write; this
++                              must not cross a file boundary; in aio this
++                              must be a block size multiple */
++{
++      fil_space_t*    space;
++      fil_node_t*     node;
++
++      /* Reserve the fil_system mutex and make sure that we can open at
++      least one file while holding it, if the file is not already open */
++
++      fil_mutex_enter_and_prepare_for_io(space_id);
++
++      space = fil_space_get_by_id(space_id);
++
++      if (!space) {
++              mutex_exit(&fil_system->mutex);
++              return(FALSE);
++      }
++
++      node = UT_LIST_GET_FIRST(space->chain);
++
++      for (;;) {
++              if (UNIV_UNLIKELY(node == NULL)) {
++                      mutex_exit(&fil_system->mutex);
++                      return(FALSE);
++              }
++
++              if (space->id != 0 && node->size == 0) {
++                      /* We do not know the size of a single-table tablespace
++                      before we open the file */
++
++                      break;
++              }
++
++              if (node->size > block_offset) {
++                      /* Found! */
++                      break;
++              } else {
++                      block_offset -= node->size;
++                      node = UT_LIST_GET_NEXT(chain, node);
++              }
++      }
++
++      /* Open file if closed */
++      fil_node_prepare_for_io(node, fil_system, space);
++      fil_node_complete_io(node, fil_system, OS_FILE_READ);
++
++      /* Check that at least the start offset is within the bounds of a
++      single-table tablespace */
++      if (UNIV_UNLIKELY(node->size <= block_offset)
++          && space->id != 0 && space->purpose == FIL_TABLESPACE) {
++              mutex_exit(&fil_system->mutex);
++              return(FALSE);
++      }
++
++      mutex_exit(&fil_system->mutex);
++      return(TRUE);
++}
++
+ #ifndef UNIV_HOTBACKUP
+ /**********************************************************************//**
+ Waits for an aio operation to complete. This function is used to write the
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:49:11.589956135 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:33:37.645555490 +0900
+@@ -11708,6 +11708,12 @@
+   "Limit the allocated memory for dictionary cache. (0: unlimited)",
+   NULL, NULL, 0, 0, LONG_MAX, 0);
++static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump,
++  PLUGIN_VAR_RQCMDARG,
++  "Time in seconds between automatic buffer pool dumps. "
++  "0 (the default) disables automatic dumps.",
++  NULL, NULL, 0, 0, UINT_MAX32, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+@@ -11791,6 +11797,7 @@
+ #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+   MYSQL_SYSVAR(read_ahead_threshold),
+   MYSQL_SYSVAR(io_capacity),
++  MYSQL_SYSVAR(auto_lru_dump),
+   MYSQL_SYSVAR(purge_threads),
+   MYSQL_SYSVAR(purge_batch_size),
+   NULL
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-03 17:34:35.286211349 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-04 15:33:37.677480733 +0900
+@@ -50,6 +50,7 @@
+ #include "trx0rseg.h" /* for trx_rseg_struct */
+ #include "trx0sys.h" /* for trx_sys */
+ #include "dict0dict.h" /* for dict_sys */
++#include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
+ }
+ static const char plugin_author[] = "Innobase Oy";
+@@ -4255,6 +4256,36 @@
+                       "Hello!");
+               goto end_func;
+       }
++      else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
++              ut_print_timestamp(stderr);
++              fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_DUMP'"
++                              " was detected.\n");
++
++              if (buf_LRU_file_dump()) {
++                      field_store_string(i_s_table->field[0],
++                              "XTRA_LRU_DUMP was succeeded.");
++              } else {
++                      field_store_string(i_s_table->field[0],
++                              "XTRA_LRU_DUMP was failed.");
++              }
++
++              goto end_func;
++      }
++      else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
++              ut_print_timestamp(stderr);
++              fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_RESTORE'"
++                              " was detected.\n");
++
++              if (buf_LRU_file_restore()) {
++                      field_store_string(i_s_table->field[0],
++                              "XTRA_LRU_RESTORE was succeeded.");
++              } else {
++                      field_store_string(i_s_table->field[0],
++                              "XTRA_LRU_RESTORE was failed.");
++              }
++
++              goto end_func;
++      }
+       field_store_string(i_s_table->field[0],
+               "Undefined XTRA_* command.");
+diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
+--- a/storage/innobase/include/buf0lru.h       2010-12-03 15:49:59.223956070 +0900
++++ b/storage/innobase/include/buf0lru.h       2010-12-04 15:33:37.681481467 +0900
+@@ -219,6 +219,18 @@
+ void
+ buf_LRU_stat_update(void);
+ /*=====================*/
++/********************************************************************//**
++Dump the LRU page list to the specific file. */
++UNIV_INTERN
++ibool
++buf_LRU_file_dump(void);
++/*===================*/
++/********************************************************************//**
++Read the pages based on the specific file.*/
++UNIV_INTERN
++ibool
++buf_LRU_file_restore(void);
++/*======================*/
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ /**********************************************************************//**
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h       2010-12-03 17:49:11.596953870 +0900
++++ b/storage/innobase/include/buf0rea.h       2010-12-04 15:33:37.682563900 +0900
+@@ -31,6 +31,37 @@
+ #include "buf0types.h"
+ /********************************************************************//**
++Low-level function which reads a page asynchronously from a file to the
++buffer buf_pool if it is not already there, in which case does nothing.
++Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
++flag is cleared and the x-lock released by an i/o-handler thread.
++@return 1 if a read request was queued, 0 if the page already resided
++in buf_pool, or if the page is in the doublewrite buffer blocks in
++which case it is never read into the pool, or if the tablespace does
++not exist or is being dropped 
++@return 1 if read request is issued. 0 if it is not */
++UNIV_INTERN
++ulint
++buf_read_page_low(
++/*==============*/
++      ulint*  err,    /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
++                      trying to read from a non-existent tablespace, or a
++                      tablespace which is just now being dropped */
++      ibool   sync,   /*!< in: TRUE if synchronous aio is desired */
++      ulint   mode,   /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
++                      ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
++                      at read-ahead functions) */
++      ulint   space,  /*!< in: space id */
++      ulint   zip_size,/*!< in: compressed page size, or 0 */
++      ibool   unzip,  /*!< in: TRUE=request uncompressed page */
++      ib_int64_t tablespace_version, /*!< in: if the space memory object has
++                      this timestamp different from what we are giving here,
++                      treat the tablespace as dropped; this is a timestamp we
++                      use to stop dangling page reads from a tablespace
++                      which we have DISCARDed + IMPORTed back */
++      ulint   offset, /*!< in: page number */
++      trx_t*  trx);
++/********************************************************************//**
+ High-level function which reads a page asynchronously from a file to the
+ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+ an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h       2010-12-03 17:49:11.597953501 +0900
++++ b/storage/innobase/include/fil0fil.h       2010-12-04 15:33:37.684551372 +0900
+@@ -644,6 +644,22 @@
+       void*   message,        /*!< in: message for aio handler if non-sync
+                               aio used, else ignored */
+       trx_t*  trx);
++/********************************************************************//**
++Confirm whether the parameters are valid or not */
++UNIV_INTERN
++ibool
++fil_area_is_exist(
++/*==============*/
++      ulint   space_id,       /*!< in: space id */
++      ulint   zip_size,       /*!< in: compressed page size in bytes;
++                              0 for uncompressed pages */
++      ulint   block_offset,   /*!< in: offset in number of blocks */
++      ulint   byte_offset,    /*!< in: remainder of offset in bytes; in
++                              aio this must be divisible by the OS block
++                              size */
++      ulint   len);           /*!< in: how many bytes to read or write; this
++                              must not cross a file boundary; in aio this
++                              must be a block size multiple */
+ /**********************************************************************//**
+ Waits for an aio operation to complete. This function is used to write the
+ handler for completed requests. The aio array of pending requests is divided
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 17:49:11.603969747 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-04 15:33:37.685550816 +0900
+@@ -356,6 +356,9 @@
+ reading of a disk page */
+ extern ulint srv_buf_pool_reads;
++/** Time in seconds between automatic buffer pool dumps */
++extern uint srv_auto_lru_dump;
++
+ /** Status variables to be passed to MySQL */
+ typedef struct export_var_struct export_struc;
+@@ -655,6 +658,16 @@
+ /*=====================*/
+       void*   arg);   /*!< in: a dummy parameter required by
+                       os_thread_create */
++/*********************************************************************//**
++A thread which restores the buffer pool from a dump file on startup and does
++periodic buffer pool dumps.
++@return       a dummy parameter */
++UNIV_INTERN
++os_thread_ret_t
++srv_LRU_dump_restore_thread(
++/*====================*/
++      void*   arg);   /*!< in: a dummy parameter required by
++                      os_thread_create */
+ /******************************************************************//**
+ Outputs to a file the output of the InnoDB Monitor.
+ @return FALSE if not all information printed
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 17:49:11.620986661 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-04 15:33:37.708550811 +0900
+@@ -327,6 +327,9 @@
+ reading of a disk page */
+ UNIV_INTERN ulint srv_buf_pool_reads = 0;
++/** Time in seconds between automatic buffer pool dumps */
++UNIV_INTERN uint srv_auto_lru_dump = 0;
++
+ /* structure to pass status variables to MySQL */
+ UNIV_INTERN export_struc export_vars;
+@@ -2663,6 +2666,56 @@
+       /* We count the number of threads in os_thread_exit(). A created
+       thread should always use that to exit and not use return() to exit. */
++      os_thread_exit(NULL);
++
++      OS_THREAD_DUMMY_RETURN;
++}
++
++/*********************************************************************//**
++A thread which restores the buffer pool from a dump file on startup and does
++periodic buffer pool dumps.
++@return       a dummy parameter */
++UNIV_INTERN
++os_thread_ret_t
++srv_LRU_dump_restore_thread(
++/*====================*/
++      void*   arg __attribute__((unused)))
++                      /*!< in: a dummy parameter required by
++                      os_thread_create */
++{
++      uint    auto_lru_dump;
++      time_t  last_dump_time;
++      time_t  time_elapsed;
++
++#ifdef UNIV_DEBUG_THREAD_CREATION
++      fprintf(stderr, "LRU dump/restore thread starts, id %lu\n",
++              os_thread_pf(os_thread_get_curr_id()));
++#endif
++
++      if (srv_auto_lru_dump)
++              buf_LRU_file_restore();
++
++      last_dump_time = time(NULL);
++
++loop:
++      os_thread_sleep(5000000);
++
++      if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
++              goto exit_func;
++      }
++
++      time_elapsed = time(NULL) - last_dump_time;
++      auto_lru_dump = srv_auto_lru_dump;
++      if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
++              last_dump_time = time(NULL);
++              buf_LRU_file_dump();
++      }
++
++      goto loop;
++exit_func:
++      /* We count the number of threads in os_thread_exit(). A created
++      thread should always use that to exit and not use return() to exit. */
++
+       os_thread_exit(NULL);
+       OS_THREAD_DUMMY_RETURN;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-03 15:18:48.916955609 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:33:37.711484798 +0900
+@@ -121,9 +121,9 @@
+ static os_file_t      files[1000];
+ /** io_handler_thread parameters for thread identification */
+-static ulint          n[SRV_MAX_N_IO_THREADS + 6];
++static ulint          n[SRV_MAX_N_IO_THREADS + 7];
+ /** io_handler_thread identifiers */
+-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
++static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7];
+ /** We use this mutex to test the return value of pthread_mutex_trylock
+    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
+@@ -1737,6 +1737,10 @@
+       os_thread_create(&srv_monitor_thread, NULL,
+                        thread_ids + 4 + SRV_MAX_N_IO_THREADS);
++      /* Create the thread which automaticaly dumps/restore buffer pool */
++      os_thread_create(&srv_LRU_dump_restore_thread, NULL,
++                       thread_ids + 5 + SRV_MAX_N_IO_THREADS);
++
+       srv_is_being_started = FALSE;
+       err = dict_create_or_check_foreign_constraint_tables();
diff --git a/innodb_opt_lru_count.patch b/innodb_opt_lru_count.patch
new file mode 100644 (file)
index 0000000..9f77138
--- /dev/null
@@ -0,0 +1,314 @@
+# name       : innodb_opt_lru_count.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:20:49.593024343 +0900
+@@ -137,7 +137,7 @@
+                             ut_ad(buf_page_get_state(ut_list_node_313)
+                                   == BUF_BLOCK_ZIP_FREE)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+-      bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++      bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+       if (bpage) {
+               UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-03 15:18:48.866986963 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-03 15:20:49.595987311 +0900
+@@ -881,9 +881,9 @@
+       block->page.in_zip_hash = FALSE;
+       block->page.in_flush_list = FALSE;
+       block->page.in_free_list = FALSE;
+-      block->page.in_LRU_list = FALSE;
+       block->in_unzip_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
++      block->page.in_LRU_list = FALSE;
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+       block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -1494,7 +1494,7 @@
+       memcpy(dpage, bpage, sizeof *dpage);
+-      ut_d(bpage->in_LRU_list = FALSE);
++      bpage->in_LRU_list = FALSE;
+       ut_d(bpage->in_page_hash = FALSE);
+       /* relocate buf_pool->LRU */
+@@ -3729,8 +3729,8 @@
+               bpage->in_zip_hash = FALSE;
+               bpage->in_flush_list = FALSE;
+               bpage->in_free_list = FALSE;
+-              bpage->in_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
++              bpage->in_LRU_list = FALSE;
+               ut_d(bpage->in_page_hash = TRUE);
+@@ -3893,7 +3893,7 @@
+       ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+       /* Flush pages from the end of the LRU list if necessary */
+-      buf_flush_free_margin(buf_pool);
++      buf_flush_free_margin(buf_pool, FALSE);
+       frame = block->frame;
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c   2010-12-03 15:18:48.868953442 +0900
++++ b/storage/innobase/buf/buf0flu.c   2010-12-03 15:20:49.599986956 +0900
+@@ -403,19 +403,21 @@
+                               buf_page_in_file(bpage) and in the LRU list */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+-      ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+-      ut_ad(bpage->in_LRU_list);
++      //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++      //ut_ad(bpage->in_LRU_list);
+-      if (UNIV_LIKELY(buf_page_in_file(bpage))) {
++      if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+               return(bpage->oldest_modification == 0
+                      && buf_page_get_io_fix(bpage) == BUF_IO_NONE
+                      && bpage->buf_fix_count == 0);
+       }
++      /* permited not to own LRU_mutex..  */
++/*
+       ut_print_timestamp(stderr);
+       fprintf(stderr,
+               "  InnoDB: Error: buffer block state %lu"
+@@ -423,6 +425,7 @@
+               (ulong) buf_page_get_state(bpage));
+       ut_print_buf(stderr, bpage, sizeof(buf_page_t));
+       putc('\n', stderr);
++*/
+       return(FALSE);
+ }
+@@ -1955,8 +1958,14 @@
+       buf_page_t*     bpage;
+       ulint           n_replaceable;
+       ulint           distance        = 0;
++      ibool           have_LRU_mutex = FALSE;
+-      buf_pool_mutex_enter(buf_pool);
++      if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++              have_LRU_mutex = TRUE;
++retry:
++      //buf_pool_mutex_enter(buf_pool);
++      if (have_LRU_mutex)
++              buf_pool_mutex_enter(buf_pool);
+       n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+@@ -1967,7 +1976,13 @@
+                  + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
+              && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
+-              mutex_t* block_mutex = buf_page_get_mutex(bpage);
++              mutex_t* block_mutex;
++              if (!bpage->in_LRU_list) {
++                      /* reatart. but it is very optimistic */
++                      bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++                      continue;
++              }
++              block_mutex = buf_page_get_mutex(bpage);
+               mutex_enter(block_mutex);
+@@ -1982,11 +1997,18 @@
+               bpage = UT_LIST_GET_PREV(LRU, bpage);
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      if (have_LRU_mutex)
++              buf_pool_mutex_exit(buf_pool);
+       if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
+               return(0);
++      } else if (!have_LRU_mutex) {
++              /* confirm it again with LRU_mutex for exactness */
++              have_LRU_mutex = TRUE;
++              distance = 0;
++              goto retry;
+       }
+       return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
+@@ -2004,7 +2026,8 @@
+ void
+ buf_flush_free_margin(
+ /*==================*/
+-      buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
++      buf_pool_t*     buf_pool,               /*!< in: Buffer pool instance */
++      ibool           wait)
+ {
+       ulint   n_to_flush;
+@@ -2015,7 +2038,7 @@
+               n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
+-              if (n_flushed == ULINT_UNDEFINED) {
++              if (wait && n_flushed == ULINT_UNDEFINED) {
+                       /* There was an LRU type flush batch already running;
+                       let us wait for it to end */
+@@ -2028,8 +2051,9 @@
+ Flushes pages from the end of all the LRU lists. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margins(void)
++buf_flush_free_margins(
+ /*========================*/
++      ibool   wait)
+ {
+       ulint   i;
+@@ -2038,7 +2062,7 @@
+               buf_pool = buf_pool_from_array(i);
+-              buf_flush_free_margin(buf_pool);
++              buf_flush_free_margin(buf_pool, wait);
+       }
+ }
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0lru.c   2010-12-03 15:20:49.602952786 +0900
+@@ -1016,7 +1016,7 @@
+       /* No free block was found: try to flush the LRU list */
+-      buf_flush_free_margin(buf_pool);
++      buf_flush_free_margin(buf_pool, TRUE);
+       ++srv_buf_pool_wait_free;
+       os_aio_simulated_wake_handler_threads();
+@@ -1213,7 +1213,7 @@
+       /* Remove the block from the LRU list */
+       UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+-      ut_d(bpage->in_LRU_list = FALSE);
++      bpage->in_LRU_list = FALSE;
+       buf_unzip_LRU_remove_block_if_needed(bpage);
+@@ -1292,7 +1292,7 @@
+       ut_ad(!bpage->in_LRU_list);
+       UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+-      ut_d(bpage->in_LRU_list = TRUE);
++      bpage->in_LRU_list = TRUE;
+       if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+@@ -1362,7 +1362,7 @@
+               buf_pool->LRU_old_len++;
+       }
+-      ut_d(bpage->in_LRU_list = TRUE);
++      bpage->in_LRU_list = TRUE;
+       if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+@@ -1617,7 +1617,7 @@
+                               buf_page_set_old(b, buf_page_is_old(b));
+ #endif /* UNIV_LRU_DEBUG */
+                       } else {
+-                              ut_d(b->in_LRU_list = FALSE);
++                              b->in_LRU_list = FALSE;
+                               buf_LRU_add_block_low(b, buf_page_is_old(b));
+                       }
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-03 15:18:48.870953384 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-03 15:20:49.604956032 +0900
+@@ -200,7 +200,7 @@
+       }
+       /* Flush pages from the end of the LRU list if necessary */
+-      buf_flush_free_margin(buf_pool);
++      buf_flush_free_margin(buf_pool, TRUE);
+       /* Increment number of I/O operations used for LRU policy. */
+       buf_LRU_stat_inc_io();
+@@ -476,7 +476,7 @@
+       os_aio_simulated_wake_handler_threads();
+       /* Flush pages from the end of the LRU list if necessary */
+-      buf_flush_free_margin(buf_pool);
++      buf_flush_free_margin(buf_pool, TRUE);
+ #ifdef UNIV_DEBUG
+       if (buf_debug_prints && (count > 0)) {
+@@ -565,7 +565,7 @@
+       os_aio_simulated_wake_handler_threads();
+       /* Flush pages from the end of all the LRU lists if necessary */
+-      buf_flush_free_margins();
++      buf_flush_free_margins(FALSE);
+ #ifdef UNIV_DEBUG
+       if (buf_debug_prints) {
+@@ -659,7 +659,7 @@
+       os_aio_simulated_wake_handler_threads();
+       /* Flush pages from the end of all the LRU lists if necessary */
+-      buf_flush_free_margins();
++      buf_flush_free_margins(FALSE);
+ #ifdef UNIV_DEBUG
+       if (buf_debug_prints) {
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-03 15:20:49.608986590 +0900
+@@ -1314,11 +1314,11 @@
+       UT_LIST_NODE_T(buf_page_t) LRU;
+                                       /*!< node of the LRU list */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+       ibool           in_LRU_list;    /*!< TRUE if the page is in
+                                       the LRU list; used in
+                                       debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+       unsigned        old:1;          /*!< TRUE if the block is in the old
+                                       blocks in buf_pool->LRU_old */
+       unsigned        freed_page_clock:31;/*!< the value of
+diff -ruN a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
+--- a/storage/innobase/include/buf0flu.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0flu.h       2010-12-03 15:20:49.609953185 +0900
+@@ -65,13 +65,15 @@
+ void
+ buf_flush_free_margin(
+ /*==================*/
+-       buf_pool_t*    buf_pool);
++       buf_pool_t*    buf_pool,
++      ibool           wait);
+ /*********************************************************************//**
+ Flushes pages from the end of all the LRU lists. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margins(void);
++buf_flush_free_margins(
+ /*=========================*/
++      ibool           wait);
+ #endif /* !UNIV_HOTBACKUP */
+ /********************************************************************//**
+ Initializes a page for writing to the tablespace. */
diff --git a/innodb_overwrite_relay_log_info.patch b/innodb_overwrite_relay_log_info.patch
new file mode 100644 (file)
index 0000000..60ca505
--- /dev/null
@@ -0,0 +1,495 @@
+# name       : innodb_overwrite_relay_log_info.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:37:45.516105468 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:38:20.318952987 +0900
+@@ -42,6 +42,8 @@
+ #pragma implementation                                // gcc: Class implementation
+ #endif
++#define MYSQL_SERVER
++
+ #include <sql_table.h>        // explain_filename, nz2, EXPLAIN_PARTITIONS_AS_COMMENT,
+                       // EXPLAIN_FILENAME_MAX_EXTRA_LENGTH
+@@ -52,6 +54,15 @@
+ #include <mysql/innodb_priv.h>
+ #include <mysql/psi/psi.h>
++#ifdef MYSQL_SERVER
++#include <rpl_mi.h>
++#include <slave.h>
++// Defined in slave.cc
++int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
++int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
++                        const char *default_val);
++#endif /* MYSQL_SERVER */
++
+ /** @file ha_innodb.cc */
+ /* Include necessary InnoDB headers */
+@@ -91,6 +102,14 @@
+ #include "ha_innodb.h"
+ #include "i_s.h"
++#ifdef MYSQL_SERVER
++// Defined in trx0sys.c
++extern char           trx_sys_mysql_master_log_name[];
++extern ib_int64_t     trx_sys_mysql_master_log_pos;
++extern char           trx_sys_mysql_relay_log_name[];
++extern ib_int64_t     trx_sys_mysql_relay_log_pos;
++#endif /* MYSQL_SERVER */
++
+ # ifndef MYSQL_PLUGIN_IMPORT
+ #  define MYSQL_PLUGIN_IMPORT /* nothing */
+ # endif /* MYSQL_PLUGIN_IMPORT */
+@@ -163,6 +182,7 @@
+ static my_bool        innobase_use_doublewrite                = TRUE;
+ static my_bool        innobase_use_checksums                  = TRUE;
+ static my_bool        innobase_locks_unsafe_for_binlog        = FALSE;
++static my_bool        innobase_overwrite_relay_log_info       = FALSE;
+ static my_bool        innobase_rollback_on_timeout            = FALSE;
+ static my_bool        innobase_create_status_file             = FALSE;
+ static my_bool        innobase_stats_on_metadata              = TRUE;
+@@ -2201,6 +2221,89 @@
+       }
+ #endif /* UNIV_DEBUG */
++#ifndef MYSQL_SERVER
++      innodb_overwrite_relay_log_info = FALSE;
++#endif
++
++#ifdef HAVE_REPLICATION
++#ifdef MYSQL_SERVER
++      /* read master log position from relay-log.info if exists */
++      char fname[FN_REFLEN+128];
++      int pos;
++      int info_fd;
++      IO_CACHE info_file;
++
++      fname[0] = '\0';
++
++      if(innobase_overwrite_relay_log_info) {
++
++      fprintf(stderr,
++              "InnoDB: Warning: innodb_overwrite_relay_log_info is enabled."
++              " Updates in other storage engines may have problem with consistency.\n");
++
++      bzero((char*) &info_file, sizeof(info_file));
++      fn_format(fname, relay_log_info_file, mysql_data_home, "", 4+32);
++
++      int error=0;
++
++      if (!access(fname,F_OK)) {
++              /* exist */
++              if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) {
++                      error=1;
++              } else if (init_io_cache(&info_file, info_fd, IO_SIZE*2,
++                                      READ_CACHE, 0L, 0, MYF(MY_WME))) {
++                      error=1;
++              }
++
++              if (error) {
++relay_info_error:
++                      if (info_fd >= 0)
++                              my_close(info_fd, MYF(0));
++                      fname[0] = '\0';
++                      goto skip_relay;
++              }
++      } else {
++              fname[0] = '\0';
++              goto skip_relay;
++      }
++
++      if (init_strvar_from_file(fname, sizeof(fname), &info_file, "") || /* dummy (it is relay-log) */
++          init_intvar_from_file(&pos, &info_file, BIN_LOG_HEADER_SIZE)) { 
++              end_io_cache(&info_file);
++              error=1;
++              goto relay_info_error;
++      }
++
++      fprintf(stderr,
++              "InnoDB: relay-log.info is detected.\n"
++              "InnoDB: relay log: position %u, file name %s\n",
++              pos, fname);
++
++      strncpy(trx_sys_mysql_relay_log_name, fname, TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
++      trx_sys_mysql_relay_log_pos = (ib_int64_t) pos;
++
++      if (init_strvar_from_file(fname, sizeof(fname), &info_file, "") ||
++          init_intvar_from_file(&pos, &info_file, 0)) {
++              end_io_cache(&info_file);
++              error=1;
++              goto relay_info_error;
++      }
++
++      fprintf(stderr,
++              "InnoDB: master log: position %u, file name %s\n",
++              pos, fname);
++
++      strncpy(trx_sys_mysql_master_log_name, fname, TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
++      trx_sys_mysql_master_log_pos = (ib_int64_t) pos;
++
++      end_io_cache(&info_file);
++      if (info_fd >= 0)
++              my_close(info_fd, MYF(0));
++      }
++skip_relay:
++#endif /* MYSQL_SERVER */
++#endif /* HAVE_REPLICATION */
++
+       /* Check that values don't overflow on 32-bit systems. */
+       if (sizeof(ulint) == 4) {
+               if (innobase_buffer_pool_size > UINT_MAX32) {
+@@ -2499,6 +2602,76 @@
+               goto mem_free_and_error;
+       }
++#ifdef HAVE_REPLICATION
++#ifdef MYSQL_SERVER
++      if(innobase_overwrite_relay_log_info) {
++      /* If InnoDB progressed from relay-log.info, overwrite it */
++      if (fname[0] == '\0') {
++              fprintf(stderr,
++                      "InnoDB: something wrong with relay-info.log. InnoDB will not overwrite it.\n");
++      } else if (0 != strcmp(fname, trx_sys_mysql_master_log_name)
++                 || pos != trx_sys_mysql_master_log_pos) {
++              /* Overwrite relay-log.info */
++              bzero((char*) &info_file, sizeof(info_file));
++              fn_format(fname, relay_log_info_file, mysql_data_home, "", 4+32);
++
++              int error = 0;
++
++              if (!access(fname,F_OK)) {
++                      /* exist */
++                      if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) {
++                              error = 1;
++                      } else if (init_io_cache(&info_file, info_fd, IO_SIZE*2,
++                                              WRITE_CACHE, 0L, 0, MYF(MY_WME))) {
++                              error = 1;
++                      }
++
++                      if (error) {
++                              if (info_fd >= 0)
++                                      my_close(info_fd, MYF(0));
++                              goto skip_overwrite;
++                      }
++              } else {
++                      error = 1;
++                      goto skip_overwrite;
++              }
++
++              char buff[FN_REFLEN*2+22*2+4], *pos;
++
++              my_b_seek(&info_file, 0L);
++              pos=strmov(buff, trx_sys_mysql_relay_log_name);
++              *pos++='\n';
++              pos=longlong2str(trx_sys_mysql_relay_log_pos, pos, 10);
++              *pos++='\n';
++              pos=strmov(pos, trx_sys_mysql_master_log_name);
++              *pos++='\n';
++              pos=longlong2str(trx_sys_mysql_master_log_pos, pos, 10);
++              *pos='\n';
++
++              if (my_b_write(&info_file, (uchar*) buff, (size_t) (pos-buff)+1))
++                      error = 1;
++              if (flush_io_cache(&info_file))
++                      error = 1;
++
++              end_io_cache(&info_file);
++              if (info_fd >= 0)
++                      my_close(info_fd, MYF(0));
++skip_overwrite:
++              if (error) {
++                      fprintf(stderr,
++                              "InnoDB: ERROR: error occured during overwriting relay-log.info.\n");
++              } else {
++                      fprintf(stderr,
++                              "InnoDB: relay-log.info was overwritten.\n");
++              }
++      } else {
++              fprintf(stderr,
++                      "InnoDB: InnoDB and relay-log.info are synchronized. InnoDB will not overwrite it.\n");
++      }
++      }
++#endif /* MYSQL_SERVER */
++#endif /* HAVE_REPLICATION */
++
+       innobase_old_blocks_pct = buf_LRU_old_ratio_update(
+               innobase_old_blocks_pct, TRUE);
+@@ -2611,6 +2784,25 @@
+       trx_t*  trx)    /*!< in: transaction handle */
+ {
+       if (trx_is_started(trx)) {
++#ifdef HAVE_REPLICATION
++#ifdef MYSQL_SERVER
++              THD *thd=current_thd;
++
++              if (thd && thd->slave_thread) {
++                      /* Update the replication position info inside InnoDB */
++                      trx->mysql_master_log_file_name
++                              = active_mi->rli.group_master_log_name;
++                      trx->mysql_master_log_pos
++                              = ((ib_int64_t)active_mi->rli.group_master_log_pos +
++                                 ((ib_int64_t)active_mi->rli.future_event_relay_log_pos -
++                                  (ib_int64_t)active_mi->rli.group_relay_log_pos));
++                      trx->mysql_relay_log_file_name
++                              = active_mi->rli.group_relay_log_name;
++                      trx->mysql_relay_log_pos
++                              = (ib_int64_t)active_mi->rli.future_event_relay_log_pos;
++              }
++#endif /* MYSQL_SERVER */
++#endif /* HAVE_REPLICATION */
+               trx_commit_for_mysql(trx);
+       }
+@@ -10919,6 +11111,12 @@
+   "The common part for InnoDB table spaces.",
+   NULL, NULL, NULL);
++static MYSQL_SYSVAR_BOOL(overwrite_relay_log_info, innobase_overwrite_relay_log_info,
++  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++  "During InnoDB crash recovery on slave overwrite relay-log.info "
++  "to align master log file position if information in InnoDB and relay-log.info is different.",
++  NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
+   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+   "Enable InnoDB doublewrite buffer (enabled by default). "
+@@ -11375,6 +11573,7 @@
+   MYSQL_SYSVAR(old_blocks_pct),
+   MYSQL_SYSVAR(old_blocks_time),
+   MYSQL_SYSVAR(open_files),
++  MYSQL_SYSVAR(overwrite_relay_log_info),
+   MYSQL_SYSVAR(rollback_on_timeout),
+   MYSQL_SYSVAR(stats_on_metadata),
+   MYSQL_SYSVAR(stats_sample_pages),
+diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
+--- a/storage/innobase/include/trx0sys.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/trx0sys.h       2010-12-03 15:38:20.321953297 +0900
+@@ -52,6 +52,9 @@
+ extern ib_int64_t     trx_sys_mysql_master_log_pos;
+ /* @} */
++extern char           trx_sys_mysql_relay_log_name[];
++extern ib_int64_t     trx_sys_mysql_relay_log_pos;
++
+ /** If this MySQL server uses binary logging, after InnoDB has been inited
+ and if it has done a crash recovery, we store the binlog file name and position
+ here. */
+@@ -293,7 +296,8 @@
+ void
+ trx_sys_update_mysql_binlog_offset(
+ /*===============================*/
+-      const char*     file_name,/*!< in: MySQL log file name */
++      trx_sysf_t*     sys_header,
++      const char*     file_name_in,/*!< in: MySQL log file name */
+       ib_int64_t      offset, /*!< in: position in that log file */
+       ulint           field,  /*!< in: offset of the MySQL log info field in
+                               the trx sys header */
+@@ -488,6 +492,7 @@
+ @see trx_sys_mysql_master_log_name
+ @see trx_sys_mysql_bin_log_name */
+ #define TRX_SYS_MYSQL_LOG_NAME_LEN    512
++#define TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN     480     /* (500 - 12) is dead line. */
+ /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
+ #define TRX_SYS_MYSQL_LOG_MAGIC_N     873422344
+@@ -497,6 +502,7 @@
+ /** The offset of the MySQL replication info in the trx system header;
+ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+ #define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
++#define TRX_SYS_MYSQL_RELAY_LOG_INFO  (UNIV_PAGE_SIZE - 1500)
+ /** The offset of the MySQL binlog offset info in the trx system header */
+ #define TRX_SYS_MYSQL_LOG_INFO                (UNIV_PAGE_SIZE - 1000)
+diff -ruN a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
+--- a/storage/innobase/include/trx0trx.h       2010-12-03 15:18:48.894955550 +0900
++++ b/storage/innobase/include/trx0trx.h       2010-12-03 15:38:20.323953416 +0900
+@@ -569,6 +569,21 @@
+       ib_int64_t      mysql_log_offset;/* if MySQL binlog is used, this field
+                                       contains the end offset of the binlog
+                                       entry */
++      const char*     mysql_master_log_file_name;
++                                      /* if the database server is a MySQL
++                                      replication slave, we have here the
++                                      master binlog name up to which
++                                      replication has processed; otherwise
++                                      this is a pointer to a null
++                                      character */
++      ib_int64_t      mysql_master_log_pos;
++                                      /* if the database server is a MySQL
++                                      replication slave, this is the
++                                      position in the log file up to which
++                                      replication has processed */
++      const char*     mysql_relay_log_file_name;
++      ib_int64_t      mysql_relay_log_pos;
++
+       os_thread_id_t  mysql_thread_id;/* id of the MySQL thread associated
+                                       with this transaction object */
+       ulint           mysql_process_no;/* since in Linux, 'top' reports
+diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
+--- a/storage/innobase/trx/trx0sys.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/trx/trx0sys.c   2010-12-03 15:38:20.325956917 +0900
+@@ -75,13 +75,16 @@
+ file name and position here. */
+ /* @{ */
+ /** Master binlog file name */
+-UNIV_INTERN char      trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
++UNIV_INTERN char      trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN];
+ /** Master binlog file position.  We have successfully got the updates
+ up to this position.  -1 means that no crash recovery was needed, or
+ there was no master log position info inside InnoDB.*/
+ UNIV_INTERN ib_int64_t        trx_sys_mysql_master_log_pos    = -1;
+ /* @} */
++UNIV_INTERN char      trx_sys_mysql_relay_log_name[TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN];
++UNIV_INTERN ib_int64_t        trx_sys_mysql_relay_log_pos     = -1;
++
+ /** If this MySQL server uses binary logging, after InnoDB has been inited
+ and if it has done a crash recovery, we store the binlog file name and position
+ here. */
+@@ -683,23 +686,25 @@
+ void
+ trx_sys_update_mysql_binlog_offset(
+ /*===============================*/
+-      const char*     file_name,/*!< in: MySQL log file name */
++      trx_sysf_t*     sys_header,
++      const char*     file_name_in,/*!< in: MySQL log file name */
+       ib_int64_t      offset, /*!< in: position in that log file */
+       ulint           field,  /*!< in: offset of the MySQL log info field in
+                               the trx sys header */
+       mtr_t*          mtr)    /*!< in: mtr */
+ {
+-      trx_sysf_t*     sys_header;
++      const char*     file_name;
+-      if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
++      if (ut_strlen(file_name_in) >= TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN) {
+               /* We cannot fit the name to the 512 bytes we have reserved */
++              /* -> To store relay log file information, file_name must fit to the 480 bytes */
+-              return;
++              file_name = "";
++      } else {
++              file_name = file_name_in;
+       }
+-      sys_header = trx_sysf_get(mtr);
+-
+       if (mach_read_from_4(sys_header + field
+                            + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+           != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+@@ -821,13 +826,26 @@
+                                        + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+               sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+               + TRX_SYS_MYSQL_LOG_NAME);
++
++      fprintf(stderr,
++              "InnoDB: and relay log file\n"
++              "InnoDB: position %lu %lu, file name %s\n",
++              (ulong) mach_read_from_4(sys_header
++                                       + TRX_SYS_MYSQL_RELAY_LOG_INFO
++                                       + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
++              (ulong) mach_read_from_4(sys_header
++                                       + TRX_SYS_MYSQL_RELAY_LOG_INFO
++                                       + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
++              sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++              + TRX_SYS_MYSQL_LOG_NAME);
++
+       /* Copy the master log position info to global variables we can
+       use in ha_innobase.cc to initialize glob_mi to right values */
+       ut_memcpy(trx_sys_mysql_master_log_name,
+                 sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+                 + TRX_SYS_MYSQL_LOG_NAME,
+-                TRX_SYS_MYSQL_LOG_NAME_LEN);
++                TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
+       trx_sys_mysql_master_log_pos
+               = (((ib_int64_t) mach_read_from_4(
+@@ -836,6 +854,19 @@
+               + ((ib_int64_t) mach_read_from_4(
+                          sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+                          + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
++
++      ut_memcpy(trx_sys_mysql_relay_log_name,
++                sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++                + TRX_SYS_MYSQL_LOG_NAME,
++                TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
++
++      trx_sys_mysql_relay_log_pos
++              = (((ib_int64_t) mach_read_from_4(
++                          sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++                          + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
++              + ((ib_int64_t) mach_read_from_4(
++                         sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++                         + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
+       mtr_commit(&mtr);
+ }
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c   2010-12-03 15:37:45.549028990 +0900
++++ b/storage/innobase/trx/trx0trx.c   2010-12-03 15:38:20.328957217 +0900
+@@ -135,6 +135,10 @@
+       trx->mysql_log_file_name = NULL;
+       trx->mysql_log_offset = 0;
++      trx->mysql_master_log_file_name = "";
++      trx->mysql_master_log_pos = 0;
++      trx->mysql_relay_log_file_name = "";
++      trx->mysql_relay_log_pos = 0;
+       mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
+@@ -732,6 +736,7 @@
+       trx_rseg_t*     rseg;
+       trx_undo_t*     undo;
+       mtr_t           mtr;
++      trx_sysf_t*     sys_header = NULL;
+       ut_ad(mutex_own(&kernel_mutex));
+@@ -787,13 +792,35 @@
+               if (trx->mysql_log_file_name
+                   && trx->mysql_log_file_name[0] != '\0') {
++                      if (!sys_header) {
++                              sys_header = trx_sysf_get(&mtr);
++                      }
+                       trx_sys_update_mysql_binlog_offset(
++                              sys_header,
+                               trx->mysql_log_file_name,
+                               trx->mysql_log_offset,
+                               TRX_SYS_MYSQL_LOG_INFO, &mtr);
+                       trx->mysql_log_file_name = NULL;
+               }
++              if (trx->mysql_master_log_file_name[0] != '\0') {
++                      /* This database server is a MySQL replication slave */
++                      if (!sys_header) {
++                              sys_header = trx_sysf_get(&mtr);
++                      }
++                      trx_sys_update_mysql_binlog_offset(
++                              sys_header,
++                              trx->mysql_relay_log_file_name,
++                              trx->mysql_relay_log_pos,
++                              TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr);
++                      trx_sys_update_mysql_binlog_offset(
++                              sys_header,
++                              trx->mysql_master_log_file_name,
++                              trx->mysql_master_log_pos,
++                              TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
++                      trx->mysql_master_log_file_name = "";
++              }
++
+               /* The following call commits the mini-transaction, making the
+               whole transaction committed in the file-based world, at this
+               log sequence number. The transaction becomes 'durable' when
diff --git a/innodb_pass_corrupt_table.patch b/innodb_pass_corrupt_table.patch
new file mode 100644 (file)
index 0000000..4bfb654
--- /dev/null
@@ -0,0 +1,1387 @@
+# name       : innodb_pass_corrupt_table.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
+--- a/storage/innobase/btr/btr0btr.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0btr.c   2010-12-04 15:38:18.110513593 +0900
+@@ -137,6 +137,12 @@
+       root_page_no = dict_index_get_page(index);
+       block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
++
++      if (srv_pass_corrupt_table && !block) {
++              return(0);
++      }
++      ut_a(block);
++
+       ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
+            == dict_table_is_comp(index->table));
+ #ifdef UNIV_BTR_DEBUG
+@@ -422,6 +428,12 @@
+       root = btr_root_get(index, &mtr);
++      if (srv_pass_corrupt_table && !root) {
++              mtr_commit(&mtr);
++              return(0);
++      }
++      ut_a(root);
++
+       if (flag == BTR_N_LEAF_PAGES) {
+               seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+@@ -869,6 +881,13 @@
+       mtr_start(&mtr);
+       root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
++
++      if (srv_pass_corrupt_table && !root) {
++              mtr_commit(&mtr);
++              return;
++      }
++      ut_a(root);
++      
+ #ifdef UNIV_BTR_DEBUG
+       ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+                                   + root, space));
+@@ -891,6 +910,12 @@
+       mtr_start(&mtr);
+       root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
++
++      if (srv_pass_corrupt_table && !root) {
++              mtr_commit(&mtr);
++              return;
++      }
++      ut_a(root);
+ #ifdef UNIV_BTR_DEBUG
+       ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+                                   + root, space));
+@@ -924,6 +949,11 @@
+       block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
++      if (srv_pass_corrupt_table && !block) {
++              return;
++      }
++      ut_a(block);
++
+       btr_search_drop_page_hash_index(block);
+       header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c   2010-12-03 17:30:16.239038936 +0900
++++ b/storage/innobase/btr/btr0cur.c   2010-12-04 15:38:18.114551906 +0900
+@@ -238,6 +238,11 @@
+       case BTR_MODIFY_LEAF:
+               mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
+               get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
++
++              if (srv_pass_corrupt_table && !get_block) {
++                      return;
++              }
++              ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+               ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+ #endif /* UNIV_BTR_DEBUG */
+@@ -251,6 +256,11 @@
+                       get_block = btr_block_get(space, zip_size,
+                                                 left_page_no,
+                                                 RW_X_LATCH, mtr);
++
++                      if (srv_pass_corrupt_table && !get_block) {
++                              return;
++                      }
++                      ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+                       ut_a(page_is_comp(get_block->frame)
+                            == page_is_comp(page));
+@@ -262,6 +272,11 @@
+               get_block = btr_block_get(space, zip_size, page_no,
+                                         RW_X_LATCH, mtr);
++
++              if (srv_pass_corrupt_table && !get_block) {
++                      return;
++              }
++              ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+               ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+ #endif /* UNIV_BTR_DEBUG */
+@@ -273,6 +288,11 @@
+                       get_block = btr_block_get(space, zip_size,
+                                                 right_page_no,
+                                                 RW_X_LATCH, mtr);
++
++                      if (srv_pass_corrupt_table && !get_block) {
++                              return;
++                      }
++                      ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+                       ut_a(page_is_comp(get_block->frame)
+                            == page_is_comp(page));
+@@ -294,6 +314,11 @@
+                       get_block = btr_block_get(space, zip_size,
+                                                 left_page_no, mode, mtr);
+                       cursor->left_block = get_block;
++
++                      if (srv_pass_corrupt_table && !get_block) {
++                              return;
++                      }
++                      ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+                       ut_a(page_is_comp(get_block->frame)
+                            == page_is_comp(page));
+@@ -304,6 +329,11 @@
+               }
+               get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
++
++              if (srv_pass_corrupt_table && !get_block) {
++                      return;
++              }
++              ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+               ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+ #endif /* UNIV_BTR_DEBUG */
+@@ -576,6 +606,19 @@
+               file, line, mtr);
+       if (block == NULL) {
++              if (srv_pass_corrupt_table
++                  && buf_mode != BUF_GET_IF_IN_POOL
++                  && buf_mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
++                      page_cursor->block = 0;
++                      page_cursor->rec = 0;
++                      if (estimate) {
++                              cursor->path_arr->nth_rec = ULINT_UNDEFINED;
++                      }
++                      goto func_exit;
++              }
++              ut_a(buf_mode == BUF_GET_IF_IN_POOL
++                   || buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
++
+               /* This must be a search to perform an insert/delete
+               mark/ delete; try using the insert/delete buffer */
+@@ -650,6 +693,16 @@
+       block->check_index_page_at_flush = TRUE;
+       page = buf_block_get_frame(block);
++      if (srv_pass_corrupt_table && !page) {
++              page_cursor->block = 0;
++              page_cursor->rec = 0;
++              if (estimate) {
++                      cursor->path_arr->nth_rec = ULINT_UNDEFINED;
++              }
++              goto func_exit;
++      }
++      ut_a(page);
++
+       if (rw_latch != RW_NO_LATCH) {
+ #ifdef UNIV_ZIP_DEBUG
+               const page_zip_des_t*   page_zip
+@@ -854,6 +907,17 @@
+                                        RW_NO_LATCH, NULL, BUF_GET,
+                                        file, line, mtr);
+               page = buf_block_get_frame(block);
++
++              if (srv_pass_corrupt_table && !page) {
++                      page_cursor->block = 0;
++                      page_cursor->rec = 0;
++                      if (estimate) {
++                              cursor->path_arr->nth_rec = ULINT_UNDEFINED;
++                      }
++                      break;
++              }
++              ut_a(page);
++
+               ut_ad(index->id == btr_page_get_index_id(page));
+               block->check_index_page_at_flush = TRUE;
+@@ -974,6 +1038,14 @@
+                                        RW_NO_LATCH, NULL, BUF_GET,
+                                        file, line, mtr);
+               page = buf_block_get_frame(block);
++
++              if (srv_pass_corrupt_table && !page) {
++                      page_cursor->block = 0;
++                      page_cursor->rec = 0;
++                      break;
++              }
++              ut_a(page);
++
+               ut_ad(index->id == btr_page_get_index_id(page));
+               if (height == ULINT_UNDEFINED) {
+@@ -1288,6 +1360,12 @@
+       *big_rec = NULL;
+       block = btr_cur_get_block(cursor);
++
++      if (srv_pass_corrupt_table && !block) {
++              return(DB_CORRUPTION);
++      }
++      ut_a(block);
++
+       page = buf_block_get_frame(block);
+       index = cursor->index;
+       zip_size = buf_block_get_zip_size(block);
+@@ -3022,6 +3100,11 @@
+       block = btr_cur_get_block(cursor);
++      if (srv_pass_corrupt_table && !block) {
++              return(DB_CORRUPTION);
++      }
++      ut_a(block);
++
+       ut_ad(page_is_leaf(buf_block_get_frame(block)));
+       rec = btr_cur_get_rec(cursor);
+@@ -3826,6 +3909,11 @@
+               page = btr_cur_get_page(&cursor);
++              if (srv_pass_corrupt_table && !page) {
++                      break;
++              }
++              ut_a(page);
++
+               supremum = page_get_supremum_rec(page);
+               if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS && is_first_page) {
+                       /* the cursor should be the first record of the page. */
+diff -ruN a/storage/innobase/btr/btr0pcur.c b/storage/innobase/btr/btr0pcur.c
+--- a/storage/innobase/btr/btr0pcur.c  2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0pcur.c  2010-12-04 15:38:18.116563877 +0900
+@@ -32,7 +32,7 @@
+ #include "ut0byte.h"
+ #include "rem0cmp.h"
+ #include "trx0trx.h"
+-
++#include "srv0srv.h"
+ /**************************************************************//**
+ Allocates memory for a persistent cursor object and initializes the cursor.
+ @return       own: persistent cursor */
+@@ -102,6 +102,12 @@
+       ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+       block = btr_pcur_get_block(cursor);
++
++      if (srv_pass_corrupt_table && !block) {
++              return;
++      }
++      ut_a(block);
++
+       index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
+       page_cursor = btr_pcur_get_page_cur(cursor);
+@@ -419,6 +425,15 @@
+       next_block = btr_block_get(space, zip_size, next_page_no,
+                                  cursor->latch_mode, mtr);
+       next_page = buf_block_get_frame(next_block);
++
++      if (srv_pass_corrupt_table && !next_page) {
++              btr_leaf_page_release(btr_pcur_get_block(cursor),
++                                    cursor->latch_mode, mtr);
++              btr_pcur_get_page_cur(cursor)->block = 0;
++              btr_pcur_get_page_cur(cursor)->rec = 0;
++              return;
++      }
++      ut_a(next_page);
+ #ifdef UNIV_BTR_DEBUG
+       ut_a(page_is_comp(next_page) == page_is_comp(page));
+       ut_a(btr_page_get_prev(next_page, mtr)
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c   2010-12-03 15:49:59.166193407 +0900
++++ b/storage/innobase/btr/btr0sea.c   2010-12-04 15:38:18.118548961 +0900
+@@ -42,7 +42,7 @@
+ #include "btr0pcur.h"
+ #include "btr0btr.h"
+ #include "ha0ha.h"
+-
++#include "srv0srv.h"
+ /** Flag: has the search system been enabled?
+ Protected by btr_search_latch and btr_search_enabled_mutex. */
+ UNIV_INTERN char              btr_search_enabled      = TRUE;
+@@ -607,6 +607,11 @@
+       block = btr_cur_get_block(cursor);
++      if (srv_pass_corrupt_table && !block) {
++              return;
++      }
++      ut_a(block);
++
+       /* NOTE that the following two function calls do NOT protect
+       info or block->n_fields etc. with any semaphore, to save CPU time!
+       We cannot assume the fields are consistent when we return from
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-04 15:37:50.554565654 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-04 15:38:18.119548922 +0900
+@@ -52,6 +52,7 @@
+ #include "log0recv.h"
+ #include "page0zip.h"
+ #include "trx0trx.h"
++#include "srv0start.h"
+ /* prototypes for new functions added to ha_innodb.cc */
+ trx_t* innobase_get_trx();
+@@ -1131,6 +1132,11 @@
+                       ready = buf_flush_ready_for_replace(&block->page);
+                       mutex_exit(&block->mutex);
++                      if (block->page.is_corrupt) {
++                              /* corrupt page may remain, it can be skipped */
++                              break;
++                      }
++
+                       if (!ready) {
+                               return(block);
+@@ -2476,6 +2482,14 @@
+               return(NULL);
+       }
++      if (srv_pass_corrupt_table) {
++              if (bpage->is_corrupt) {
++                      rw_lock_s_unlock(&buf_pool->page_hash_latch);
++                      return(NULL);
++              }
++      }
++      ut_a(!(bpage->is_corrupt));
++
+       block_mutex = buf_page_get_mutex_enter(bpage);
+       rw_lock_s_unlock(&buf_pool->page_hash_latch);
+@@ -3022,6 +3036,14 @@
+               return(NULL);
+       }
++      if (srv_pass_corrupt_table) {
++              if (block->page.is_corrupt) {
++                      mutex_exit(block_mutex);
++                      return(NULL);
++              }
++      }
++      ut_a(!(block->page.is_corrupt));
++
+       switch (buf_block_get_state(block)) {
+               buf_page_t*     bpage;
+               ibool           success;
+@@ -3689,6 +3711,7 @@
+       bpage->newest_modification = 0;
+       bpage->oldest_modification = 0;
+       HASH_INVALIDATE(bpage, hash);
++      bpage->is_corrupt = FALSE;
+ #ifdef UNIV_DEBUG_FILE_ACCESSES
+       bpage->file_page_was_freed = FALSE;
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+@@ -4199,7 +4222,8 @@
+ void
+ buf_page_io_complete(
+ /*=================*/
+-      buf_page_t*     bpage)  /*!< in: pointer to the block in question */
++      buf_page_t*     bpage,  /*!< in: pointer to the block in question */
++      trx_t*          trx)
+ {
+       enum buf_io_fix io_type;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+@@ -4278,6 +4302,7 @@
+                               (ulong) bpage->offset);
+               }
++              if (!srv_pass_corrupt_table || !bpage->is_corrupt) {
+               /* From version 3.23.38 up we store the page checksum
+               to the 4 first bytes of the page end lsn field */
+@@ -4319,6 +4344,19 @@
+                             REFMAN "forcing-recovery.html\n"
+                             "InnoDB: about forcing recovery.\n", stderr);
++                      if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space)
++                          && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
++                              fprintf(stderr,
++                                      "InnoDB: space %u will be treated as corrupt.\n",
++                                      bpage->space);
++                              fil_space_set_corrupt(bpage->space);
++                              if (trx && trx->dict_operation_lock_mode == 0) {
++                                      dict_table_set_corrupt_by_space(bpage->space, TRUE);
++                              } else {
++                                      dict_table_set_corrupt_by_space(bpage->space, FALSE);
++                              }
++                              bpage->is_corrupt = TRUE;
++                      } else
+                       if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+                               fputs("InnoDB: Ending processing because of"
+                                     " a corrupt database page.\n",
+@@ -4326,6 +4364,7 @@
+                               exit(1);
+                       }
+               }
++              } /**/
+               if (recv_recovery_is_on()) {
+                       /* Pages must be uncompressed for crash recovery. */
+@@ -4335,8 +4374,11 @@
+               if (uncompressed && !recv_no_ibuf_operations) {
+                       ibuf_merge_or_delete_for_page(
++                              /* Delete possible entries, if bpage is_corrupt */
++                              (srv_pass_corrupt_table && bpage->is_corrupt) ? NULL :
+                               (buf_block_t*) bpage, bpage->space,
+                               bpage->offset, buf_page_get_zip_size(bpage),
++                              (srv_pass_corrupt_table && bpage->is_corrupt) ? FALSE :
+                               TRUE);
+               }
+       }
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-04 15:37:50.557553380 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-04 15:41:09.784467585 +0900
+@@ -193,12 +193,19 @@
+                             ((buf_block_t*) bpage)->frame, bpage, trx);
+       }
+       thd_wait_end(NULL);
++
++      if (srv_pass_corrupt_table) {
++              if (*err != DB_SUCCESS) {
++                      bpage->is_corrupt = TRUE;
++              }
++      } else {
+       ut_a(*err == DB_SUCCESS);
++      }
+       if (sync) {
+               /* The i/o is already completed when we arrive from
+               fil_read */
+-              buf_page_io_complete(bpage);
++              buf_page_io_complete(bpage, trx);
+       }
+       return(1);
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c        2010-12-03 17:30:16.248987063 +0900
++++ b/storage/innobase/dict/dict0dict.c        2010-12-04 15:45:23.808513973 +0900
+@@ -54,6 +54,7 @@
+ #include "row0merge.h"
+ #include "m_ctype.h" /* my_isspace() */
+ #include "ha_prototypes.h" /* innobase_strcasecmp() */
++#include "srv0start.h" /* SRV_LOG_SPACE_FIRST_ID */
+ #include <ctype.h>
+@@ -749,7 +750,7 @@
+       mutex_exit(&(dict_sys->mutex));
+-      if (table != NULL) {
++      if (table != NULL && !table->is_corrupt) {
+               /* If table->ibd_file_missing == TRUE, this will
+               print an error message and return without doing
+               anything. */
+@@ -1290,7 +1291,7 @@
+                   + dict_sys->size) > srv_dict_size_limit ) {
+               prev_table = UT_LIST_GET_PREV(table_LRU, table);
+-              if (table == self || table->n_mysql_handles_opened)
++              if (table == self || table->n_mysql_handles_opened || table->is_corrupt)
+                       goto next_loop;
+               cached_foreign_tables = 0;
+@@ -4314,6 +4315,12 @@
+       heap = mem_heap_create(1000);
+       while (index) {
++              if (table->is_corrupt) {
++                      ut_a(srv_pass_corrupt_table);
++                      mem_heap_free(heap);
++                      return(FALSE);
++              }
++
+               size = btr_get_size(index, BTR_TOTAL_SIZE);
+               index->stat_index_size = size;
+@@ -4433,6 +4440,12 @@
+       heap = mem_heap_create(1000);
+       while (index) {
++              if (table->is_corrupt) {
++                      ut_a(srv_pass_corrupt_table);
++                      mem_heap_free(heap);
++                      return;
++              }
++
+ /*===========================================*/
+ {
+       dict_table_t*   sys_stats;
+@@ -4598,6 +4611,12 @@
+                    || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+                        && dict_index_is_clust(index)))) {
+                       ulint   size;
++
++                      if (table->is_corrupt) {
++                              ut_a(srv_pass_corrupt_table);
++                              return;
++                      }
++
+                       size = btr_get_size(index, BTR_TOTAL_SIZE);
+                       index->stat_index_size = size;
+@@ -5318,4 +5337,42 @@
+               rw_lock_free(&dict_table_stats_latches[i]);
+       }
+ }
++
++/*************************************************************************
++set is_corrupt flag by space_id*/
++
++void
++dict_table_set_corrupt_by_space(
++/*============================*/
++      ulint   space_id,
++      ibool   need_mutex)
++{
++      dict_table_t*   table;
++      ibool           found = FALSE;
++
++      ut_a(!trx_sys_sys_space(space_id) && space_id < SRV_LOG_SPACE_FIRST_ID);
++
++      if (need_mutex)
++              mutex_enter(&(dict_sys->mutex));
++
++      table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
++
++      while (table) {
++              if (table->space == space_id) {
++                      table->is_corrupt = TRUE;
++                      found = TRUE;
++              }
++
++              table = UT_LIST_GET_NEXT(table_LRU, table);
++      }
++
++      if (need_mutex)
++              mutex_exit(&(dict_sys->mutex));
++
++      if (!found) {
++              fprintf(stderr, "InnoDB: space to be marked as "
++                      "crashed was not found for id %lu.\n",
++                      (ulong) space_id);
++      }
++}
+ #endif /* !UNIV_HOTBACKUP */
+diff -ruN a/storage/innobase/dict/dict0mem.c b/storage/innobase/dict/dict0mem.c
+--- a/storage/innobase/dict/dict0mem.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0mem.c 2010-12-04 15:38:18.126549463 +0900
+@@ -92,6 +92,8 @@
+       /* The number of transactions that are either waiting on the
+       AUTOINC lock or have been granted the lock. */
+       table->n_waiting_or_granted_auto_inc_locks = 0;
++
++      table->is_corrupt = FALSE;
+ #endif /* !UNIV_HOTBACKUP */
+       ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-04 15:37:50.564551587 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-04 15:38:18.128549252 +0900
+@@ -233,6 +233,7 @@
+                               file we have written to */
+       ibool           is_in_unflushed_spaces; /*!< TRUE if this space is
+                               currently in unflushed_spaces */
++      ibool           is_corrupt;
+       UT_LIST_NODE_T(fil_space_t) space_list;
+                               /*!< list of all spaces */
+       ulint           magic_n;/*!< FIL_SPACE_MAGIC_N */
+@@ -1263,6 +1264,8 @@
+                   ut_fold_string(name), space);
+       space->is_in_unflushed_spaces = FALSE;
++      space->is_corrupt = FALSE;
++
+       UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
+       mutex_exit(&fil_system->mutex);
+@@ -4917,6 +4920,22 @@
+       ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+       ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
++      if (srv_pass_corrupt_table && space->is_corrupt) {
++              /* should ignore i/o for the crashed space */
++              mutex_enter(&fil_system->mutex);
++              fil_node_complete_io(node, fil_system, type);
++              mutex_exit(&fil_system->mutex);
++              if (mode == OS_AIO_NORMAL) {
++                      ut_a(space->purpose == FIL_TABLESPACE);
++                      buf_page_io_complete(message, trx);
++              }
++              if (type == OS_FILE_READ) {
++                      return(DB_TABLESPACE_DELETED);
++              } else {
++                      return(DB_SUCCESS);
++              }
++      } else {
++              ut_a(!space->is_corrupt);
+ #ifdef UNIV_HOTBACKUP
+       /* In ibbackup do normal i/o, not aio */
+       if (type == OS_FILE_READ) {
+@@ -4931,6 +4950,8 @@
+       ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
+                    offset_low, offset_high, len, node, message, trx);
+ #endif
++      } /**/
++
+       ut_a(ret);
+       if (mode == OS_AIO_SYNC) {
+@@ -5080,7 +5101,7 @@
+       if (fil_node->space->purpose == FIL_TABLESPACE) {
+               srv_set_io_thread_op_info(segment, "complete io for buf page");
+-              buf_page_io_complete(message);
++              buf_page_io_complete(message, NULL);
+       } else {
+               srv_set_io_thread_op_info(segment, "complete io for log");
+               log_io_complete(message);
+@@ -5434,3 +5455,46 @@
+                return 0;
+        }
+ }
++
++/*************************************************************************
++functions to access is_corrupt flag of fil_space_t*/
++
++ibool
++fil_space_is_corrupt(
++/*=================*/
++      ulint   space_id)
++{
++      fil_space_t*    space;
++      ibool           ret = FALSE;
++
++      mutex_enter(&fil_system->mutex);
++
++      space = fil_space_get_by_id(space_id);
++
++      if (space && space->is_corrupt) {
++              ret = TRUE;
++      }
++
++      mutex_exit(&fil_system->mutex);
++
++      return(ret);
++}
++
++void
++fil_space_set_corrupt(
++/*==================*/
++      ulint   space_id)
++{
++      fil_space_t*    space;
++
++      mutex_enter(&fil_system->mutex);
++
++      space = fil_space_get_by_id(space_id);
++
++      if (space) {
++              space->is_corrupt = TRUE;
++      }
++
++      mutex_exit(&fil_system->mutex);
++}
++
+diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
+--- a/storage/innobase/fsp/fsp0fsp.c   2010-12-04 15:37:50.569480615 +0900
++++ b/storage/innobase/fsp/fsp0fsp.c   2010-12-04 15:38:18.131550103 +0900
+@@ -369,6 +369,12 @@
+       ut_ad(id || !zip_size);
+       block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
++
++      if (srv_pass_corrupt_table && !block) {
++              return(0);
++      }
++      ut_a(block);
++
+       header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
+       buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+@@ -787,6 +793,12 @@
+       fsp_header_t*   sp_header;
+       block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
++
++      if (srv_pass_corrupt_table && !block) {
++              return(0);
++      }
++      ut_a(block);
++
+       buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+       sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
+@@ -1866,6 +1878,11 @@
+ {
+       fseg_inode_t*   inode;
++      if (srv_pass_corrupt_table && !page) {
++              return(ULINT_UNDEFINED);
++      }
++      ut_a(page);
++
+       for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+               inode = fsp_seg_inode_page_get_nth_inode(
+@@ -1979,6 +1996,11 @@
+       page = buf_block_get_frame(block);
++      if (srv_pass_corrupt_table && !page) {
++              return(0);
++      }
++      ut_a(page);
++
+       n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
+       ut_a(n != ULINT_UNDEFINED);
+@@ -2072,6 +2094,11 @@
+       inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
++      if (srv_pass_corrupt_table && !inode) {
++              return(0);
++      }
++      ut_a(inode);
++
+       if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
+               inode = NULL;
+@@ -2098,7 +2125,7 @@
+ {
+       fseg_inode_t*   inode
+               = fseg_inode_try_get(header, space, zip_size, mtr);
+-      ut_a(inode);
++      ut_a(srv_pass_corrupt_table || inode);
+       return(inode);
+ }
+@@ -3304,6 +3331,11 @@
+       descr = xdes_get_descriptor(space, zip_size, page, mtr);
++      if (srv_pass_corrupt_table && !descr) {
++              /* The page may be corrupt. pass it. */
++              return;
++      }
++
+       ut_a(descr);
+       if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
+               fputs("InnoDB: Dump of the tablespace extent descriptor: ",
+@@ -3551,6 +3583,11 @@
+       descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
++      if (srv_pass_corrupt_table && !descr) {
++              /* The page may be corrupt. pass it. */
++              return(TRUE);
++      }
++
+       /* Check that the header resides on a page which has not been
+       freed yet */
+@@ -3635,6 +3672,12 @@
+       inode = fseg_inode_get(header, space, zip_size, mtr);
++      if (srv_pass_corrupt_table && !inode) {
++              /* ignore the corruption */
++              return(TRUE);
++      }
++      ut_a(inode);
++
+       descr = fseg_get_first_extent(inode, space, zip_size, mtr);
+       if (descr != NULL) {
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:37:50.578486593 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:38:18.137549396 +0900
+@@ -3926,6 +3926,12 @@
+               DBUG_RETURN(1);
+       }
++      if (share->ib_table && share->ib_table->is_corrupt) {
++              free_share(share);
++
++              DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
++      }
++
+       /* Create buffers for packing the fields of a record. Why
+       table->reclength did not work here? Obviously, because char
+       fields when packed actually became 1 byte longer, when we also
+@@ -3953,6 +3959,19 @@
+       /* Get pointer to a table object in InnoDB dictionary cache */
+       ib_table = dict_table_get(norm_name, TRUE);
+       
++      if (ib_table && ib_table->is_corrupt) {
++              free_share(share);
++              my_free(upd_buff);
++
++              DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
++      }
++
++      if (share->ib_table) {
++              ut_a(share->ib_table == ib_table);
++      } else {
++              share->ib_table = ib_table;
++      }
++
+       if (NULL == ib_table) {
+               if (is_part && retries < 10) {
+                       ++retries;
+@@ -5117,6 +5136,10 @@
+       ha_statistic_increment(&SSV::ha_write_count);
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
+               table->timestamp_field->set_time();
+@@ -5334,6 +5357,10 @@
+ func_exit:
+       innobase_active_small();
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       DBUG_RETURN(error_result);
+ }
+@@ -5510,6 +5537,10 @@
+       ha_statistic_increment(&SSV::ha_update_count);
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+               table->timestamp_field->set_time();
+@@ -5599,6 +5630,10 @@
+       innobase_active_small();
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       DBUG_RETURN(error);
+ }
+@@ -5620,6 +5655,10 @@
+       ha_statistic_increment(&SSV::ha_delete_count);
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       if (!prebuilt->upd_node) {
+               row_get_prebuilt_update_vector(prebuilt);
+       }
+@@ -5646,6 +5685,10 @@
+       innobase_active_small();
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       DBUG_RETURN(error);
+ }
+@@ -5885,6 +5928,10 @@
+       ha_statistic_increment(&SSV::ha_read_key_count);
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       index = prebuilt->index;
+       if (UNIV_UNLIKELY(index == NULL)) {
+@@ -5950,6 +5997,10 @@
+               ret = DB_UNSUPPORTED;
+       }
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       switch (ret) {
+       case DB_SUCCESS:
+               error = 0;
+@@ -6060,6 +6111,10 @@
+ {
+       DBUG_ENTER("change_active_index");
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       ut_ad(user_thd == ha_thd());
+       ut_a(prebuilt->trx == thd_to_trx(user_thd));
+@@ -6150,6 +6205,10 @@
+       DBUG_ENTER("general_fetch");
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       ut_a(prebuilt->trx == thd_to_trx(user_thd));
+       innodb_srv_conc_enter_innodb(prebuilt->trx);
+@@ -6159,6 +6218,10 @@
+       innodb_srv_conc_exit_innodb(prebuilt->trx);
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       switch (ret) {
+       case DB_SUCCESS:
+               error = 0;
+@@ -7424,10 +7487,18 @@
+       update_thd(ha_thd());
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       /* Truncate the table in InnoDB */
+       error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       error = convert_error_code_to_mysql(error, prebuilt->table->flags,
+                                           NULL);
+@@ -7940,6 +8011,16 @@
+       return(ranges + (double) rows / (double) total_rows * time_for_scan);
+ }
++UNIV_INTERN
++bool
++ha_innobase::is_corrupt() const
++{
++      if (share->ib_table)
++              return ((bool)share->ib_table->is_corrupt);
++      else
++              return (FALSE);
++}
++
+ /*********************************************************************//**
+ Calculates the key number used inside MySQL for an Innobase index. We will
+ first check the "index translation table" for a match of the index to get
+@@ -8058,7 +8139,7 @@
+       ib_table = prebuilt->table;
+       if (flag & HA_STATUS_TIME) {
+-              if (called_from_analyze || innobase_stats_on_metadata) {
++              if ((called_from_analyze || innobase_stats_on_metadata) && !share->ib_table->is_corrupt) {
+                       /* In sql_show we call with this flag: update
+                       then statistics so that they are up-to-date */
+@@ -8349,10 +8430,18 @@
+       THD*            thd,            /*!< in: connection thread handle */
+       HA_CHECK_OPT*   check_opt)      /*!< in: currently ignored */
+ {
++      if (share->ib_table->is_corrupt) {
++              return(HA_ADMIN_CORRUPT);
++      }
++
+       /* Simply call ::info() with all the flags */
+       info_low(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
+                true /* called from analyze */);
++      if (share->ib_table->is_corrupt) {
++              return(HA_ADMIN_CORRUPT);
++      }
++
+       return(0);
+ }
+@@ -8534,6 +8623,10 @@
+               my_error(ER_QUERY_INTERRUPTED, MYF(0));
+       }
++      if (share->ib_table->is_corrupt) {
++              return(HA_ADMIN_CORRUPT);
++      }
++
+       DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
+ }
+@@ -9304,6 +9397,10 @@
+       update_thd(thd);
++      if (share->ib_table->is_corrupt) {
++              DBUG_RETURN(HA_ERR_CRASHED);
++      }
++
+       if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
+               ut_print_timestamp(stderr);
+               fprintf(stderr,
+@@ -11722,6 +11819,14 @@
+   "0 (the default) disables automatic dumps.",
+   NULL, NULL, 0, 0, UINT_MAX32, 0);
++static        MYSQL_SYSVAR_ULONG(pass_corrupt_table, srv_pass_corrupt_table,
++  PLUGIN_VAR_RQCMDARG,
++  "Pass corruptions of user tables as 'corrupt table' instead of not crashing itself, "
++  "when used with file_per_table. "
++  "All file io for the datafile after detected as corrupt are disabled, "
++  "except for the deletion.",
++  NULL, NULL, 0, 0, 1, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+   MYSQL_SYSVAR(additional_mem_pool_size),
+   MYSQL_SYSVAR(autoextend_increment),
+@@ -11809,6 +11914,7 @@
+   MYSQL_SYSVAR(auto_lru_dump),
+   MYSQL_SYSVAR(purge_threads),
+   MYSQL_SYSVAR(purge_batch_size),
++  MYSQL_SYSVAR(pass_corrupt_table),
+   NULL
+ };
+diff -ruN a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
+--- a/storage/innobase/handler/ha_innodb.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/handler/ha_innodb.h     2010-12-04 15:38:18.159588579 +0900
+@@ -52,6 +52,7 @@
+       innodb_idx_translate_t  idx_trans_tbl;  /*!< index translation
+                                               table between MySQL and
+                                               Innodb */
++      dict_table_t*           ib_table;
+ } INNOBASE_SHARE;
+@@ -135,6 +136,7 @@
+       int close(void);
+       double scan_time();
+       double read_time(uint index, uint ranges, ha_rows rows);
++      bool is_corrupt() const;
+       int write_row(uchar * buf);
+       int update_row(const uchar * old_data, uchar * new_data);
+diff -ruN a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
+--- a/storage/innobase/include/btr0btr.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/btr0btr.ic      2010-12-04 15:38:18.162515035 +0900
+@@ -28,7 +28,7 @@
+ #include "mtr0mtr.h"
+ #include "mtr0log.h"
+ #include "page0zip.h"
+-
++#include "srv0srv.h"
+ #define BTR_MAX_NODE_LEVEL    50      /*!< Maximum B-tree page level
+                                       (not really a hard limit).
+                                       Used in debug assertions
+@@ -55,7 +55,9 @@
+       block = buf_page_get_gen(space, zip_size, page_no, mode,
+                                NULL, BUF_GET, file, line, mtr);
+-      if (mode != RW_NO_LATCH) {
++      ut_a(srv_pass_corrupt_table || block);
++
++      if (block && mode != RW_NO_LATCH) {
+               buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+       }
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-03 15:49:59.218956083 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-04 15:38:18.164513667 +0900
+@@ -913,7 +913,7 @@
+       const buf_block_t*      block)  /*!< in: pointer to the control block */
+       __attribute__((pure));
+ #else /* UNIV_DEBUG */
+-# define buf_block_get_frame(block) (block)->frame
++# define buf_block_get_frame(block) (block ? (block)->frame : 0)
+ #endif /* UNIV_DEBUG */
+ /*********************************************************************//**
+ Gets the space id of a block.
+@@ -1045,7 +1045,8 @@
+ void
+ buf_page_io_complete(
+ /*=================*/
+-      buf_page_t*     bpage); /*!< in: pointer to the block in question */
++      buf_page_t*     bpage,  /*!< in: pointer to the block in question */
++      trx_t*          trx);
+ /********************************************************************//**
+ Calculates a folded value of a file page address to use in the page hash
+ table.
+@@ -1360,6 +1361,7 @@
+                                       0 if the block was never accessed
+                                       in the buffer pool */
+       /* @} */
++      ibool           is_corrupt;
+ # ifdef UNIV_DEBUG_FILE_ACCESSES
+       ibool           file_page_was_freed;
+                                       /*!< this is set to TRUE when fsp
+diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
+--- a/storage/innobase/include/buf0buf.ic      2010-12-03 15:49:59.221956024 +0900
++++ b/storage/innobase/include/buf0buf.ic      2010-12-04 15:38:18.167513925 +0900
+@@ -34,7 +34,7 @@
+ #include "buf0flu.h"
+ #include "buf0lru.h"
+ #include "buf0rea.h"
+-
++#include "srv0srv.h"
+ /*********************************************************************//**
+ Gets the current size of buffer buf_pool in bytes.
+ @return size in bytes */
+@@ -617,6 +617,12 @@
+ /*================*/
+       const buf_block_t*      block)  /*!< in: pointer to the control block */
+ {
++      ut_a(srv_pass_corrupt_table || block);
++
++      if (srv_pass_corrupt_table && !block) {
++              return(0);
++      }
++
+       ut_ad(block);
+       switch (buf_block_get_state(block)) {
+diff -ruN a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
+--- a/storage/innobase/include/dict0dict.h     2010-12-03 17:30:16.306955940 +0900
++++ b/storage/innobase/include/dict0dict.h     2010-12-04 15:38:18.169513750 +0900
+@@ -1226,6 +1226,15 @@
+ dict_close(void);
+ /*============*/
++/*************************************************************************
++set is_corrupt flag by space_id*/
++
++void
++dict_table_set_corrupt_by_space(
++/*============================*/
++      ulint   space_id,
++      ibool   need_mutex);
++
+ #ifndef UNIV_NONINL
+ #include "dict0dict.ic"
+ #endif
+diff -ruN a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
+--- a/storage/innobase/include/dict0mem.h      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0mem.h      2010-12-04 15:38:18.171513956 +0900
+@@ -595,6 +595,7 @@
+                               the AUTOINC lock on this table. */
+                               /* @} */
+       /*----------------------*/
++      ibool           is_corrupt;
+ #endif /* !UNIV_HOTBACKUP */
+ #ifdef UNIV_DEBUG
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h       2010-12-04 15:35:29.175520016 +0900
++++ b/storage/innobase/include/fil0fil.h       2010-12-04 15:38:18.172483391 +0900
+@@ -757,6 +757,19 @@
+ fil_system_hash_nodes(void);
+ /*========================*/
++/*************************************************************************
++functions to access is_corrupt flag of fil_space_t*/
++
++ibool
++fil_space_is_corrupt(
++/*=================*/
++      ulint   space_id);
++
++void
++fil_space_set_corrupt(
++/*==================*/
++      ulint   space_id);
++
+ typedef       struct fil_space_struct fil_space_t;
+ #endif
+diff -ruN a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
+--- a/storage/innobase/include/fut0fut.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/fut0fut.ic      2010-12-04 15:38:18.174481728 +0900
+@@ -23,6 +23,7 @@
+ Created 12/13/1995 Heikki Tuuri
+ ***********************************************************************/
++#include "srv0srv.h"
+ #include "sync0rw.h"
+ #include "buf0buf.h"
+@@ -48,6 +49,12 @@
+       ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+       block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
++
++      if (srv_pass_corrupt_table && !block) {
++              return(0);
++      }
++      ut_a(block);
++
+       ptr = buf_block_get_frame(block) + addr.boffset;
+       buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+diff -ruN a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
+--- a/storage/innobase/include/page0page.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0page.h     2010-12-04 15:38:18.175514037 +0900
+@@ -500,7 +500,7 @@
+ page_is_leaf(
+ /*=========*/
+       const page_t*   page)   /*!< in: page */
+-      __attribute__((nonnull, pure));
++      __attribute__((pure));
+ /************************************************************//**
+ Gets the pointer to the next record on the page.
+ @return       pointer to next record */
+diff -ruN a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
+--- a/storage/innobase/include/page0page.ic    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0page.ic    2010-12-04 15:38:18.177482672 +0900
+@@ -274,6 +274,9 @@
+ /*=========*/
+       const page_t*   page)   /*!< in: page */
+ {
++      if (!page) {
++              return(FALSE);
++      }
+       return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
+ }
+diff -ruN a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
+--- a/storage/innobase/include/page0zip.h      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0zip.h      2010-12-04 15:38:18.179513974 +0900
+@@ -114,7 +114,7 @@
+       const page_t*   page,   /*!< in: uncompressed page */
+       dict_index_t*   index,  /*!< in: index of the B-tree node */
+       mtr_t*          mtr)    /*!< in: mini-transaction, or NULL */
+-      __attribute__((nonnull(1,2,3)));
++      __attribute__((nonnull(1,3)));
+ /**********************************************************************//**
+ Decompress a page.  This function should tolerate errors on the compressed
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-04 15:37:50.591516341 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-04 15:38:18.180563749 +0900
+@@ -242,6 +242,7 @@
+ extern ulint  srv_adaptive_flushing_method;
+ extern ulint  srv_expand_import;
++extern ulint  srv_pass_corrupt_table;
+ extern ulint  srv_extra_rsegments;
+ extern ulint  srv_dict_size_limit;
+diff -ruN a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
+--- a/storage/innobase/page/page0zip.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/page/page0zip.c 2010-12-04 15:38:18.195515935 +0900
+@@ -1153,6 +1153,10 @@
+       FILE*           logfile = NULL;
+ #endif
++      if (!page) {
++              return(FALSE);
++      }
++
+       ut_a(page_is_comp(page));
+       ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
+       ut_ad(page_simple_validate_new((page_t*) page));
+diff -ruN a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
+--- a/storage/innobase/row/row0ins.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0ins.c   2010-12-04 15:38:18.198514028 +0900
+@@ -1335,6 +1335,12 @@
+               const rec_t*            rec = btr_pcur_get_rec(&pcur);
+               const buf_block_t*      block = btr_pcur_get_block(&pcur);
++              if (srv_pass_corrupt_table && !block) {
++                      err = DB_CORRUPTION;
++                      break;
++              }
++              ut_a(block);
++
+               if (page_rec_is_infimum(rec)) {
+                       continue;
+diff -ruN a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
+--- a/storage/innobase/row/row0merge.c 2010-12-03 17:30:16.330986655 +0900
++++ b/storage/innobase/row/row0merge.c 2010-12-04 15:38:18.201513966 +0900
+@@ -1245,6 +1245,13 @@
+               if (UNIV_LIKELY(has_next)) {
+                       rec = btr_pcur_get_rec(&pcur);
++
++                      if (srv_pass_corrupt_table && !rec) {
++                              err = DB_CORRUPTION;
++                              goto err_exit;
++                      }
++                      ut_a(rec);
++
+                       offsets = rec_get_offsets(rec, clust_index, NULL,
+                                                 ULINT_UNDEFINED, &row_heap);
+diff -ruN a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
+--- a/storage/innobase/row/row0sel.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0sel.c   2010-12-04 15:38:18.205551115 +0900
+@@ -3848,6 +3848,13 @@
+       /* PHASE 4: Look for matching records in a loop */
+       rec = btr_pcur_get_rec(pcur);
++
++      if (srv_pass_corrupt_table && !rec) {
++              err = DB_CORRUPTION;
++              goto lock_wait_or_error;
++      }
++      ut_a(rec);
++
+       ut_ad(!!page_rec_is_comp(rec) == comp);
+ #ifdef UNIV_SEARCH_DEBUG
+       /*
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-04 15:37:50.602481253 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-04 15:38:18.209513823 +0900
+@@ -428,6 +428,7 @@
+ UNIV_INTERN ulint     srv_adaptive_flushing_method = 0; /* 0: native  1: estimate  2: keep_average */
+ UNIV_INTERN ulint     srv_expand_import = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint     srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
+ UNIV_INTERN ulint     srv_extra_rsegments = 127; /* extra rseg for users */
+ UNIV_INTERN ulint     srv_dict_size_limit = 0;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:37:50.605491300 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:38:18.212513722 +0900
+@@ -2020,6 +2020,13 @@
+       os_fast_mutex_free(&srv_os_test_mutex);
++      if (!srv_file_per_table_original_value
++          && srv_pass_corrupt_table) {
++              fprintf(stderr, "InnoDB: Warning:"
++                      " innodb_file_per_table is diabled."
++                      " So innodb_pass_corrupt_table doesn't make sence\n");
++      }
++
+       if (srv_print_verbose_log) {
+               ut_print_timestamp(stderr);
+               fprintf(stderr,
diff --git a/innodb_recovery_patches.patch b/innodb_recovery_patches.patch
new file mode 100644 (file)
index 0000000..21a6f7e
--- /dev/null
@@ -0,0 +1,504 @@
+# name       : innodb_recovery_patches.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-03 15:49:59.187028943 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-03 17:30:41.579956150 +0900
+@@ -122,6 +122,46 @@
+       bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
+                                      tablespace_version, offset);
+       if (bpage == NULL) {
++              /* bugfix: http://bugs.mysql.com/bug.php?id=43948 */
++              if (recv_recovery_is_on() && *err == DB_TABLESPACE_DELETED) {
++                      /* hashed log recs must be treated here */
++                      recv_addr_t*    recv_addr;
++
++                      mutex_enter(&(recv_sys->mutex));
++
++                      if (recv_sys->apply_log_recs == FALSE) {
++                              mutex_exit(&(recv_sys->mutex));
++                              goto not_to_recover;
++                      }
++
++                      /* recv_get_fil_addr_struct() */
++                      recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
++                                      hash_calc_hash(ut_fold_ulint_pair(space, offset),
++                                              recv_sys->addr_hash));
++                      while (recv_addr) {
++                              if ((recv_addr->space == space)
++                                      && (recv_addr->page_no == offset)) {
++                                      break;
++                              }
++                              recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
++                      }
++
++                      if ((recv_addr == NULL)
++                          || (recv_addr->state == RECV_BEING_PROCESSED)
++                          || (recv_addr->state == RECV_PROCESSED)) {
++                              mutex_exit(&(recv_sys->mutex));
++                              goto not_to_recover;
++                      }
++
++                      fprintf(stderr, " (cannot find space: %lu)", space);
++                      recv_addr->state = RECV_PROCESSED;
++
++                      ut_a(recv_sys->n_addrs);
++                      recv_sys->n_addrs--;
++
++                      mutex_exit(&(recv_sys->mutex));
++              }
++not_to_recover:
+               return(0);
+       }
+@@ -613,6 +653,50 @@
+               /* It is a single table tablespace and the .ibd file is
+               missing: do nothing */
++              /* the log records should be treated here same reason
++              for http://bugs.mysql.com/bug.php?id=43948 */
++
++              if (recv_recovery_is_on()) {
++                      recv_addr_t*    recv_addr;
++
++                      mutex_enter(&(recv_sys->mutex));
++
++                      if (recv_sys->apply_log_recs == FALSE) {
++                              mutex_exit(&(recv_sys->mutex));
++                              goto not_to_recover;
++                      }
++
++                      for (i = 0; i < n_stored; i++) {
++                              /* recv_get_fil_addr_struct() */
++                              recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
++                                              hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]),
++                                                      recv_sys->addr_hash));
++                              while (recv_addr) {
++                                      if ((recv_addr->space == space)
++                                              && (recv_addr->page_no == page_nos[i])) {
++                                              break;
++                                      }
++                                      recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
++                              }
++
++                              if ((recv_addr == NULL)
++                                  || (recv_addr->state == RECV_BEING_PROCESSED)
++                                  || (recv_addr->state == RECV_PROCESSED)) {
++                                      continue;
++                              }
++
++                              recv_addr->state = RECV_PROCESSED;
++
++                              ut_a(recv_sys->n_addrs);
++                              recv_sys->n_addrs--;
++                      }
++
++                      mutex_exit(&(recv_sys->mutex));
++
++                      fprintf(stderr, " (cannot find space: %lu)", space);
++              }
++not_to_recover:
++
+               return;
+       }
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:30:16.261955714 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:30:41.584971130 +0900
+@@ -182,6 +182,7 @@
+ #endif /* UNIV_LOG_ARCHIVE */
+ static my_bool        innobase_use_doublewrite                = TRUE;
+ static my_bool        innobase_use_checksums                  = TRUE;
++static my_bool        innobase_recovery_stats                 = TRUE;
+ static my_bool        innobase_locks_unsafe_for_binlog        = FALSE;
+ static my_bool        innobase_overwrite_relay_log_info       = FALSE;
+ static my_bool        innobase_rollback_on_timeout            = FALSE;
+@@ -2529,6 +2530,8 @@
+       srv_force_recovery = (ulint) innobase_force_recovery;
++      srv_recovery_stats = (ibool) innobase_recovery_stats;
++
+       srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+       srv_use_checksums = (ibool) innobase_use_checksums;
+@@ -11170,6 +11173,11 @@
+   "The common part for InnoDB table spaces.",
+   NULL, NULL, NULL);
++static MYSQL_SYSVAR_BOOL(recovery_stats, innobase_recovery_stats,
++  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++  "Output statistics of recovery process after it.",
++  NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(overwrite_relay_log_info, innobase_overwrite_relay_log_info,
+   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+   "During InnoDB crash recovery on slave overwrite relay-log.info "
+@@ -11658,6 +11666,7 @@
+   MYSQL_SYSVAR(data_file_path),
+   MYSQL_SYSVAR(data_home_dir),
+   MYSQL_SYSVAR(doublewrite),
++  MYSQL_SYSVAR(recovery_stats),
+   MYSQL_SYSVAR(fast_shutdown),
+   MYSQL_SYSVAR(file_io_threads),
+   MYSQL_SYSVAR(read_io_threads),
+diff -ruN a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
+--- a/storage/innobase/include/log0recv.h      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/log0recv.h      2010-12-03 17:30:41.592958318 +0900
+@@ -438,6 +438,39 @@
+       hash_table_t*   addr_hash;/*!< hash table of file addresses of pages */
+       ulint           n_addrs;/*!< number of not processed hashed file
+                               addresses in the hash table */
++
++/* If you modified the following defines at original file,
++   You should also modify them. */
++/* defined in os0file.c */
++#define OS_AIO_MERGE_N_CONSECUTIVE    64
++/* defined in log0recv.c */
++#define RECV_READ_AHEAD_AREA  32
++      time_t          stats_recv_start_time;
++      ulint           stats_recv_turns;
++
++      ulint           stats_read_requested_pages;
++      ulint           stats_read_in_area[RECV_READ_AHEAD_AREA];
++
++      ulint           stats_read_io_pages;
++      ulint           stats_read_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
++      ulint           stats_write_io_pages;
++      ulint           stats_write_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
++
++      ulint           stats_doublewrite_check_pages;
++      ulint           stats_doublewrite_overwrite_pages;
++
++      ulint           stats_recover_pages_with_read;
++      ulint           stats_recover_pages_without_read;
++
++      ulint           stats_log_recs;
++      ulint           stats_log_len_sum;
++
++      ulint           stats_applied_log_recs;
++      ulint           stats_applied_log_len_sum;
++      ulint           stats_pages_already_new;
++
++      ib_uint64_t     stats_oldest_modified_lsn;
++      ib_uint64_t     stats_newest_modified_lsn;
+ };
+ /** The recovery system */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 17:30:16.321953515 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 17:30:41.593985184 +0900
+@@ -129,6 +129,8 @@
+ extern ulint* srv_data_file_sizes;
+ extern ulint* srv_data_file_is_raw_partition;
++extern ibool  srv_recovery_stats;
++
+ extern ibool  srv_auto_extend_last_data_file;
+ extern ulint  srv_last_file_size_max;
+ extern char** srv_log_group_home_dirs;
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c  2010-12-03 15:18:48.903987466 +0900
++++ b/storage/innobase/log/log0recv.c  2010-12-03 17:30:41.598022536 +0900
+@@ -187,6 +187,9 @@
+       recv_sys->heap = NULL;
+       recv_sys->addr_hash = NULL;
++
++      recv_sys->stats_recv_start_time = time(NULL);
++      recv_sys->stats_oldest_modified_lsn = IB_ULONGLONG_MAX;
+ }
+ /********************************************************//**
+@@ -327,6 +330,11 @@
+               recv_n_pool_free_frames = 512;
+       }
++      if (buf_pool_get_curr_size() >= (32 * 1024 * 1024)) {
++              /* Buffer pool of size greater than 32 MB. */
++              recv_n_pool_free_frames = 1024;
++      }
++
+       recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
+       recv_sys->len = 0;
+       recv_sys->recovered_offset = 0;
+@@ -1363,6 +1371,11 @@
+       len = rec_end - body;
++      if (srv_recovery_stats) {
++              recv_sys->stats_log_recs++;
++              recv_sys->stats_log_len_sum += len;
++      }
++
+       recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
+       recv->type = type;
+       recv->len = rec_end - body;
+@@ -1474,6 +1487,7 @@
+       ib_uint64_t     start_lsn;
+       ib_uint64_t     end_lsn;
+       ib_uint64_t     page_lsn;
++      ib_uint64_t     page_lsn_orig;
+       ib_uint64_t     page_newest_lsn;
+       ibool           modification_to_page;
+ #ifndef UNIV_HOTBACKUP
+@@ -1496,6 +1510,8 @@
+                                            buf_block_get_page_no(block));
+       if ((recv_addr == NULL)
++              /* bugfix: http://bugs.mysql.com/bug.php?id=44140 */
++          || (recv_addr->state == RECV_BEING_READ && !just_read_in)
+           || (recv_addr->state == RECV_BEING_PROCESSED)
+           || (recv_addr->state == RECV_PROCESSED)) {
+@@ -1511,6 +1527,14 @@
+       recv_addr->state = RECV_BEING_PROCESSED;
++      if (srv_recovery_stats) {
++              if (just_read_in) {
++                      recv_sys->stats_recover_pages_with_read++;
++              } else {
++                      recv_sys->stats_recover_pages_without_read++;
++              }
++      }
++
+       mutex_exit(&(recv_sys->mutex));
+       mtr_start(&mtr);
+@@ -1540,6 +1564,7 @@
+       /* Read the newest modification lsn from the page */
+       page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
++      page_lsn_orig = page_lsn;
+ #ifndef UNIV_HOTBACKUP
+       /* It may be that the page has been modified in the buffer
+@@ -1559,6 +1584,21 @@
+       modification_to_page = FALSE;
+       start_lsn = end_lsn = 0;
++      if (srv_recovery_stats) {
++              mutex_enter(&(recv_sys->mutex));
++              if (page_lsn_orig && recv_sys->stats_oldest_modified_lsn > page_lsn_orig) {
++                      recv_sys->stats_oldest_modified_lsn = page_lsn_orig;
++              }
++              if (page_lsn_orig && recv_sys->stats_newest_modified_lsn < page_lsn_orig) {
++                      recv_sys->stats_newest_modified_lsn = page_lsn_orig;
++              }
++              if (UT_LIST_GET_LAST(recv_addr->rec_list)->start_lsn
++                  < page_lsn_orig) {
++                      recv_sys->stats_pages_already_new++;
++              }
++              mutex_exit(&(recv_sys->mutex));
++      }
++
+       recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
+       while (recv) {
+@@ -1613,6 +1653,13 @@
+                                                        buf + recv->len,
+                                                        block, &mtr);
++                      if (srv_recovery_stats) {
++                              mutex_enter(&(recv_sys->mutex));
++                              recv_sys->stats_applied_log_recs++;
++                              recv_sys->stats_applied_log_len_sum += recv->len;
++                              mutex_exit(&(recv_sys->mutex));
++                      }
++
+                       end_lsn = recv->start_lsn + recv->len;
+                       mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
+                       mach_write_to_8(UNIV_PAGE_SIZE
+@@ -1715,6 +1762,13 @@
+               }
+       }
++      if (srv_recovery_stats && n) {
++              mutex_enter(&(recv_sys->mutex));
++              recv_sys->stats_read_requested_pages += n;
++              recv_sys->stats_read_in_area[n - 1]++;
++              mutex_exit(&(recv_sys->mutex));
++      }
++
+       buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
+       /*
+       fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
+@@ -1867,6 +1921,10 @@
+       if (has_printed) {
+               fprintf(stderr, "InnoDB: Apply batch completed\n");
++
++              if (srv_recovery_stats) {
++                      recv_sys->stats_recv_turns++;
++              }
+       }
+       mutex_exit(&(recv_sys->mutex));
+@@ -3270,6 +3328,90 @@
+       }
+ #endif /* UNIV_DEBUG */
++      if (recv_needed_recovery && srv_recovery_stats) {
++              ulint   flush_list_len = 0;
++              ulint   i;
++
++              fprintf(stderr,
++                      "InnoDB: Applying log records was done. Its statistics are followings.\n");
++
++              fprintf(stderr,
++                      "============================================================\n"
++                      "-------------------\n"
++                      "RECOVERY STATISTICS\n"
++                      "-------------------\n");
++              fprintf(stderr,
++                      "Recovery time: %g sec. (%lu turns)\n",
++                      difftime(time(NULL), recv_sys->stats_recv_start_time),
++                      recv_sys->stats_recv_turns);
++
++              for (i = 0; i < srv_buf_pool_instances; i++) {
++                      buf_pool_t*     buf_pool;
++
++                      buf_pool = buf_pool_from_array(i);
++                      flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
++              }
++              fprintf(stderr,
++                      "\n"
++                      "Data page IO statistics\n"
++                      "  Requested pages: %lu\n"
++                      "  Read pages:      %lu\n"
++                      "  Written pages:   %lu\n"
++                      "  (Dirty blocks):  %lu\n",
++                      recv_sys->stats_read_requested_pages,
++                      recv_sys->stats_read_io_pages,
++                      recv_sys->stats_write_io_pages,
++                      flush_list_len);
++
++              fprintf(stderr,
++                      "  Grouping IO [times]:\n"
++                      "\tnumber of pages,\n"
++                      "\t\tread request neighbors (in %d pages chunk),\n"
++                      "\t\t\tcombined read IO,\n"
++                      "\t\t\t\tcombined write IO\n",
++                      RECV_READ_AHEAD_AREA);
++              for (i = 0; i < ut_max(RECV_READ_AHEAD_AREA,
++                                      OS_AIO_MERGE_N_CONSECUTIVE); i++) {
++                      fprintf(stderr,
++                              "\t%3lu,\t%lu,\t%lu,\t%lu\n", i + 1,
++                              (i < RECV_READ_AHEAD_AREA) ?
++                                      recv_sys->stats_read_in_area[i] : 0,
++                              (i < OS_AIO_MERGE_N_CONSECUTIVE) ?
++                                      recv_sys->stats_read_io_consecutive[i] : 0,
++                              (i < OS_AIO_MERGE_N_CONSECUTIVE) ?
++                                      recv_sys->stats_write_io_consecutive[i] : 0);
++              }
++
++              fprintf(stderr,
++                      "\n"
++                      "Recovery process statistics\n"
++                      "  Checked pages by doublewrite buffer: %lu\n"
++                      "  Overwritten pages from doublewrite:  %lu\n"
++                      "  Recovered pages by io_thread:        %lu\n"
++                      "  Recovered pages by main thread:      %lu\n"
++                      "  Parsed log records to apply:         %lu\n"
++                      "            Sum of the length:         %lu\n"
++                      "  Applied log records:                 %lu\n"
++                      "            Sum of the length:         %lu\n"
++                      "  Pages which are already new enough:  %lu (It may not be accurate, if turns > 1)\n"
++                      "  Oldest page's LSN:                   %llu\n"
++                      "  Newest page's LSN:                   %llu\n",
++                      recv_sys->stats_doublewrite_check_pages,
++                      recv_sys->stats_doublewrite_overwrite_pages,
++                      recv_sys->stats_recover_pages_with_read,
++                      recv_sys->stats_recover_pages_without_read,
++                      recv_sys->stats_log_recs,
++                      recv_sys->stats_log_len_sum,
++                      recv_sys->stats_applied_log_recs,
++                      recv_sys->stats_applied_log_len_sum,
++                      recv_sys->stats_pages_already_new,
++                      recv_sys->stats_oldest_modified_lsn,
++                      recv_sys->stats_newest_modified_lsn);
++
++              fprintf(stderr,
++                      "============================================================\n");
++      }
++
+       if (recv_needed_recovery) {
+               trx_sys_print_mysql_master_log_pos();
+               trx_sys_print_mysql_binlog_offset();
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c    2010-12-03 15:18:48.908955759 +0900
++++ b/storage/innobase/os/os0file.c    2010-12-03 17:30:41.602022989 +0900
+@@ -43,6 +43,7 @@
+ #include "srv0start.h"
+ #include "fil0fil.h"
+ #include "buf0buf.h"
++#include "log0recv.h"
+ #ifndef UNIV_HOTBACKUP
+ # include "os0sync.h"
+ # include "os0thread.h"
+@@ -4237,6 +4238,18 @@
+               os_thread_exit(NULL);
+       }
++      if (srv_recovery_stats && recv_recovery_is_on() && n_consecutive) {
++              mutex_enter(&(recv_sys->mutex));
++              if (slot->type == OS_FILE_READ) {
++                      recv_sys->stats_read_io_pages += n_consecutive;
++                      recv_sys->stats_read_io_consecutive[n_consecutive - 1]++;
++              } else if (slot->type == OS_FILE_WRITE) {
++                      recv_sys->stats_write_io_pages += n_consecutive;
++                      recv_sys->stats_write_io_consecutive[n_consecutive - 1]++;
++              }
++              mutex_exit(&(recv_sys->mutex));
++      }
++
+       os_mutex_enter(array->mutex);
+       slot = os_aio_array_get_nth_slot(array, i + segment * n);
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 17:30:16.339955597 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 17:30:41.604958138 +0900
+@@ -165,6 +165,8 @@
+ /* size in database pages */
+ UNIV_INTERN ulint*    srv_data_file_sizes = NULL;
++UNIV_INTERN ibool     srv_recovery_stats = FALSE;
++
+ /* if TRUE, then we auto-extend the last data file */
+ UNIV_INTERN ibool     srv_auto_extend_last_data_file  = FALSE;
+ /* if != 0, this tells the max size auto-extending may increase the
+diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
+--- a/storage/innobase/trx/trx0sys.c   2010-12-03 15:41:52.051986524 +0900
++++ b/storage/innobase/trx/trx0sys.c   2010-12-03 17:30:41.607026818 +0900
+@@ -566,6 +566,12 @@
+                              zip_size ? zip_size : UNIV_PAGE_SIZE,
+                              read_buf, NULL);
++                      if (srv_recovery_stats && recv_recovery_is_on()) {
++                              mutex_enter(&(recv_sys->mutex));
++                              recv_sys->stats_doublewrite_check_pages++;
++                              mutex_exit(&(recv_sys->mutex));
++                      }
++
+                       /* Check if the page is corrupt */
+                       if (UNIV_UNLIKELY
+@@ -613,6 +619,13 @@
+                                      zip_size, page_no, 0,
+                                      zip_size ? zip_size : UNIV_PAGE_SIZE,
+                                      page, NULL);
++
++                              if (srv_recovery_stats && recv_recovery_is_on()) {
++                                      mutex_enter(&(recv_sys->mutex));
++                                      recv_sys->stats_doublewrite_overwrite_pages++;
++                                      mutex_exit(&(recv_sys->mutex));
++                              }
++
+                               fprintf(stderr,
+                                       "InnoDB: Recovered the page from"
+                                       " the doublewrite buffer.\n");
diff --git a/innodb_separate_doublewrite.patch b/innodb_separate_doublewrite.patch
new file mode 100644 (file)
index 0000000..2a7f5bc
--- /dev/null
@@ -0,0 +1,1078 @@
+# name       : innodb_separate_doublewrite.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-03 17:49:11.574962867 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-04 15:35:58.624514033 +0900
+@@ -4247,7 +4247,8 @@
+               read_space_id = mach_read_from_4(
+                       frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+-              if (bpage->space == TRX_SYS_SPACE
++              if ((bpage->space == TRX_SYS_SPACE
++                   || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE))
+                   && trx_doublewrite_page_inside(bpage->offset)) {
+                       ut_print_timestamp(stderr);
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c   2010-12-03 15:49:59.179956111 +0900
++++ b/storage/innobase/buf/buf0flu.c   2010-12-04 15:35:58.624514033 +0900
+@@ -763,7 +763,8 @@
+       write_buf = trx_doublewrite->write_buf;
+       i = 0;
+-      fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
++      fil_io(OS_FILE_WRITE, TRUE,
++             (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
+              trx_doublewrite->block1, 0, len,
+              (void*) write_buf, NULL);
+@@ -800,7 +801,8 @@
+               + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+       ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
+-      fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
++      fil_io(OS_FILE_WRITE, TRUE,
++             (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
+              trx_doublewrite->block2, 0, len,
+              (void*) write_buf, NULL);
+@@ -830,7 +832,7 @@
+ flush:
+       /* Now flush the doublewrite buffer data to disk */
+-      fil_flush(TRX_SYS_SPACE);
++      fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
+       /* We know that the writes have been flushed to disk now
+       and in recovery we will find them in the doublewrite buffer
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-04 15:35:29.138514157 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-04 15:35:58.626486771 +0900
+@@ -88,7 +88,9 @@
+       wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+       mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
+-      if (trx_doublewrite && space == TRX_SYS_SPACE
++      if (trx_doublewrite
++          && (space == TRX_SYS_SPACE
++              || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
+           && (   (offset >= trx_doublewrite->block1
+                   && offset < trx_doublewrite->block1
+                   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
+--- a/storage/innobase/dict/dict0load.c        2010-12-03 17:30:16.252956569 +0900
++++ b/storage/innobase/dict/dict0load.c        2010-12-04 15:35:58.627482825 +0900
+@@ -781,7 +781,7 @@
+               mtr_commit(&mtr);
+-              if (space_id == 0) {
++              if (trx_sys_sys_space(space_id)) {
+                       /* The system tablespace always exists. */
+               } else if (in_crash_recovery) {
+                       /* Check that the tablespace (the .ibd file) really
+@@ -1578,7 +1578,7 @@
+       space = mach_read_from_4(field);
+       /* Check if the tablespace exists and has the right name */
+-      if (space != 0) {
++      if (!trx_sys_sys_space(space)) {
+               flags = dict_sys_tables_get_flags(rec);
+               if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
+@@ -1728,7 +1728,7 @@
+               goto err_exit;
+       }
+-      if (table->space == 0) {
++      if (trx_sys_sys_space(table->space)) {
+               /* The system tablespace is always available. */
+       } else if (!fil_space_for_table_exists_in_mem(
+                          table->space, name,
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-04 15:35:29.143813775 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-04 15:35:58.628498870 +0900
+@@ -627,7 +627,7 @@
+       UT_LIST_ADD_LAST(chain, space->chain, node);
+-      if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
++      if (id < SRV_EXTRA_SYS_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
+               fil_system->max_assigned_id = id;
+       }
+@@ -691,14 +691,14 @@
+               size_bytes = (((ib_int64_t)size_high) << 32)
+                       + (ib_int64_t)size_low;
+ #ifdef UNIV_HOTBACKUP
+-              if (space->id == 0) {
++              if (trx_sys_sys_space(space->id)) {
+                       node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+                       os_file_close(node->handle);
+                       goto add_size;
+               }
+ #endif /* UNIV_HOTBACKUP */
+               ut_a(space->purpose != FIL_LOG);
+-              ut_a(space->id != 0);
++              ut_a(!trx_sys_sys_space(space->id));
+               if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
+                       fprintf(stderr,
+@@ -744,7 +744,7 @@
+               }
+               if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
+-                                || space_id == 0)) {
++                                || trx_sys_sys_space(space_id))) {
+                       fprintf(stderr,
+                               "InnoDB: Error: tablespace id %lu"
+                               " in file %s is not sensible\n",
+@@ -812,7 +812,7 @@
+       system->n_open++;
+-      if (space->purpose == FIL_TABLESPACE && space->id != 0) {
++      if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id)) {
+               /* Put the node to the LRU list */
+               UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+       }
+@@ -845,7 +845,7 @@
+       ut_a(system->n_open > 0);
+       system->n_open--;
+-      if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
++      if (node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) {
+               ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+               /* The node is in the LRU list, remove it */
+@@ -931,7 +931,7 @@
+ retry:
+       mutex_enter(&fil_system->mutex);
+-      if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
++      if (trx_sys_sys_space(space_id) || space_id >= SRV_LOG_SPACE_FIRST_ID) {
+               /* We keep log files and system tablespace files always open;
+               this is important in preventing deadlocks in this module, as
+               a page read completion often performs another read from the
+@@ -1162,7 +1162,7 @@
+                       " tablespace memory cache!\n",
+                       (ulong) space->id);
+-              if (id == 0 || purpose != FIL_TABLESPACE) {
++              if (trx_sys_sys_space(id) || purpose != FIL_TABLESPACE) {
+                       mutex_exit(&fil_system->mutex);
+@@ -1224,6 +1224,7 @@
+       space->mark = FALSE;
+       if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
++          && UNIV_UNLIKELY(id < SRV_EXTRA_SYS_SPACE_FIRST_ID)
+           && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
+               if (!fil_system->space_id_reuse_warned) {
+                       fil_system->space_id_reuse_warned = TRUE;
+@@ -1307,7 +1308,7 @@
+                       (ulong) SRV_LOG_SPACE_FIRST_ID);
+       }
+-      success = (id < SRV_LOG_SPACE_FIRST_ID);
++      success = (id < SRV_EXTRA_SYS_SPACE_FIRST_ID);
+       if (success) {
+               *space_id = fil_system->max_assigned_id = id;
+@@ -1570,6 +1571,8 @@
+       UT_LIST_INIT(fil_system->LRU);
+       fil_system->max_n_open = max_n_open;
++
++      fil_system->max_assigned_id = TRX_SYS_SPACE_MAX;
+ }
+ /*******************************************************************//**
+@@ -1591,7 +1594,7 @@
+       space = UT_LIST_GET_FIRST(fil_system->space_list);
+       while (space != NULL) {
+-              if (space->purpose != FIL_TABLESPACE || space->id == 0) {
++              if (space->purpose != FIL_TABLESPACE || trx_sys_sys_space(space->id)) {
+                       node = UT_LIST_GET_FIRST(space->chain);
+                       while (node != NULL) {
+@@ -1681,6 +1684,10 @@
+               ut_error;
+       }
++      if (max_id >= SRV_EXTRA_SYS_SPACE_FIRST_ID) {
++              return;
++      }
++
+       mutex_enter(&fil_system->mutex);
+       if (fil_system->max_assigned_id < max_id) {
+@@ -1699,6 +1706,7 @@
+ ulint
+ fil_write_lsn_and_arch_no_to_file(
+ /*==============================*/
++      ulint           space_id,
+       ulint           sum_of_sizes,   /*!< in: combined size of previous files
+                                       in space, in database pages */
+       ib_uint64_t     lsn,            /*!< in: lsn to write */
+@@ -1708,14 +1716,16 @@
+       byte*   buf1;
+       byte*   buf;
++      ut_a(trx_sys_sys_space(space_id));
++
+       buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
+       buf = ut_align(buf1, UNIV_PAGE_SIZE);
+-      fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
++      fil_read(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+       mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+-      fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
++      fil_write(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+       mem_free(buf1);
+@@ -1751,7 +1761,7 @@
+               always open. */
+               if (space->purpose == FIL_TABLESPACE
+-                  && space->id == 0) {
++                  && trx_sys_sys_space(space->id)) {
+                       sum_of_sizes = 0;
+                       node = UT_LIST_GET_FIRST(space->chain);
+@@ -1759,7 +1769,7 @@
+                               mutex_exit(&fil_system->mutex);
+                               err = fil_write_lsn_and_arch_no_to_file(
+-                                      sum_of_sizes, lsn, arch_log_no);
++                                      space->id, sum_of_sizes, lsn, arch_log_no);
+                               if (err != DB_SUCCESS) {
+                                       return(err);
+@@ -3806,7 +3816,7 @@
+       }
+ #ifndef UNIV_HOTBACKUP
+-      if (space_id == ULINT_UNDEFINED || space_id == 0) {
++      if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
+               fprintf(stderr,
+                       "InnoDB: Error: tablespace id %lu in file %s"
+                       " is not sensible\n",
+@@ -3815,7 +3825,7 @@
+               goto func_exit;
+       }
+ #else
+-      if (space_id == ULINT_UNDEFINED || space_id == 0) {
++      if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
+               char*   new_path;
+               fprintf(stderr,
+@@ -4636,7 +4646,7 @@
+       }
+       if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
+-          && space->id != 0) {
++          && !trx_sys_sys_space(space->id)) {
+               /* The node is in the LRU list, remove it */
+               ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+@@ -4682,7 +4692,7 @@
+       }
+       if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
+-          && node->space->id != 0) {
++          && !trx_sys_sys_space(node->space->id)) {
+               /* The node must be put back to the LRU list */
+               UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+       }
+@@ -5298,7 +5308,7 @@
+               ut_a(fil_node->n_pending == 0);
+               ut_a(fil_node->open);
+               ut_a(fil_node->space->purpose == FIL_TABLESPACE);
+-              ut_a(fil_node->space->id != 0);
++              ut_a(!trx_sys_sys_space(fil_node->space->id));
+               fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
+       }
+diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
+--- a/storage/innobase/fsp/fsp0fsp.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/fsp/fsp0fsp.c   2010-12-04 15:35:58.632513243 +0900
+@@ -48,7 +48,7 @@
+ # include "log0log.h"
+ #endif /* UNIV_HOTBACKUP */
+ #include "dict0mem.h"
+-
++#include "trx0sys.h"
+ #define FSP_HEADER_OFFSET     FIL_PAGE_DATA   /* Offset of the space header
+                                               within a file page */
+@@ -999,10 +999,10 @@
+       flst_init(header + FSP_SEG_INODES_FREE, mtr);
+       mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
+-      if (space == 0) {
++      if (space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE) {
+               fsp_fill_free_list(FALSE, space, header, mtr);
+               btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
+-                         0, 0, DICT_IBUF_ID_MIN + space,
++                         space, 0, DICT_IBUF_ID_MIN + space,
+                          dict_ind_redundant, mtr);
+       } else {
+               fsp_fill_free_list(TRUE, space, header, mtr);
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:35:29.153514047 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-04 15:35:58.636549909 +0900
+@@ -163,6 +163,7 @@
+ static char*  innobase_log_group_home_dir             = NULL;
+ static char*  innobase_file_format_name               = NULL;
+ static char*  innobase_change_buffering               = NULL;
++static char*  innobase_doublewrite_file               = NULL;
+ /* The highest file format being used in the database. The value can be
+ set by user, however, it will be adjusted to the newer file format if
+@@ -2425,6 +2426,8 @@
+               goto error;
+       }
++      srv_doublewrite_file = innobase_doublewrite_file;
++
+       srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table;
+       /* -------------- Log files ---------------------------*/
+@@ -11553,6 +11556,11 @@
+   "Path to individual files and their sizes.",
+   NULL, NULL, NULL);
++static MYSQL_SYSVAR_STR(doublewrite_file, innobase_doublewrite_file,
++  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++  "Path to special datafile for doublewrite buffer. (default is "": not used) ### ONLY FOR EXPERTS!!! ###",
++  NULL, NULL, NULL);
++
+ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
+   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+   "The AUTOINC lock modes supported by InnoDB:               "
+@@ -11723,6 +11731,7 @@
+   MYSQL_SYSVAR(commit_concurrency),
+   MYSQL_SYSVAR(concurrency_tickets),
+   MYSQL_SYSVAR(data_file_path),
++  MYSQL_SYSVAR(doublewrite_file),
+   MYSQL_SYSVAR(data_home_dir),
+   MYSQL_SYSVAR(doublewrite),
+   MYSQL_SYSVAR(recovery_stats),
+diff -ruN a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
+--- a/storage/innobase/include/mtr0log.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/mtr0log.ic      2010-12-04 15:35:58.644607059 +0900
+@@ -27,8 +27,8 @@
+ #include "ut0lst.h"
+ #include "buf0buf.h"
+ #include "fsp0types.h"
++#include "srv0srv.h"
+ #include "trx0sys.h"
+-
+ /********************************************************//**
+ Opens a buffer to mlog. It must be closed with mlog_close.
+ @return       buffer, NULL if log mode MTR_LOG_NONE */
+@@ -201,7 +201,8 @@
+       the doublewrite buffer is located in pages
+       FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
+       system tablespace */
+-      if (space == TRX_SYS_SPACE
++      if ((space == TRX_SYS_SPACE
++           || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
+           && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
+               if (trx_doublewrite_buf_is_being_created) {
+                       /* Do nothing: we only come to this branch in an
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-04 15:35:29.177480351 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-04 15:35:58.646556250 +0900
+@@ -132,6 +132,8 @@
+ extern ulint* srv_data_file_sizes;
+ extern ulint* srv_data_file_is_raw_partition;
++extern char*  srv_doublewrite_file;
++
+ extern ibool  srv_recovery_stats;
+ extern ibool  srv_auto_extend_last_data_file;
+diff -ruN a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
+--- a/storage/innobase/include/srv0start.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/srv0start.h     2010-12-08 17:15:07.602605797 +0900
+@@ -127,4 +127,7 @@
+ /** Log 'spaces' have id's >= this */
+ #define SRV_LOG_SPACE_FIRST_ID                0xFFFFFFF0UL
++/** reserved for extra system tables */
++#define SRV_EXTRA_SYS_SPACE_FIRST_ID  0xFFFFFFE0UL
++
+ #endif
+diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
+--- a/storage/innobase/include/trx0sys.h       2010-12-03 15:41:52.047049291 +0900
++++ b/storage/innobase/include/trx0sys.h       2010-12-04 15:35:58.647551222 +0900
+@@ -124,6 +124,22 @@
+ /*=============*/
+       ulint   space,  /*!< in: space */
+       ulint   page_no);/*!< in: page number */
++/***************************************************************//**
++Checks if a space is the system tablespaces.
++@return TRUE if system tablespace */
++UNIV_INLINE
++ibool
++trx_sys_sys_space(
++/*==============*/
++      ulint   space); /*!< in: space */
++/***************************************************************//**
++Checks if a space is the doublewrite tablespace.
++@return TRUE if doublewrite tablespace */
++UNIV_INLINE
++ibool
++trx_sys_doublewrite_space(
++/*======================*/
++      ulint   space); /*!< in: space */
+ /*****************************************************************//**
+ Creates and initializes the central memory structures for the transaction
+ system. This is called when the database is started. */
+@@ -137,6 +153,13 @@
+ void
+ trx_sys_create(void);
+ /*================*/
++/*****************************************************************//**
++Creates and initializes the dummy transaction system page for tablespace. */
++UNIV_INTERN
++void
++trx_sys_dummy_create(
++/*=================*/
++      ulint   space);
+ /****************************************************************//**
+ Looks for a free slot for a rollback segment in the trx system file copy.
+ @return       slot index or ULINT_UNDEFINED if not found */
+@@ -448,6 +471,8 @@
+ /* Space id and page no where the trx system file copy resides */
+ #define       TRX_SYS_SPACE   0       /* the SYSTEM tablespace */
++#define       TRX_DOUBLEWRITE_SPACE   0xFFFFFFE0UL    /* the doublewrite buffer tablespace if used */
++#define       TRX_SYS_SPACE_MAX       9       /* reserved max space id for system tablespaces */
+ #include "fsp0fsp.h"
+ #define       TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
+diff -ruN a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
+--- a/storage/innobase/include/trx0sys.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/trx0sys.ic      2010-12-04 15:35:58.649473284 +0900
+@@ -71,6 +71,40 @@
+ }
+ /***************************************************************//**
++Checks if a space is the system tablespaces.
++@return TRUE if system tablespace */
++UNIV_INLINE
++ibool
++trx_sys_sys_space(
++/*==============*/
++      ulint   space)  /*!< in: space */
++{
++      if (srv_doublewrite_file) {
++              /* several spaces are reserved */
++              return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE));
++      } else {
++              return((ibool)(space == TRX_SYS_SPACE));
++      }
++}
++
++/***************************************************************//**
++Checks if a space is the doublewrite tablespace.
++@return TRUE if doublewrite tablespace */
++UNIV_INLINE
++ibool
++trx_sys_doublewrite_space(
++/*======================*/
++      ulint   space)  /*!< in: space */
++{
++      if (srv_doublewrite_file) {
++              /* doublewrite buffer is separated */
++              return((ibool)(space == TRX_DOUBLEWRITE_SPACE));
++      } else {
++              return((ibool)(space == TRX_SYS_SPACE));
++      }
++}
++
++/***************************************************************//**
+ Gets the pointer in the nth slot of the rseg array.
+ @return       pointer to rseg object, NULL if slot not in use */
+ UNIV_INLINE
+diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
+--- a/storage/innobase/row/row0mysql.c 2010-12-03 17:30:16.334989510 +0900
++++ b/storage/innobase/row/row0mysql.c 2010-12-04 15:35:58.652496484 +0900
+@@ -3423,7 +3423,7 @@
+               /* Do not drop possible .ibd tablespace if something went
+               wrong: we do not want to delete valuable data of the user */
+-              if (err == DB_SUCCESS && space_id > 0) {
++              if (err == DB_SUCCESS && !trx_sys_sys_space(space_id)) {
+                       if (!fil_space_for_table_exists_in_mem(space_id,
+                                                              name_or_path,
+                                                              is_temp, FALSE,
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-04 15:35:29.180483212 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-04 15:35:58.656550107 +0900
+@@ -168,6 +168,8 @@
+ /* size in database pages */
+ UNIV_INTERN ulint*    srv_data_file_sizes = NULL;
++UNIV_INTERN char*     srv_doublewrite_file = NULL;
++
+ UNIV_INTERN ibool     srv_recovery_stats = FALSE;
+ /* if TRUE, then we auto-extend the last data file */
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:35:29.183481330 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:35:58.661550545 +0900
+@@ -715,6 +715,7 @@
+ /*======================*/
+       ibool*          create_new_db,  /*!< out: TRUE if new database should be
+                                       created */
++      ibool*          create_new_doublewrite_file,
+ #ifdef UNIV_LOG_ARCHIVE
+       ulint*          min_arch_log_no,/*!< out: min of archived log
+                                       numbers in data files */
+@@ -747,6 +748,7 @@
+       *sum_of_new_sizes = 0;
+       *create_new_db = FALSE;
++      *create_new_doublewrite_file = FALSE;
+       srv_normalize_path_for_win(srv_data_home);
+@@ -984,6 +986,142 @@
+                               srv_data_file_is_raw_partition[i] != 0);
+       }
++      /* special file for doublewrite buffer */
++      if (srv_doublewrite_file)
++      {
++              srv_normalize_path_for_win(srv_doublewrite_file);
++
++              fprintf(stderr,
++                      "InnoDB: Notice: innodb_doublewrite_file is specified.\n"
++                      "InnoDB: This is for expert only. Don't use if you don't understand what is it 'WELL'.\n"
++                      "InnoDB: ### Don't specify older file than the last checkpoint ###\n"
++                      "InnoDB: otherwise the older doublewrite buffer will break your data during recovery!\n");
++
++              strcpy(name, srv_doublewrite_file);
++
++              /* First we try to create the file: if it already
++              exists, ret will get value FALSE */
++
++              files[i] = os_file_create(innodb_file_data_key, name, OS_FILE_CREATE,
++                                        OS_FILE_NORMAL,
++                                        OS_DATA_FILE, &ret);
++
++              if (ret == FALSE && os_file_get_last_error(FALSE)
++                  != OS_FILE_ALREADY_EXISTS
++#ifdef UNIV_AIX
++                  /* AIX 5.1 after security patch ML7 may have
++                  errno set to 0 here, which causes our function
++                  to return 100; work around that AIX problem */
++                  && os_file_get_last_error(FALSE) != 100
++#endif
++                  ) {
++                      fprintf(stderr,
++                              "InnoDB: Error in creating"
++                              " or opening %s\n",
++                              name);
++
++                      return(DB_ERROR);
++              }
++
++              if (ret == FALSE) {
++                      /* We open the data file */
++
++                      files[i] = os_file_create(innodb_file_data_key,
++                              name, OS_FILE_OPEN, OS_FILE_NORMAL,
++                              OS_DATA_FILE, &ret);
++
++                      if (!ret) {
++                              fprintf(stderr,
++                                      "InnoDB: Error in opening %s\n", name);
++                              os_file_get_last_error(TRUE);
++
++                              return(DB_ERROR);
++                      }
++
++                      ret = os_file_get_size(files[i], &size, &size_high);
++                      ut_a(ret);
++                      /* Round size downward to megabytes */
++
++                      rounded_size_pages
++                              = (size / (1024 * 1024) + 4096 * size_high)
++                                      << (20 - UNIV_PAGE_SIZE_SHIFT);
++
++                      if (rounded_size_pages != TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) {
++
++                              fprintf(stderr,
++                                      "InnoDB: Warning: doublewrite buffer file %s"
++                                      " is of a different size\n"
++                                      "InnoDB: %lu pages"
++                                      " (rounded down to MB)\n"
++                                      "InnoDB: than intended size"
++                                      " %lu pages...\n",
++                                      name,
++                                      (ulong) rounded_size_pages,
++                                      (ulong) TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9);
++                      }
++
++                      fil_read_flushed_lsn_and_arch_log_no(
++                              files[i], one_opened,
++#ifdef UNIV_LOG_ARCHIVE
++                              min_arch_log_no, max_arch_log_no,
++#endif /* UNIV_LOG_ARCHIVE */
++                              min_flushed_lsn, max_flushed_lsn);
++                      one_opened = TRUE;
++              } else {
++                      /* We created the data file and now write it full of
++                      zeros */
++
++                      *create_new_doublewrite_file = TRUE;
++
++                      ut_print_timestamp(stderr);
++                      fprintf(stderr,
++                              "  InnoDB: Doublewrite buffer file %s did not"
++                              " exist: new to be created\n",
++                              name);
++
++                      if (*create_new_db == FALSE) {
++                              fprintf(stderr,
++                                      "InnoDB: Warning: Previous version's ibdata files may cause crash.\n"
++                                      "        If you use that, please use the ibdata files of this version.\n");
++                      }
++
++                      ut_print_timestamp(stderr);
++                      fprintf(stderr,
++                              "  InnoDB: Setting file %s size to %lu MB\n",
++                              name,
++                              (ulong) ((TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9)
++                                       >> (20 - UNIV_PAGE_SIZE_SHIFT)));
++
++                      fprintf(stderr,
++                              "InnoDB: Database physically writes the"
++                              " file full: wait...\n");
++
++                      ret = os_file_set_size(
++                              name, files[i],
++                              srv_calc_low32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9),
++                              srv_calc_high32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9));
++
++                      if (!ret) {
++                              fprintf(stderr,
++                                      "InnoDB: Error in creating %s:"
++                                      " probably out of disk space\n", name);
++
++                              return(DB_ERROR);
++                      }
++              }
++
++              ret = os_file_close(files[i]);
++              ut_a(ret);
++
++              fil_space_create(name, TRX_DOUBLEWRITE_SPACE, 0, FIL_TABLESPACE);
++
++              ut_a(fil_validate());
++
++              fil_node_create(name, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, TRX_DOUBLEWRITE_SPACE, FALSE);
++
++              i++;
++      }
++
+       return(DB_SUCCESS);
+ }
+@@ -997,6 +1135,7 @@
+ /*====================================*/
+ {
+       ibool           create_new_db;
++      ibool           create_new_doublewrite_file;
+       ibool           log_file_created;
+       ibool           log_created     = FALSE;
+       ibool           log_opened      = FALSE;
+@@ -1416,6 +1555,7 @@
+       }
+       err = open_or_create_data_files(&create_new_db,
++                                      &create_new_doublewrite_file,
+ #ifdef UNIV_LOG_ARCHIVE
+                                       &min_arch_log_no, &max_arch_log_no,
+ #endif /* UNIV_LOG_ARCHIVE */
+@@ -1545,6 +1685,14 @@
+               after the double write buffer has been created. */
+               trx_sys_create();
++              if (create_new_doublewrite_file) {
++                      mtr_start(&mtr);
++                      fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
++                      mtr_commit(&mtr);
++
++                      trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
++              }
++
+               dict_create();
+               srv_startup_is_before_trx_rollback_phase = FALSE;
+@@ -1577,6 +1725,13 @@
+               recv_recovery_from_archive_finish();
+ #endif /* UNIV_LOG_ARCHIVE */
+       } else {
++              char*   save_srv_doublewrite_file = NULL;
++
++              if (create_new_doublewrite_file) {
++                      /* doublewrite_file cannot be used for recovery yet. */
++                      save_srv_doublewrite_file = srv_doublewrite_file;
++                      srv_doublewrite_file = NULL;
++              }
+               /* Check if we support the max format that is stamped
+               on the system tablespace. 
+@@ -1663,6 +1818,17 @@
+               we have finished the recovery process so that the
+               image of TRX_SYS_PAGE_NO is not stale. */
+               trx_sys_file_format_tag_init();
++
++              if (create_new_doublewrite_file) {
++                      /* restore the value */
++                      srv_doublewrite_file = save_srv_doublewrite_file;
++
++                      mtr_start(&mtr);
++                      fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
++                      mtr_commit(&mtr);
++
++                      trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
++              }
+       }
+       if (!create_new_db && sum_of_new_sizes > 0) {
+diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
+--- a/storage/innobase/trx/trx0sys.c   2010-12-03 17:32:15.651024019 +0900
++++ b/storage/innobase/trx/trx0sys.c   2010-12-04 15:35:58.664550291 +0900
+@@ -414,6 +414,152 @@
+               goto start_again;
+       }
++
++    if (srv_doublewrite_file) {
++      /* the same doublewrite buffer to TRX_SYS_SPACE should exist.
++      check and create if not exist.*/
++
++      mtr_start(&mtr);
++      trx_doublewrite_buf_is_being_created = TRUE;
++
++      block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, TRX_SYS_PAGE_NO,
++                           RW_X_LATCH, &mtr);
++      buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
++
++      doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
++
++      if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
++          == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
++              /* The doublewrite buffer has already been created:
++              just read in some numbers */
++
++              mtr_commit(&mtr);
++      } else {
++              fprintf(stderr,
++                      "InnoDB: Doublewrite buffer not found in the doublewrite file:"
++                      " creating new\n");
++
++              if (buf_pool_get_curr_size()
++                  < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
++                      + FSP_EXTENT_SIZE / 2 + 100)
++                     * UNIV_PAGE_SIZE)) {
++                      fprintf(stderr,
++                              "InnoDB: Cannot create doublewrite buffer:"
++                              " you must\n"
++                              "InnoDB: increase your buffer pool size.\n"
++                              "InnoDB: Cannot continue operation.\n");
++
++                      exit(1);
++              }
++
++              block2 = fseg_create(TRX_DOUBLEWRITE_SPACE, TRX_SYS_PAGE_NO,
++                                   TRX_SYS_DOUBLEWRITE
++                                   + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
++
++              /* fseg_create acquires a second latch on the page,
++              therefore we must declare it: */
++
++              buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
++
++              if (block2 == NULL) {
++                      fprintf(stderr,
++                              "InnoDB: Cannot create doublewrite buffer:"
++                              " you must\n"
++                              "InnoDB: increase your tablespace size.\n"
++                              "InnoDB: Cannot continue operation.\n");
++
++                      /* We exit without committing the mtr to prevent
++                      its modifications to the database getting to disk */
++
++                      exit(1);
++              }
++
++              fseg_header = buf_block_get_frame(block)
++                      + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
++              prev_page_no = 0;
++
++              for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
++                           + FSP_EXTENT_SIZE / 2; i++) {
++                      page_no = fseg_alloc_free_page(fseg_header,
++                                                     prev_page_no + 1,
++                                                     FSP_UP, &mtr);
++                      if (page_no == FIL_NULL) {
++                              fprintf(stderr,
++                                      "InnoDB: Cannot create doublewrite"
++                                      " buffer: you must\n"
++                                      "InnoDB: increase your"
++                                      " tablespace size.\n"
++                                      "InnoDB: Cannot continue operation.\n"
++                                      );
++
++                              exit(1);
++                      }
++
++                      /* We read the allocated pages to the buffer pool;
++                      when they are written to disk in a flush, the space
++                      id and page number fields are also written to the
++                      pages. When we at database startup read pages
++                      from the doublewrite buffer, we know that if the
++                      space id and page number in them are the same as
++                      the page position in the tablespace, then the page
++                      has not been written to in doublewrite. */
++
++#ifdef UNIV_SYNC_DEBUG
++                      new_block =
++#endif /* UNIV_SYNC_DEBUG */
++                      buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no,
++                                   RW_X_LATCH, &mtr);
++                      buf_block_dbg_add_level(new_block,
++                                              SYNC_NO_ORDER_CHECK);
++
++                      if (i == FSP_EXTENT_SIZE / 2) {
++                              ut_a(page_no == FSP_EXTENT_SIZE);
++                              mlog_write_ulint(doublewrite
++                                               + TRX_SYS_DOUBLEWRITE_BLOCK1,
++                                               page_no, MLOG_4BYTES, &mtr);
++                              mlog_write_ulint(doublewrite
++                                               + TRX_SYS_DOUBLEWRITE_REPEAT
++                                               + TRX_SYS_DOUBLEWRITE_BLOCK1,
++                                               page_no, MLOG_4BYTES, &mtr);
++                      } else if (i == FSP_EXTENT_SIZE / 2
++                                 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
++                              ut_a(page_no == 2 * FSP_EXTENT_SIZE);
++                              mlog_write_ulint(doublewrite
++                                               + TRX_SYS_DOUBLEWRITE_BLOCK2,
++                                               page_no, MLOG_4BYTES, &mtr);
++                              mlog_write_ulint(doublewrite
++                                               + TRX_SYS_DOUBLEWRITE_REPEAT
++                                               + TRX_SYS_DOUBLEWRITE_BLOCK2,
++                                               page_no, MLOG_4BYTES, &mtr);
++                      } else if (i > FSP_EXTENT_SIZE / 2) {
++                              ut_a(page_no == prev_page_no + 1);
++                      }
++
++                      prev_page_no = page_no;
++              }
++
++              mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
++                               TRX_SYS_DOUBLEWRITE_MAGIC_N,
++                               MLOG_4BYTES, &mtr);
++              mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
++                               + TRX_SYS_DOUBLEWRITE_REPEAT,
++                               TRX_SYS_DOUBLEWRITE_MAGIC_N,
++                               MLOG_4BYTES, &mtr);
++
++              mlog_write_ulint(doublewrite
++                               + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
++                               TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
++                               MLOG_4BYTES, &mtr);
++              mtr_commit(&mtr);
++
++              /* Flush the modified pages to disk and make a checkpoint */
++              log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
++
++              fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n");
++              trx_sys_multiple_tablespace_format = TRUE;
++      }
++      trx_doublewrite_buf_is_being_created = FALSE;
++    }
+ }
+ /****************************************************************//**
+@@ -437,10 +583,19 @@
+       ulint   source_page_no;
+       byte*   page;
+       byte*   doublewrite;
++      ulint   doublewrite_space_id;
+       ulint   space_id;
+       ulint   page_no;
+       ulint   i;
++      doublewrite_space_id = (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
++
++      if (srv_doublewrite_file) {
++              fprintf(stderr,
++                      "InnoDB: doublewrite file '%s' is used.\n",
++                      srv_doublewrite_file);
++      }
++
+       /* We do the file i/o past the buffer pool */
+       unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
+@@ -449,7 +604,7 @@
+       /* Read the trx sys header to check if we are using the doublewrite
+       buffer */
+-      fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
++      fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, TRX_SYS_PAGE_NO, 0,
+              UNIV_PAGE_SIZE, read_buf, NULL);
+       doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
+@@ -487,10 +642,10 @@
+       /* Read the pages from the doublewrite buffer to memory */
+-      fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
++      fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block1, 0,
+              TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+              buf, NULL);
+-      fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
++      fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block2, 0,
+              TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+              buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+              NULL);
+@@ -546,7 +701,8 @@
+                               " doublewrite buf.\n",
+                               (ulong) space_id, (ulong) page_no, (ulong) i);
+-              } else if (space_id == TRX_SYS_SPACE
++              } else if ((space_id == TRX_SYS_SPACE
++                          || (srv_doublewrite_file && space_id == TRX_DOUBLEWRITE_SPACE))
+                          && ((page_no >= block1
+                               && page_no
+                               < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+@@ -990,6 +1146,83 @@
+ }
+ /*****************************************************************//**
++Creates dummy of the file page for the transaction system. */
++static
++void
++trx_sysf_dummy_create(
++/*==================*/
++      ulint   space,
++      mtr_t*  mtr)
++{
++      buf_block_t*    block;
++      page_t*         page;
++
++      ut_ad(mtr);
++
++      /* Note that below we first reserve the file space x-latch, and
++      then enter the kernel: we must do it in this order to conform
++      to the latching order rules. */
++
++      mtr_x_lock(fil_space_get_latch(space, NULL), mtr);
++      mutex_enter(&kernel_mutex);
++
++      /* Create the trx sys file block in a new allocated file segment */
++      block = fseg_create(space, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
++                          mtr);
++      buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
++
++      fprintf(stderr, "%lu\n", buf_block_get_page_no(block));
++      ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
++
++      page = buf_block_get_frame(block);
++
++      mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
++                       MLOG_2BYTES, mtr);
++
++      /* Reset the doublewrite buffer magic number to zero so that we
++      know that the doublewrite buffer has not yet been created (this
++      suppresses a Valgrind warning) */
++
++      mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
++                       + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
++
++#ifdef UNDEFINED
++      /* TODO: REMOVE IT: The bellow is not needed, I think */
++      sys_header = trx_sysf_get(mtr);
++
++      /* Start counting transaction ids from number 1 up */
++      mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
++                        ut_dulint_create(0, 1), mtr);
++
++      /* Reset the rollback segment slots */
++      for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
++
++              trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
++              trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
++      }
++
++      /* The remaining area (up to the page trailer) is uninitialized.
++      Silence Valgrind warnings about it. */
++      UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
++                                   + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
++                                   + TRX_SYS_RSEG_SPACE),
++                     (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
++                      - (TRX_SYS_RSEGS
++                         + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
++                         + TRX_SYS_RSEG_SPACE))
++                     + page - sys_header);
++
++      /* Create the first rollback segment in the SYSTEM tablespace */
++      page_no = trx_rseg_header_create(space, 0, ULINT_MAX, &slot_no,
++                                       mtr);
++      ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
++      ut_a(page_no != FIL_NULL);
++#endif
++
++      mutex_exit(&kernel_mutex);
++}
++
++/*****************************************************************//**
+ Creates and initializes the central memory structures for the transaction
+ system. This is called when the database is started. */
+ UNIV_INTERN
+@@ -1351,6 +1584,26 @@
+       /* Does nothing at the moment */
+ }
++/*****************************************************************//**
++Creates and initializes the dummy transaction system page for tablespace. */
++UNIV_INTERN
++void
++trx_sys_dummy_create(
++/*=================*/
++      ulint   space)
++{
++      mtr_t   mtr;
++
++      /* This function is only for doublewrite file for now */
++      ut_a(space == TRX_DOUBLEWRITE_SPACE);
++
++      mtr_start(&mtr);
++
++      trx_sysf_dummy_create(space, &mtr);
++
++      mtr_commit(&mtr);
++}
++
+ /*********************************************************************
+ Creates the rollback segments */
+ UNIV_INTERN
diff --git a/innodb_show_lock_name.patch b/innodb_show_lock_name.patch
new file mode 100644 (file)
index 0000000..f4f4730
--- /dev/null
@@ -0,0 +1,412 @@
+# name       : innodb_show_lock_name.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:34:35.285040381 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:35:12.974975252 +0900
+@@ -9491,8 +9491,8 @@
+                       rw_lock_wait_time += mutex->lspent_time;
+               }
+ #else /* UNIV_DEBUG */
+-              buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
+-                                   mutex->cfile_name, (ulong) mutex->cline);
++              buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s",
++                                   mutex->cmutex_name);
+               buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
+                                    (ulong) mutex->count_os_wait);
+@@ -9507,9 +9507,8 @@
+       if (block_mutex) {
+               buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+-                                           "combined %s:%lu",
+-                                           block_mutex->cfile_name,
+-                                           (ulong) block_mutex->cline);
++                                           "combined %s",
++                                           block_mutex->cmutex_name);
+               buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+                                            "os_waits=%lu",
+                                            (ulong) block_mutex_oswait_count);
+@@ -9538,8 +9537,8 @@
+                       continue;
+               }
+-              buf1len = my_snprintf(buf1, sizeof buf1, "%s:%lu",
+-                                   lock->cfile_name, (ulong) lock->cline);
++              buf1len = my_snprintf(buf1, sizeof buf1, "%s",
++                                   lock->lock_name);
+               buf2len = my_snprintf(buf2, sizeof buf2, "os_waits=%lu",
+                                     (ulong) lock->count_os_wait);
+@@ -9553,9 +9552,8 @@
+       if (block_lock) {
+               buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+-                                           "combined %s:%lu",
+-                                           block_lock->cfile_name,
+-                                           (ulong) block_lock->cline);
++                                           "combined %s",
++                                           block_lock->lock_name);
+               buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+                                            "os_waits=%lu",
+                                            (ulong) block_lock_oswait_count);
+diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
+--- a/storage/innobase/include/sync0rw.h       2010-12-03 15:49:59.225953164 +0900
++++ b/storage/innobase/include/sync0rw.h       2010-12-03 17:35:12.978024458 +0900
+@@ -144,7 +144,7 @@
+ #  endif/* UNIV_SYNC_DEBUG */
+ # else /* UNIV_DEBUG */
+ #  define rw_lock_create(K, L, level)                         \
+-      rw_lock_create_func((L), __FILE__, __LINE__)
++      rw_lock_create_func((L), #L, NULL, 0)
+ # endif       /* UNIV_DEBUG */
+ /**************************************************************//**
+@@ -197,7 +197,7 @@
+ #  endif/* UNIV_SYNC_DEBUG */
+ # else        /* UNIV_DEBUG */
+ #  define rw_lock_create(K, L, level)                         \
+-      pfs_rw_lock_create_func((K), (L), __FILE__, __LINE__)
++      pfs_rw_lock_create_func((K), (L), #L, NULL, 0)
+ # endif       /* UNIV_DEBUG */
+ /******************************************************************
+@@ -255,8 +255,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+-      const char*     cmutex_name,    /*!< in: mutex name */
+ #endif /* UNIV_DEBUG */
++      const char*     cmutex_name,    /*!< in: mutex name */
+       const char*     cfile_name,     /*!< in: file name where created */
+       ulint           cline);         /*!< in: file line where created */
+ /******************************************************************//**
+@@ -609,7 +609,8 @@
+       struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
+ #endif
+       ulint count_os_wait;    /*!< Count of os_waits. May not be accurate */
+-      const char*     cfile_name;/*!< File name where lock created */
++      //const char*   cfile_name;/*!< File name where lock created */
++      const char*     lock_name;/*!< lock name */
+         /* last s-lock file/line is not guaranteed to be correct */
+       const char*     last_s_file_name;/*!< File name where last s-locked */
+       const char*     last_x_file_name;/*!< File name where last x-locked */
+@@ -620,7 +621,7 @@
+                               are at the start of this struct, thus we can
+                               peek this field without causing much memory
+                               bus traffic */
+-      unsigned        cline:14;       /*!< Line where created */
++      //unsigned      cline:14;       /*!< Line where created */
+       unsigned        last_s_line:14; /*!< Line number where last time s-locked */
+       unsigned        last_x_line:14; /*!< Line number where last time x-locked */
+ #ifdef UNIV_DEBUG
+@@ -690,8 +691,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+-      const char*     cmutex_name,    /*!< in: mutex name */
+ #endif /* UNIV_DEBUG */
++      const char*     cmutex_name,    /*!< in: mutex name */
+       const char*     cfile_name,     /*!< in: file name where created */
+       ulint           cline);         /*!< in: file line where created */
+diff -ruN a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
+--- a/storage/innobase/include/sync0rw.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0rw.ic      2010-12-03 17:35:12.980024605 +0900
+@@ -640,8 +640,8 @@
+ #  ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ #  endif /* UNIV_SYNC_DEBUG */
+-      const char*     cmutex_name,    /*!< in: mutex name */
+ # endif /* UNIV_DEBUG */
++      const char*     cmutex_name,    /*!< in: mutex name */
+       const char*     cfile_name,     /*!< in: file name where created */
+       ulint           cline)          /*!< in: file line where created */
+ {
+@@ -656,8 +656,8 @@
+ #  ifdef UNIV_SYNC_DEBUG
+                           level,
+ #  endif /* UNIV_SYNC_DEBUG */
+-                          cmutex_name,
+ # endif /* UNIV_DEBUG */
++                          cmutex_name,
+                           cfile_name,
+                           cline);
+ }
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h     2010-12-03 15:49:59.227955503 +0900
++++ b/storage/innobase/include/sync0sync.h     2010-12-03 17:35:12.982023946 +0900
+@@ -166,7 +166,7 @@
+ #  endif/* UNIV_SYNC_DEBUG */
+ # else
+ #  define mutex_create(K, M, level)                           \
+-      pfs_mutex_create_func((K), (M), __FILE__, __LINE__)
++      pfs_mutex_create_func((K), (M), #M, NULL, 0)
+ # endif       /* UNIV_DEBUG */
+ # define mutex_enter(M)                                               \
+@@ -193,7 +193,7 @@
+ #  endif /* UNIV_SYNC_DEBUG */
+ # else /* UNIV_DEBUG */
+ #  define mutex_create(K, M, level)                           \
+-      mutex_create_func((M), __FILE__, __LINE__)
++      mutex_create_func((M), #M, NULL, 0)
+ # endif       /* UNIV_DEBUG */
+ # define mutex_enter(M)       mutex_enter_func((M), __FILE__, __LINE__)
+@@ -217,8 +217,8 @@
+ mutex_create_func(
+ /*==============*/
+       mutex_t*        mutex,          /*!< in: pointer to memory */
+-#ifdef UNIV_DEBUG
+       const char*     cmutex_name,    /*!< in: mutex name */
++#ifdef UNIV_DEBUG
+ # ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+@@ -291,8 +291,8 @@
+ /*==================*/
+       PSI_mutex_key   key,            /*!< in: Performance Schema key */
+       mutex_t*        mutex,          /*!< in: pointer to memory */
+-# ifdef UNIV_DEBUG
+       const char*     cmutex_name,    /*!< in: mutex name */
++# ifdef UNIV_DEBUG
+ #  ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ #  endif /* UNIV_SYNC_DEBUG */
+@@ -723,9 +723,9 @@
+       ulint   line;           /*!< Line where the mutex was locked */
+       ulint   level;          /*!< Level in the global latching order */
+ #endif /* UNIV_SYNC_DEBUG */
++#ifdef UNIV_DEBUG
+       const char*     cfile_name;/*!< File name where mutex created */
+       ulint           cline;  /*!< Line where created */
+-#ifdef UNIV_DEBUG
+       os_thread_id_t thread_id; /*!< The thread id of the thread
+                               which locked the mutex. */
+       ulint           magic_n;        /*!< MUTEX_MAGIC_N */
+@@ -740,9 +740,9 @@
+       ulong           count_os_yield; /*!< count of os_wait */
+       ulonglong       lspent_time;    /*!< mutex os_wait timer msec */
+       ulonglong       lmax_spent_time;/*!< mutex os_wait timer msec */
+-      const char*     cmutex_name;    /*!< mutex name */
+       ulint           mutex_type;     /*!< 0=usual mutex, 1=rw_lock mutex */
+ #endif /* UNIV_DEBUG */
++      const char*     cmutex_name;    /*!< mutex name */
+ #ifdef UNIV_PFS_MUTEX
+       struct PSI_mutex* pfs_psi;      /*!< The performance schema
+                                       instrumentation hook */
+diff -ruN a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
+--- a/storage/innobase/include/sync0sync.ic    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0sync.ic    2010-12-03 17:35:12.984024599 +0900
+@@ -320,8 +320,8 @@
+ /*==================*/
+       mysql_pfs_key_t key,            /*!< in: Performance Schema key */
+       mutex_t*        mutex,          /*!< in: pointer to memory */
+-# ifdef UNIV_DEBUG
+       const char*     cmutex_name,    /*!< in: mutex name */
++# ifdef UNIV_DEBUG
+ #  ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ #  endif /* UNIV_SYNC_DEBUG */
+@@ -334,8 +334,8 @@
+                               : NULL;
+       mutex_create_func(mutex,
+-# ifdef UNIV_DEBUG
+                         cmutex_name,
++# ifdef UNIV_DEBUG
+ #  ifdef UNIV_SYNC_DEBUG
+                         level,
+ #  endif /* UNIV_SYNC_DEBUG */
+diff -ruN a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
+--- a/storage/innobase/sync/sync0arr.c 2010-12-03 15:09:51.304953409 +0900
++++ b/storage/innobase/sync/sync0arr.c 2010-12-03 17:35:12.985024561 +0900
+@@ -488,12 +488,12 @@
+               mutex = cell->old_wait_mutex;
+               fprintf(file,
+-                      "Mutex at %p created file %s line %lu, lock var %lu\n"
++                      "Mutex at %p '%s', lock var %lu\n"
+ #ifdef UNIV_SYNC_DEBUG
+                       "Last time reserved in file %s line %lu, "
+ #endif /* UNIV_SYNC_DEBUG */
+                       "waiters flag %lu\n",
+-                      (void*) mutex, mutex->cfile_name, (ulong) mutex->cline,
++                      (void*) mutex, mutex->cmutex_name,
+                       (ulong) mutex->lock_word,
+ #ifdef UNIV_SYNC_DEBUG
+                       mutex->file_name, (ulong) mutex->line,
+@@ -511,9 +511,8 @@
+               rwlock = cell->old_wait_rw_lock;
+               fprintf(file,
+-                      " RW-latch at %p created in file %s line %lu\n",
+-                      (void*) rwlock, rwlock->cfile_name,
+-                      (ulong) rwlock->cline);
++                      " RW-latch at %p '%s'\n",
++                      (void*) rwlock, rwlock->lock_name);
+               writer = rw_lock_get_writer(rwlock);
+               if (writer != RW_LOCK_NOT_LOCKED) {
+                       fprintf(file,
+diff -ruN a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
+--- a/storage/innobase/sync/sync0rw.c  2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0rw.c  2010-12-03 17:35:12.987029059 +0900
+@@ -241,8 +241,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+-      const char*     cmutex_name,    /*!< in: mutex name */
+ #endif /* UNIV_DEBUG */
++      const char*     cmutex_name,    /*!< in: mutex name */
+       const char*     cfile_name,     /*!< in: file name where created */
+       ulint           cline)          /*!< in: file line where created */
+ {
+@@ -253,14 +253,15 @@
+       mutex_create(rw_lock_mutex_key, rw_lock_get_mutex(lock),
+                    SYNC_NO_ORDER_CHECK);
+-      lock->mutex.cfile_name = cfile_name;
+-      lock->mutex.cline = cline;
++      ut_d(lock->mutex.cfile_name = cfile_name);
++      ut_d(lock->mutex.cline = cline);
+-      ut_d(lock->mutex.cmutex_name = cmutex_name);
++      lock->mutex.cmutex_name = cmutex_name;
+       ut_d(lock->mutex.mutex_type = 1);
+ #else /* INNODB_RW_LOCKS_USE_ATOMICS */
+ # ifdef UNIV_DEBUG
+-      UT_NOT_USED(cmutex_name);
++      UT_NOT_USED(cfile_name);
++      UT_NOT_USED(cline);
+ # endif
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+@@ -280,8 +281,7 @@
+       ut_d(lock->magic_n = RW_LOCK_MAGIC_N);
+-      lock->cfile_name = cfile_name;
+-      lock->cline = (unsigned int) cline;
++      lock->lock_name = cmutex_name;
+       lock->count_os_wait = 0;
+       lock->last_s_file_name = "not yet reserved";
+@@ -401,10 +401,10 @@
+       if (srv_print_latch_waits) {
+               fprintf(stderr,
+                       "Thread %lu spin wait rw-s-lock at %p"
+-                      " cfile %s cline %lu rnds %lu\n",
++                      " '%s' rnds %lu\n",
+                       (ulong) os_thread_pf(os_thread_get_curr_id()),
+                       (void*) lock,
+-                      lock->cfile_name, (ulong) lock->cline, (ulong) i);
++                      lock->lock_name, (ulong) i);
+       }
+       /* We try once again to obtain the lock */
+@@ -437,10 +437,9 @@
+               if (srv_print_latch_waits) {
+                       fprintf(stderr,
+                               "Thread %lu OS wait rw-s-lock at %p"
+-                              " cfile %s cline %lu\n",
++                              " '%s'\n",
+                               os_thread_pf(os_thread_get_curr_id()),
+-                              (void*) lock, lock->cfile_name,
+-                              (ulong) lock->cline);
++                              (void*) lock, lock->lock_name);
+               }
+               /* these stats may not be accurate */
+@@ -659,9 +658,9 @@
+       if (srv_print_latch_waits) {
+               fprintf(stderr,
+                       "Thread %lu spin wait rw-x-lock at %p"
+-                      " cfile %s cline %lu rnds %lu\n",
++                      " '%s' rnds %lu\n",
+                       os_thread_pf(os_thread_get_curr_id()), (void*) lock,
+-                      lock->cfile_name, (ulong) lock->cline, (ulong) i);
++                      lock->lock_name, (ulong) i);
+       }
+       sync_array_reserve_cell(sync_primary_wait_array,
+@@ -682,9 +681,9 @@
+       if (srv_print_latch_waits) {
+               fprintf(stderr,
+                       "Thread %lu OS wait for rw-x-lock at %p"
+-                      " cfile %s cline %lu\n",
++                      " '%s'\n",
+                       os_thread_pf(os_thread_get_curr_id()), (void*) lock,
+-                      lock->cfile_name, (ulong) lock->cline);
++                      lock->lock_name);
+       }
+       /* these stats may not be accurate */
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c        2010-12-03 15:49:59.233955565 +0900
++++ b/storage/innobase/sync/sync0sync.c        2010-12-03 17:35:12.989024400 +0900
+@@ -249,8 +249,8 @@
+ mutex_create_func(
+ /*==============*/
+       mutex_t*        mutex,          /*!< in: pointer to memory */
+-#ifdef UNIV_DEBUG
+       const char*     cmutex_name,    /*!< in: mutex name */
++#ifdef UNIV_DEBUG
+ # ifdef UNIV_SYNC_DEBUG
+       ulint           level,          /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+@@ -274,11 +274,13 @@
+       mutex->file_name = "not yet reserved";
+       mutex->level = level;
+ #endif /* UNIV_SYNC_DEBUG */
++#ifdef UNIV_DEBUG
+       mutex->cfile_name = cfile_name;
+       mutex->cline = cline;
++#endif /* UNIV_DEBUG */
+       mutex->count_os_wait = 0;
+-#ifdef UNIV_DEBUG
+       mutex->cmutex_name=       cmutex_name;
++#ifdef UNIV_DEBUG
+       mutex->count_using=       0;
+       mutex->mutex_type=        0;
+       mutex->lspent_time=       0;
+@@ -532,9 +534,9 @@
+ #ifdef UNIV_SRV_PRINT_LATCH_WAITS
+       fprintf(stderr,
+               "Thread %lu spin wait mutex at %p"
+-              " cfile %s cline %lu rnds %lu\n",
++              " '%s' rnds %lu\n",
+               (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
+-              mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
++              mutex->cmutex_name, (ulong) i);
+ #endif
+       mutex_spin_round_count += i;
+@@ -609,9 +611,9 @@
+ #ifdef UNIV_SRV_PRINT_LATCH_WAITS
+       fprintf(stderr,
+-              "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
++              "Thread %lu OS wait mutex at %p '%s' rnds %lu\n",
+               (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
+-              mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
++              mutex->cmutex_name, (ulong) i);
+ #endif
+       mutex_os_wait_count++;
+@@ -913,9 +915,8 @@
+                               if (mutex->magic_n == MUTEX_MAGIC_N) {
+                                       fprintf(stderr,
+-                                              "Mutex created at %s %lu\n",
+-                                              mutex->cfile_name,
+-                                              (ulong) mutex->cline);
++                                              "Mutex '%s'\n",
++                                              mutex->cmutex_name);
+                                       if (mutex_get_lock_word(mutex) != 0) {
+                                               const char*     file_name;
diff --git a/innodb_show_status.patch b/innodb_show_status.patch
new file mode 100644 (file)
index 0000000..aa465b3
--- /dev/null
@@ -0,0 +1,595 @@
+# name       : innodb_show_status.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-03 15:07:31.786968193 +0900
+@@ -4812,14 +4812,16 @@
+       buf_flush_list_mutex_enter(buf_pool);
+       fprintf(file,
+-              "Buffer pool size   %lu\n"
+-              "Free buffers       %lu\n"
+-              "Database pages     %lu\n"
+-              "Old database pages %lu\n"
+-              "Modified db pages  %lu\n"
++              "Buffer pool size        %lu\n"
++              "Buffer pool size, bytes %lu\n"
++              "Free buffers            %lu\n"
++              "Database pages          %lu\n"
++              "Old database pages      %lu\n"
++              "Modified db pages       %lu\n"
+               "Pending reads %lu\n"
+               "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
+               (ulong) buf_pool->curr_size,
++              (ulong) buf_pool->curr_size * UNIV_PAGE_SIZE,
+               (ulong) UT_LIST_GET_LEN(buf_pool->free),
+               (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+               (ulong) buf_pool->LRU_old_len,
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c   2010-12-03 20:58:26.000000000 +0300
++++ b/storage/innobase/buf/buf0flu.c   2011-01-07 03:37:41.000000000 +0300
+@@ -75,7 +75,7 @@
+ static buf_flush_stat_t       buf_flush_stat_sum;
+ /** Number of pages flushed through non flush_list flushes. */
+-static ulint buf_lru_flush_page_count = 0;
++// static ulint buf_lru_flush_page_count = 0;
+ /* @} */
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-03 15:07:31.790357112 +0900
+@@ -4858,3 +4858,30 @@
+       fil_system = NULL;
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void)
++/*=======================*/
++{
++       if (fil_system) {
++               return (fil_system->spaces->n_cells
++                       + fil_system->name_hash->n_cells);
++       } else {
++               return 0;
++       }
++}
++
++ulint
++fil_system_hash_nodes(void)
++/*=======================*/
++{
++       if (fil_system) {
++               return (UT_LIST_GET_LEN(fil_system->space_list)
++                       * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE));
++       } else {
++               return 0;
++       }
++}
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:06:58.727955654 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:07:31.799376984 +0900
+@@ -584,6 +584,8 @@
+   (char*) &export_vars.innodb_buffer_pool_pages_dirty,          SHOW_LONG},
+   {"buffer_pool_pages_flushed",
+   (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
++  {"buffer_pool_pages_LRU_flushed",
++  (char*) &export_vars.innodb_buffer_pool_pages_LRU_flushed,  SHOW_LONG},
+   {"buffer_pool_pages_free",
+   (char*) &export_vars.innodb_buffer_pool_pages_free,   SHOW_LONG},
+ #ifdef UNIV_DEBUG
+@@ -10975,6 +10977,16 @@
+   "Force InnoDB to not use next-key locking, to use only row-level locking.",
+   NULL, NULL, FALSE);
++static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks,
++  PLUGIN_VAR_OPCMDARG,
++  "Whether to show records locked in SHOW INNODB STATUS.",
++  NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held,
++  PLUGIN_VAR_RQCMDARG,
++  "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.",
++  NULL, NULL, 10, 0, 1000, 0);
++
+ #ifdef UNIV_LOG_ARCHIVE
+ static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
+   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -11162,7 +11174,7 @@
+ static MYSQL_SYSVAR_STR(version, innodb_version_str,
+   PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
+-  "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
++  "Percona-InnoDB-plugin version", NULL, NULL, INNODB_VERSION_STR);
+ static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
+   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+@@ -11247,6 +11259,8 @@
+   MYSQL_SYSVAR(thread_concurrency),
+   MYSQL_SYSVAR(thread_sleep_delay),
+   MYSQL_SYSVAR(autoinc_lock_mode),
++  MYSQL_SYSVAR(show_verbose_locks),
++  MYSQL_SYSVAR(show_locks_held),
+   MYSQL_SYSVAR(version),
+   MYSQL_SYSVAR(use_sys_malloc),
+   MYSQL_SYSVAR(use_native_aio),
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/fil0fil.h       2010-12-03 15:07:31.812028575 +0900
+@@ -726,6 +726,17 @@
+ /*============================*/
+       ulint           id);    /*!< in: space id */
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void);
++/*========================*/
++
++ulint
++fil_system_hash_nodes(void);
++/*========================*/
++
+ typedef       struct fil_space_struct fil_space_t;
+ #endif
+diff -ruN a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
+--- a/storage/innobase/include/read0read.h     2010-12-04 02:58:26.000000000 +0900
++++ b/storage/innobase/include/read0read.h     2011-01-21 19:35:44.127631727 +0900
+@@ -88,6 +88,7 @@
+ void
+ read_view_print(
+ /*============*/
++      FILE*                   file,
+       const read_view_t*      view);  /*!< in: read view */
+ /*********************************************************************//**
+ Create a consistent cursor view for mysql to be used in cursors. In this
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 15:07:31.813958103 +0900
+@@ -145,6 +145,9 @@
+ extern char   srv_adaptive_flushing;
++extern ulint    srv_show_locks_held;
++extern ulint    srv_show_verbose_locks;
++
+ /* The sort order table of the MySQL latin1_swedish_ci character set
+ collation */
+ extern const byte*    srv_latin1_ordering;
+@@ -318,6 +321,8 @@
+ buffer pool to disk */
+ extern ulint srv_buf_pool_flushed;
++extern ulint buf_lru_flush_page_count;
++
+ /** Number of buffer pool reads that led to the
+ reading of a disk page */
+ extern ulint srv_buf_pool_reads;
+@@ -691,6 +696,7 @@
+       ulint innodb_buffer_pool_reads;         /*!< srv_buf_pool_reads */
+       ulint innodb_buffer_pool_wait_free;     /*!< srv_buf_pool_wait_free */
+       ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
++      ulint innodb_buffer_pool_pages_LRU_flushed;     /*!< buf_lru_flush_page_count */
+       ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+       ulint innodb_buffer_pool_read_ahead;    /*!< srv_read_ahead */
+       ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
+diff -ruN a/storage/innobase/include/thr0loc.h b/storage/innobase/include/thr0loc.h
+--- a/storage/innobase/include/thr0loc.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/thr0loc.h       2010-12-03 15:07:31.815081509 +0900
+@@ -83,6 +83,17 @@
+ thr_local_get_in_ibuf_field(void);
+ /*=============================*/
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void);
++/*=======================*/
++
++ulint
++thr_local_hash_nodes(void);
++/*=======================*/
++
+ #ifndef UNIV_NONINL
+ #include "thr0loc.ic"
+ #endif
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/lock/lock0lock.c        2010-12-03 15:07:31.819023998 +0900
+@@ -4302,6 +4302,7 @@
+       putc('\n', file);
++      if ( srv_show_verbose_locks ) {
+       block = buf_page_try_get(space, page_no, &mtr);
+       for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
+@@ -4328,6 +4329,7 @@
+               putc('\n', file);
+       }
++      }
+       mtr_commit(&mtr);
+       if (UNIV_LIKELY_NULL(heap)) {
+@@ -4511,7 +4513,7 @@
+               }
+       }
+-      if (!srv_print_innodb_lock_monitor) {
++        if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) {
+               nth_trx++;
+               goto loop;
+       }
+@@ -4583,8 +4585,8 @@
+       nth_lock++;
+-      if (nth_lock >= 10) {
+-              fputs("10 LOCKS PRINTED FOR THIS TRX:"
++      if (nth_lock >= srv_show_locks_held) {
++              fputs("TOO MANY LOCKS PRINTED FOR THIS TRX:"
+                     " SUPPRESSING FURTHER PRINTS\n",
+                     file);
+diff -ruN a/storage/innobase/read/read0read.c b/storage/innobase/read/read0read.c
+--- a/storage/innobase/read/read0read.c        2010-12-04 02:58:26.000000000 +0900
++++ b/storage/innobase/read/read0read.c        2011-01-21 19:37:08.292650181 +0900
+@@ -357,34 +357,35 @@
+ void
+ read_view_print(
+ /*============*/
++      FILE*                   file,
+       const read_view_t*      view)   /*!< in: read view */
+ {
+       ulint   n_ids;
+       ulint   i;
+       if (view->type == VIEW_HIGH_GRANULARITY) {
+-              fprintf(stderr,
++              fprintf(file,
+                       "High-granularity read view undo_n:o %llu\n",
+                       (ullint) view->undo_no);
+       } else {
+-              fprintf(stderr, "Normal read view\n");
++              fprintf(file, "Normal read view\n");
+       }
+-      fprintf(stderr, "Read view low limit trx n:o " TRX_ID_FMT "\n",
++      fprintf(file, "Read view low limit trx n:o " TRX_ID_FMT "\n",
+               (ullint) view->low_limit_no);
+-      fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
++      fprintf(file, "Read view up limit trx id " TRX_ID_FMT "\n",
+               (ullint) view->up_limit_id);
+-      fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
++      fprintf(file, "Read view low limit trx id " TRX_ID_FMT "\n",
+               (ullint) view->low_limit_id);
+-      fprintf(stderr, "Read view individually stored trx ids:\n");
++      fprintf(file, "Read view individually stored trx ids:\n");
+       n_ids = view->n_trx_ids;
+       for (i = 0; i < n_ids; i++) {
+-              fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
++              fprintf(file, "Read view trx id " TRX_ID_FMT "\n",
+                       (ullint) read_view_get_nth_trx_id(view, i));
+       }
+ }
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:07:31.824022673 +0900
+@@ -84,6 +84,7 @@
+ #include "ha_prototypes.h"
+ #include "trx0i_s.h"
+ #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
++#include "read0read.h"
+ #include "mysql/plugin.h"
+ #include "mysql/service_thd_wait.h"
+@@ -193,6 +194,9 @@
+ the checkpoints. */
+ UNIV_INTERN char      srv_adaptive_flushing   = TRUE;
++UNIV_INTERN ulint     srv_show_locks_held     = 10;
++UNIV_INTERN ulint     srv_show_verbose_locks  = 0;
++
+ /** Maximum number of times allowed to conditionally acquire
+ mutex before switching to blocking wait on the mutex */
+ #define MAX_MUTEX_NOWAIT      20
+@@ -311,6 +315,7 @@
+ /* variable to count the number of pages that were written from buffer
+ pool to the disk */
+ UNIV_INTERN ulint srv_buf_pool_flushed = 0;
++UNIV_INTERN ulint buf_lru_flush_page_count = 0;
+ /** Number of buffer pool reads that led to the
+ reading of a disk page */
+@@ -1787,6 +1792,13 @@
+       ulint   n_reserved;
+       ibool   ret;
++      ulint   btr_search_sys_subtotal;
++      ulint   lock_sys_subtotal;
++      ulint   recv_sys_subtotal;
++
++      ulint   i;
++      trx_t*  trx;
++
+       mutex_enter(&srv_innodb_monitor_mutex);
+       current_time = time(NULL);
+@@ -1835,31 +1847,6 @@
+       mutex_exit(&dict_foreign_err_mutex);
+-      /* Only if lock_print_info_summary proceeds correctly,
+-      before we call the lock_print_info_all_transactions
+-      to print all the lock information. */
+-      ret = lock_print_info_summary(file, nowait);
+-
+-      if (ret) {
+-              if (trx_start) {
+-                      long    t = ftell(file);
+-                      if (t < 0) {
+-                              *trx_start = ULINT_UNDEFINED;
+-                      } else {
+-                              *trx_start = (ulint) t;
+-                      }
+-              }
+-              lock_print_info_all_transactions(file);
+-              if (trx_end) {
+-                      long    t = ftell(file);
+-                      if (t < 0) {
+-                              *trx_end = ULINT_UNDEFINED;
+-                      } else {
+-                              *trx_end = (ulint) t;
+-                      }
+-              }
+-      }
+-
+       fputs("--------\n"
+             "FILE I/O\n"
+             "--------\n", file);
+@@ -1890,10 +1877,84 @@
+             "BUFFER POOL AND MEMORY\n"
+             "----------------------\n", file);
+       fprintf(file,
+-              "Total memory allocated " ULINTPF
+-              "; in additional pool allocated " ULINTPF "\n",
+-              ut_total_allocated_memory,
+-              mem_pool_get_reserved(mem_comm_pool));
++                      "Total memory allocated " ULINTPF
++                      "; in additional pool allocated " ULINTPF "\n",
++                      ut_total_allocated_memory,
++                      mem_pool_get_reserved(mem_comm_pool));
++      /* Calcurate reserved memories */
++      if (btr_search_sys && btr_search_sys->hash_index->heap) {
++              btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap);
++      } else {
++              btr_search_sys_subtotal = 0;
++              for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) {
++                      btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]);
++              }
++      }
++
++      lock_sys_subtotal = 0;
++      if (trx_sys) {
++              mutex_enter(&kernel_mutex);
++              trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
++              while (trx) {
++                      lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0);
++                      trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
++              }
++              mutex_exit(&kernel_mutex);
++      }
++
++      recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
++                      ? mem_heap_get_size(recv_sys->heap) : 0);
++
++      fprintf(file,
++                      "Internal hash tables (constant factor + variable factor)\n"
++                      "    Adaptive hash index %lu \t(%lu + %lu)\n"
++                      "    Page hash           %lu (buffer pool 0 only)\n"
++                      "    Dictionary cache    %lu \t(%lu + %lu)\n"
++                      "    File system         %lu \t(%lu + %lu)\n"
++                      "    Lock system         %lu \t(%lu + %lu)\n"
++                      "    Recovery system     %lu \t(%lu + %lu)\n"
++                      "    Threads             %lu \t(%lu + %lu)\n",
++
++                      (ulong) (btr_search_sys
++                              ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0)
++                      + btr_search_sys_subtotal,
++                      (ulong) (btr_search_sys
++                              ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0),
++                      (ulong) btr_search_sys_subtotal,
++
++                      (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
++
++                      (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++                                              + dict_sys->table_id_hash->n_cells
++                                              ) * sizeof(hash_cell_t)
++                                      + dict_sys->size) : 0),
++                      (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++                                                      + dict_sys->table_id_hash->n_cells
++                                                      ) * sizeof(hash_cell_t)) : 0),
++                      (ulong) (dict_sys ? (dict_sys->size) : 0),
++
++                      (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
++                                      + fil_system_hash_nodes()),
++                      (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
++                      (ulong) fil_system_hash_nodes(),
++
++                      (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
++                                      + lock_sys_subtotal),
++                      (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
++                      (ulong) lock_sys_subtotal,
++
++                      (ulong) (((recv_sys && recv_sys->addr_hash)
++                                              ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
++                                      + recv_sys_subtotal),
++                      (ulong) ((recv_sys && recv_sys->addr_hash)
++                                      ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
++                      (ulong) recv_sys_subtotal,
++
++                      (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)
++                                      + thr_local_hash_nodes()),
++                      (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)),
++                      (ulong) thr_local_hash_nodes());
++
+       fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
+               dict_sys->size);
+@@ -1909,6 +1970,16 @@
+       fprintf(file, "%lu read views open inside InnoDB\n",
+               UT_LIST_GET_LEN(trx_sys->view_list));
++      if (UT_LIST_GET_LEN(trx_sys->view_list)) {
++              read_view_t*    view = UT_LIST_GET_LAST(trx_sys->view_list);
++
++              if (view) {
++                      fprintf(file, "---OLDEST VIEW---\n");
++                      read_view_print(file, view);
++                      fprintf(file, "-----------------\n");
++              }
++      }
++
+       n_reserved = fil_space_get_n_reserved_extents(0);
+       if (n_reserved > 0) {
+               fprintf(file,
+@@ -1952,6 +2023,31 @@
+       srv_n_rows_deleted_old = srv_n_rows_deleted;
+       srv_n_rows_read_old = srv_n_rows_read;
++      /* Only if lock_print_info_summary proceeds correctly,
++      before we call the lock_print_info_all_transactions
++      to print all the lock information. */
++      ret = lock_print_info_summary(file, nowait);
++
++      if (ret) {
++              if (trx_start) {
++                      long    t = ftell(file);
++                      if (t < 0) {
++                              *trx_start = ULINT_UNDEFINED;
++                      } else {
++                              *trx_start = (ulint) t;
++                      }
++              }
++              lock_print_info_all_transactions(file);
++              if (trx_end) {
++                      long    t = ftell(file);
++                      if (t < 0) {
++                              *trx_end = ULINT_UNDEFINED;
++                      } else {
++                              *trx_end = (ulint) t;
++                      }
++              }
++      }
++
+       fputs("----------------------------\n"
+             "END OF INNODB MONITOR OUTPUT\n"
+             "============================\n", file);
+@@ -1995,6 +2091,7 @@
+               = srv_buf_pool_write_requests;
+       export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
+       export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
++      export_vars.innodb_buffer_pool_pages_LRU_flushed = buf_lru_flush_page_count;
+       export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
+       export_vars.innodb_buffer_pool_read_ahead
+               = stat.n_ra_pages_read;
+diff -ruN a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
+--- a/storage/innobase/sync/sync0arr.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0arr.c 2010-12-03 15:07:31.826041368 +0900
+@@ -477,7 +477,7 @@
+       fprintf(file,
+               "--Thread %lu has waited at %s line %lu"
+-              " for %.2f seconds the semaphore:\n",
++              " for %#.5g seconds the semaphore:\n",
+               (ulong) os_thread_pf(cell->thread), cell->file,
+               (ulong) cell->line,
+               difftime(time(NULL), cell->reservation_time));
+diff -ruN a/storage/innobase/thr/thr0loc.c b/storage/innobase/thr/thr0loc.c
+--- a/storage/innobase/thr/thr0loc.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/thr/thr0loc.c   2010-12-03 15:07:31.828023915 +0900
+@@ -49,6 +49,7 @@
+ /** The hash table. The module is not yet initialized when it is NULL. */
+ static hash_table_t*  thr_local_hash  = NULL;
++ulint         thr_local_hash_n_nodes = 0;
+ /** Thread local data */
+ typedef struct thr_local_struct thr_local_t;
+@@ -221,6 +222,7 @@
+                   os_thread_pf(os_thread_get_curr_id()),
+                   local);
++      thr_local_hash_n_nodes++;
+       mutex_exit(&thr_local_mutex);
+ }
+@@ -249,6 +251,7 @@
+       HASH_DELETE(thr_local_t, hash, thr_local_hash,
+                   os_thread_pf(id), local);
++      thr_local_hash_n_nodes--;
+       mutex_exit(&thr_local_mutex);
+@@ -305,3 +308,29 @@
+       hash_table_free(thr_local_hash);
+       thr_local_hash = NULL;
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void)
++/*======================*/
++{
++      if (thr_local_hash) {
++              return (thr_local_hash->n_cells);
++      } else {
++              return 0;
++      }
++}
++
++ulint
++thr_local_hash_nodes(void)
++/*======================*/
++{
++      if (thr_local_hash) {
++              return (thr_local_hash_n_nodes
++                      * (sizeof(thr_local_t) + MEM_BLOCK_HEADER_SIZE));
++      } else {
++              return 0;
++      }
++}
+diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
+--- a/storage/innobase/trx/trx0purge.c 2010-12-04 02:58:26.000000000 +0900
++++ b/storage/innobase/trx/trx0purge.c 2011-01-21 19:40:42.086683671 +0900
+@@ -1201,7 +1201,7 @@
+ /*=====================*/
+ {
+       fprintf(stderr, "InnoDB: Purge system view:\n");
+-      read_view_print(purge_sys->view);
++      read_view_print(stderr, purge_sys->view);
+       fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT
+               ", undo n:o " TRX_ID_FMT "\n",
diff --git a/innodb_show_status_extend.patch b/innodb_show_status_extend.patch
new file mode 100644 (file)
index 0000000..e7d8120
--- /dev/null
@@ -0,0 +1,490 @@
+# name       : innodb_show_status_extend.patch
+# introduced : XtraDB based 5.5.8
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2011-01-21 19:53:42.369599743 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2011-01-21 19:54:44.659599699 +0900
+@@ -618,6 +618,16 @@
+       trx_t*  trx);   /*!< in: transaction handle */
+ static SHOW_VAR innodb_status_variables[]= {
++  {"adaptive_hash_cells",
++  (char*) &export_vars.innodb_adaptive_hash_cells,      SHOW_LONG},
++  {"adaptive_hash_heap_buffers",
++  (char*) &export_vars.innodb_adaptive_hash_heap_buffers, SHOW_LONG},
++  {"adaptive_hash_hash_searches",
++  (char*) &export_vars.innodb_adaptive_hash_hash_searches, SHOW_LONG},
++  {"adaptive_hash_non_hash_searches",
++  (char*) &export_vars.innodb_adaptive_hash_non_hash_searches, SHOW_LONG},
++  {"background_log_sync",
++  (char*) &export_vars.innodb_background_log_sync,      SHOW_LONG},
+   {"buffer_pool_pages_data",
+   (char*) &export_vars.innodb_buffer_pool_pages_data,   SHOW_LONG},
+   {"buffer_pool_pages_dirty",
+@@ -632,8 +642,14 @@
+   {"buffer_pool_pages_latched",
+   (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
+ #endif /* UNIV_DEBUG */
++  {"buffer_pool_pages_made_not_young",
++  (char*) &export_vars.innodb_buffer_pool_pages_made_not_young, SHOW_LONG},
++  {"buffer_pool_pages_made_young",
++  (char*) &export_vars.innodb_buffer_pool_pages_made_young, SHOW_LONG},
+   {"buffer_pool_pages_misc",
+   (char*) &export_vars.innodb_buffer_pool_pages_misc,   SHOW_LONG},
++  {"buffer_pool_pages_old",
++  (char*) &export_vars.innodb_buffer_pool_pages_old,    SHOW_LONG},
+   {"buffer_pool_pages_total",
+   (char*) &export_vars.innodb_buffer_pool_pages_total,          SHOW_LONG},
+   {"buffer_pool_read_ahead",
+@@ -648,6 +664,12 @@
+   (char*) &export_vars.innodb_buffer_pool_wait_free,    SHOW_LONG},
+   {"buffer_pool_write_requests",
+   (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
++  {"checkpoint_age",
++  (char*) &export_vars.innodb_checkpoint_age,           SHOW_LONG},
++  {"checkpoint_max_age",
++  (char*) &export_vars.innodb_checkpoint_max_age,       SHOW_LONG},
++  {"checkpoint_target_age",
++  (char*) &export_vars.innodb_checkpoint_target_age,    SHOW_LONG},
+   {"data_fsyncs",
+   (char*) &export_vars.innodb_data_fsyncs,              SHOW_LONG},
+   {"data_pending_fsyncs",
+@@ -674,12 +696,66 @@
+   (char*) &export_vars.innodb_dict_tables,              SHOW_LONG},
+   {"have_atomic_builtins",
+   (char*) &export_vars.innodb_have_atomic_builtins,     SHOW_BOOL},
++  {"history_list_length",
++  (char*) &export_vars.innodb_history_list_length,      SHOW_LONG},
++  {"ibuf_discarded_delete_marks",
++  (char*) &export_vars.innodb_ibuf_discarded_delete_marks, SHOW_LONG},
++  {"ibuf_discarded_deletes",
++  (char*) &export_vars.innodb_ibuf_discarded_deletes,   SHOW_LONG},
++  {"ibuf_discarded_inserts",
++  (char*) &export_vars.innodb_ibuf_discarded_inserts,   SHOW_LONG},
++  {"ibuf_free_list",
++  (char*) &export_vars.innodb_ibuf_free_list,           SHOW_LONG},
++  {"ibuf_merged_delete_marks",
++  (char*) &export_vars.innodb_ibuf_merged_delete_marks,         SHOW_LONG},
++  {"ibuf_merged_deletes",
++  (char*) &export_vars.innodb_ibuf_merged_deletes,      SHOW_LONG},
++  {"ibuf_merged_inserts",
++  (char*) &export_vars.innodb_ibuf_merged_inserts,      SHOW_LONG},
++  {"ibuf_merges",
++  (char*) &export_vars.innodb_ibuf_merges,              SHOW_LONG},
++  {"ibuf_segment_size",
++  (char*) &export_vars.innodb_ibuf_segment_size,        SHOW_LONG},
++  {"ibuf_size",
++  (char*) &export_vars.innodb_ibuf_size,                SHOW_LONG},
+   {"log_waits",
+   (char*) &export_vars.innodb_log_waits,                SHOW_LONG},
+   {"log_write_requests",
+   (char*) &export_vars.innodb_log_write_requests,       SHOW_LONG},
+   {"log_writes",
+   (char*) &export_vars.innodb_log_writes,               SHOW_LONG},
++  {"lsn_current",
++  (char*) &export_vars.innodb_lsn_current,              SHOW_LONGLONG},
++  {"lsn_flushed",
++  (char*) &export_vars.innodb_lsn_flushed,              SHOW_LONGLONG},
++  {"lsn_last_checkpoint",
++  (char*) &export_vars.innodb_lsn_last_checkpoint,      SHOW_LONGLONG},
++  {"master_thread_1_second_loops",
++  (char*) &export_vars.innodb_master_thread_1_second_loops, SHOW_LONG},
++  {"master_thread_10_second_loops",
++  (char*) &export_vars.innodb_master_thread_10_second_loops, SHOW_LONG},
++  {"master_thread_background_loops",
++  (char*) &export_vars.innodb_master_thread_background_loops, SHOW_LONG},
++  {"master_thread_main_flush_loops",
++  (char*) &export_vars.innodb_master_thread_main_flush_loops, SHOW_LONG},
++  {"master_thread_sleeps",
++  (char*) &export_vars.innodb_master_thread_sleeps,     SHOW_LONG},
++  {"max_trx_id",
++  (char*) &export_vars.innodb_max_trx_id,               SHOW_LONGLONG},
++  {"mem_adaptive_hash",
++  (char*) &export_vars.innodb_mem_adaptive_hash,        SHOW_LONG},
++  {"mem_dictionary",
++  (char*) &export_vars.innodb_mem_dictionary,           SHOW_LONG},
++  {"mem_total",
++  (char*) &export_vars.innodb_mem_total,                SHOW_LONG},
++  {"mutex_os_waits",
++  (char*) &export_vars.innodb_mutex_os_waits,           SHOW_LONGLONG},
++  {"mutex_spin_rounds",
++  (char*) &export_vars.innodb_mutex_spin_rounds,        SHOW_LONGLONG},
++  {"mutex_spin_waits",
++  (char*) &export_vars.innodb_mutex_spin_waits,                 SHOW_LONGLONG},
++  {"oldest_view_low_limit_trx_id",
++  (char*) &export_vars.innodb_oldest_view_low_limit_trx_id, SHOW_LONGLONG},
+   {"os_log_fsyncs",
+   (char*) &export_vars.innodb_os_log_fsyncs,            SHOW_LONG},
+   {"os_log_pending_fsyncs",
+@@ -696,8 +772,14 @@
+   (char*) &export_vars.innodb_pages_read,               SHOW_LONG},
+   {"pages_written",
+   (char*) &export_vars.innodb_pages_written,            SHOW_LONG},
++  {"purge_trx_id",
++  (char*) &export_vars.innodb_purge_trx_id,             SHOW_LONGLONG},
++  {"purge_undo_no",
++  (char*) &export_vars.innodb_purge_undo_no,            SHOW_LONGLONG},
+   {"row_lock_current_waits",
+   (char*) &export_vars.innodb_row_lock_current_waits,   SHOW_LONG},
++  {"row_lock_numbers",
++  (char*) &export_vars.innodb_row_lock_numbers,                 SHOW_LONG},
+   {"row_lock_time",
+   (char*) &export_vars.innodb_row_lock_time,            SHOW_LONGLONG},
+   {"row_lock_time_avg",
+@@ -714,8 +796,20 @@
+   (char*) &export_vars.innodb_rows_read,                SHOW_LONG},
+   {"rows_updated",
+   (char*) &export_vars.innodb_rows_updated,             SHOW_LONG},
++  {"s_lock_os_waits",
++  (char*) &export_vars.innodb_s_lock_os_waits,                  SHOW_LONGLONG},
++  {"s_lock_spin_rounds",
++  (char*) &export_vars.innodb_s_lock_spin_rounds,       SHOW_LONGLONG},
++  {"s_lock_spin_waits",
++  (char*) &export_vars.innodb_s_lock_spin_waits,        SHOW_LONGLONG},
+   {"truncated_status_writes",
+   (char*) &export_vars.innodb_truncated_status_writes,        SHOW_LONG},
++  {"x_lock_os_waits",
++  (char*) &export_vars.innodb_x_lock_os_waits,                  SHOW_LONGLONG},
++  {"x_lock_spin_rounds",
++  (char*) &export_vars.innodb_x_lock_spin_rounds,       SHOW_LONGLONG},
++  {"x_lock_spin_waits",
++  (char*) &export_vars.innodb_x_lock_spin_waits,        SHOW_LONGLONG},
+   {NullS, NullS, SHOW_LONG}
+ };
+diff -ruN a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
+--- a/storage/innobase/include/lock0lock.h     2011-01-21 19:52:38.967683738 +0900
++++ b/storage/innobase/include/lock0lock.h     2011-01-21 19:54:44.660599140 +0900
+@@ -816,6 +816,7 @@
+ /** The lock system struct */
+ struct lock_sys_struct{
+       hash_table_t*   rec_hash;       /*!< hash table of the record locks */
++      ulint           rec_num;
+ };
+ /** The lock system */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2011-01-21 19:53:42.380638228 +0900
++++ b/storage/innobase/include/srv0srv.h       2011-01-21 19:54:44.662600032 +0900
+@@ -727,6 +727,11 @@
+ /** Status variables to be passed to MySQL */
+ struct export_var_struct{
++      ulint innodb_adaptive_hash_cells;
++      ulint innodb_adaptive_hash_heap_buffers;
++      ulint innodb_adaptive_hash_hash_searches;
++      ulint innodb_adaptive_hash_non_hash_searches;
++      ulint innodb_background_log_sync;
+       ulint innodb_data_pending_reads;        /*!< Pending reads */
+       ulint innodb_data_pending_writes;       /*!< Pending writes */
+       ulint innodb_data_pending_fsyncs;       /*!< Pending fsyncs */
+@@ -744,6 +749,9 @@
+ #ifdef UNIV_DEBUG
+       ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
+ #endif /* UNIV_DEBUG */
++      ulint innodb_buffer_pool_pages_made_not_young;
++      ulint innodb_buffer_pool_pages_made_young;
++      ulint innodb_buffer_pool_pages_old;
+       ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */
+       ulint innodb_buffer_pool_reads;         /*!< srv_buf_pool_reads */
+       ulint innodb_buffer_pool_wait_free;     /*!< srv_buf_pool_wait_free */
+@@ -752,13 +760,43 @@
+       ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+       ulint innodb_buffer_pool_read_ahead;    /*!< srv_read_ahead */
+       ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
++      ulint innodb_checkpoint_age;
++      ulint innodb_checkpoint_max_age;
++      ulint innodb_checkpoint_target_age;
+       ulint innodb_dblwr_pages_written;       /*!< srv_dblwr_pages_written */
+       ulint innodb_dblwr_writes;              /*!< srv_dblwr_writes */
+       ulint innodb_deadlocks;
+       ibool innodb_have_atomic_builtins;      /*!< HAVE_ATOMIC_BUILTINS */
++      ulint innodb_history_list_length;
++      ulint innodb_ibuf_size;
++      ulint innodb_ibuf_free_list;
++      ulint innodb_ibuf_segment_size;
++      ulint innodb_ibuf_merges;
++      ulint innodb_ibuf_merged_inserts;
++      ulint innodb_ibuf_merged_delete_marks;
++      ulint innodb_ibuf_merged_deletes;
++      ulint innodb_ibuf_discarded_inserts;
++      ulint innodb_ibuf_discarded_delete_marks;
++      ulint innodb_ibuf_discarded_deletes;
+       ulint innodb_log_waits;                 /*!< srv_log_waits */
+       ulint innodb_log_write_requests;        /*!< srv_log_write_requests */
+       ulint innodb_log_writes;                /*!< srv_log_writes */
++      ib_int64_t innodb_lsn_current;
++      ib_int64_t innodb_lsn_flushed;
++      ib_int64_t innodb_lsn_last_checkpoint;
++      ulint innodb_master_thread_1_second_loops;
++      ulint innodb_master_thread_10_second_loops;
++      ulint innodb_master_thread_background_loops;
++      ulint innodb_master_thread_main_flush_loops;
++      ulint innodb_master_thread_sleeps;
++      ib_int64_t innodb_max_trx_id;
++      ulint innodb_mem_adaptive_hash;
++      ulint innodb_mem_dictionary;
++      ulint innodb_mem_total;
++      ib_int64_t innodb_mutex_os_waits;
++      ib_int64_t innodb_mutex_spin_rounds;
++      ib_int64_t innodb_mutex_spin_waits;
++      ib_int64_t innodb_oldest_view_low_limit_trx_id;
+       ulint innodb_os_log_written;            /*!< srv_os_log_written */
+       ulint innodb_os_log_fsyncs;             /*!< fil_n_log_flushes */
+       ulint innodb_os_log_pending_writes;     /*!< srv_os_log_pending_writes */
+@@ -767,6 +805,8 @@
+       ulint innodb_pages_created;             /*!< buf_pool->stat.n_pages_created */
+       ulint innodb_pages_read;                /*!< buf_pool->stat.n_pages_read */
+       ulint innodb_pages_written;             /*!< buf_pool->stat.n_pages_written */
++      ib_int64_t innodb_purge_trx_id;
++      ib_int64_t innodb_purge_undo_no;
+       ulint innodb_row_lock_waits;            /*!< srv_n_lock_wait_count */
+       ulint innodb_row_lock_current_waits;    /*!< srv_n_lock_wait_current_count */
+       ib_int64_t innodb_row_lock_time;        /*!< srv_n_lock_wait_time
+@@ -776,11 +816,18 @@
+                                               / srv_n_lock_wait_count */
+       ulint innodb_row_lock_time_max;         /*!< srv_n_lock_max_wait_time
+                                               / 1000 */
++      ulint innodb_row_lock_numbers;
+       ulint innodb_rows_read;                 /*!< srv_n_rows_read */
+       ulint innodb_rows_inserted;             /*!< srv_n_rows_inserted */
+       ulint innodb_rows_updated;              /*!< srv_n_rows_updated */
+       ulint innodb_rows_deleted;              /*!< srv_n_rows_deleted */
+       ulint innodb_truncated_status_writes;   /*!< srv_truncated_status_writes */
++      ib_int64_t innodb_s_lock_os_waits;
++      ib_int64_t innodb_s_lock_spin_rounds;
++      ib_int64_t innodb_s_lock_spin_waits;
++      ib_int64_t innodb_x_lock_os_waits;
++      ib_int64_t innodb_x_lock_spin_rounds;
++      ib_int64_t innodb_x_lock_spin_waits;
+ };
+ /** Thread slot in the thread table */
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h     2011-01-21 19:48:45.982637372 +0900
++++ b/storage/innobase/include/sync0sync.h     2011-01-21 19:54:44.664638235 +0900
+@@ -760,6 +760,10 @@
+ #define       SYNC_SPIN_ROUNDS        srv_n_spin_wait_rounds
++extern        ib_int64_t      mutex_spin_round_count;
++extern        ib_int64_t      mutex_spin_wait_count;
++extern        ib_int64_t      mutex_os_wait_count;
++
+ /** The number of mutex_exit calls. Intended for performance monitoring. */
+ extern        ib_int64_t      mutex_exit_count;
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c        2011-01-21 19:52:38.998600121 +0900
++++ b/storage/innobase/lock/lock0lock.c        2011-01-21 19:54:44.668637536 +0900
+@@ -571,6 +571,7 @@
+       lock_sys = mem_alloc(sizeof(lock_sys_t));
+       lock_sys->rec_hash = hash_create(n_cells);
++      lock_sys->rec_num = 0;
+       /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
+@@ -1719,6 +1720,7 @@
+       HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
+                   lock_rec_fold(space, page_no), lock);
++      lock_sys->rec_num++;
+       if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+               lock_set_lock_and_trx_wait(lock, trx);
+@@ -2265,6 +2267,7 @@
+       HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+                   lock_rec_fold(space, page_no), in_lock);
++      lock_sys->rec_num--;
+       UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
+@@ -2308,6 +2311,7 @@
+       HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+                   lock_rec_fold(space, page_no), in_lock);
++      lock_sys->rec_num--;
+       UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
+ }
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2011-01-21 19:53:42.390637840 +0900
++++ b/storage/innobase/srv/srv0srv.c   2011-01-21 19:54:44.673637084 +0900
+@@ -2227,12 +2227,49 @@
+       ulint           LRU_len;
+       ulint           free_len;
+       ulint           flush_list_len;
++      ulint           mem_adaptive_hash, mem_dictionary;
++      read_view_t*    oldest_view;
++      ulint           i;
+       buf_get_total_stat(&stat);
+       buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
++      if (btr_search_sys && btr_search_sys->hash_index[0]->heap) {
++              mem_adaptive_hash = mem_heap_get_size(btr_search_sys->hash_index[0]->heap);
++      } else {
++              mem_adaptive_hash = 0;
++              for (i=0; i < btr_search_sys->hash_index[0]->n_mutexes; i++) {
++                      mem_adaptive_hash += mem_heap_get_size(btr_search_sys->hash_index[0]->heaps[i]);
++              }
++      }
++      mem_adaptive_hash *= btr_search_index_num;
++      if (btr_search_sys) {
++              mem_adaptive_hash += (btr_search_sys->hash_index[0]->n_cells * btr_search_index_num * sizeof(hash_cell_t));
++      }
++
++      mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells
++                                      + dict_sys->table_id_hash->n_cells
++                                    ) * sizeof(hash_cell_t)
++                              + dict_sys->size) : 0);
++
+       mutex_enter(&srv_innodb_monitor_mutex);
++      export_vars.innodb_adaptive_hash_cells = 0;
++      export_vars.innodb_adaptive_hash_heap_buffers = 0;
++      for (i = 0; i < btr_search_index_num; i++) {
++              hash_table_t*   table = btr_search_get_hash_index((index_id_t)i);
++
++              export_vars.innodb_adaptive_hash_cells
++                      += hash_get_n_cells(table);
++              export_vars.innodb_adaptive_hash_heap_buffers
++                      += (UT_LIST_GET_LEN(table->heap->base) - 1);
++      }
++      export_vars.innodb_adaptive_hash_hash_searches
++              = btr_cur_n_sea;
++      export_vars.innodb_adaptive_hash_non_hash_searches
++              = btr_cur_n_non_sea;
++      export_vars.innodb_background_log_sync
++              = srv_log_writes_and_flush;
+       export_vars.innodb_data_pending_reads
+               = os_n_pending_reads;
+       export_vars.innodb_data_pending_writes
+@@ -2269,6 +2306,101 @@
+       export_vars.innodb_buffer_pool_pages_misc
+               = buf_pool_get_n_pages() - LRU_len - free_len;
++
++      export_vars.innodb_buffer_pool_pages_made_young
++              = stat.n_pages_made_young;
++      export_vars.innodb_buffer_pool_pages_made_not_young
++              = stat.n_pages_not_made_young;
++      export_vars.innodb_buffer_pool_pages_old = 0;
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              buf_pool_t*     buf_pool = buf_pool_from_array(i);
++              export_vars.innodb_buffer_pool_pages_old
++                      += buf_pool->LRU_old_len;
++      }
++      export_vars.innodb_checkpoint_age
++              = (log_sys->lsn - log_sys->last_checkpoint_lsn);
++      export_vars.innodb_checkpoint_max_age
++              = log_sys->max_checkpoint_age;
++      export_vars.innodb_checkpoint_target_age
++              = srv_checkpoint_age_target
++                ? ut_min(log_sys->max_checkpoint_age_async, srv_checkpoint_age_target)
++                : log_sys->max_checkpoint_age_async;
++      export_vars.innodb_history_list_length
++              = trx_sys->rseg_history_len;
++      export_vars.innodb_ibuf_size
++              = ibuf->size;
++      export_vars.innodb_ibuf_free_list
++              = ibuf->free_list_len;
++      export_vars.innodb_ibuf_segment_size
++              = ibuf->seg_size;
++      export_vars.innodb_ibuf_merges
++              = ibuf->n_merges;
++      export_vars.innodb_ibuf_merged_inserts
++              = ibuf->n_merged_ops[IBUF_OP_INSERT];
++      export_vars.innodb_ibuf_merged_delete_marks
++              = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK];
++      export_vars.innodb_ibuf_merged_deletes
++              = ibuf->n_merged_ops[IBUF_OP_DELETE];
++      export_vars.innodb_ibuf_discarded_inserts
++              = ibuf->n_discarded_ops[IBUF_OP_INSERT];
++      export_vars.innodb_ibuf_discarded_delete_marks
++              = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK];
++      export_vars.innodb_ibuf_discarded_deletes
++              = ibuf->n_discarded_ops[IBUF_OP_DELETE];
++      export_vars.innodb_lsn_current
++              = log_sys->lsn;
++      export_vars.innodb_lsn_flushed
++              = log_sys->flushed_to_disk_lsn;
++      export_vars.innodb_lsn_last_checkpoint
++              = log_sys->last_checkpoint_lsn;
++      export_vars.innodb_master_thread_1_second_loops
++              = srv_main_1_second_loops;
++      export_vars.innodb_master_thread_10_second_loops
++              = srv_main_10_second_loops;
++      export_vars.innodb_master_thread_background_loops
++              = srv_main_background_loops;
++      export_vars.innodb_master_thread_main_flush_loops
++              = srv_main_flush_loops;
++      export_vars.innodb_master_thread_sleeps
++              = srv_main_sleeps;
++      export_vars.innodb_max_trx_id
++              = trx_sys->max_trx_id;
++      export_vars.innodb_mem_adaptive_hash
++              = mem_adaptive_hash;
++      export_vars.innodb_mem_dictionary
++              = mem_dictionary;
++      export_vars.innodb_mem_total
++              = ut_total_allocated_memory;
++      export_vars.innodb_mutex_os_waits
++              = mutex_os_wait_count;
++      export_vars.innodb_mutex_spin_rounds
++              = mutex_spin_round_count;
++      export_vars.innodb_mutex_spin_waits
++              = mutex_spin_wait_count;
++      export_vars.innodb_s_lock_os_waits
++              = rw_s_os_wait_count;
++      export_vars.innodb_s_lock_spin_rounds
++              = rw_s_spin_round_count;
++      export_vars.innodb_s_lock_spin_waits
++              = rw_s_spin_wait_count;
++      export_vars.innodb_x_lock_os_waits
++              = rw_x_os_wait_count;
++      export_vars.innodb_x_lock_spin_rounds
++              = rw_x_spin_round_count;
++      export_vars.innodb_x_lock_spin_waits
++              = rw_x_spin_wait_count;
++
++      oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
++      export_vars.innodb_oldest_view_low_limit_trx_id
++              = oldest_view ? oldest_view->low_limit_id : 0;
++
++      export_vars.innodb_purge_trx_id
++              = purge_sys->purge_trx_no;
++      export_vars.innodb_purge_undo_no
++              = purge_sys->purge_undo_no;
++      export_vars.innodb_row_lock_numbers
++              = lock_sys->rec_num;
++
+ #ifdef HAVE_ATOMIC_BUILTINS
+       export_vars.innodb_have_atomic_builtins = 1;
+ #else
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c        2011-01-21 19:53:03.458637954 +0900
++++ b/storage/innobase/sync/sync0sync.c        2011-01-21 19:54:44.676637686 +0900
+@@ -170,13 +170,13 @@
+ /** The number of iterations in the mutex_spin_wait() spin loop.
+ Intended for performance monitoring. */
+-static ib_int64_t     mutex_spin_round_count          = 0;
++UNIV_INTERN ib_int64_t        mutex_spin_round_count          = 0;
+ /** The number of mutex_spin_wait() calls.  Intended for
+ performance monitoring. */
+-static ib_int64_t     mutex_spin_wait_count           = 0;
++UNIV_INTERN ib_int64_t        mutex_spin_wait_count           = 0;
+ /** The number of OS waits in mutex_spin_wait().  Intended for
+ performance monitoring. */
+-static ib_int64_t     mutex_os_wait_count             = 0;
++UNIV_INTERN ib_int64_t        mutex_os_wait_count             = 0;
+ /** The number of mutex_exit() calls. Intended for performance
+ monitoring. */
+ UNIV_INTERN ib_int64_t        mutex_exit_count                = 0;
diff --git a/innodb_show_sys_tables.patch b/innodb_show_sys_tables.patch
new file mode 100644 (file)
index 0000000..36f5061
--- /dev/null
@@ -0,0 +1,1734 @@
+# name       : innodb_show_sys_tables.patch
+# introduced : 13?
+# maintainer : Yasufumi
+# (It is revived from mysql-5.5.6-rc)
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:53:54.615040167 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 16:07:26.851357007 +0900
+@@ -11673,7 +11673,14 @@
+ i_s_innodb_cmp,
+ i_s_innodb_cmp_reset,
+ i_s_innodb_cmpmem,
+-i_s_innodb_cmpmem_reset
++i_s_innodb_cmpmem_reset,
++i_s_innodb_sys_tables,
++i_s_innodb_sys_tablestats,
++i_s_innodb_sys_indexes,
++i_s_innodb_sys_columns,
++i_s_innodb_sys_fields,
++i_s_innodb_sys_foreign,
++i_s_innodb_sys_foreign_cols
+ mysql_declare_plugin_end;
+ /** @brief Initialize the default value of innodb_commit_concurrency.
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-03 15:49:59.207956807 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-03 17:10:02.719210529 +0900
+@@ -36,9 +36,11 @@
+ #include <mysql/innodb_priv.h>
+ extern "C" {
++#include "btr0pcur.h" /* for file sys_tables related info. */
+ #include "btr0types.h"
+ #include "buf0buddy.h" /* for i_s_cmpmem */
+ #include "buf0buf.h" /* for buf_pool and PAGE_ZIP_MIN_SIZE */
++#include "dict0load.h"        /* for file sys_tables related info. */
+ #include "dict0mem.h"
+ #include "dict0types.h"
+ #include "ha_prototypes.h" /* for innobase_convert_name() */
+@@ -1787,6 +1789,1675 @@
+       DBUG_RETURN(0);
+ }
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLES */
++static ST_FIELD_INFO    innodb_sys_tables_fields_info[] =
++{
++#define SYS_TABLE_ID          0
++      {STRUCT_FLD(field_name,         "TABLE_ID"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_SCHEMA      1
++      {STRUCT_FLD(field_name,         "SCHEMA"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_NAME                2
++      {STRUCT_FLD(field_name,         "NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_FLAG                3
++      {STRUCT_FLD(field_name,         "FLAG"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_NUM_COLUMN  4
++      {STRUCT_FLD(field_name,         "N_COLS"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_SPACE               5
++      {STRUCT_FLD(field_name,         "SPACE"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Populate information_schema.innodb_sys_tables table with information
++from SYS_TABLES.
++@return       0 on success */
++static
++int
++i_s_dict_fill_sys_tables(
++/*=====================*/
++      THD*            thd,            /*!< in: thread */
++      dict_table_t*   table,          /*!< in: table */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++      char            buf[NAME_LEN * 2 + 2];
++      char*           ptr;
++
++      DBUG_ENTER("i_s_dict_fill_sys_tables");
++
++      fields = table_to_fill->field;
++
++      OK(fields[SYS_TABLE_ID]->store(longlong(table->id), TRUE));
++
++      strncpy(buf, table->name, NAME_LEN * 2 + 2);
++      ptr = strchr(buf, '/');
++      if (ptr) {
++              *ptr = '\0';
++              ++ptr;
++
++              OK(field_store_string(fields[SYS_TABLE_SCHEMA], buf));
++              OK(field_store_string(fields[SYS_TABLE_NAME], ptr));
++      } else {
++              fields[SYS_TABLE_SCHEMA]->set_null();
++              OK(field_store_string(fields[SYS_TABLE_NAME], buf));
++      }
++
++      OK(fields[SYS_TABLE_FLAG]->store(table->flags));
++
++      OK(fields[SYS_TABLE_NUM_COLUMN]->store(table->n_cols));
++
++      OK(fields[SYS_TABLE_SPACE]->store(table->space));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_TABLES table, and fill the
++information_schema.innodb_sys_tables table with related table information
++@return 0 on success */
++static
++int
++i_s_sys_tables_fill_table(
++/*======================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t    pcur;
++      const rec_t*    rec;
++      mem_heap_t*     heap;
++      mtr_t           mtr;
++
++      DBUG_ENTER("i_s_sys_tables_fill_table");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&(dict_sys->mutex));
++        mtr_start(&mtr);
++
++      rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
++
++      while (rec) {
++              const char*     err_msg;
++              dict_table_t*   table_rec;
++
++              /* Create and populate a dict_table_t structure with
++              information from SYS_TABLES row */
++              err_msg = dict_process_sys_tables_rec(
++                      heap, rec, &table_rec, DICT_TABLE_LOAD_FROM_RECORD);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_tables(thd, table_rec, tables->table);
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              /* Since dict_process_sys_tables_rec() is called with
++              DICT_TABLE_LOAD_FROM_RECORD, the table_rec is created in
++              dict_process_sys_tables_rec(), we will need to free it */
++              if (table_rec) {
++                      dict_mem_table_free(table_rec);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mutex_enter(&dict_sys->mutex);
++              mtr_start(&mtr);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tables
++@return 0 on success */
++static
++int
++innodb_sys_tables_init(
++/*===================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_tables_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sys_tables_fields_info;
++        schema->fill_table = i_s_sys_tables_fill_table;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_tables =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_TABLES"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB SYS_TABLES"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_tables_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLESTATS */
++static ST_FIELD_INFO    innodb_sys_tablestats_fields_info[] =
++{
++#define SYS_TABLESTATS_ID             0
++      {STRUCT_FLD(field_name,         "TABLE_ID"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_SCHEMA         1
++      {STRUCT_FLD(field_name,         "SCHEMA"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_NAME           2
++      {STRUCT_FLD(field_name,         "NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_INIT           3
++      {STRUCT_FLD(field_name,         "STATS_INITIALIZED"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_NROW           4
++      {STRUCT_FLD(field_name,         "NUM_ROWS"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_CLUST_SIZE     5
++      {STRUCT_FLD(field_name,         "CLUST_INDEX_SIZE"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_INDEX_SIZE     6
++      {STRUCT_FLD(field_name,         "OTHER_INDEX_SIZE"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_MODIFIED               7
++      {STRUCT_FLD(field_name,         "MODIFIED_COUNTER"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_AUTONINC               8
++      {STRUCT_FLD(field_name,         "AUTOINC"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_MYSQL_OPEN_HANDLE      9
++      {STRUCT_FLD(field_name,         "MYSQL_HANDLES_OPENED"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Populate information_schema.innodb_sys_tablestats table with information
++from SYS_TABLES.
++@return       0 on success */
++static
++int
++i_s_dict_fill_sys_tablestats(
++/*=========================*/
++      THD*            thd,            /*!< in: thread */
++      dict_table_t*   table,          /*!< in: table */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++      char            buf[NAME_LEN * 2 + 2];
++      char*           ptr;
++
++      DBUG_ENTER("i_s_dict_fill_sys_tablestats");
++
++      fields = table_to_fill->field;
++
++      OK(fields[SYS_TABLESTATS_ID]->store(longlong(table->id), TRUE));
++
++      strncpy(buf, table->name, NAME_LEN * 2 + 2);
++      ptr = strchr(buf, '/');
++      if (ptr) {
++              *ptr = '\0';
++              ++ptr;
++
++              OK(field_store_string(fields[SYS_TABLESTATS_SCHEMA], buf));
++              OK(field_store_string(fields[SYS_TABLESTATS_NAME], ptr));
++      } else {
++              fields[SYS_TABLESTATS_SCHEMA]->set_null();
++              OK(field_store_string(fields[SYS_TABLESTATS_NAME], buf));
++      }
++
++      if (table->stat_initialized) {
++              OK(field_store_string(fields[SYS_TABLESTATS_INIT],
++                                    "Initialized"));
++      } else {
++              OK(field_store_string(fields[SYS_TABLESTATS_INIT],
++                                    "Uninitialized"));
++      }
++
++      OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows, TRUE));
++
++      OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
++              table->stat_clustered_index_size));
++
++      OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
++              table->stat_sum_of_other_index_sizes));
++
++      OK(fields[SYS_TABLESTATS_MODIFIED]->store(
++              table->stat_modified_counter));
++
++      OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE));
++
++      OK(fields[SYS_TABLESTATS_MYSQL_OPEN_HANDLE]->store(
++              table->n_mysql_handles_opened));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_TABLES table, and fill the
++information_schema.innodb_sys_tablestats table with table statistics
++related information
++@return 0 on success */
++static
++int
++i_s_sys_tables_fill_table_stats(
++/*============================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t    pcur;
++      const rec_t*    rec;
++      mem_heap_t*     heap;
++      mtr_t           mtr;
++
++      DBUG_ENTER("i_s_sys_tables_fill_table_stats");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&dict_sys->mutex);
++        mtr_start(&mtr);
++
++      rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
++
++      while (rec) {
++              const char*     err_msg;
++              dict_table_t*   table_rec;
++
++              /* Fetch the dict_table_t structure corresponding to
++              this SYS_TABLES record */
++              err_msg = dict_process_sys_tables_rec(
++                      heap, rec, &table_rec, DICT_TABLE_LOAD_FROM_CACHE);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_tablestats(thd, table_rec,
++                                                   tables->table);
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mutex_enter(&dict_sys->mutex);
++              mtr_start(&mtr);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tablestats
++@return 0 on success */
++static
++int
++innodb_sys_tablestats_init(
++/*=======================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_tablestats_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sys_tablestats_fields_info;
++        schema->fill_table = i_s_sys_tables_fill_table_stats;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_tablestats =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_TABLESTATS"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB SYS_TABLESTATS"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_tablestats_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_INDEXES */
++static ST_FIELD_INFO    innodb_sysindex_fields_info[] =
++{
++#define SYS_INDEX_ID          0
++      {STRUCT_FLD(field_name,         "INDEX_ID"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_NAME                1
++      {STRUCT_FLD(field_name,         "NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_TABLE_ID    2
++      {STRUCT_FLD(field_name,         "TABLE_ID"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_TYPE                3
++      {STRUCT_FLD(field_name,         "TYPE"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_NUM_FIELDS  4
++      {STRUCT_FLD(field_name,         "N_FIELDS"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_PAGE_NO     5
++      {STRUCT_FLD(field_name,         "PAGE_NO"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_SPACE               6
++      {STRUCT_FLD(field_name,         "SPACE"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to populate the information_schema.innodb_sys_indexes table with
++collected index information
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_indexes(
++/*======================*/
++      THD*            thd,            /*!< in: thread */
++      table_id_t      table_id,       /*!< in: table id */
++      dict_index_t*   index,          /*!< in: populated dict_index_t
++                                      struct with index info */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++
++      DBUG_ENTER("i_s_dict_fill_sys_indexes");
++
++      fields = table_to_fill->field;
++
++      OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE));
++
++      OK(field_store_string(fields[SYS_INDEX_NAME], index->name));
++
++      OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), TRUE));
++
++      OK(fields[SYS_INDEX_TYPE]->store(index->type));
++
++      OK(fields[SYS_INDEX_NUM_FIELDS]->store(index->n_fields));
++
++      OK(fields[SYS_INDEX_PAGE_NO]->store(index->page));
++
++      OK(fields[SYS_INDEX_SPACE]->store(index->space));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_INDEXES table, and fill the
++information_schema.innodb_sys_indexes table with related index information
++@return 0 on success */
++static
++int
++i_s_sys_indexes_fill_table(
++/*=======================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t            pcur;
++      const rec_t*            rec;
++      mem_heap_t*             heap;
++      mtr_t                   mtr;
++
++      DBUG_ENTER("i_s_sys_indexes_fill_table");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&dict_sys->mutex);
++        mtr_start(&mtr);
++
++      /* Start scan the SYS_INDEXES table */
++      rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
++
++      /* Process each record in the table */
++      while (rec) {
++              const char*     err_msg;;
++              table_id_t      table_id;
++              dict_index_t    index_rec;
++
++              /* Populate a dict_index_t structure with information from
++              a SYS_INDEXES row */
++              err_msg = dict_process_sys_indexes_rec(heap, rec, &index_rec,
++                                                     &table_id);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_indexes(thd, table_id, &index_rec,
++                                               tables->table);
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mutex_enter(&dict_sys->mutex);
++              mtr_start(&mtr);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_indexes
++@return 0 on success */
++static
++int
++innodb_sys_indexes_init(
++/*====================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_index_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sysindex_fields_info;
++        schema->fill_table = i_s_sys_indexes_fill_table;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_indexes =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_INDEXES"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB SYS_INDEXES"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_indexes_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_COLUMNS */
++static ST_FIELD_INFO    innodb_sys_columns_fields_info[] =
++{
++#define SYS_COLUMN_TABLE_ID           0
++      {STRUCT_FLD(field_name,         "TABLE_ID"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_NAME               1
++      {STRUCT_FLD(field_name,         "NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_POSITION   2
++      {STRUCT_FLD(field_name,         "POS"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_MTYPE              3
++      {STRUCT_FLD(field_name,         "MTYPE"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN__PRTYPE    4
++      {STRUCT_FLD(field_name,         "PRTYPE"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_COLUMN_LEN 5
++      {STRUCT_FLD(field_name,         "LEN"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to populate the information_schema.innodb_sys_columns with
++related column information
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_columns(
++/*======================*/
++      THD*            thd,            /*!< in: thread */
++      table_id_t      table_id,       /*!< in: table ID */
++      const char*     col_name,       /*!< in: column name */
++      dict_col_t*     column,         /*!< in: dict_col_t struct holding
++                                      more column information */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++
++      DBUG_ENTER("i_s_dict_fill_sys_columns");
++
++      fields = table_to_fill->field;
++
++      OK(fields[SYS_COLUMN_TABLE_ID]->store(longlong(table_id), TRUE));
++
++      OK(field_store_string(fields[SYS_COLUMN_NAME], col_name));
++
++      OK(fields[SYS_COLUMN_POSITION]->store(column->ind));
++
++      OK(fields[SYS_COLUMN_MTYPE]->store(column->mtype));
++
++      OK(fields[SYS_COLUMN__PRTYPE]->store(column->prtype));
++
++      OK(fields[SYS_COLUMN_COLUMN_LEN]->store(column->len));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to fill information_schema.innodb_sys_columns with information
++collected by scanning SYS_COLUMNS table.
++@return 0 on success */
++static
++int
++i_s_sys_columns_fill_table(
++/*=======================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t    pcur;
++      const rec_t*    rec;
++      const char*     col_name;
++      mem_heap_t*     heap;
++      mtr_t           mtr;
++
++      DBUG_ENTER("i_s_sys_columns_fill_table");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&dict_sys->mutex);
++        mtr_start(&mtr);
++
++      rec = dict_startscan_system(&pcur, &mtr, SYS_COLUMNS);
++
++      while (rec) {
++              const char*     err_msg;
++              dict_col_t      column_rec;
++              table_id_t      table_id;
++
++              /* populate a dict_col_t structure with information from
++              a SYS_COLUMNS row */
++              err_msg = dict_process_sys_columns_rec(heap, rec, &column_rec,
++                                                     &table_id, &col_name);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_columns(thd, table_id, col_name,
++                                               &column_rec,
++                                               tables->table);
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mutex_enter(&dict_sys->mutex);
++              mtr_start(&mtr);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_columns
++@return 0 on success */
++static
++int
++innodb_sys_columns_init(
++/*====================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_columns_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sys_columns_fields_info;
++        schema->fill_table = i_s_sys_columns_fill_table;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_columns =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_COLUMNS"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB SYS_COLUMNS"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_columns_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_fields */
++static ST_FIELD_INFO    innodb_sys_fields_fields_info[] =
++{
++#define SYS_FIELD_INDEX_ID    0
++      {STRUCT_FLD(field_name,         "INDEX_ID"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FIELD_NAME                1
++      {STRUCT_FLD(field_name,         "NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FIELD_POS         2
++      {STRUCT_FLD(field_name,         "POS"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_fields with information
++collected by scanning SYS_FIELDS table.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_fields(
++/*=====================*/
++      THD*            thd,            /*!< in: thread */
++      index_id_t      index_id,       /*!< in: index id for the field */
++      dict_field_t*   field,          /*!< in: table */
++      ulint           pos,            /*!< in: Field position */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++
++      DBUG_ENTER("i_s_dict_fill_sys_fields");
++
++      fields = table_to_fill->field;
++
++      OK(fields[SYS_FIELD_INDEX_ID]->store(longlong(index_id), TRUE));
++
++      OK(field_store_string(fields[SYS_FIELD_NAME], field->name));
++
++      OK(fields[SYS_FIELD_POS]->store(pos));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_FIELDS table, and fill the
++information_schema.innodb_sys_fields table with related index field
++information
++@return 0 on success */
++static
++int
++i_s_sys_fields_fill_table(
++/*======================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t    pcur;
++      const rec_t*    rec;
++      mem_heap_t*     heap;
++      index_id_t      last_id;
++      mtr_t           mtr;
++
++      DBUG_ENTER("i_s_sys_fields_fill_table");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&dict_sys->mutex);
++        mtr_start(&mtr);
++
++      /* will save last index id so that we know whether we move to
++      the next index. This is used to calculate prefix length */
++      last_id = 0;
++
++      rec = dict_startscan_system(&pcur, &mtr, SYS_FIELDS);
++
++      while (rec) {
++              ulint           pos;
++              const char*     err_msg;
++              index_id_t      index_id;
++              dict_field_t    field_rec;
++
++              /* Populate a dict_field_t structure with information from
++              a SYS_FIELDS row */
++              err_msg = dict_process_sys_fields_rec(heap, rec, &field_rec,
++                                                    &pos, &index_id, last_id);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_fields(thd, index_id, &field_rec,
++                                               pos, tables->table);
++                      last_id = index_id;
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mutex_enter(&dict_sys->mutex);
++              mtr_start(&mtr);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_fields
++@return 0 on success */
++static
++int
++innodb_sys_fields_init(
++/*===================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_field_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sys_fields_fields_info;
++        schema->fill_table = i_s_sys_fields_fill_table;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_fields =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_FIELDS"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB SYS_FIELDS"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_fields_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign */
++static ST_FIELD_INFO    innodb_sys_foreign_fields_info[] =
++{
++#define SYS_FOREIGN_ID                0
++      {STRUCT_FLD(field_name,         "ID"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_FOR_NAME  1
++      {STRUCT_FLD(field_name,         "FOR_NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_REF_NAME  2
++      {STRUCT_FLD(field_name,         "REF_NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_NUM_COL   3
++      {STRUCT_FLD(field_name,         "N_COLS"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_TYPE      4
++      {STRUCT_FLD(field_name,         "TYPE"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_foreign with information
++collected by scanning SYS_FOREIGN table.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_foreign(
++/*======================*/
++      THD*            thd,            /*!< in: thread */
++      dict_foreign_t* foreign,        /*!< in: table */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++
++      DBUG_ENTER("i_s_dict_fill_sys_foreign");
++
++      fields = table_to_fill->field;
++
++      OK(field_store_string(fields[SYS_FOREIGN_ID], foreign->id));
++
++      OK(field_store_string(fields[SYS_FOREIGN_FOR_NAME],
++                            foreign->foreign_table_name));
++
++      OK(field_store_string(fields[SYS_FOREIGN_REF_NAME],
++                            foreign->referenced_table_name));
++
++      OK(fields[SYS_FOREIGN_NUM_COL]->store(foreign->n_fields));
++
++      OK(fields[SYS_FOREIGN_TYPE]->store(foreign->type));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to populate INFORMATION_SCHEMA.innodb_sys_foreign table. Loop
++through each record in SYS_FOREIGN, and extract the foreign key
++information.
++@return 0 on success */
++static
++int
++i_s_sys_foreign_fill_table(
++/*=======================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t    pcur;
++      const rec_t*    rec;
++      mem_heap_t*     heap;
++      mtr_t           mtr;
++
++      DBUG_ENTER("i_s_sys_foreign_fill_table");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&dict_sys->mutex);
++        mtr_start(&mtr);
++
++      rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN);
++
++      while (rec) {
++              const char*     err_msg;
++              dict_foreign_t  foreign_rec;
++
++              /* Populate a dict_foreign_t structure with information from
++              a SYS_FOREIGN row */
++              err_msg = dict_process_sys_foreign_rec(heap, rec, &foreign_rec);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_foreign(thd, &foreign_rec,
++                                               tables->table);
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mtr_start(&mtr);
++              mutex_enter(&dict_sys->mutex);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign
++@return 0 on success */
++static
++int
++innodb_sys_foreign_init(
++/*====================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_foreign_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sys_foreign_fields_info;
++        schema->fill_table = i_s_sys_foreign_fill_table;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_foreign =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_FOREIGN"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB SYS_FOREIGN"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_foreign_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols */
++static ST_FIELD_INFO    innodb_sys_foreign_cols_fields_info[] =
++{
++#define SYS_FOREIGN_COL_ID            0
++      {STRUCT_FLD(field_name,         "ID"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_COL_FOR_NAME      1
++      {STRUCT_FLD(field_name,         "FOR_COL_NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_COL_REF_NAME      2
++      {STRUCT_FLD(field_name,         "REF_COL_NAME"),
++       STRUCT_FLD(field_length,       NAME_LEN + 1),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_COL_POS           3
++      {STRUCT_FLD(field_name,         "POS"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_foreign_cols with information
++collected by scanning SYS_FOREIGN_COLS table.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_foreign_cols(
++/*==========================*/
++      THD*            thd,            /*!< in: thread */
++      const char*     name,           /*!< in: foreign key constraint name */
++      const char*     for_col_name,   /*!< in: referencing column name*/
++      const char*     ref_col_name,   /*!< in: referenced column
++                                      name */
++      ulint           pos,            /*!< in: column position */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++
++      DBUG_ENTER("i_s_dict_fill_sys_foreign_cols");
++
++      fields = table_to_fill->field;
++
++      OK(field_store_string(fields[SYS_FOREIGN_COL_ID], name));
++
++      OK(field_store_string(fields[SYS_FOREIGN_COL_FOR_NAME], for_col_name));
++
++      OK(field_store_string(fields[SYS_FOREIGN_COL_REF_NAME], ref_col_name));
++
++      OK(fields[SYS_FOREIGN_COL_POS]->store(pos));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to populate INFORMATION_SCHEMA.innodb_sys_foreign_cols table. Loop
++through each record in SYS_FOREIGN_COLS, and extract the foreign key column
++information and fill the INFORMATION_SCHEMA.innodb_sys_foreign_cols table.
++@return 0 on success */
++static
++int
++i_s_sys_foreign_cols_fill_table(
++/*============================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t    pcur;
++      const rec_t*    rec;
++      mem_heap_t*     heap;
++      mtr_t           mtr;
++
++      DBUG_ENTER("i_s_sys_foreign_cols_fill_table");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&dict_sys->mutex);
++        mtr_start(&mtr);
++
++      rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN_COLS);
++
++      while (rec) {
++              const char*     err_msg;
++              const char*     name;
++              const char*     for_col_name;
++              const char*     ref_col_name;
++              ulint           pos;
++
++              /* Extract necessary information from a SYS_FOREIGN_COLS row */
++              err_msg = dict_process_sys_foreign_col_rec(
++                      heap, rec, &name, &for_col_name, &ref_col_name, &pos);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_foreign_cols(
++                              thd, name, for_col_name, ref_col_name, pos,
++                              tables->table);
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mutex_enter(&dict_sys->mutex);
++              mtr_start(&mtr);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols
++@return 0 on success */
++static
++int
++innodb_sys_foreign_cols_init(
++/*========================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_foreign_cols_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sys_foreign_cols_fields_info;
++        schema->fill_table = i_s_sys_foreign_cols_fill_table;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_foreign_cols =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_FOREIGN_COLS"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "InnoDB SYS_FOREIGN_COLS"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_foreign_cols_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
+ /***********************************************************************
+ */
+ static ST_FIELD_INFO  i_s_innodb_rseg_fields_info[] =
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h   2010-12-03 15:37:45.540456499 +0900
++++ b/storage/innobase/handler/i_s.h   2010-12-03 16:08:57.596941207 +0900
+@@ -33,6 +33,13 @@
+ extern struct st_mysql_plugin i_s_innodb_cmp_reset;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
++extern struct st_mysql_plugin i_s_innodb_sys_tables;
++extern struct st_mysql_plugin i_s_innodb_sys_tablestats;
++extern struct st_mysql_plugin i_s_innodb_sys_indexes;
++extern struct st_mysql_plugin   i_s_innodb_sys_columns;
++extern struct st_mysql_plugin   i_s_innodb_sys_fields;
++extern struct st_mysql_plugin   i_s_innodb_sys_foreign;
++extern struct st_mysql_plugin   i_s_innodb_sys_foreign_cols;
+ extern struct st_mysql_plugin i_s_innodb_rseg;
+ #endif /* i_s_h */
diff --git a/innodb_split_buf_pool_mutex.patch b/innodb_split_buf_pool_mutex.patch
new file mode 100644 (file)
index 0000000..9fe7fb1
--- /dev/null
@@ -0,0 +1,3955 @@
+# name       : innodb_split_buf_pool_mutex.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0cur.c   2010-12-03 15:48:29.268957148 +0900
+@@ -4039,7 +4039,8 @@
+       mtr_commit(mtr);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       mutex_enter(&block->mutex);
+       /* Only free the block if it is still allocated to
+@@ -4050,17 +4051,22 @@
+           && buf_block_get_space(block) == space
+           && buf_block_get_page_no(block) == page_no) {
+-              if (buf_LRU_free_block(&block->page, all, NULL)
++              if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
+                   != BUF_LRU_FREED
+-                  && all && block->page.zip.data) {
++                  && all && block->page.zip.data
++                  /* Now, buf_LRU_free_block() may release mutex temporarily */
++                  && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
++                  && buf_block_get_space(block) == space
++                  && buf_block_get_page_no(block) == page_no) {
+                       /* Attempt to deallocate the uncompressed page
+                       if the whole block cannot be deallocted. */
+-                      buf_LRU_free_block(&block->page, FALSE, NULL);
++                      buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       mutex_exit(&block->mutex);
+ }
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c   2010-12-03 15:48:03.033037049 +0900
++++ b/storage/innobase/btr/btr0sea.c   2010-12-03 15:48:29.271024260 +0900
+@@ -1211,7 +1211,7 @@
+       ulint*          offsets;
+       rw_lock_x_lock(&btr_search_latch);
+-      buf_pool_mutex_enter_all();
++      //buf_pool_mutex_enter_all();
+       table = btr_search_sys->hash_index;
+@@ -1220,6 +1220,8 @@
+               buf_pool = buf_pool_from_array(j);
++              mutex_enter(&buf_pool->LRU_list_mutex);
++
+               bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+               while (bpage != NULL) {
+@@ -1301,9 +1303,11 @@
+                       bpage = UT_LIST_GET_PREV(LRU, bpage);
+               }
++
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       }
+-      buf_pool_mutex_exit_all();
++      //buf_pool_mutex_exit_all();
+       rw_lock_x_unlock(&btr_search_latch);
+       if (UNIV_LIKELY_NULL(heap)) {
+@@ -1896,7 +1900,7 @@
+       rec_offs_init(offsets_);
+       rw_lock_x_lock(&btr_search_latch);
+-      buf_pool_mutex_enter_all();
++      buf_pool_page_hash_x_lock_all();
+       cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+@@ -1904,11 +1908,11 @@
+               /* We release btr_search_latch every once in a while to
+               give other queries a chance to run. */
+               if ((i != 0) && ((i % chunk_size) == 0)) {
+-                      buf_pool_mutex_exit_all();
++                      buf_pool_page_hash_x_unlock_all();
+                       rw_lock_x_unlock(&btr_search_latch);
+                       os_thread_yield();
+                       rw_lock_x_lock(&btr_search_latch);
+-                      buf_pool_mutex_enter_all();
++                      buf_pool_page_hash_x_lock_all();
+               }
+               node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+@@ -2019,11 +2023,11 @@
+               /* We release btr_search_latch every once in a while to
+               give other queries a chance to run. */
+               if (i != 0) {
+-                      buf_pool_mutex_exit_all();
++                      buf_pool_page_hash_x_unlock_all();
+                       rw_lock_x_unlock(&btr_search_latch);
+                       os_thread_yield();
+                       rw_lock_x_lock(&btr_search_latch);
+-                      buf_pool_mutex_enter_all();
++                      buf_pool_page_hash_x_lock_all();
+               }
+               if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+@@ -2031,7 +2035,7 @@
+               }
+       }
+-      buf_pool_mutex_exit_all();
++      buf_pool_page_hash_x_unlock_all();
+       rw_lock_x_unlock(&btr_search_latch);
+       if (UNIV_LIKELY_NULL(heap)) {
+               mem_heap_free(heap);
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
+@@ -73,10 +73,11 @@
+       if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
+ #endif /* UNIV_DEBUG_VALGRIND */
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+       ut_ad(buf_pool->zip_free[i].start != bpage);
+-      UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
++      UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
+ #ifdef UNIV_DEBUG_VALGRIND
+       if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
+@@ -96,8 +97,8 @@
+                                       buf_pool->zip_free[] */
+ {
+ #ifdef UNIV_DEBUG_VALGRIND
+-      buf_page_t*     prev = UT_LIST_GET_PREV(list, bpage);
+-      buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
++      buf_page_t*     prev = UT_LIST_GET_PREV(zip_list, bpage);
++      buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
+       if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
+       if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
+@@ -106,9 +107,10 @@
+       ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
+ #endif /* UNIV_DEBUG_VALGRIND */
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+-      UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
++      UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
+ #ifdef UNIV_DEBUG_VALGRIND
+       if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
+@@ -128,12 +130,13 @@
+ {
+       buf_page_t*     bpage;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_a(i < BUF_BUDDY_SIZES);
+ #ifndef UNIV_DEBUG_VALGRIND
+       /* Valgrind would complain about accessing free memory. */
+-      ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++      ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                             ut_ad(buf_page_get_state(ut_list_node_313)
+                                   == BUF_BLOCK_ZIP_FREE)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+@@ -177,16 +180,19 @@
+ buf_buddy_block_free(
+ /*=================*/
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+-      void*           buf)            /*!< in: buffer frame to deallocate */
++      void*           buf,            /*!< in: buffer frame to deallocate */
++      ibool           have_page_hash_mutex)
+ {
+       const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
+       buf_page_t*     bpage;
+       buf_block_t*    block;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
++      mutex_enter(&buf_pool->zip_hash_mutex);
++
+       HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+                   ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+                         && bpage->in_zip_hash && !bpage->in_page_hash),
+@@ -198,12 +204,14 @@
+       ut_d(bpage->in_zip_hash = FALSE);
+       HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
++      mutex_exit(&buf_pool->zip_hash_mutex);
++
+       ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+       UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+       block = (buf_block_t*) bpage;
+       mutex_enter(&block->mutex);
+-      buf_LRU_block_free_non_file_page(block);
++      buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+       mutex_exit(&block->mutex);
+       ut_ad(buf_pool->buddy_n_frames > 0);
+@@ -220,7 +228,7 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_block(block);
+       const ulint     fold = BUF_POOL_ZIP_FOLD(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+@@ -232,7 +240,10 @@
+       ut_ad(!block->page.in_page_hash);
+       ut_ad(!block->page.in_zip_hash);
+       ut_d(block->page.in_zip_hash = TRUE);
++
++      mutex_enter(&buf_pool->zip_hash_mutex);
+       HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
++      mutex_exit(&buf_pool->zip_hash_mutex);
+       ut_d(buf_pool->buddy_n_frames++);
+ }
+@@ -268,7 +279,7 @@
+               bpage->state = BUF_BLOCK_ZIP_FREE;
+ #ifndef UNIV_DEBUG_VALGRIND
+               /* Valgrind would complain about accessing free memory. */
+-              ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++              ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                                     ut_ad(buf_page_get_state(
+                                                   ut_list_node_313)
+                                           == BUF_BLOCK_ZIP_FREE)));
+@@ -291,25 +302,29 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       ulint           i,              /*!< in: index of buf_pool->zip_free[],
+                                       or BUF_BUDDY_SIZES */
+-      ibool*          lru)            /*!< in: pointer to a variable that
++      ibool*          lru,            /*!< in: pointer to a variable that
+                                       will be assigned TRUE if storage was
+                                       allocated from the LRU list and
+                                       buf_pool->mutex was temporarily
+                                       released, or NULL if the LRU list
+                                       should not be used */
++      ibool           have_page_hash_mutex)
+ {
+       buf_block_t*    block;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       if (i < BUF_BUDDY_SIZES) {
+               /* Try to allocate from the buddy system. */
++              mutex_enter(&buf_pool->zip_free_mutex);
+               block = buf_buddy_alloc_zip(buf_pool, i);
+               if (block) {
+                       goto func_exit;
+               }
++              mutex_exit(&buf_pool->zip_free_mutex);
+       }
+       /* Try allocating from the buf_pool->free list. */
+@@ -326,19 +341,30 @@
+       }
+       /* Try replacing an uncompressed page in the buffer pool. */
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      if (have_page_hash_mutex) {
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      }
+       block = buf_LRU_get_free_block(buf_pool, 0);
+       *lru = TRUE;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      if (have_page_hash_mutex) {
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++      }
+ alloc_big:
+       buf_buddy_block_register(block);
++      mutex_enter(&buf_pool->zip_free_mutex);
+       block = buf_buddy_alloc_from(
+               buf_pool, block->frame, i, BUF_BUDDY_SIZES);
+ func_exit:
+       buf_pool->buddy_stat[i].used++;
++      mutex_exit(&buf_pool->zip_free_mutex);
++
+       return(block);
+ }
+@@ -355,7 +381,10 @@
+       buf_page_t*     b;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       switch (buf_page_get_state(bpage)) {
+       case BUF_BLOCK_ZIP_FREE:
+@@ -364,7 +393,7 @@
+       case BUF_BLOCK_FILE_PAGE:
+       case BUF_BLOCK_MEMORY:
+       case BUF_BLOCK_REMOVE_HASH:
+-              ut_error;
++              /* ut_error; */ /* optimistic */
+       case BUF_BLOCK_ZIP_DIRTY:
+               /* Cannot relocate dirty pages. */
+               return(FALSE);
+@@ -374,9 +403,18 @@
+       }
+       mutex_enter(&buf_pool->zip_mutex);
++      mutex_enter(&buf_pool->zip_free_mutex);
+       if (!buf_page_can_relocate(bpage)) {
+               mutex_exit(&buf_pool->zip_mutex);
++              mutex_exit(&buf_pool->zip_free_mutex);
++              return(FALSE);
++      }
++
++      if (bpage != buf_page_hash_get(buf_pool,
++                                     bpage->space, bpage->offset)) {
++              mutex_exit(&buf_pool->zip_mutex);
++              mutex_exit(&buf_pool->zip_free_mutex);
+               return(FALSE);
+       }
+@@ -384,18 +422,19 @@
+       ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
+       /* relocate buf_pool->zip_clean */
+-      b = UT_LIST_GET_PREV(list, dpage);
+-      UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
++      b = UT_LIST_GET_PREV(zip_list, dpage);
++      UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
+       if (b) {
+-              UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
++              UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
+       } else {
+-              UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
++              UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
+       }
+       UNIV_MEM_INVALID(bpage, sizeof *bpage);
+       mutex_exit(&buf_pool->zip_mutex);
++      mutex_exit(&buf_pool->zip_free_mutex);
+       return(TRUE);
+ }
+@@ -409,14 +448,16 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           src,            /*!< in: block to relocate */
+       void*           dst,            /*!< in: free block to relocate to */
+-      ulint           i)              /*!< in: index of
++      ulint           i,              /*!< in: index of
+                                       buf_pool->zip_free[] */
++      ibool           have_page_hash_mutex)
+ {
+       buf_page_t*     bpage;
+       const ulint     size    = BUF_BUDDY_LOW << i;
+       ullint          usec    = ut_time_us(NULL);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_ad(!ut_align_offset(src, size));
+       ut_ad(!ut_align_offset(dst, size));
+@@ -438,6 +479,12 @@
+               /* This is a compressed page. */
+               mutex_t*        mutex;
++              if (!have_page_hash_mutex) {
++                      mutex_exit(&buf_pool->zip_free_mutex);
++                      mutex_enter(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_lock(&buf_pool->page_hash_latch);
++              }
++
+               /* The src block may be split into smaller blocks,
+               some of which may be free.  Thus, the
+               mach_read_from_4() calls below may attempt to read
+@@ -462,6 +509,11 @@
+                       added to buf_pool->page_hash yet.  Obviously,
+                       it cannot be relocated. */
++                      if (!have_page_hash_mutex) {
++                              mutex_enter(&buf_pool->zip_free_mutex);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       return(FALSE);
+               }
+@@ -473,18 +525,27 @@
+                       For the sake of simplicity, give up. */
+                       ut_ad(page_zip_get_size(&bpage->zip) < size);
++                      if (!have_page_hash_mutex) {
++                              mutex_enter(&buf_pool->zip_free_mutex);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       return(FALSE);
+               }
++              /* To keep latch order */
++              if (have_page_hash_mutex)
++                      mutex_exit(&buf_pool->zip_free_mutex);
++
+               /* The block must have been allocated, but it may
+               contain uninitialized data. */
+               UNIV_MEM_ASSERT_W(src, size);
+-              mutex = buf_page_get_mutex(bpage);
++              mutex = buf_page_get_mutex_enter(bpage);
+-              mutex_enter(mutex);
++              mutex_enter(&buf_pool->zip_free_mutex);
+-              if (buf_page_can_relocate(bpage)) {
++              if (mutex && buf_page_can_relocate(bpage)) {
+                       /* Relocate the compressed page. */
+                       ut_a(bpage->zip.data == src);
+                       memcpy(dst, src, size);
+@@ -499,10 +560,22 @@
+                               buddy_stat->relocated_usec
+                                       += ut_time_us(NULL) - usec;
+                       }
++
++                      if (!have_page_hash_mutex) {
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       return(TRUE);
+               }
+-              mutex_exit(mutex);
++              if (!have_page_hash_mutex) {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              }
++
++              if (mutex) {
++                      mutex_exit(mutex);
++              }
+       } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
+               /* This must be a buf_page_t object. */
+ #if UNIV_WORD_SIZE == 4
+@@ -511,10 +584,31 @@
+               about uninitialized pad bytes. */
+               UNIV_MEM_ASSERT_RW(src, size);
+ #endif
++
++              mutex_exit(&buf_pool->zip_free_mutex);
++
++              if (!have_page_hash_mutex) {
++                      mutex_enter(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_lock(&buf_pool->page_hash_latch);
++              }
++
+               if (buf_buddy_relocate_block(src, dst)) {
++                      mutex_enter(&buf_pool->zip_free_mutex);
++
++                      if (!have_page_hash_mutex) {
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       goto success;
+               }
++
++              mutex_enter(&buf_pool->zip_free_mutex);
++
++              if (!have_page_hash_mutex) {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              }
+       }
+       return(FALSE);
+@@ -529,13 +623,15 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           buf,            /*!< in: block to be freed, must not be
+                                       pointed to by the buffer pool */
+-      ulint           i)              /*!< in: index of buf_pool->zip_free[],
++      ulint           i,              /*!< in: index of buf_pool->zip_free[],
+                                       or BUF_BUDDY_SIZES */
++      ibool           have_page_hash_mutex)
+ {
+       buf_page_t*     bpage;
+       buf_page_t*     buddy;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_ad(i <= BUF_BUDDY_SIZES);
+       ut_ad(buf_pool->buddy_stat[i].used > 0);
+@@ -546,7 +642,9 @@
+       ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+       if (i == BUF_BUDDY_SIZES) {
+-              buf_buddy_block_free(buf_pool, buf);
++              mutex_exit(&buf_pool->zip_free_mutex);
++              buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
++              mutex_enter(&buf_pool->zip_free_mutex);
+               return;
+       }
+@@ -591,7 +689,7 @@
+               ut_a(bpage != buf);
+               {
+-                      buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
++                      buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
+                       UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
+                       bpage = next;
+               }
+@@ -600,13 +698,13 @@
+ #ifndef UNIV_DEBUG_VALGRIND
+ buddy_nonfree:
+       /* Valgrind would complain about accessing free memory. */
+-      ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++      ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                             ut_ad(buf_page_get_state(ut_list_node_313)
+                                   == BUF_BLOCK_ZIP_FREE)));
+ #endif /* UNIV_DEBUG_VALGRIND */
+       /* The buddy is not free. Is there a free block of this size? */
+-      bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++      bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+       if (bpage) {
+               /* Remove the block from the free list, because a successful
+@@ -616,7 +714,7 @@
+               buf_buddy_remove_from_free(buf_pool, bpage, i);
+               /* Try to relocate the buddy of buf to the free block. */
+-              if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
++              if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
+                       ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+                       goto buddy_free2;
+@@ -636,14 +734,14 @@
+               (Parts of the buddy can be free in
+               buf_pool->zip_free[j] with j < i.) */
+-              ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++              ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                                     ut_ad(buf_page_get_state(
+                                                   ut_list_node_313)
+                                           == BUF_BLOCK_ZIP_FREE
+                                           && ut_list_node_313 != buddy)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+-              if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
++              if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
+                       buf = bpage;
+                       UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-03 15:22:36.314943336 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-03 15:48:29.282947357 +0900
+@@ -263,6 +263,7 @@
+ #ifdef UNIV_PFS_RWLOCK
+ /* Keys to register buffer block related rwlocks and mutexes with
+ performance schema */
++UNIV_INTERN mysql_pfs_key_t   buf_pool_page_hash_key;
+ UNIV_INTERN mysql_pfs_key_t   buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ UNIV_INTERN mysql_pfs_key_t   buf_block_debug_latch_key;
+@@ -273,6 +274,10 @@
+ UNIV_INTERN mysql_pfs_key_t   buffer_block_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t   buf_pool_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t   buf_pool_zip_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_LRU_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_free_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_zip_free_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_zip_hash_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t   flush_list_mutex_key;
+ #endif /* UNIV_PFS_MUTEX */
+@@ -881,9 +886,9 @@
+       block->page.in_zip_hash = FALSE;
+       block->page.in_flush_list = FALSE;
+       block->page.in_free_list = FALSE;
+-      block->in_unzip_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
+       block->page.in_LRU_list = FALSE;
++      block->in_unzip_LRU_list = FALSE;
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+       block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -981,9 +986,11 @@
+               memset(block->frame, '\0', UNIV_PAGE_SIZE);
+ #endif
+               /* Add the block to the free list */
+-              UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
++              mutex_enter(&buf_pool->free_list_mutex);
++              UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
+               ut_d(block->page.in_free_list = TRUE);
++              mutex_exit(&buf_pool->free_list_mutex);
+               ut_ad(buf_pool_from_block(block) == buf_pool);
+               block++;
+@@ -1038,7 +1045,8 @@
+       buf_chunk_t*    chunk = buf_pool->chunks;
+       ut_ad(buf_pool);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       for (n = buf_pool->n_chunks; n--; chunk++) {
+               buf_block_t* block = buf_chunk_contains_zip(chunk, data);
+@@ -1138,7 +1146,7 @@
+       buf_block_t*            block;
+       const buf_block_t*      block_end;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
+       block_end = chunk->blocks + chunk->size;
+@@ -1150,8 +1158,10 @@
+               ut_ad(!block->in_unzip_LRU_list);
+               ut_ad(!block->page.in_flush_list);
+               /* Remove the block from the free list. */
++              mutex_enter(&buf_pool->free_list_mutex);
+               ut_ad(block->page.in_free_list);
+-              UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++              UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++              mutex_exit(&buf_pool->free_list_mutex);
+               /* Free the latches. */
+               mutex_free(&block->mutex);
+@@ -1208,9 +1218,21 @@
+       ------------------------------- */
+       mutex_create(buf_pool_mutex_key,
+                    &buf_pool->mutex, SYNC_BUF_POOL);
++      mutex_create(buf_pool_LRU_list_mutex_key,
++                   &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
++      rw_lock_create(buf_pool_page_hash_key,
++                     &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
++      mutex_create(buf_pool_free_list_mutex_key,
++                   &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
++      mutex_create(buf_pool_zip_free_mutex_key,
++                   &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
++      mutex_create(buf_pool_zip_hash_mutex_key,
++                   &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
+       mutex_create(buf_pool_zip_mutex_key,
+                    &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_enter(buf_pool);
+       if (buf_pool_size > 0) {
+@@ -1223,6 +1245,8 @@
+                       mem_free(chunk);
+                       mem_free(buf_pool);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                       buf_pool_mutex_exit(buf_pool);
+                       return(DB_ERROR);
+@@ -1253,6 +1277,8 @@
+       /* All fields are initialized by mem_zalloc(). */
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_exit(buf_pool);
+       return(DB_SUCCESS);
+@@ -1467,7 +1493,11 @@
+       ulint           fold;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+       ut_a(bpage->buf_fix_count == 0);
+@@ -1554,7 +1584,8 @@
+ try_again:
+       btr_search_disable(); /* Empty the adaptive hash index again */
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+ shrink_again:
+       if (buf_pool->n_chunks <= 1) {
+@@ -1625,7 +1656,7 @@
+                               buf_LRU_make_block_old(&block->page);
+                               dirty++;
+-                      } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
++                      } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
+                                  != BUF_LRU_FREED) {
+                               nonfree++;
+                       }
+@@ -1633,7 +1664,8 @@
+                       mutex_exit(&block->mutex);
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+               /* Request for a flush of the chunk if it helps.
+               Do not flush if there are non-free blocks, since
+@@ -1683,7 +1715,8 @@
+ func_done:
+       buf_pool->old_pool_size = buf_pool->curr_pool_size;
+ func_exit:
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       btr_search_enable();
+ }
+@@ -1724,7 +1757,9 @@
+       hash_table_t*   zip_hash;
+       hash_table_t*   page_hash;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       /* Free, create, and populate the hash table. */
+       hash_table_free(buf_pool->page_hash);
+@@ -1765,8 +1800,9 @@
+       All such blocks are either in buf_pool->zip_clean or
+       in buf_pool->flush_list. */
++      mutex_enter(&buf_pool->zip_mutex);
+       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(zip_list, b)) {
+               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+               ut_ad(!b->in_flush_list);
+               ut_ad(b->in_LRU_list);
+@@ -1776,10 +1812,11 @@
+               HASH_INSERT(buf_page_t, hash, page_hash,
+                           buf_page_address_fold(b->space, b->offset), b);
+       }
++      mutex_exit(&buf_pool->zip_mutex);
+       buf_flush_list_mutex_enter(buf_pool);
+       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(flush_list, b)) {
+               ut_ad(b->in_flush_list);
+               ut_ad(b->in_LRU_list);
+               ut_ad(b->in_page_hash);
+@@ -1806,7 +1843,9 @@
+       }
+       buf_flush_list_mutex_exit(buf_pool);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+ /********************************************************************
+@@ -1853,21 +1892,32 @@
+       buf_page_t*     bpage;
+       ulint           i;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
++      mutex_t*        block_mutex;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
++      if (bpage) {
++              block_mutex = buf_page_get_mutex_enter(bpage);
++              ut_a(block_mutex);
++      }
+       if (UNIV_LIKELY_NULL(bpage)) {
+               if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+                       /* The page was loaded meanwhile. */
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                       return(bpage);
+               }
+               /* Add to an existing watch. */
+               bpage->buf_fix_count++;
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              mutex_exit(block_mutex);
+               return(NULL);
+       }
++      /* buf_pool->watch is protected by zip_mutex for now */
++      mutex_enter(&buf_pool->zip_mutex);
+       for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
+               bpage = &buf_pool->watch[i];
+@@ -1891,10 +1941,12 @@
+                       bpage->space = space;
+                       bpage->offset = offset;
+                       bpage->buf_fix_count = 1;
+-
++                      bpage->buf_pool_index = buf_pool_index(buf_pool);
+                       ut_d(bpage->in_page_hash = TRUE);
+                       HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+                                   fold, bpage);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      mutex_exit(&buf_pool->zip_mutex);
+                       return(NULL);
+               case BUF_BLOCK_ZIP_PAGE:
+                       ut_ad(bpage->in_page_hash);
+@@ -1912,6 +1964,8 @@
+       ut_error;
+       /* Fix compiler warning */
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      mutex_exit(&buf_pool->zip_mutex);
+       return(NULL);
+ }
+@@ -1941,6 +1995,8 @@
+       buf_chunk_t*    chunks;
+       buf_chunk_t*    chunk;
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_enter(buf_pool);
+       chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
+@@ -1959,6 +2015,8 @@
+               buf_pool->n_chunks++;
+       }
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_exit(buf_pool);
+ }
+@@ -2046,7 +2104,11 @@
+                                       space, offset) */
+       buf_page_t*     watch)          /*!< in/out: sentinel for watch */
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
++      ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
+       HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
+       ut_d(watch->in_page_hash = FALSE);
+@@ -2068,28 +2130,31 @@
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ulint           fold = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+       /* The page must exist because buf_pool_watch_set()
+       increments buf_fix_count. */
+       ut_a(bpage);
+       if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
+-              mutex_t* mutex = buf_page_get_mutex(bpage);
++              mutex_t* mutex = buf_page_get_mutex_enter(bpage);
+-              mutex_enter(mutex);
+               ut_a(bpage->buf_fix_count > 0);
+               bpage->buf_fix_count--;
+               mutex_exit(mutex);
+       } else {
++              mutex_enter(&buf_pool->zip_mutex);
+               ut_a(bpage->buf_fix_count > 0);
+               if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
+                       buf_pool_watch_remove(buf_pool, fold, bpage);
+               }
++              mutex_exit(&buf_pool->zip_mutex);
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+ /****************************************************************//**
+@@ -2109,14 +2174,16 @@
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ulint           fold    = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+       /* The page must exist because buf_pool_watch_set()
+       increments buf_fix_count. */
+       ut_a(bpage);
+       ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(ret);
+ }
+@@ -2133,13 +2200,15 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       ut_a(buf_page_in_file(bpage));
+       buf_LRU_make_block_young(bpage);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /********************************************************************//**
+@@ -2163,14 +2232,20 @@
+       ut_a(buf_page_in_file(bpage));
+       if (buf_page_peek_if_too_old(bpage)) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               buf_LRU_make_block_young(bpage);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       } else if (!access_time) {
+               ulint   time_ms = ut_time_ms();
+-              buf_pool_mutex_enter(buf_pool);
++              mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
++              //buf_pool_mutex_enter(buf_pool);
++              if (block_mutex) {
+               buf_page_set_accessed(bpage, time_ms);
+-              buf_pool_mutex_exit(buf_pool);
++              mutex_exit(block_mutex);
++              }
++              //buf_pool_mutex_exit(buf_pool);
+       }
+ }
+@@ -2187,7 +2262,8 @@
+       buf_block_t*    block;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+@@ -2196,7 +2272,8 @@
+               block->check_index_page_at_flush = FALSE;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+ /********************************************************************//**
+@@ -2215,7 +2292,8 @@
+       ibool           is_hashed;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+@@ -2226,7 +2304,8 @@
+               is_hashed = block->is_hashed;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(is_hashed);
+ }
+@@ -2248,7 +2327,8 @@
+       buf_page_t*     bpage;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+@@ -2257,7 +2337,8 @@
+               bpage->file_page_was_freed = TRUE;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(bpage);
+ }
+@@ -2278,7 +2359,8 @@
+       buf_page_t*     bpage;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+@@ -2287,7 +2369,8 @@
+               bpage->file_page_was_freed = FALSE;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(bpage);
+ }
+@@ -2322,8 +2405,9 @@
+       buf_pool->stat.n_page_gets++;
+       for (;;) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
+ lookup:
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               bpage = buf_page_hash_get(buf_pool, space, offset);
+               if (bpage) {
+                       ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+@@ -2332,7 +2416,8 @@
+               /* Page not in buf_pool: needs to be read from file */
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               buf_read_page(space, zip_size, offset);
+@@ -2344,10 +2429,15 @@
+       if (UNIV_UNLIKELY(!bpage->zip.data)) {
+               /* There is no compressed page. */
+ err_exit:
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               return(NULL);
+       }
++      block_mutex = buf_page_get_mutex_enter(bpage);
++
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+       ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+       switch (buf_page_get_state(bpage)) {
+@@ -2356,19 +2446,19 @@
+       case BUF_BLOCK_MEMORY:
+       case BUF_BLOCK_REMOVE_HASH:
+       case BUF_BLOCK_ZIP_FREE:
++              if (block_mutex)
++                      mutex_exit(block_mutex);
+               break;
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
+-              block_mutex = &buf_pool->zip_mutex;
+-              mutex_enter(block_mutex);
++              ut_a(block_mutex == &buf_pool->zip_mutex);
+               bpage->buf_fix_count++;
+               goto got_block;
+       case BUF_BLOCK_FILE_PAGE:
+-              block_mutex = &((buf_block_t*) bpage)->mutex;
+-              mutex_enter(block_mutex);
++              ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
+               /* Discard the uncompressed page frame if possible. */
+-              if (buf_LRU_free_block(bpage, FALSE, NULL)
++              if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
+                   == BUF_LRU_FREED) {
+                       mutex_exit(block_mutex);
+@@ -2387,7 +2477,7 @@
+       must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+       access_time = buf_page_is_accessed(bpage);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       mutex_exit(block_mutex);
+@@ -2696,7 +2786,7 @@
+       const buf_block_t*      block)          /*!< in: pointer to block,
+                                               not dereferenced */
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+               /* The pointer should be aligned. */
+@@ -2732,6 +2822,7 @@
+       ulint           fix_type;
+       ibool           must_read;
+       ulint           retries = 0;
++      mutex_t*        block_mutex = NULL;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ut_ad(mtr);
+@@ -2753,9 +2844,11 @@
+       fold = buf_page_address_fold(space, offset);
+ loop:
+       block = guess;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       if (block) {
++              block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++
+               /* If the guess is a compressed page descriptor that
+               has been allocated by buf_buddy_alloc(), it may have
+               been invalidated by buf_buddy_relocate().  In that
+@@ -2764,11 +2857,15 @@
+               the guess may be pointing to a buffer pool chunk that
+               has been released when resizing the buffer pool. */
+-              if (!buf_block_is_uncompressed(buf_pool, block)
++              if (!block_mutex) {
++                      block = guess = NULL;
++              } else if (!buf_block_is_uncompressed(buf_pool, block)
+                   || offset != block->page.offset
+                   || space != block->page.space
+                   || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
++                      mutex_exit(block_mutex);
++
+                       block = guess = NULL;
+               } else {
+                       ut_ad(!block->page.in_zip_hash);
+@@ -2777,12 +2874,19 @@
+       }
+       if (block == NULL) {
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               block = (buf_block_t*) buf_page_hash_get_low(
+                       buf_pool, space, offset, fold);
++              if (block) {
++                      block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++                      ut_a(block_mutex);
++              }
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       }
+ loop2:
+       if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
++              mutex_exit(block_mutex);
+               block = NULL;
+       }
+@@ -2794,12 +2898,14 @@
+                               space, offset, fold);
+                       if (UNIV_LIKELY_NULL(block)) {
+-
++                              block_mutex = buf_page_get_mutex((buf_page_t*)block);
++                              ut_a(block_mutex);
++                              ut_ad(mutex_own(block_mutex));
+                               goto got_block;
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
+               if (mode == BUF_GET_IF_IN_POOL
+                   || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+@@ -2847,7 +2953,8 @@
+               /* The page is being read to buffer pool,
+               but we cannot wait around for the read to
+               complete. */
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(block_mutex);
+               return(NULL);
+       }
+@@ -2857,38 +2964,49 @@
+               ibool           success;
+       case BUF_BLOCK_FILE_PAGE:
++              if (block_mutex == &buf_pool->zip_mutex) {
++                      /* it is wrong mutex... */
++                      mutex_exit(block_mutex);
++                      goto loop;
++              }
+               break;
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
++              ut_ad(block_mutex == &buf_pool->zip_mutex);
+               bpage = &block->page;
+               /* Protect bpage->buf_fix_count. */
+-              mutex_enter(&buf_pool->zip_mutex);
++              //mutex_enter(&buf_pool->zip_mutex);
+               if (bpage->buf_fix_count
+                   || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+                       /* This condition often occurs when the buffer
+                       is not buffer-fixed, but I/O-fixed by
+                       buf_page_init_for_read(). */
+-                      mutex_exit(&buf_pool->zip_mutex);
++                      //mutex_exit(&buf_pool->zip_mutex);
+ wait_until_unfixed:
+                       /* The block is buffer-fixed or I/O-fixed.
+                       Try again later. */
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      mutex_exit(block_mutex);
+                       os_thread_sleep(WAIT_FOR_READ);
+   
+                       goto loop;
+               }
+               /* Allocate an uncompressed page. */
+-              buf_pool_mutex_exit(buf_pool);
+-              mutex_exit(&buf_pool->zip_mutex);
++              //buf_pool_mutex_exit(buf_pool);
++              //mutex_exit(&buf_pool->zip_mutex);
++              mutex_exit(block_mutex);
+               block = buf_LRU_get_free_block(buf_pool, 0);
+               ut_a(block);
++              block_mutex = &block->mutex;
+-              buf_pool_mutex_enter(buf_pool);
+-              mutex_enter(&block->mutex);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++              mutex_enter(block_mutex);
+               {
+                       buf_page_t*     hash_bpage;
+@@ -2901,35 +3019,47 @@
+                               while buf_pool->mutex was released.
+                               Free the block that was allocated. */
+-                              buf_LRU_block_free_non_file_page(block);
+-                              mutex_exit(&block->mutex);
++                              buf_LRU_block_free_non_file_page(block, TRUE);
++                              mutex_exit(block_mutex);
+                               block = (buf_block_t*) hash_bpage;
++                              if (block) {
++                                      block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++                                      ut_a(block_mutex);
++                              }
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
+                               goto loop2;
+                       }
+               }
++              mutex_enter(&buf_pool->zip_mutex);
++
+               if (UNIV_UNLIKELY
+                   (bpage->buf_fix_count
+                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
++                      mutex_exit(&buf_pool->zip_mutex);
+                       /* The block was buffer-fixed or I/O-fixed
+                       while buf_pool->mutex was not held by this thread.
+                       Free the block that was allocated and try again.
+                       This should be extremely unlikely. */
+-                      buf_LRU_block_free_non_file_page(block);
+-                      mutex_exit(&block->mutex);
++                      buf_LRU_block_free_non_file_page(block, TRUE);
++                      //mutex_exit(&block->mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+                       goto wait_until_unfixed;
+               }
+               /* Move the compressed page from bpage to block,
+               and uncompress it. */
+-              mutex_enter(&buf_pool->zip_mutex);
+-
+               buf_relocate(bpage, &block->page);
++
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+               buf_block_init_low(block);
+               block->lock_hash_val = lock_rec_hash(space, offset);
+@@ -2938,7 +3068,7 @@
+               if (buf_page_get_state(&block->page)
+                   == BUF_BLOCK_ZIP_PAGE) {
+-                      UT_LIST_REMOVE(list, buf_pool->zip_clean,
++                      UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
+                                      &block->page);
+                       ut_ad(!block->page.in_flush_list);
+               } else {
+@@ -2955,19 +3085,24 @@
+               /* Insert at the front of unzip_LRU list */
+               buf_unzip_LRU_add_block(block, FALSE);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++
+               block->page.buf_fix_count = 1;
+               buf_block_set_io_fix(block, BUF_IO_READ);
+               rw_lock_x_lock_func(&block->lock, 0, file, line);
+               UNIV_MEM_INVALID(bpage, sizeof *bpage);
+-              mutex_exit(&block->mutex);
++              mutex_exit(block_mutex);
+               mutex_exit(&buf_pool->zip_mutex);
++
++              buf_pool_mutex_enter(buf_pool);
+               buf_pool->n_pend_unzip++;
++              buf_pool_mutex_exit(buf_pool);
+-              buf_buddy_free(buf_pool, bpage, sizeof *bpage);
++              buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
+               /* Decompress the page and apply buffered operations
+               while not holding buf_pool->mutex or block->mutex. */
+@@ -2980,12 +3115,15 @@
+               }
+               /* Unfix and unlatch the block. */
+-              buf_pool_mutex_enter(buf_pool);
+-              mutex_enter(&block->mutex);
++              //buf_pool_mutex_enter(buf_pool);
++              block_mutex = &block->mutex;
++              mutex_enter(block_mutex);
+               block->page.buf_fix_count--;
+               buf_block_set_io_fix(block, BUF_IO_NONE);
+-              mutex_exit(&block->mutex);
++
++              buf_pool_mutex_enter(buf_pool);
+               buf_pool->n_pend_unzip--;
++              buf_pool_mutex_exit(buf_pool);
+               rw_lock_x_unlock(&block->lock);
+               break;
+@@ -3001,7 +3139,7 @@
+       ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+-      mutex_enter(&block->mutex);
++      //mutex_enter(&block->mutex);
+ #if UNIV_WORD_SIZE == 4
+       /* On 32-bit systems, there is no padding in buf_page_t.  On
+       other systems, Valgrind could complain about uninitialized pad
+@@ -3014,7 +3152,7 @@
+               /* Try to evict the block from the buffer pool, to use the
+               insert buffer (change buffer) as much as possible. */
+-              if (buf_LRU_free_block(&block->page, TRUE, NULL)
++              if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
+                   == BUF_LRU_FREED) {
+                       mutex_exit(&block->mutex);
+                       if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+@@ -3051,13 +3189,14 @@
+       buf_block_buf_fix_inc(block, file, line);
+-      mutex_exit(&block->mutex);
++      //mutex_exit(&block->mutex);
+       /* Check if this is the first access to the page */
+       access_time = buf_page_is_accessed(&block->page);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(block_mutex);
+       buf_page_set_accessed_make_young(&block->page, access_time);
+@@ -3290,9 +3429,11 @@
+       buf_pool = buf_pool_from_block(block);
+       if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               buf_LRU_make_block_young(&block->page);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       } else if (!buf_page_is_accessed(&block->page)) {
+               /* Above, we do a dirty read on purpose, to avoid
+               mutex contention.  The field buf_page_t::access_time
+@@ -3300,9 +3441,11 @@
+               field must be protected by mutex, however. */
+               ulint   time_ms = ut_time_ms();
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&block->mutex);
+               buf_page_set_accessed(&block->page, time_ms);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&block->mutex);
+       }
+       ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+@@ -3369,18 +3512,21 @@
+       ut_ad(mtr);
+       ut_ad(mtr->state == MTR_ACTIVE);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       block = buf_block_hash_get(buf_pool, space_id, page_no);
+       if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               return(NULL);
+       }
+       ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
+       mutex_enter(&block->mutex);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+       ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+@@ -3469,7 +3615,10 @@
+       buf_page_t*     hash_page;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       ut_ad(mutex_own(&(block->mutex)));
+       ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+@@ -3498,11 +3647,14 @@
+       if (UNIV_LIKELY(!hash_page)) {
+       } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
+               /* Preserve the reference count. */
+-              ulint   buf_fix_count = hash_page->buf_fix_count;
++              ulint   buf_fix_count;
++              mutex_enter(&buf_pool->zip_mutex);
++              buf_fix_count = hash_page->buf_fix_count;
+               ut_a(buf_fix_count > 0);
+               block->page.buf_fix_count += buf_fix_count;
+               buf_pool_watch_remove(buf_pool, fold, hash_page);
++              mutex_exit(&buf_pool->zip_mutex);
+       } else {
+               fprintf(stderr,
+                       "InnoDB: Error: page %lu %lu already found"
+@@ -3512,7 +3664,8 @@
+                       (const void*) hash_page, (const void*) block);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+               mutex_exit(&block->mutex);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               buf_print();
+               buf_LRU_print();
+               buf_validate();
+@@ -3596,7 +3749,9 @@
+       fold = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+       if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
+@@ -3605,9 +3760,15 @@
+ err_exit:
+               if (block) {
+                       mutex_enter(&block->mutex);
+-                      buf_LRU_block_free_non_file_page(block);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      buf_LRU_block_free_non_file_page(block, FALSE);
+                       mutex_exit(&block->mutex);
+               }
++              else {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              }
+               bpage = NULL;
+               goto func_exit;
+@@ -3630,6 +3791,8 @@
+               buf_page_init(space, offset, fold, block);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+               /* The block must be put to the LRU list, to the old blocks */
+               buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+@@ -3657,7 +3820,7 @@
+                       been added to buf_pool->LRU and
+                       buf_pool->page_hash. */
+                       mutex_exit(&block->mutex);
+-                      data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++                      data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+                       mutex_enter(&block->mutex);
+                       block->page.zip.data = data;
+@@ -3670,6 +3833,7 @@
+                       buf_unzip_LRU_add_block(block, TRUE);
+               }
++              mutex_exit(&buf_pool->LRU_list_mutex);
+               mutex_exit(&block->mutex);
+       } else {
+               /* Defer buf_buddy_alloc() until after the block has
+@@ -3681,8 +3845,8 @@
+               control block (bpage), in order to avoid the
+               invocation of buf_buddy_relocate_block() on
+               uninitialized data. */
+-              data = buf_buddy_alloc(buf_pool, zip_size, &lru);
+-              bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
++              data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
++              bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
+               /* Initialize the buf_pool pointer. */
+               bpage->buf_pool_index = buf_pool_index(buf_pool);
+@@ -3701,8 +3865,11 @@
+                               /* The block was added by some other thread. */
+                               watch_page = NULL;
+-                              buf_buddy_free(buf_pool, bpage, sizeof *bpage);
+-                              buf_buddy_free(buf_pool, data, zip_size);
++                              buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
++                              buf_buddy_free(buf_pool, data, zip_size, TRUE);
++
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                               bpage = NULL;
+                               goto func_exit;
+@@ -3746,18 +3913,24 @@
+               HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
+                           bpage);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+               /* The block must be put to the LRU list, to the old blocks */
+               buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+               buf_LRU_insert_zip_clean(bpage);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++
+               buf_page_set_io_fix(bpage, BUF_IO_READ);
+               mutex_exit(&buf_pool->zip_mutex);
+       }
++      buf_pool_mutex_enter(buf_pool);
+       buf_pool->n_pend_reads++;
+-func_exit:
+       buf_pool_mutex_exit(buf_pool);
++func_exit:
++      //buf_pool_mutex_exit(buf_pool);
+       if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+@@ -3799,7 +3972,9 @@
+       fold = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       block = (buf_block_t*) buf_page_hash_get_low(
+               buf_pool, space, offset, fold);
+@@ -3815,7 +3990,9 @@
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+               /* Page can be found in buf_pool */
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               buf_block_free(free_block);
+@@ -3837,6 +4014,7 @@
+       mutex_enter(&block->mutex);
+       buf_page_init(space, offset, fold, block);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       /* The block must be put to the LRU list */
+       buf_LRU_add_block(&block->page, FALSE);
+@@ -3863,7 +4041,7 @@
+               the reacquisition of buf_pool->mutex.  We also must
+               defer this operation until after the block descriptor
+               has been added to buf_pool->LRU and buf_pool->page_hash. */
+-              data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++              data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+               mutex_enter(&block->mutex);
+               block->page.zip.data = data;
+@@ -3881,7 +4059,8 @@
+       buf_page_set_accessed(&block->page, time_ms);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+@@ -3932,6 +4111,8 @@
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+       const ibool     uncompressed = (buf_page_get_state(bpage)
+                                       == BUF_BLOCK_FILE_PAGE);
++      ibool           have_LRU_mutex = FALSE;
++      mutex_t*        block_mutex;
+       ut_a(buf_page_in_file(bpage));
+@@ -4065,8 +4246,26 @@
+               }
+       }
++      if (io_type == BUF_IO_WRITE
++          && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++              || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
++              /* to keep consistency at buf_LRU_insert_zip_clean() */
++              have_LRU_mutex = TRUE; /* optimistic */
++      }
++retry_mutex:
++      if (have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex);
++      block_mutex = buf_page_get_mutex_enter(bpage);
++      ut_a(block_mutex);
++      if (io_type == BUF_IO_WRITE
++          && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++              || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
++          && !have_LRU_mutex) {
++              mutex_exit(block_mutex);
++              have_LRU_mutex = TRUE;
++              goto retry_mutex;
++      }
+       buf_pool_mutex_enter(buf_pool);
+-      mutex_enter(buf_page_get_mutex(bpage));
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+       if (io_type == BUF_IO_WRITE || uncompressed) {
+@@ -4089,6 +4288,7 @@
+               the x-latch to this OS thread: do not let this confuse you in
+               debugging! */
++              ut_a(!have_LRU_mutex);
+               ut_ad(buf_pool->n_pend_reads > 0);
+               buf_pool->n_pend_reads--;
+               buf_pool->stat.n_pages_read++;
+@@ -4106,6 +4306,9 @@
+               buf_flush_write_complete(bpage);
++              if (have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++
+               if (uncompressed) {
+                       rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+                                            BUF_IO_WRITE);
+@@ -4128,8 +4331,8 @@
+       }
+ #endif /* UNIV_DEBUG */
+-      mutex_exit(buf_page_get_mutex(bpage));
+       buf_pool_mutex_exit(buf_pool);
++      mutex_exit(block_mutex);
+ }
+ /*********************************************************************//**
+@@ -4146,7 +4349,9 @@
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       chunk = buf_pool->chunks;
+@@ -4163,7 +4368,9 @@
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       return(TRUE);
+ }
+@@ -4211,7 +4418,8 @@
+               freed = buf_LRU_search_and_free_block(buf_pool, 100);
+       }
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+       ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
+@@ -4224,7 +4432,8 @@
+       memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
+       buf_refresh_io_stats(buf_pool);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /*********************************************************************//**
+@@ -4266,7 +4475,10 @@
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
++      /* for keep the new latch order, it cannot validate correctly... */
+       chunk = buf_pool->chunks;
+@@ -4361,7 +4573,7 @@
+       /* Check clean compressed-only blocks. */
+       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(zip_list, b)) {
+               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+               switch (buf_page_get_io_fix(b)) {
+               case BUF_IO_NONE:
+@@ -4392,7 +4604,7 @@
+       buf_flush_list_mutex_enter(buf_pool);
+       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(flush_list, b)) {
+               ut_ad(b->in_flush_list);
+               ut_a(b->oldest_modification);
+               n_flush++;
+@@ -4451,6 +4663,8 @@
+       }
+       ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
++      /* because of latching order with block->mutex, we cannot get needed mutexes before that */
++/*
+       if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+               fprintf(stderr, "Free list len %lu, free blocks %lu\n",
+                       (ulong) UT_LIST_GET_LEN(buf_pool->free),
+@@ -4461,8 +4675,11 @@
+       ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+       ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+       ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
++*/
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       ut_a(buf_LRU_validate());
+       ut_a(buf_flush_validate(buf_pool));
+@@ -4518,7 +4735,9 @@
+       index_ids = mem_alloc(size * sizeof *index_ids);
+       counts = mem_alloc(sizeof(ulint) * size);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      mutex_enter(&buf_pool->free_list_mutex);
+       buf_flush_list_mutex_enter(buf_pool);
+       fprintf(stderr,
+@@ -4587,7 +4806,9 @@
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      mutex_exit(&buf_pool->free_list_mutex);
+       for (i = 0; i < n_found; i++) {
+               index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -4644,7 +4865,7 @@
+       buf_chunk_t*    chunk;
+       ulint           fixed_pages_number = 0;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       chunk = buf_pool->chunks;
+@@ -4678,7 +4899,7 @@
+       /* Traverse the lists of clean and dirty compressed-only blocks. */
+       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(zip_list, b)) {
+               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+               ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
+@@ -4690,7 +4911,7 @@
+       buf_flush_list_mutex_enter(buf_pool);
+       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(flush_list, b)) {
+               ut_ad(b->in_flush_list);
+               switch (buf_page_get_state(b)) {
+@@ -4716,7 +4937,7 @@
+       buf_flush_list_mutex_exit(buf_pool);
+       mutex_exit(&buf_pool->zip_mutex);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       return(fixed_pages_number);
+ }
+@@ -4810,6 +5031,8 @@
+       ut_ad(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      mutex_enter(&buf_pool->free_list_mutex);
+       buf_pool_mutex_enter(buf_pool);
+       buf_flush_list_mutex_enter(buf_pool);
+@@ -4913,6 +5136,8 @@
+               buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+       buf_refresh_io_stats(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      mutex_exit(&buf_pool->free_list_mutex);
+       buf_pool_mutex_exit(buf_pool);
+ }
+@@ -5032,11 +5257,13 @@
+ {
+       ulint   len;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->free_list_mutex);
+       len = UT_LIST_GET_LEN(buf_pool->free);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->free_list_mutex);
+       return(len);
+ }
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c   2010-12-03 15:22:36.318955693 +0900
++++ b/storage/innobase/buf/buf0flu.c   2010-12-03 15:48:29.289024083 +0900
+@@ -279,7 +279,7 @@
+       ut_d(block->page.in_flush_list = TRUE);
+       block->page.oldest_modification = lsn;
+-      UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++      UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+ #ifdef UNIV_DEBUG_VALGRIND
+       {
+@@ -373,14 +373,14 @@
+                      > block->page.oldest_modification) {
+                       ut_ad(b->in_flush_list);
+                       prev_b = b;
+-                      b = UT_LIST_GET_NEXT(list, b);
++                      b = UT_LIST_GET_NEXT(flush_list, b);
+               }
+       }
+       if (prev_b == NULL) {
+-              UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++              UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+       } else {
+-              UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
++              UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
+                                    prev_b, &block->page);
+       }
+@@ -406,7 +406,7 @@
+       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
+       //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+-      //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++      ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       //ut_ad(bpage->in_LRU_list);
+       if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+@@ -442,14 +442,14 @@
+       enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+-      ut_a(buf_page_in_file(bpage));
++      //ut_a(buf_page_in_file(bpage));
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+-      if (bpage->oldest_modification != 0
++      if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
+           && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+               ut_ad(bpage->in_flush_list);
+@@ -480,7 +480,7 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(bpage->in_flush_list);
+@@ -498,11 +498,11 @@
+               return;
+       case BUF_BLOCK_ZIP_DIRTY:
+               buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
+-              UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++              UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+               buf_LRU_insert_zip_clean(bpage);
+               break;
+       case BUF_BLOCK_FILE_PAGE:
+-              UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++              UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+               break;
+       }
+@@ -546,7 +546,7 @@
+       buf_page_t*     prev_b = NULL;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       /* Must reside in the same buffer pool. */
+       ut_ad(buf_pool == buf_pool_from_bpage(dpage));
+@@ -575,18 +575,18 @@
+       because we assert on in_flush_list in comparison function. */
+       ut_d(bpage->in_flush_list = FALSE);
+-      prev = UT_LIST_GET_PREV(list, bpage);
+-      UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++      prev = UT_LIST_GET_PREV(flush_list, bpage);
++      UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+       if (prev) {
+               ut_ad(prev->in_flush_list);
+               UT_LIST_INSERT_AFTER(
+-                      list,
++                      flush_list,
+                       buf_pool->flush_list,
+                       prev, dpage);
+       } else {
+               UT_LIST_ADD_FIRST(
+-                      list,
++                      flush_list,
+                       buf_pool->flush_list,
+                       dpage);
+       }
+@@ -1055,7 +1055,7 @@
+ #ifdef UNIV_DEBUG
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(!buf_pool_mutex_own(buf_pool));
++      //ut_ad(!buf_pool_mutex_own(buf_pool));
+ #endif
+ #ifdef UNIV_LOG_DEBUG
+@@ -1069,7 +1069,8 @@
+       io_fixed and oldest_modification != 0.  Thus, it cannot be
+       relocated in the buffer pool or removed from flush_list or
+       LRU_list. */
+-      ut_ad(!buf_pool_mutex_own(buf_pool));
++      //ut_ad(!buf_pool_mutex_own(buf_pool));
++      ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(!buf_flush_list_mutex_own(buf_pool));
+       ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+@@ -1232,12 +1233,18 @@
+       ibool           is_uncompressed;
+       ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+       ut_ad(buf_page_in_file(bpage));
+       block_mutex = buf_page_get_mutex(bpage);
+       ut_ad(mutex_own(block_mutex));
++      buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+       ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
+       buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+@@ -1399,14 +1406,16 @@
+               buf_pool = buf_pool_get(space, i);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               /* We only want to flush pages from this buffer pool. */
+               bpage = buf_page_hash_get(buf_pool, space, i);
+               if (!bpage) {
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+                       continue;
+               }
+@@ -1418,11 +1427,9 @@
+               if (flush_type != BUF_FLUSH_LRU
+                   || i == offset
+                   || buf_page_is_old(bpage)) {
+-                      mutex_t* block_mutex = buf_page_get_mutex(bpage);
+-
+-                      mutex_enter(block_mutex);
++                      mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+-                      if (buf_flush_ready_for_flush(bpage, flush_type)
++                      if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
+                           && (i == offset || !bpage->buf_fix_count)) {
+                               /* We only try to flush those
+                               neighbors != offset where the buf fix
+@@ -1438,11 +1445,12 @@
+                               ut_ad(!buf_pool_mutex_own(buf_pool));
+                               count++;
+                               continue;
+-                      } else {
++                      } else if (block_mutex) {
+                               mutex_exit(block_mutex);
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       }
+       return(count);
+@@ -1475,21 +1483,25 @@
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(flush_type != BUF_FLUSH_LRU
++            || mutex_own(&buf_pool->LRU_list_mutex));
+-      block_mutex = buf_page_get_mutex(bpage);
+-      mutex_enter(block_mutex);
++      block_mutex = buf_page_get_mutex_enter(bpage);
+-      ut_a(buf_page_in_file(bpage));
++      //ut_a(buf_page_in_file(bpage));
+-      if (buf_flush_ready_for_flush(bpage, flush_type)) {
++      if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
+               ulint           space;
+               ulint           offset;
+               buf_pool_t*     buf_pool;
+               buf_pool = buf_pool_from_bpage(bpage);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              if (flush_type == BUF_FLUSH_LRU) {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++              }
+               /* These fields are protected by both the
+               buffer pool mutex and block mutex. */
+@@ -1505,13 +1517,18 @@
+                                                 *count,
+                                                 n_to_flush);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              if (flush_type == BUF_FLUSH_LRU) {
++                      mutex_enter(&buf_pool->LRU_list_mutex);
++              }
+               flushed = TRUE;
+-      } else {
++      } else if (block_mutex) {
+               mutex_exit(block_mutex);
+       }
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(flush_type != BUF_FLUSH_LRU
++            || mutex_own(&buf_pool->LRU_list_mutex));
+       return(flushed);
+ }
+@@ -1532,7 +1549,8 @@
+       buf_page_t*     bpage;
+       ulint           count = 0;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       do {
+               /* Start from the end of the list looking for a
+@@ -1554,7 +1572,8 @@
+       should be flushed, we factor in this value. */
+       buf_lru_flush_page_count += count;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       return(count);
+ }
+@@ -1582,9 +1601,10 @@
+ {
+       ulint           len;
+       buf_page_t*     bpage;
++      buf_page_t*     prev_bpage = NULL;
+       ulint           count = 0;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       /* If we have flushed enough, leave the loop */
+       do {
+@@ -1603,6 +1623,7 @@
+               if (bpage) {
+                       ut_a(bpage->oldest_modification > 0);
++                      prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
+               }
+               if (!bpage || bpage->oldest_modification >= lsn_limit) {
+@@ -1644,9 +1665,17 @@
+                               break;
+                       }
+-                      bpage = UT_LIST_GET_PREV(list, bpage);
++                      bpage = UT_LIST_GET_PREV(flush_list, bpage);
+-                      ut_ad(!bpage || bpage->in_flush_list);
++                      //ut_ad(!bpage || bpage->in_flush_list);
++                      if (bpage != prev_bpage) {
++                              /* the search might warp.. retrying */
++                              buf_flush_list_mutex_exit(buf_pool);
++                              break;
++                      }
++                      if (bpage) {
++                              prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
++                      }
+                       buf_flush_list_mutex_exit(buf_pool);
+@@ -1655,7 +1684,7 @@
+       } while (count < min_n && bpage != NULL && len > 0);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       return(count);
+ }
+@@ -1694,13 +1723,15 @@
+             || sync_thread_levels_empty_gen(TRUE));
+ #endif /* UNIV_SYNC_DEBUG */
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       /* Note: The buffer pool mutex is released and reacquired within
+       the flush functions. */
+       switch(flush_type) {
+       case BUF_FLUSH_LRU:
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               count = buf_flush_LRU_list_batch(buf_pool, min_n);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+               break;
+       case BUF_FLUSH_LIST:
+               count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
+@@ -1709,7 +1740,7 @@
+               ut_error;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       buf_flush_buffered_writes();
+@@ -1965,7 +1996,7 @@
+ retry:
+       //buf_pool_mutex_enter(buf_pool);
+       if (have_LRU_mutex)
+-              buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+       n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+@@ -1982,15 +2013,15 @@
+                       bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+                       continue;
+               }
+-              block_mutex = buf_page_get_mutex(bpage);
+-
+-              mutex_enter(block_mutex);
++              block_mutex = buf_page_get_mutex_enter(bpage);
+-              if (buf_flush_ready_for_replace(bpage)) {
++              if (block_mutex && buf_flush_ready_for_replace(bpage)) {
+                       n_replaceable++;
+               }
+-              mutex_exit(block_mutex);
++              if (block_mutex) {
++                      mutex_exit(block_mutex);
++              }
+               distance++;
+@@ -1999,7 +2030,7 @@
+       //buf_pool_mutex_exit(buf_pool);
+       if (have_LRU_mutex)
+-              buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
+@@ -2198,7 +2229,7 @@
+       ut_ad(buf_flush_list_mutex_own(buf_pool));
+-      UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
++      UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
+                        ut_ad(ut_list_node_313->in_flush_list));
+       bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+@@ -2238,7 +2269,7 @@
+                       rnode = rbt_next(buf_pool->flush_rbt, rnode);
+               }
+-              bpage = UT_LIST_GET_NEXT(list, bpage);
++              bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+               ut_a(!bpage || om >= bpage->oldest_modification);
+       }
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c   2010-12-03 15:22:36.321987250 +0900
++++ b/storage/innobase/buf/buf0lru.c   2010-12-03 15:48:29.293023197 +0900
+@@ -143,8 +143,9 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+-      buf_block_t*    block); /*!< in: block, must contain a file page and
++      buf_block_t*    block,  /*!< in: block, must contain a file page and
+                               be in a state where it can be freed */
++      ibool           have_page_hash_mutex);
+ /******************************************************************//**
+ Determines if the unzip_LRU list should be used for evicting a victim
+@@ -154,15 +155,20 @@
+ ibool
+ buf_LRU_evict_from_unzip_LRU(
+ /*=========================*/
+-      buf_pool_t*     buf_pool)
++      buf_pool_t*     buf_pool,
++      ibool           have_LRU_mutex)
+ {
+       ulint   io_avg;
+       ulint   unzip_avg;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      if (!have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex);
+       /* If the unzip_LRU list is empty, we can only use the LRU. */
+       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+               return(FALSE);
+       }
+@@ -171,14 +177,20 @@
+       decompressed pages in the buffer pool. */
+       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+           <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+               return(FALSE);
+       }
+       /* If eviction hasn't started yet, we assume by default
+       that a workload is disk bound. */
+       if (buf_pool->freed_page_clock == 0) {
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+               return(TRUE);
+       }
++      if (!have_LRU_mutex)
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       /* Calculate the average over past intervals, and add the values
+       of the current interval. */
+@@ -246,19 +258,23 @@
+       page_arr = ut_malloc(
+               sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+ scan_again:
+       num_entries = 0;
+       bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+       while (bpage != NULL) {
+-              mutex_t*        block_mutex = buf_page_get_mutex(bpage);
++              mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
+               buf_page_t*     prev_bpage;
+-              mutex_enter(block_mutex);
+               prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
++              if (!block_mutex) {
++                      goto next_page;
++              }
++
+               ut_a(buf_page_in_file(bpage));
+               if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
+@@ -287,14 +303,16 @@
+                       /* Array full. We release the buf_pool->mutex to
+                       obey the latching order. */
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+                       buf_LRU_drop_page_hash_batch(
+                               id, zip_size, page_arr, num_entries);
+                       num_entries = 0;
+-                      buf_pool_mutex_enter(buf_pool);
++                      //buf_pool_mutex_enter(buf_pool);
++                      mutex_enter(&buf_pool->LRU_list_mutex);
+               } else {
+                       mutex_exit(block_mutex);
+               }
+@@ -319,7 +337,8 @@
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       /* Drop any remaining batch of search hashed pages. */
+       buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+@@ -341,7 +360,9 @@
+       ibool           all_freed;
+ scan_again:
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       all_freed = TRUE;
+@@ -369,8 +390,16 @@
+                       all_freed = FALSE;
+               } else {
+-                      mutex_t* block_mutex = buf_page_get_mutex(bpage);
+-                      mutex_enter(block_mutex);
++                      mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
++
++                      if (!block_mutex) {
++                              /* It may be impossible case...
++                              Something wrong, so will be scan_again */
++
++                              all_freed = FALSE;
++
++                              goto next_page_no_mutex;
++                      }
+                       if (bpage->buf_fix_count > 0) {
+@@ -429,7 +458,9 @@
+                               ulint   page_no;
+                               ulint   zip_size;
+-                              buf_pool_mutex_exit(buf_pool);
++                              //buf_pool_mutex_exit(buf_pool);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                               zip_size = buf_page_get_zip_size(bpage);
+                               page_no = buf_page_get_page_no(bpage);
+@@ -454,7 +485,7 @@
+                       if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+                           != BUF_BLOCK_ZIP_FREE) {
+                               buf_LRU_block_free_hashed_page((buf_block_t*)
+-                                                             bpage);
++                                                             bpage, TRUE);
+                       } else {
+                               /* The block_mutex should have been
+                               released by buf_LRU_block_remove_hashed_page()
+@@ -486,7 +517,9 @@
+               bpage = prev_bpage;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       if (!all_freed) {
+               os_thread_sleep(20000);
+@@ -532,7 +565,9 @@
+       buf_page_t*     b;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++      ut_ad(mutex_own(&buf_pool->flush_list_mutex));
+       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
+       /* Find the first successor of bpage in the LRU list
+@@ -540,17 +575,17 @@
+       b = bpage;
+       do {
+               b = UT_LIST_GET_NEXT(LRU, b);
+-      } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
++      } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
+       /* Insert bpage before b, i.e., after the predecessor of b. */
+       if (b) {
+-              b = UT_LIST_GET_PREV(list, b);
++              b = UT_LIST_GET_PREV(zip_list, b);
+       }
+       if (b) {
+-              UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
++              UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
+       } else {
+-              UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
++              UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
+       }
+ }
+@@ -563,18 +598,19 @@
+ buf_LRU_free_from_unzip_LRU_list(
+ /*=============================*/
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+-      ulint           n_iterations)   /*!< in: how many times this has
++      ulint           n_iterations,   /*!< in: how many times this has
+                                       been called repeatedly without
+                                       result: a high value means that
+                                       we should search farther; we will
+                                       search n_iterations / 5 of the
+                                       unzip_LRU list, or nothing if
+                                       n_iterations >= 5 */
++      ibool           have_LRU_mutex)
+ {
+       buf_block_t*    block;
+       ulint           distance;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       /* Theoratically it should be much easier to find a victim
+       from unzip_LRU as we can choose even a dirty block (as we'll
+@@ -584,7 +620,7 @@
+       if we have done five iterations so far. */
+       if (UNIV_UNLIKELY(n_iterations >= 5)
+-          || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
++          || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
+               return(FALSE);
+       }
+@@ -592,18 +628,25 @@
+       distance = 100 + (n_iterations
+                         * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
++restart:
+       for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+            UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+            block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
+               enum buf_lru_free_block_status  freed;
++              mutex_enter(&block->mutex);
++              if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
++                  || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
++                      mutex_exit(&block->mutex);
++                      goto restart;
++              }
++
+               ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+               ut_ad(block->in_unzip_LRU_list);
+               ut_ad(block->page.in_LRU_list);
+-              mutex_enter(&block->mutex);
+-              freed = buf_LRU_free_block(&block->page, FALSE, NULL);
++              freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
+               mutex_exit(&block->mutex);
+               switch (freed) {
+@@ -637,21 +680,23 @@
+ buf_LRU_free_from_common_LRU_list(
+ /*==============================*/
+       buf_pool_t*     buf_pool,
+-      ulint           n_iterations)
++      ulint           n_iterations,
+                               /*!< in: how many times this has been called
+                               repeatedly without result: a high value means
+                               that we should search farther; if
+                               n_iterations < 10, then we search
+                               n_iterations / 10 * buf_pool->curr_size
+                               pages from the end of the LRU list */
++      ibool           have_LRU_mutex)
+ {
+       buf_page_t*     bpage;
+       ulint           distance;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
++restart:
+       for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+            UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+            bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
+@@ -659,14 +704,23 @@
+               enum buf_lru_free_block_status  freed;
+               unsigned                        accessed;
+               mutex_t*                        block_mutex
+-                      = buf_page_get_mutex(bpage);
++                      = buf_page_get_mutex_enter(bpage);
++
++              if (!block_mutex) {
++                      goto restart;
++              }
++
++              if (!bpage->in_LRU_list
++                  || !buf_page_in_file(bpage)) {
++                      mutex_exit(block_mutex);
++                      goto restart;
++              }
+               ut_ad(buf_page_in_file(bpage));
+               ut_ad(bpage->in_LRU_list);
+-              mutex_enter(block_mutex);
+               accessed = buf_page_is_accessed(bpage);
+-              freed = buf_LRU_free_block(bpage, TRUE, NULL);
++              freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
+               mutex_exit(block_mutex);
+               switch (freed) {
+@@ -718,16 +772,23 @@
+                               n_iterations / 5 of the unzip_LRU list. */
+ {
+       ibool   freed = FALSE;
++      ibool   have_LRU_mutex = FALSE;
+-      buf_pool_mutex_enter(buf_pool);
++      if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++              have_LRU_mutex = TRUE;
++
++      //buf_pool_mutex_enter(buf_pool);
++      if (have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex);
+-      freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
++      freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
+       if (!freed) {
+               freed = buf_LRU_free_from_common_LRU_list(
+-                      buf_pool, n_iterations);
++                      buf_pool, n_iterations, have_LRU_mutex);
+       }
++      buf_pool_mutex_enter(buf_pool);
+       if (!freed) {
+               buf_pool->LRU_flush_ended = 0;
+       } else if (buf_pool->LRU_flush_ended > 0) {
+@@ -735,6 +796,8 @@
+       }
+       buf_pool_mutex_exit(buf_pool);
++      if (have_LRU_mutex)
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       return(freed);
+ }
+@@ -795,7 +858,9 @@
+               buf_pool = buf_pool_from_array(i);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
++              mutex_enter(&buf_pool->free_list_mutex);
+               if (!recv_recovery_on
+                   && UT_LIST_GET_LEN(buf_pool->free)
+@@ -805,7 +870,9 @@
+                       ret = TRUE;
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              mutex_exit(&buf_pool->free_list_mutex);
+       }
+       return(ret);
+@@ -823,9 +890,10 @@
+ {
+       buf_block_t*    block;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+-      block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
++      mutex_enter(&buf_pool->free_list_mutex);
++      block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
+       if (block) {
+@@ -834,7 +902,9 @@
+               ut_ad(!block->page.in_flush_list);
+               ut_ad(!block->page.in_LRU_list);
+               ut_a(!buf_page_in_file(&block->page));
+-              UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++              UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++
++              mutex_exit(&buf_pool->free_list_mutex);
+               mutex_enter(&block->mutex);
+@@ -844,6 +914,8 @@
+               ut_ad(buf_pool_from_block(block) == buf_pool);
+               mutex_exit(&block->mutex);
++      } else {
++              mutex_exit(&buf_pool->free_list_mutex);
+       }
+       return(block);
+@@ -868,7 +940,7 @@
+       ibool           mon_value_was   = FALSE;
+       ibool           started_monitor = FALSE;
+ loop:
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+           + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
+@@ -951,8 +1023,10 @@
+                       ibool   lru;
+                       page_zip_set_size(&block->page.zip, zip_size);
++                      mutex_enter(&buf_pool->LRU_list_mutex);
+                       block->page.zip.data = buf_buddy_alloc(
+-                              buf_pool, zip_size, &lru);
++                              buf_pool, zip_size, &lru, FALSE);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+                       UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+               } else {
+@@ -960,7 +1034,7 @@
+                       block->page.zip.data = NULL;
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
+               if (started_monitor) {
+                       srv_print_innodb_monitor = mon_value_was;
+@@ -972,7 +1046,7 @@
+       /* If no block was in the free list, search from the end of the LRU
+       list and try to free a block there */
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
+@@ -1058,7 +1132,8 @@
+       ulint   new_len;
+       ut_a(buf_pool->LRU_old);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+       ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+ #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
+@@ -1124,7 +1199,8 @@
+ {
+       buf_page_t*     bpage;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+       /* We first initialize all blocks in the LRU list as old and then use
+@@ -1159,13 +1235,14 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+       ut_ad(buf_page_in_file(bpage));
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       if (buf_page_belongs_to_unzip_LRU(bpage)) {
+               buf_block_t*    block = (buf_block_t*) bpage;
+               ut_ad(block->in_unzip_LRU_list);
+-              ut_d(block->in_unzip_LRU_list = FALSE);
++              block->in_unzip_LRU_list = FALSE;
+               UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+       }
+@@ -1183,7 +1260,8 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_in_file(bpage));
+@@ -1260,12 +1338,13 @@
+       ut_ad(buf_pool);
+       ut_ad(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+       ut_ad(!block->in_unzip_LRU_list);
+-      ut_d(block->in_unzip_LRU_list = TRUE);
++      block->in_unzip_LRU_list = TRUE;
+       if (old) {
+               UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+@@ -1286,7 +1365,8 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_in_file(bpage));
+@@ -1337,7 +1417,8 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_in_file(bpage));
+       ut_ad(!bpage->in_LRU_list);
+@@ -1416,7 +1497,8 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       if (bpage->old) {
+               buf_pool->stat.n_pages_made_young++;
+@@ -1458,19 +1540,20 @@
+       buf_page_t*     bpage,  /*!< in: block to be freed */
+       ibool           zip,    /*!< in: TRUE if should remove also the
+                               compressed page of an uncompressed page */
+-      ibool*          buf_pool_mutex_released)
++      ibool*          buf_pool_mutex_released,
+                               /*!< in: pointer to a variable that will
+                               be assigned TRUE if buf_pool_mutex
+                               was temporarily released, or NULL */
++      ibool           have_LRU_mutex)
+ {
+       buf_page_t*     b = NULL;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+       mutex_t*        block_mutex = buf_page_get_mutex(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(mutex_own(block_mutex));
+       ut_ad(buf_page_in_file(bpage));
+-      ut_ad(bpage->in_LRU_list);
++      //ut_ad(bpage->in_LRU_list);
+       ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+ #if UNIV_WORD_SIZE == 4
+       /* On 32-bit systems, there is no padding in buf_page_t.  On
+@@ -1479,7 +1562,7 @@
+       UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+ #endif
+-      if (!buf_page_can_relocate(bpage)) {
++      if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
+               /* Do not free buffer-fixed or I/O-fixed blocks. */
+               return(BUF_LRU_NOT_FREED);
+@@ -1511,15 +1594,15 @@
+               If it cannot be allocated (without freeing a block
+               from the LRU list), refuse to free bpage. */
+ alloc:
+-              buf_pool_mutex_exit_forbid(buf_pool);
+-              b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
+-              buf_pool_mutex_exit_allow(buf_pool);
++              //buf_pool_mutex_exit_forbid(buf_pool);
++              b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
++              //buf_pool_mutex_exit_allow(buf_pool);
+               if (UNIV_UNLIKELY(!b)) {
+                       return(BUF_LRU_CANNOT_RELOCATE);
+               }
+-              memcpy(b, bpage, sizeof *b);
++              //memcpy(b, bpage, sizeof *b);
+       }
+ #ifdef UNIV_DEBUG
+@@ -1530,6 +1613,39 @@
+       }
+ #endif /* UNIV_DEBUG */
++      /* not to break latch order, must re-enter block_mutex */
++      mutex_exit(block_mutex);
++
++      if (!have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
++      mutex_enter(block_mutex);
++
++      /* recheck states of block */
++      if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
++          || !buf_page_can_relocate(bpage)) {
++not_freed:
++              if (b) {
++                      buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
++              }
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              return(BUF_LRU_NOT_FREED);
++      } else if (zip || !bpage->zip.data) {
++              if (bpage->oldest_modification)
++                      goto not_freed;
++      } else if (bpage->oldest_modification) {
++              if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
++                      ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
++                      goto not_freed;
++              }
++      }
++
++      if (b) {
++              memcpy(b, bpage, sizeof *b);
++      }
++
+       if (buf_LRU_block_remove_hashed_page(bpage, zip)
+           != BUF_BLOCK_ZIP_FREE) {
+               ut_a(bpage->buf_fix_count == 0);
+@@ -1546,6 +1662,10 @@
+                       ut_a(!hash_b);
++                      while (prev_b && !prev_b->in_LRU_list) {
++                              prev_b = UT_LIST_GET_PREV(LRU, prev_b);
++                      }
++
+                       b->state = b->oldest_modification
+                               ? BUF_BLOCK_ZIP_DIRTY
+                               : BUF_BLOCK_ZIP_PAGE;
+@@ -1642,7 +1762,9 @@
+                       *buf_pool_mutex_released = TRUE;
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               mutex_exit(block_mutex);
+               /* Remove possible adaptive hash index on the page.
+@@ -1674,7 +1796,9 @@
+                               : BUF_NO_CHECKSUM_MAGIC);
+               }
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              if (have_LRU_mutex)
++                      mutex_enter(&buf_pool->LRU_list_mutex);
+               mutex_enter(block_mutex);
+               if (b) {
+@@ -1684,13 +1808,17 @@
+                       mutex_exit(&buf_pool->zip_mutex);
+               }
+-              buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
++              buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
+       } else {
+               /* The block_mutex should have been released by
+               buf_LRU_block_remove_hashed_page() when it returns
+               BUF_BLOCK_ZIP_FREE. */
+               ut_ad(block_mutex == &buf_pool->zip_mutex);
+               mutex_enter(block_mutex);
++
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       }
+       return(BUF_LRU_FREED);
+@@ -1702,13 +1830,14 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+-      buf_block_t*    block)  /*!< in: block, must not contain a file page */
++      buf_block_t*    block,  /*!< in: block, must not contain a file page */
++      ibool           have_page_hash_mutex)
+ {
+       void*           data;
+       buf_pool_t*     buf_pool = buf_pool_from_block(block);
+       ut_ad(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(mutex_own(&block->mutex));
+       switch (buf_block_get_state(block)) {
+@@ -1742,18 +1871,21 @@
+       if (data) {
+               block->page.zip.data = NULL;
+               mutex_exit(&block->mutex);
+-              buf_pool_mutex_exit_forbid(buf_pool);
++              //buf_pool_mutex_exit_forbid(buf_pool);
+               buf_buddy_free(
+-                      buf_pool, data, page_zip_get_size(&block->page.zip));
++                      buf_pool, data, page_zip_get_size(&block->page.zip),
++                      have_page_hash_mutex);
+-              buf_pool_mutex_exit_allow(buf_pool);
++              //buf_pool_mutex_exit_allow(buf_pool);
+               mutex_enter(&block->mutex);
+               page_zip_set_size(&block->page.zip, 0);
+       }
+-      UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
++      mutex_enter(&buf_pool->free_list_mutex);
++      UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
+       ut_d(block->page.in_free_list = TRUE);
++      mutex_exit(&buf_pool->free_list_mutex);
+       UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+ }
+@@ -1783,7 +1915,11 @@
+       buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+@@ -1891,7 +2027,9 @@
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+               mutex_exit(buf_page_get_mutex(bpage));
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               buf_print();
+               buf_LRU_print();
+               buf_validate();
+@@ -1912,17 +2050,17 @@
+               ut_a(bpage->zip.data);
+               ut_a(buf_page_get_zip_size(bpage));
+-              UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
++              UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
+               mutex_exit(&buf_pool->zip_mutex);
+-              buf_pool_mutex_exit_forbid(buf_pool);
++              //buf_pool_mutex_exit_forbid(buf_pool);
+               buf_buddy_free(
+                       buf_pool, bpage->zip.data,
+-                      page_zip_get_size(&bpage->zip));
++                      page_zip_get_size(&bpage->zip), TRUE);
+-              buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
+-              buf_pool_mutex_exit_allow(buf_pool);
++              buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
++              //buf_pool_mutex_exit_allow(buf_pool);
+               UNIV_MEM_UNDESC(bpage);
+               return(BUF_BLOCK_ZIP_FREE);
+@@ -1945,13 +2083,13 @@
+                       ut_ad(!bpage->in_flush_list);
+                       ut_ad(!bpage->in_LRU_list);
+                       mutex_exit(&((buf_block_t*) bpage)->mutex);
+-                      buf_pool_mutex_exit_forbid(buf_pool);
++                      //buf_pool_mutex_exit_forbid(buf_pool);
+                       buf_buddy_free(
+                               buf_pool, data,
+-                              page_zip_get_size(&bpage->zip));
++                              page_zip_get_size(&bpage->zip), TRUE);
+-                      buf_pool_mutex_exit_allow(buf_pool);
++                      //buf_pool_mutex_exit_allow(buf_pool);
+                       mutex_enter(&((buf_block_t*) bpage)->mutex);
+                       page_zip_set_size(&bpage->zip, 0);
+               }
+@@ -1977,18 +2115,19 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+-      buf_block_t*    block)  /*!< in: block, must contain a file page and
++      buf_block_t*    block,  /*!< in: block, must contain a file page and
+                               be in a state where it can be freed */
++      ibool           have_page_hash_mutex)
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_block(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_block(block);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(mutex_own(&block->mutex));
+       buf_block_set_state(block, BUF_BLOCK_MEMORY);
+-      buf_LRU_block_free_non_file_page(block);
++      buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ }
+ /**********************************************************************//**
+@@ -2015,7 +2154,8 @@
+       }
+       if (adjust) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               if (ratio != buf_pool->LRU_old_ratio) {
+                       buf_pool->LRU_old_ratio = ratio;
+@@ -2027,7 +2167,8 @@
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       } else {
+               buf_pool->LRU_old_ratio = ratio;
+       }
+@@ -2124,7 +2265,8 @@
+       ulint           new_len;
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+@@ -2185,16 +2327,22 @@
+       ut_a(buf_pool->LRU_old_len == old_len);
+-      UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      mutex_enter(&buf_pool->free_list_mutex);
++
++      UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
+                        ut_ad(ut_list_node_313->in_free_list));
+       for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+            bpage != NULL;
+-           bpage = UT_LIST_GET_NEXT(list, bpage)) {
++           bpage = UT_LIST_GET_NEXT(free, bpage)) {
+               ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+       }
++      mutex_exit(&buf_pool->free_list_mutex);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++
+       UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
+                        ut_ad(ut_list_node_313->in_unzip_LRU_list
+                              && ut_list_node_313->page.in_LRU_list));
+@@ -2208,7 +2356,8 @@
+               ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /**********************************************************************//**
+@@ -2244,7 +2393,8 @@
+       const buf_page_t*       bpage;
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+@@ -2301,7 +2451,8 @@
+               bpage = UT_LIST_GET_NEXT(LRU, bpage);
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /**********************************************************************//**
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-03 15:22:36.323977308 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-03 15:48:29.296024468 +0900
+@@ -311,6 +311,7 @@
+               return(0);
+       }
++      buf_pool_mutex_exit(buf_pool);
+       /* Check that almost all pages in the area have been accessed; if
+       offset == low, the accesses must be in a descending order, otherwise,
+@@ -329,6 +330,7 @@
+       fail_count = 0;
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       for (i = low; i < high; i++) {
+               bpage = buf_page_hash_get(buf_pool, space, i);
+@@ -356,7 +358,8 @@
+               if (fail_count > threshold) {
+                       /* Too many failures: return */
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+                       return(0);
+               }
+@@ -371,7 +374,8 @@
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+       if (bpage == NULL) {
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               return(0);
+       }
+@@ -397,7 +401,8 @@
+       pred_offset = fil_page_get_prev(frame);
+       succ_offset = fil_page_get_next(frame);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       if ((offset == low) && (succ_offset == offset + 1)) {
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:48:03.048955897 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:48:29.304024564 +0900
+@@ -245,6 +245,10 @@
+ #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+       {&buf_pool_mutex_key, "buf_pool_mutex", 0},
+       {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
++      {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
++      {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
++      {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
++      {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
+       {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
+       {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
+       {&dict_sys_mutex_key, "dict_sys_mutex", 0},
+@@ -295,6 +299,7 @@
+       {&archive_lock_key, "archive_lock", 0},
+ #  endif /* UNIV_LOG_ARCHIVE */
+       {&btr_search_latch_key, "btr_search_latch", 0},
++      {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
+ #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
+       {&buf_block_lock_key, "buf_block_lock", 0},
+ #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-03 15:37:45.517105700 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-03 15:48:29.331024462 +0900
+@@ -1566,7 +1566,8 @@
+               buf_pool = buf_pool_from_array(i);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->zip_free_mutex);
+               for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+                       buf_buddy_stat_t*       buddy_stat;
+@@ -1596,7 +1597,8 @@
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->zip_free_mutex);
+               if (status) {
+                       break;
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:48:03.068954202 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:48:29.335988682 +0900
+@@ -3705,9 +3705,11 @@
+               ulint           fold = buf_page_address_fold(space, page_no);
+               buf_pool_t*     buf_pool = buf_pool_get(space, page_no);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               if (UNIV_LIKELY_NULL(bpage)) {
+                       /* A buffer pool watch has been set or the
+diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
+--- a/storage/innobase/include/buf0buddy.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.h     2010-12-03 15:48:29.338023826 +0900
+@@ -51,10 +51,11 @@
+       buf_pool_t*     buf_pool,
+                       /*!< buffer pool in which the block resides */
+       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
+-      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
++      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
+                       TRUE if storage was allocated from the LRU list
+                       and buf_pool->mutex was temporarily released,
+                       or NULL if the LRU list should not be used */
++      ibool   have_page_hash_mutex)
+       __attribute__((malloc));
+ /**********************************************************************//**
+@@ -67,7 +68,8 @@
+                       /*!< buffer pool in which the block resides */
+       void*   buf,    /*!< in: block to be freed, must not be
+                       pointed to by the buffer pool */
+-      ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
++      ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
++      ibool   have_page_hash_mutex)
+       __attribute__((nonnull));
+ #ifndef UNIV_NONINL
+diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
+--- a/storage/innobase/include/buf0buddy.ic    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.ic    2010-12-03 15:48:29.339040413 +0900
+@@ -46,10 +46,11 @@
+                       /*!< in: buffer pool in which the page resides */
+       ulint   i,      /*!< in: index of buf_pool->zip_free[],
+                       or BUF_BUDDY_SIZES */
+-      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
++      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
+                       TRUE if storage was allocated from the LRU list
+                       and buf_pool->mutex was temporarily released,
+                       or NULL if the LRU list should not be used */
++      ibool   have_page_hash_mutex)
+       __attribute__((malloc));
+ /**********************************************************************//**
+@@ -61,8 +62,9 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           buf,            /*!< in: block to be freed, must not be
+                                       pointed to by the buffer pool */
+-      ulint           i)              /*!< in: index of buf_pool->zip_free[],
++      ulint           i,              /*!< in: index of buf_pool->zip_free[],
+                                       or BUF_BUDDY_SIZES */
++      ibool           have_page_hash_mutex)
+       __attribute__((nonnull));
+ /**********************************************************************//**
+@@ -102,16 +104,17 @@
+                                       the page resides */
+       ulint           size,           /*!< in: block size, up to
+                                       UNIV_PAGE_SIZE */
+-      ibool*          lru)            /*!< in: pointer to a variable
++      ibool*          lru,            /*!< in: pointer to a variable
+                                       that will be assigned TRUE if
+                                       storage was allocated from the
+                                       LRU list and buf_pool->mutex was
+                                       temporarily released, or NULL if
+                                       the LRU list should not be used */
++      ibool           have_page_hash_mutex)
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+-      return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
++      return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+ }
+ /**********************************************************************//**
+@@ -123,12 +126,25 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           buf,            /*!< in: block to be freed, must not be
+                                       pointed to by the buffer pool */
+-      ulint           size)           /*!< in: block size, up to
++      ulint           size,           /*!< in: block size, up to
+                                       UNIV_PAGE_SIZE */
++      ibool           have_page_hash_mutex)
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++
++      if (!have_page_hash_mutex) {
++              mutex_enter(&buf_pool->LRU_list_mutex);
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++      }
+-      buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
++      mutex_enter(&buf_pool->zip_free_mutex);
++      buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
++      mutex_exit(&buf_pool->zip_free_mutex);
++
++      if (!have_page_hash_mutex) {
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      }
+ }
+ #ifdef UNIV_MATERIALIZE
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-03 15:22:36.327954660 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-03 15:48:29.343024683 +0900
+@@ -132,6 +132,20 @@
+ /*==========================*/
+ /********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void);
++/*================================*/
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void);
++/*==================================*/
++
++/********************************************************************//**
+ Creates the buffer pool.
+ @return       own: buf_pool object, NULL if not enough memory or error */
+ UNIV_INTERN
+@@ -761,6 +775,15 @@
+       const buf_page_t*       bpage)  /*!< in: pointer to control block */
+       __attribute__((pure));
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++      const buf_page_t*       bpage)  /*!< in: pointer to control block */
++      __attribute__((pure));
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return       flush type */
+@@ -1242,7 +1265,7 @@
+       All these are protected by buf_pool->mutex. */
+       /* @{ */
+-      UT_LIST_NODE_T(buf_page_t) list;
++      /* UT_LIST_NODE_T(buf_page_t) list; */
+                                       /*!< based on state, this is a
+                                       list node, protected either by
+                                       buf_pool->mutex or by
+@@ -1270,6 +1293,10 @@
+                                       BUF_BLOCK_REMOVE_HASH or
+                                       BUF_BLOCK_READY_IN_USE. */
++      /* resplit for optimistic use */
++      UT_LIST_NODE_T(buf_page_t) free;
++      UT_LIST_NODE_T(buf_page_t) flush_list;
++      UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
+ #ifdef UNIV_DEBUG
+       ibool           in_flush_list;  /*!< TRUE if in buf_pool->flush_list;
+                                       when buf_pool->flush_list_mutex is
+@@ -1362,11 +1389,11 @@
+                                       a block is in the unzip_LRU list
+                                       if page.state == BUF_BLOCK_FILE_PAGE
+                                       and page.zip.data != NULL */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+       ibool           in_unzip_LRU_list;/*!< TRUE if the page is in the
+                                       decompressed LRU list;
+                                       used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+       mutex_t         mutex;          /*!< mutex protecting this block:
+                                       state (also protected by the buffer
+                                       pool mutex), io_fix, buf_fix_count,
+@@ -1532,6 +1559,11 @@
+                                       pool instance, protects compressed
+                                       only pages (of type buf_page_t, not
+                                       buf_block_t */
++      mutex_t         LRU_list_mutex;
++      rw_lock_t       page_hash_latch;
++      mutex_t         free_list_mutex;
++      mutex_t         zip_free_mutex;
++      mutex_t         zip_hash_mutex;
+       ulint           instance_no;    /*!< Array index of this buffer
+                                       pool instance */
+       ulint           old_pool_size;  /*!< Old pool size in bytes */
+diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
+--- a/storage/innobase/include/buf0buf.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buf.ic      2010-12-03 15:48:29.345024524 +0900
+@@ -274,7 +274,7 @@
+       case BUF_BLOCK_ZIP_FREE:
+               /* This is a free page in buf_pool->zip_free[].
+               Such pages should only be accessed by the buddy allocator. */
+-              ut_error;
++              /* ut_error; */ /* optimistic */
+               break;
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
+@@ -317,9 +317,14 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
++      if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
++              /* TODO: this code is the interim. should be confirmed later. */
++              return(&buf_pool->zip_mutex);
++      }
++
+       switch (buf_page_get_state(bpage)) {
+       case BUF_BLOCK_ZIP_FREE:
+-              ut_error;
++              /* ut_error; */ /* optimistic */
+               return(NULL);
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
+@@ -329,6 +334,28 @@
+       }
+ }
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++      const buf_page_t*       bpage)  /*!< in: pointer to control block */
++{
++      mutex_t*        block_mutex;
++
++      while(1) {
++              block_mutex = buf_page_get_mutex(bpage);
++              if (!block_mutex)
++                      return block_mutex;
++
++              mutex_enter(block_mutex);
++              if (block_mutex == buf_page_get_mutex(bpage))
++                      return block_mutex;
++              mutex_exit(block_mutex);
++      }
++}
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return       flush type */
+@@ -425,8 +452,8 @@
+       enum buf_io_fix io_fix) /*!< in: io_fix state */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+@@ -456,14 +483,14 @@
+       const buf_page_t*       bpage)  /*!< control block being relocated */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(buf_page_in_file(bpage));
+-      ut_ad(bpage->in_LRU_list);
++      //ut_ad(bpage->in_LRU_list);
+-      return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
++      return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
+              && bpage->buf_fix_count == 0);
+ }
+@@ -477,8 +504,8 @@
+       const buf_page_t*       bpage)  /*!< in: control block */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(buf_page_in_file(bpage));
+@@ -498,7 +525,8 @@
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+       ut_a(buf_page_in_file(bpage));
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(bpage->in_LRU_list);
+ #ifdef UNIV_LRU_DEBUG
+@@ -545,9 +573,10 @@
+       ulint           time_ms)        /*!< in: ut_time_ms() */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
++      ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_a(buf_page_in_file(bpage));
+       if (!bpage->access_time) {
+@@ -761,19 +790,19 @@
+ /*===========*/
+       buf_block_t*    block)  /*!< in, own: block to be freed */
+ {
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       mutex_enter(&block->mutex);
+       ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+-      buf_LRU_block_free_non_file_page(block);
++      buf_LRU_block_free_non_file_page(block, FALSE);
+       mutex_exit(&block->mutex);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+ }
+ #endif /* !UNIV_HOTBACKUP */
+@@ -821,17 +850,17 @@
+                                       page frame */
+ {
+       ib_uint64_t     lsn;
+-      mutex_t*        block_mutex = buf_page_get_mutex(bpage);
+-
+-      mutex_enter(block_mutex);
++      mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
+-      if (buf_page_in_file(bpage)) {
++      if (block_mutex && buf_page_in_file(bpage)) {
+               lsn = bpage->newest_modification;
+       } else {
+               lsn = 0;
+       }
+-      mutex_exit(block_mutex);
++      if (block_mutex) {
++              mutex_exit(block_mutex);
++      }
+       return(lsn);
+ }
+@@ -849,7 +878,7 @@
+ #ifdef UNIV_SYNC_DEBUG
+       buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+-      ut_ad((buf_pool_mutex_own(buf_pool)
++      ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
+              && (block->page.buf_fix_count == 0))
+             || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -979,7 +1008,11 @@
+       buf_page_t*     bpage;
+       ut_ad(buf_pool);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
++            || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+       ut_ad(fold == buf_page_address_fold(space, offset));
+       /* Look for the page in the hash table */
+@@ -1064,11 +1097,13 @@
+       const buf_page_t*       bpage;
+       buf_pool_t*             buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(bpage != NULL);
+ }
+@@ -1196,4 +1231,38 @@
+               buf_pool_mutex_exit(buf_pool);
+       }
+ }
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void)
++/*===============================*/
++{
++      ulint   i;
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++      }
++}
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void)
++/*=================================*/
++{
++      ulint   i;
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      }
++}
+ #endif /* !UNIV_HOTBACKUP */
+diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
+--- a/storage/innobase/include/buf0lru.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0lru.h       2010-12-03 15:48:29.349024701 +0900
+@@ -113,10 +113,11 @@
+       buf_page_t*     bpage,  /*!< in: block to be freed */
+       ibool           zip,    /*!< in: TRUE if should remove also the
+                               compressed page of an uncompressed page */
+-      ibool*          buf_pool_mutex_released);
++      ibool*          buf_pool_mutex_released,
+                               /*!< in: pointer to a variable that will
+                               be assigned TRUE if buf_pool->mutex
+                               was temporarily released, or NULL */
++      ibool           have_LRU_mutex);
+ /******************************************************************//**
+ Try to free a replaceable block.
+ @return       TRUE if found and freed */
+@@ -163,7 +164,8 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+-      buf_block_t*    block); /*!< in: block, must not contain a file page */
++      buf_block_t*    block,  /*!< in: block, must not contain a file page */
++      ibool           have_page_hash_mutex);
+ /******************************************************************//**
+ Adds a block to the LRU list. */
+ UNIV_INTERN
+diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
+--- a/storage/innobase/include/sync0rw.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0rw.h       2010-12-03 15:48:29.349942993 +0900
+@@ -112,6 +112,7 @@
+ extern        mysql_pfs_key_t archive_lock_key;
+ # endif /* UNIV_LOG_ARCHIVE */
+ extern        mysql_pfs_key_t btr_search_latch_key;
++extern        mysql_pfs_key_t buf_pool_page_hash_key;
+ extern        mysql_pfs_key_t buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ extern        mysql_pfs_key_t buf_block_debug_latch_key;
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0sync.h     2010-12-03 15:48:29.352024614 +0900
+@@ -75,6 +75,10 @@
+ extern mysql_pfs_key_t        buffer_block_mutex_key;
+ extern mysql_pfs_key_t        buf_pool_mutex_key;
+ extern mysql_pfs_key_t        buf_pool_zip_mutex_key;
++extern mysql_pfs_key_t        buf_pool_LRU_list_mutex_key;
++extern mysql_pfs_key_t        buf_pool_free_list_mutex_key;
++extern mysql_pfs_key_t        buf_pool_zip_free_mutex_key;
++extern mysql_pfs_key_t        buf_pool_zip_hash_mutex_key;
+ extern mysql_pfs_key_t        cache_last_read_mutex_key;
+ extern mysql_pfs_key_t        dict_foreign_err_mutex_key;
+ extern mysql_pfs_key_t        dict_sys_mutex_key;
+@@ -660,7 +664,7 @@
+ #define       SYNC_TRX_LOCK_HEAP      298
+ #define SYNC_TRX_SYS_HEADER   290
+ #define SYNC_LOG              170
+-#define SYNC_LOG_FLUSH_ORDER  147
++#define SYNC_LOG_FLUSH_ORDER  156
+ #define SYNC_RECV             168
+ #define       SYNC_WORK_QUEUE         162
+ #define       SYNC_SEARCH_SYS_CONF    161     /* for assigning btr_search_enabled */
+@@ -670,8 +674,13 @@
+                                       SYNC_SEARCH_SYS, as memory allocation
+                                       can call routines there! Otherwise
+                                       the level is SYNC_MEM_HASH. */
++#define       SYNC_BUF_LRU_LIST       158
++#define       SYNC_BUF_PAGE_HASH      157
++#define       SYNC_BUF_BLOCK          155     /* Block mutex */
++#define       SYNC_BUF_FREE_LIST      153
++#define       SYNC_BUF_ZIP_FREE       152
++#define       SYNC_BUF_ZIP_HASH       151
+ #define       SYNC_BUF_POOL           150     /* Buffer pool mutex */
+-#define       SYNC_BUF_BLOCK          146     /* Block mutex */
+ #define       SYNC_BUF_FLUSH_LIST     145     /* Buffer flush list mutex */
+ #define SYNC_DOUBLEWRITE      140
+ #define       SYNC_ANY_LATCH          135
+@@ -703,7 +712,7 @@
+               os_fast_mutex;  /*!< We use this OS mutex in place of lock_word
+                               when atomic operations are not enabled */
+ #endif
+-      ulint   waiters;        /*!< This ulint is set to 1 if there are (or
++      volatile ulint  waiters;        /*!< This ulint is set to 1 if there are (or
+                               may be) threads waiting in the global wait
+                               array for this mutex to be released.
+                               Otherwise, this is 0. */
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:48:03.080956216 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:48:29.355023766 +0900
+@@ -3060,7 +3060,7 @@
+                                                               level += log_sys->max_checkpoint_age
+                                                                        - (lsn - oldest_modification);
+                                                       }
+-                                                      bpage = UT_LIST_GET_NEXT(list, bpage);
++                                                      bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+                                                       n_blocks++;
+                                               }
+@@ -3145,7 +3145,7 @@
+                                                       found = TRUE;
+                                                       break;
+                                               }
+-                                              bpage = UT_LIST_GET_NEXT(list, bpage);
++                                              bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+                                               new_blocks_num++;
+                                       }
+                                       if (!found) {
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0sync.c        2010-12-03 15:48:29.358023890 +0900
+@@ -265,7 +265,7 @@
+       mutex->lock_word = 0;
+ #endif
+       mutex->event = os_event_create(NULL);
+-      mutex_set_waiters(mutex, 0);
++      mutex->waiters = 0;
+ #ifdef UNIV_DEBUG
+       mutex->magic_n = MUTEX_MAGIC_N;
+ #endif /* UNIV_DEBUG */
+@@ -444,6 +444,15 @@
+       mutex_t*        mutex,  /*!< in: mutex */
+       ulint           n)      /*!< in: value to set */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++      ut_ad(mutex);
++
++      if (n) {
++              os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
++      } else {
++              os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
++      }
++#else
+       volatile ulint* ptr;            /* declared volatile to ensure that
+                                       the value is stored to memory */
+       ut_ad(mutex);
+@@ -452,6 +461,7 @@
+       *ptr = n;               /* Here we assume that the write of a single
+                               word in memory is atomic */
++#endif
+ }
+ /******************************************************************//**
+@@ -1193,7 +1203,12 @@
+                       ut_error;
+               }
+               break;
++      case SYNC_BUF_LRU_LIST:
+       case SYNC_BUF_FLUSH_LIST:
++      case SYNC_BUF_PAGE_HASH:
++      case SYNC_BUF_FREE_LIST:
++      case SYNC_BUF_ZIP_FREE:
++      case SYNC_BUF_ZIP_HASH:
+       case SYNC_BUF_POOL:
+               /* We can have multiple mutexes of this type therefore we
+               can only check whether the greater than condition holds. */
+@@ -1211,7 +1226,8 @@
+               buffer block (block->mutex or buf_pool->zip_mutex). */
+               if (!sync_thread_levels_g(array, level, FALSE)) {
+                       ut_a(sync_thread_levels_g(array, level - 1, TRUE));
+-                      ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
++                      /* the exact rule is not fixed yet, for now */
++                      //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
+               }
+               break;
+       case SYNC_REC_LOCK:
diff --git a/innodb_stats.patch b/innodb_stats.patch
new file mode 100644 (file)
index 0000000..0f50cc3
--- /dev/null
@@ -0,0 +1,2404 @@
+# name       : innodb_stats.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c   2010-12-03 15:49:59.165212710 +0900
++++ b/storage/innobase/btr/btr0cur.c   2010-12-03 17:19:24.834126874 +0900
+@@ -1010,6 +1010,107 @@
+       }
+ }
++/**********************************************************************//**
++Positions a cursor at a randomly chosen position within a B-tree
++after the given path
++@return TRUE if the position is at the first page, and cursor must point
++        the first record for used by the caller.*/
++UNIV_INTERN
++ibool
++btr_cur_open_at_rnd_pos_after_path(
++/*====================*/
++      dict_index_t*   index,          /*!< in: index */
++      ulint           latch_mode,     /*!< in: BTR_SEARCH_LEAF, ... */
++      btr_path_t*     first_rec_path,
++      btr_cur_t*      cursor,         /*!< in/out: B-tree cursor */
++      mtr_t*          mtr)            /*!< in: mtr */
++{
++      page_cur_t*     page_cursor;
++      btr_path_t*     slot;
++      ibool           is_first_rec    = TRUE;
++      ulint           page_no;
++      ulint           space;
++      ulint           zip_size;
++      ulint           height;
++      rec_t*          node_ptr;
++      mem_heap_t*     heap            = NULL;
++      ulint           offsets_[REC_OFFS_NORMAL_SIZE];
++      ulint*          offsets         = offsets_;
++      rec_offs_init(offsets_);
++
++      if (latch_mode == BTR_MODIFY_TREE) {
++              mtr_x_lock(dict_index_get_lock(index), mtr);
++      } else {
++              mtr_s_lock(dict_index_get_lock(index), mtr);
++      }
++
++      page_cursor = btr_cur_get_page_cur(cursor);
++      cursor->index = index;
++
++      space = dict_index_get_space(index);
++      zip_size = dict_table_zip_size(index->table);
++      page_no = dict_index_get_page(index);
++
++      height = ULINT_UNDEFINED;
++      slot = first_rec_path;
++
++      for (;;) {
++              buf_block_t*    block;
++              page_t*         page;
++
++              block = buf_page_get_gen(space, zip_size, page_no,
++                                       RW_NO_LATCH, NULL, BUF_GET,
++                                       __FILE__, __LINE__, mtr);
++              page = buf_block_get_frame(block);
++              ut_ad(index->id == btr_page_get_index_id(page));
++
++              if (height == ULINT_UNDEFINED) {
++                      /* We are in the root node */
++
++                      height = btr_page_get_level(page, mtr);
++              }
++
++              if (height == 0) {
++                      btr_cur_latch_leaves(page, space, zip_size, page_no,
++                                           latch_mode, cursor, mtr);
++              }
++
++              if (is_first_rec && slot->nth_rec != ULINT_UNDEFINED) {
++                      if (height == 0) {
++                              /* must open the first rec */
++                              page_cur_open_on_nth_user_rec(block, page_cursor, slot->nth_rec);
++                      } else {
++                              is_first_rec = page_cur_open_on_rnd_user_rec_after_nth(block,
++                                                              page_cursor, slot->nth_rec);
++                      }
++              } else {
++                      is_first_rec = FALSE;
++                      page_cur_open_on_rnd_user_rec(block, page_cursor);
++              }
++
++              if (height == 0) {
++                      break;
++              }
++
++              ut_ad(height > 0);
++
++              height--;
++              slot++;
++
++              node_ptr = page_cur_get_rec(page_cursor);
++              offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
++                                        ULINT_UNDEFINED, &heap);
++              /* Go to the child node */
++              page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
++      }
++
++      if (UNIV_LIKELY_NULL(heap)) {
++              mem_heap_free(heap);
++      }
++
++      return (is_first_rec);
++}
++
+ /*==================== B-TREE INSERT =========================*/
+ /*************************************************************//**
+@@ -3488,6 +3589,154 @@
+ }
+ /*******************************************************************//**
++Estimates the number of pages which have not null value of the key of n_cols.
++@return       estimated number of pages */
++UNIV_INTERN
++ulint
++btr_estimate_n_pages_not_null(
++/*=========================*/
++      dict_index_t*   index,  /*!< in: index */
++      ulint           n_cols, /*!< in: The cols should be not null */
++      btr_path_t*     path1)  /*!< in: path1[BTR_PATH_ARRAY_N_SLOTS] */
++{
++      dtuple_t*       tuple1;
++      btr_path_t      path2[BTR_PATH_ARRAY_N_SLOTS];
++      btr_cur_t       cursor;
++      btr_path_t*     slot1;
++      btr_path_t*     slot2;
++      ibool           diverged;
++      ibool           diverged_lot;
++      ulint           divergence_level;
++      ulint           n_pages;
++      ulint           i;
++      mtr_t           mtr;
++      mem_heap_t*     heap;
++
++      heap = mem_heap_create(n_cols * sizeof(dfield_t)
++                              + sizeof(dtuple_t));
++
++      /* make tuple1 (NULL,NULL,,,) from n_cols */
++      tuple1 = dtuple_create(heap, n_cols);
++      dict_index_copy_types(tuple1, index, n_cols);
++
++      for (i = 0; i < n_cols; i++) {
++              dfield_set_null(dtuple_get_nth_field(tuple1, i));
++      }
++
++      mtr_start(&mtr);
++
++      cursor.path_arr = path1;
++
++      btr_cur_search_to_nth_level(index, 0, tuple1, PAGE_CUR_G,
++                                  BTR_SEARCH_LEAF | BTR_ESTIMATE,
++                                  &cursor, 0, __FILE__, __LINE__, &mtr);
++
++      mtr_commit(&mtr);
++
++
++
++      mtr_start(&mtr);
++
++      cursor.path_arr = path2;
++
++      btr_cur_open_at_index_side(FALSE, index,
++                                 BTR_SEARCH_LEAF | BTR_ESTIMATE,
++                                 &cursor, &mtr);
++
++      mtr_commit(&mtr);
++
++      mem_heap_free(heap);
++
++      /* We have the path information for the range in path1 and path2 */
++
++      n_pages = 1;
++      diverged = FALSE;           /* This becomes true when the path is not
++                                  the same any more */
++      diverged_lot = FALSE;       /* This becomes true when the paths are
++                                  not the same or adjacent any more */
++      divergence_level = 1000000; /* This is the level where paths diverged
++                                  a lot */
++      for (i = 0; ; i++) {
++              ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
++
++              slot1 = path1 + i;
++              slot2 = path2 + i;
++
++              if ((slot1 + 1)->nth_rec == ULINT_UNDEFINED
++                  || (slot2 + 1)->nth_rec == ULINT_UNDEFINED) {
++
++                      if (i > divergence_level + 1) {
++                              /* In trees whose height is > 1 our algorithm
++                              tends to underestimate: multiply the estimate
++                              by 2: */
++
++                              n_pages = n_pages * 2;
++                      }
++
++                      /* Do not estimate the number of rows in the range
++                      to over 1 / 2 of the estimated rows in the whole
++                      table */
++
++                      if (n_pages > index->stat_n_leaf_pages / 2) {
++                              n_pages = index->stat_n_leaf_pages / 2;
++
++                              /* If there are just 0 or 1 rows in the table,
++                              then we estimate all rows are in the range */
++
++                              if (n_pages == 0) {
++                                      n_pages = index->stat_n_leaf_pages;
++                              }
++                      }
++
++                      return(n_pages);
++              }
++
++              if (!diverged && slot1->nth_rec != slot2->nth_rec) {
++
++                      diverged = TRUE;
++
++                      if (slot1->nth_rec < slot2->nth_rec) {
++                              n_pages = slot2->nth_rec - slot1->nth_rec;
++
++                              if (n_pages > 1) {
++                                      diverged_lot = TRUE;
++                                      divergence_level = i;
++                              }
++                      } else {
++                              /* Maybe the tree has changed between
++                              searches */
++
++                              return(10);
++                      }
++
++              } else if (diverged && !diverged_lot) {
++
++                      if (slot1->nth_rec < slot1->n_recs
++                          || slot2->nth_rec > 1) {
++
++                              diverged_lot = TRUE;
++                              divergence_level = i;
++
++                              n_pages = 0;
++
++                              if (slot1->nth_rec < slot1->n_recs) {
++                                      n_pages += slot1->n_recs
++                                              - slot1->nth_rec;
++                              }
++
++                              if (slot2->nth_rec > 1) {
++                                      n_pages += slot2->nth_rec - 1;
++                              }
++                      }
++              } else if (diverged_lot) {
++
++                      n_pages = (n_pages * (slot1->n_recs + slot2->n_recs))
++                              / 2;
++              }
++      }
++}
++
++/*******************************************************************//**
+ Estimates the number of different key values in a given index, for
+ each n-column prefix of the index where n <= dict_index_get_n_unique(index).
+ The estimates are stored in the array index->stat_n_diff_key_vals. */
+@@ -3516,18 +3765,38 @@
+       ulint           offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
+       ulint*          offsets_rec     = offsets_rec_;
+       ulint*          offsets_next_rec= offsets_next_rec_;
++      ulint           stats_method    = srv_stats_method;
++      btr_path_t      first_rec_path[BTR_PATH_ARRAY_N_SLOTS];
++      ulint           effective_pages; /* effective leaf pages */
+       rec_offs_init(offsets_rec_);
+       rec_offs_init(offsets_next_rec_);
+       n_cols = dict_index_get_n_unique(index);
++      if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
++              /* estimate effective pages and path for the first effective record */
++              /* TODO: make it work also for n_cols > 1. */
++              effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path);
++
++              if (!effective_pages) {
++                      for (j = 0; j <= n_cols; j++) {
++                              index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages;
++                      }
++                      return;
++              } else if (effective_pages > index->stat_n_leaf_pages) {
++                      effective_pages = index->stat_n_leaf_pages;
++              }
++      } else {
++              effective_pages = index->stat_n_leaf_pages;
++      }
++
+       n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
+       /* It makes no sense to test more pages than are contained
+       in the index, thus we lower the number if it is too high */
+-      if (srv_stats_sample_pages > index->stat_index_size) {
+-              if (index->stat_index_size > 0) {
+-                      n_sample_pages = index->stat_index_size;
++      if (srv_stats_sample_pages > effective_pages) {
++              if (effective_pages > 0) {
++                      n_sample_pages = effective_pages;
+               } else {
+                       n_sample_pages = 1;
+               }
+@@ -3539,9 +3808,15 @@
+       for (i = 0; i < n_sample_pages; i++) {
+               rec_t*  supremum;
++              ibool   is_first_page = TRUE;
+               mtr_start(&mtr);
++              if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
++                      is_first_page = btr_cur_open_at_rnd_pos_after_path(index, BTR_SEARCH_LEAF,
++                                                                      first_rec_path, &cursor, &mtr);
++              } else {
+               btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
++              }
+               /* Count the number of different key values for each prefix of
+               the key on this index page. If the prefix does not determine
+@@ -3552,7 +3827,13 @@
+               page = btr_cur_get_page(&cursor);
+               supremum = page_get_supremum_rec(page);
++              if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS && is_first_page) {
++                      /* the cursor should be the first record of the page. */
++                      /* Counting should be started from here. */
++                      rec = btr_cur_get_rec(&cursor);
++              } else {
+               rec = page_rec_get_next(page_get_infimum_rec(page));
++              }
+               if (rec != supremum) {
+                       not_empty_flag = 1;
+@@ -3561,7 +3842,8 @@
+               }
+               while (rec != supremum) {
+-                      rec_t*  next_rec = page_rec_get_next(rec);
++                      rec_t*  next_rec;
++                      next_rec = page_rec_get_next(rec);
+                       if (next_rec == supremum) {
+                               break;
+                       }
+@@ -3575,7 +3857,10 @@
+                       cmp_rec_rec_with_match(rec, next_rec,
+                                              offsets_rec, offsets_next_rec,
+                                              index, &matched_fields,
+-                                             &matched_bytes);
++                                             &matched_bytes,
++                              (stats_method==SRV_STATS_METHOD_NULLS_NOT_EQUAL) ?
++                              SRV_STATS_METHOD_NULLS_NOT_EQUAL :
++                              SRV_STATS_METHOD_NULLS_EQUAL);
+                       for (j = matched_fields + 1; j <= n_cols; j++) {
+                               /* We add one if this index record has
+@@ -3636,7 +3921,7 @@
+       for (j = 0; j <= n_cols; j++) {
+               index->stat_n_diff_key_vals[j]
+                       = ((n_diff[j]
+-                          * (ib_int64_t)index->stat_n_leaf_pages
++                          * (ib_int64_t)effective_pages
+                           + n_sample_pages - 1
+                           + total_external_size
+                           + not_empty_flag)
+@@ -3651,7 +3936,7 @@
+               different key values, or even more. Let us try to approximate
+               that: */
+-              add_on = index->stat_n_leaf_pages
++              add_on = effective_pages
+                       / (10 * (n_sample_pages
+                                + total_external_size));
+@@ -3660,6 +3945,15 @@
+               }
+               index->stat_n_diff_key_vals[j] += add_on;
++
++              if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
++                      /* index->stat_n_diff_key_vals[k] is used for calc rec_per_key,
++                      as "stats.records / index->stat_n_diff_key_vals[x]".
++                      So it should be adjusted to the value which is based on whole of the index. */
++                      index->stat_n_diff_key_vals[j] =
++                              index->stat_n_diff_key_vals[j] * (ib_int64_t)index->stat_n_leaf_pages
++                                      / (ib_int64_t)effective_pages;
++              }
+       }
+       mem_free(n_diff);
+diff -ruN a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c
+--- a/storage/innobase/dict/dict0boot.c        2010-12-03 15:48:03.034036843 +0900
++++ b/storage/innobase/dict/dict0boot.c        2010-12-03 17:19:24.835112632 +0900
+@@ -266,6 +266,29 @@
+       /* Get the dictionary header */
+       dict_hdr = dict_hdr_get(&mtr);
++      if (mach_read_from_8(dict_hdr + DICT_HDR_XTRADB_MARK)
++          != DICT_HDR_XTRADB_FLAG) {
++              /* not extended yet by XtraDB, need to be extended */
++              ulint   root_page_no;
++
++              root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
++                                        DICT_HDR_SPACE, 0, DICT_STATS_ID,
++                                        dict_ind_redundant, &mtr);
++              if (root_page_no == FIL_NULL) {
++                      fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n");
++                      srv_use_sys_stats_table = FALSE;
++              } else {
++                      mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no,
++                                       MLOG_4BYTES, &mtr);
++                      mlog_write_ull(dict_hdr + DICT_HDR_XTRADB_MARK,
++                                        DICT_HDR_XTRADB_FLAG, &mtr);
++              }
++              mtr_commit(&mtr);
++              /* restart mtr */
++              mtr_start(&mtr);
++              dict_hdr = dict_hdr_get(&mtr);
++      }
++
+       /* Because we only write new row ids to disk-based data structure
+       (dictionary header) when it is divisible by
+       DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
+@@ -425,7 +448,7 @@
+       table->id = DICT_FIELDS_ID;
+       dict_table_add_to_cache(table, heap);
+       dict_sys->sys_fields = table;
+-      mem_heap_free(heap);
++      mem_heap_empty(heap);
+       index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
+                                     DICT_HDR_SPACE,
+@@ -442,6 +465,41 @@
+                                       FALSE);
+       ut_a(error == DB_SUCCESS);
++      /*-------------------------*/
++      table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0);
++      table->n_mysql_handles_opened = 1; /* for pin */
++
++      dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
++      dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4);
++      dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0);
++
++      /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */
++#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2
++#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2"
++#endif
++
++      table->id = DICT_STATS_ID;
++      dict_table_add_to_cache(table, heap);
++      dict_sys->sys_stats = table;
++      mem_heap_empty(heap);
++
++      index = dict_mem_index_create("SYS_STATS", "CLUST_IND",
++                                    DICT_HDR_SPACE,
++                                    DICT_UNIQUE | DICT_CLUSTERED, 2);
++
++      dict_mem_index_add_field(index, "INDEX_ID", 0);
++      dict_mem_index_add_field(index, "KEY_COLS", 0);
++
++      index->id = DICT_STATS_ID;
++      error = dict_index_add_to_cache(table, index,
++                                      mtr_read_ulint(dict_hdr
++                                                     + DICT_HDR_STATS,
++                                                     MLOG_4BYTES, &mtr),
++                                      FALSE);
++      ut_a(error == DB_SUCCESS);
++
++      mem_heap_free(heap);
++
+       mtr_commit(&mtr);
+       /*-------------------------*/
+@@ -455,6 +513,7 @@
+       dict_load_sys_table(dict_sys->sys_columns);
+       dict_load_sys_table(dict_sys->sys_indexes);
+       dict_load_sys_table(dict_sys->sys_fields);
++      dict_load_sys_table(dict_sys->sys_stats);
+       mutex_exit(&(dict_sys->mutex));
+ }
+diff -ruN a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c
+--- a/storage/innobase/dict/dict0crea.c        2010-12-03 15:48:03.036081059 +0900
++++ b/storage/innobase/dict/dict0crea.c        2010-12-03 17:19:24.836964976 +0900
+@@ -508,6 +508,51 @@
+ }
+ /*****************************************************************//**
++Based on an index object, this function builds the entry to be inserted
++in the SYS_STATS system table.
++@return       the tuple which should be inserted */
++static
++dtuple_t*
++dict_create_sys_stats_tuple(
++/*========================*/
++      const dict_index_t*     index,
++      ulint                   i,
++      mem_heap_t*             heap)
++{
++      dict_table_t*   sys_stats;
++      dtuple_t*       entry;
++      dfield_t*       dfield;
++      byte*           ptr;
++
++      ut_ad(index);
++      ut_ad(heap);
++
++      sys_stats = dict_sys->sys_stats;
++
++      entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
++
++      dict_table_copy_types(entry, sys_stats);
++
++      /* 0: INDEX_ID -----------------------*/
++      dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/);
++      ptr = mem_heap_alloc(heap, 8);
++      mach_write_to_8(ptr, index->id);
++      dfield_set_data(dfield, ptr, 8);
++      /* 1: KEY_COLS -----------------------*/
++      dfield = dtuple_get_nth_field(entry, 1/*KEY_COLS*/);
++      ptr = mem_heap_alloc(heap, 4);
++      mach_write_to_4(ptr, i);
++      dfield_set_data(dfield, ptr, 4);
++      /* 4: DIFF_VALS ----------------------*/
++      dfield = dtuple_get_nth_field(entry, 2/*DIFF_VALS*/);
++      ptr = mem_heap_alloc(heap, 8);
++      mach_write_to_8(ptr, 0); /* initial value is 0 */
++      dfield_set_data(dfield, ptr, 8);
++
++      return(entry);
++}
++
++/*****************************************************************//**
+ Creates the tuple with which the index entry is searched for writing the index
+ tree root page number, if such a tree is created.
+ @return       the tuple for search */
+@@ -617,6 +662,27 @@
+ }
+ /***************************************************************//**
++Builds a row for storing stats to insert.
++@return DB_SUCCESS */
++static
++ulint
++dict_build_stats_def_step(
++/*======================*/
++      ind_node_t*     node)
++{
++      dict_index_t*   index;
++      dtuple_t*       row;
++
++      index = node->index;
++
++      row = dict_create_sys_stats_tuple(index, node->stats_no, node->heap);
++
++      ins_node_set_new_row(node->stats_def, row);
++
++      return(DB_SUCCESS);
++}
++
++/***************************************************************//**
+ Creates an index tree for the index if it is not a member of a cluster.
+ @return       DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ static
+@@ -937,6 +1003,49 @@
+                                         dict_sys->sys_fields, heap);
+       node->field_def->common.parent = node;
++      if (srv_use_sys_stats_table) {
++              node->stats_def = ins_node_create(INS_DIRECT,
++                                                dict_sys->sys_stats, heap);
++              node->stats_def->common.parent = node;
++      } else {
++              node->stats_def = NULL;
++      }
++
++      node->commit_node = commit_node_create(heap);
++      node->commit_node->common.parent = node;
++
++      return(node);
++}
++
++/*********************************************************************//**
++*/
++UNIV_INTERN
++ind_node_t*
++ind_insert_stats_graph_create(
++/*==========================*/
++      dict_index_t*   index,
++      mem_heap_t*     heap)
++{
++      ind_node_t*     node;
++
++      node = mem_heap_alloc(heap, sizeof(ind_node_t));
++
++      node->common.type = QUE_NODE_INSERT_STATS;
++
++      node->index = index;
++
++      node->state = INDEX_BUILD_STATS_COLS;
++      node->page_no = FIL_NULL;
++      node->heap = mem_heap_create(256);
++
++      node->ind_def = NULL;
++      node->field_def = NULL;
++
++      node->stats_def = ins_node_create(INS_DIRECT,
++                                        dict_sys->sys_stats, heap);
++      node->stats_def->common.parent = node;
++      node->stats_no = 0;
++
+       node->commit_node = commit_node_create(heap);
+       node->commit_node->common.parent = node;
+@@ -1087,6 +1196,7 @@
+               node->state = INDEX_BUILD_FIELD_DEF;
+               node->field_no = 0;
++              node->stats_no = 0;
+               thr->run_node = node->ind_def;
+@@ -1132,7 +1242,31 @@
+                       goto function_exit;
+               }
+-              node->state = INDEX_CREATE_INDEX_TREE;
++              if (srv_use_sys_stats_table
++                  && !((node->table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
++                      node->state = INDEX_BUILD_STATS_COLS;
++              } else {
++                      node->state = INDEX_CREATE_INDEX_TREE;
++              }
++      }
++      if (node->state == INDEX_BUILD_STATS_COLS) {
++              if (node->stats_no <= dict_index_get_n_unique(node->index)) {
++
++                      err = dict_build_stats_def_step(node);
++
++                      if (err != DB_SUCCESS) {
++
++                              goto function_exit;
++                      }
++
++                      node->stats_no++;
++
++                      thr->run_node = node->stats_def;
++
++                      return(thr);
++              } else {
++                      node->state = INDEX_CREATE_INDEX_TREE;
++              }
+       }
+       if (node->state == INDEX_CREATE_INDEX_TREE) {
+@@ -1178,6 +1312,66 @@
+               return(NULL);
+       }
++      thr->run_node = que_node_get_parent(node);
++
++      return(thr);
++}
++
++/****************************************************************//**
++*/
++UNIV_INTERN
++que_thr_t*
++dict_insert_stats_step(
++/*===================*/
++      que_thr_t*      thr)    /*!< in: query thread */
++{
++      ind_node_t*     node;
++      ulint           err     = DB_ERROR;
++      trx_t*          trx;
++
++      ut_ad(thr);
++
++      trx = thr_get_trx(thr);
++
++      node = thr->run_node;
++
++      if (thr->prev_node == que_node_get_parent(node)) {
++              node->state = INDEX_BUILD_STATS_COLS;
++      }
++
++      if (node->state == INDEX_BUILD_STATS_COLS) {
++              if (node->stats_no <= dict_index_get_n_unique(node->index)) {
++
++                      err = dict_build_stats_def_step(node);
++
++                      if (err != DB_SUCCESS) {
++
++                              goto function_exit;
++                      }
++
++                      node->stats_no++;
++
++                      thr->run_node = node->stats_def;
++
++                      return(thr);
++              } else {
++                      node->state = INDEX_COMMIT_WORK;
++              }
++      }
++
++      if (node->state == INDEX_COMMIT_WORK) {
++
++              /* do not commit transaction here for now */
++      }
++
++function_exit:
++      trx->error_state = err;
++
++      if (err == DB_SUCCESS) {
++      } else {
++              return(NULL);
++      }
++
+       thr->run_node = que_node_get_parent(node);
+       return(thr);
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c        2010-12-03 15:48:03.040222428 +0900
++++ b/storage/innobase/dict/dict0dict.c        2010-12-03 17:19:24.841947690 +0900
+@@ -754,7 +754,7 @@
+               print an error message and return without doing
+               anything. */
+               dict_update_statistics(table, TRUE /* only update stats
+-                                     if they have not been initialized */);
++                                     if they have not been initialized */, FALSE);
+       }
+       return(table);
+@@ -4291,6 +4291,240 @@
+ }
+ /*********************************************************************//**
++functions to use SYS_STATS system table. */
++static
++ibool
++dict_reload_statistics(
++/*===================*/
++      dict_table_t*   table,
++      ulint*          sum_of_index_sizes)
++{
++      dict_index_t*   index;
++      ulint           size;
++      mem_heap_t*     heap;
++
++      index = dict_table_get_first_index(table);
++
++      if (index == NULL) {
++              /* Table definition is corrupt */
++
++              return(FALSE);
++      }
++
++      heap = mem_heap_create(1000);
++
++      while (index) {
++              size = btr_get_size(index, BTR_TOTAL_SIZE);
++
++              index->stat_index_size = size;
++
++              *sum_of_index_sizes += size;
++
++              size = btr_get_size(index, BTR_N_LEAF_PAGES);
++
++              if (size == 0) {
++                      /* The root node of the tree is a leaf */
++                      size = 1;
++              }
++
++              index->stat_n_leaf_pages = size;
++
++/*===========================================*/
++{
++      dict_table_t*   sys_stats;
++      dict_index_t*   sys_index;
++      btr_pcur_t      pcur;
++      dtuple_t*       tuple;
++      dfield_t*       dfield;
++      ulint           key_cols;
++      ulint           n_cols;
++      const rec_t*    rec;
++      const byte*     field;
++      ulint           len;
++      ib_int64_t*     stat_n_diff_key_vals_tmp;
++      byte*           buf;
++      ulint           i;
++      mtr_t           mtr;
++
++      n_cols = dict_index_get_n_unique(index);
++      stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t));
++
++      sys_stats = dict_sys->sys_stats;
++      sys_index = UT_LIST_GET_FIRST(sys_stats->indexes);
++      ut_a(!dict_table_is_comp(sys_stats));
++
++      tuple = dtuple_create(heap, 1);
++      dfield = dtuple_get_nth_field(tuple, 0);
++
++      buf = mem_heap_alloc(heap, 8);
++      mach_write_to_8(buf, index->id);
++
++      dfield_set_data(dfield, buf, 8);
++      dict_index_copy_types(tuple, sys_index, 1);
++
++      mtr_start(&mtr);
++
++      btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
++                                BTR_SEARCH_LEAF, &pcur, &mtr);
++      for (i = 0; i <= n_cols; i++) {
++              rec = btr_pcur_get_rec(&pcur);
++
++              if (!btr_pcur_is_on_user_rec(&pcur)
++                  || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len))
++                      != index->id) {
++                      /* not found: even 1 if not found should not be alowed */
++                      fprintf(stderr, "InnoDB: Warning: stats for %s/%s (%lu/%lu)"
++                                      " not fonund in SYS_STATS\n",
++                                      index->table_name, index->name, i, n_cols);
++                      btr_pcur_close(&pcur);
++                      mtr_commit(&mtr);
++                      mem_heap_free(heap);
++                      return(FALSE);
++              }
++
++              if (rec_get_deleted_flag(rec, 0)) {
++                      goto next_rec;
++              }
++
++              field = rec_get_nth_field_old(rec, 1, &len);
++              ut_a(len == 4);
++
++              key_cols = mach_read_from_4(field);
++
++              ut_a(i == key_cols);
++
++              field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len);
++              ut_a(len == 8);
++
++              stat_n_diff_key_vals_tmp[i] = mach_read_from_8(field);
++next_rec:
++              btr_pcur_move_to_next_user_rec(&pcur, &mtr);
++      }
++
++      btr_pcur_close(&pcur);
++      mtr_commit(&mtr);
++
++      for (i = 0; i <= n_cols; i++) {
++              index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i];
++      }
++}
++/*===========================================*/
++
++              index = dict_table_get_next_index(index);
++      }
++
++      mem_heap_free(heap);
++      return(TRUE);
++}
++
++static
++void
++dict_store_statistics(
++/*==================*/
++      dict_table_t*   table)
++{
++      dict_index_t*   index;
++      mem_heap_t*     heap;
++
++      index = dict_table_get_first_index(table);
++
++      ut_a(index);
++
++      heap = mem_heap_create(1000);
++
++      while (index) {
++/*===========================================*/
++{
++      dict_table_t*   sys_stats;
++      dict_index_t*   sys_index;
++      btr_pcur_t      pcur;
++      dtuple_t*       tuple;
++      dfield_t*       dfield;
++      ulint           key_cols;
++      ulint           n_cols;
++      ulint           rests;
++      const rec_t*    rec;
++      const byte*     field;
++      ulint           len;
++      ib_int64_t*     stat_n_diff_key_vals_tmp;
++      byte*           buf;
++      ulint           i;
++      mtr_t           mtr;
++
++      n_cols = dict_index_get_n_unique(index);
++      stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t));
++
++      for (i = 0; i <= n_cols; i++) {
++              stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i];
++      }
++
++      sys_stats = dict_sys->sys_stats;
++      sys_index = UT_LIST_GET_FIRST(sys_stats->indexes);
++      ut_a(!dict_table_is_comp(sys_stats));
++
++      tuple = dtuple_create(heap, 1);
++      dfield = dtuple_get_nth_field(tuple, 0);
++
++      buf = mem_heap_alloc(heap, 8);
++      mach_write_to_8(buf, index->id);
++
++      dfield_set_data(dfield, buf, 8);
++      dict_index_copy_types(tuple, sys_index, 1);
++
++      mtr_start(&mtr);
++
++      btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
++                                BTR_MODIFY_LEAF, &pcur, &mtr);
++      rests = n_cols + 1;
++      for (i = 0; i <= n_cols; i++) {
++              rec = btr_pcur_get_rec(&pcur);
++
++              if (!btr_pcur_is_on_user_rec(&pcur)
++                  || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len))
++                      != index->id) {
++                      /* not found */
++                      btr_pcur_close(&pcur);
++                      mtr_commit(&mtr);
++                      break;
++              }
++
++              if (rec_get_deleted_flag(rec, 0)) {
++                      goto next_rec;
++              }
++
++              field = rec_get_nth_field_old(rec, 1, &len);
++              ut_a(len == 4);
++
++              key_cols = mach_read_from_4(field);
++
++              field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len);
++              ut_a(len == 8);
++
++              mlog_write_ull((byte*)field, stat_n_diff_key_vals_tmp[key_cols], &mtr);
++
++              rests--;
++
++next_rec:
++              btr_pcur_move_to_next_user_rec(&pcur, &mtr);
++      }
++      btr_pcur_close(&pcur);
++      mtr_commit(&mtr);
++
++      if (rests) {
++              fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries"
++                              " of %s/%s to SYS_STATS system table.\n",
++                              rests, index->table_name, index->name);
++      }
++}
++/*===========================================*/
++
++              index = dict_table_get_next_index(index);
++      }
++
++      mem_heap_free(heap);
++}
++
++/*********************************************************************//**
+ Calculates new estimates for table and index statistics. The statistics
+ are used in query optimization. */
+ UNIV_INTERN
+@@ -4298,10 +4532,11 @@
+ dict_update_statistics(
+ /*===================*/
+       dict_table_t*   table,          /*!< in/out: table */
+-      ibool           only_calc_if_missing_stats)/*!< in: only
++      ibool           only_calc_if_missing_stats,/*!< in: only
+                                       update/recalc the stats if they have
+                                       not been initialized yet, otherwise
+                                       do nothing */
++      ibool           sync)           /*!< in: TRUE if must update SYS_STATS */
+ {
+       dict_index_t*   index;
+       ulint           sum_of_index_sizes      = 0;
+@@ -4318,6 +4553,27 @@
+               return;
+       }
++      if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) {
++              dict_table_stats_lock(table, RW_X_LATCH);
++
++              /* reload statistics from SYS_STATS table */
++              if (dict_reload_statistics(table, &sum_of_index_sizes)) {
++                      /* success */
++#ifdef UNIV_DEBUG
++                      fprintf(stderr, "InnoDB: DEBUG: reload_statistics is scceeded for %s.\n",
++                                      table->name);
++#endif
++                      goto end;
++              }
++
++              dict_table_stats_unlock(table, RW_X_LATCH);
++      }
++#ifdef UNIV_DEBUG
++      fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n",
++                      table->name);
++#endif
++      sum_of_index_sizes = 0;
++
+       /* Find out the sizes of the indexes and how many different values
+       for the key they approximately have */
+@@ -4378,6 +4634,11 @@
+               index = dict_table_get_next_index(index);
+       } while (index);
++      if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
++              /* store statistics to SYS_STATS table */
++              dict_store_statistics(table);
++      }
++end:
+       index = dict_table_get_first_index(table);
+       table->stat_n_rows = index->stat_n_diff_key_vals[
+@@ -4472,7 +4733,8 @@
+       ut_ad(mutex_own(&(dict_sys->mutex)));
+-      dict_update_statistics(table, FALSE /* update even if initialized */);
++      if (srv_stats_auto_update)
++              dict_update_statistics(table, FALSE /* update even if initialized */, FALSE);
+       dict_table_stats_lock(table, RW_S_LATCH);
+diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
+--- a/storage/innobase/dict/dict0load.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0load.c        2010-12-03 17:19:24.845947460 +0900
+@@ -49,7 +49,8 @@
+       "SYS_COLUMNS",
+       "SYS_FIELDS",
+       "SYS_FOREIGN",
+-      "SYS_FOREIGN_COLS"
++      "SYS_FOREIGN_COLS",
++      "SYS_STATS"
+ };
+ /****************************************************************//**
+ Compare the name of an index column.
+@@ -342,12 +343,13 @@
+       }
+       if ((status & DICT_TABLE_UPDATE_STATS)
++          && srv_stats_auto_update
+           && dict_table_get_first_index(*table)) {
+               /* Update statistics if DICT_TABLE_UPDATE_STATS
+               is set */
+               dict_update_statistics(*table, FALSE /* update even if
+-                                     initialized */);
++                                     initialized */, FALSE);
+       }
+       return(NULL);
+@@ -565,6 +567,61 @@
+       return(NULL);
+ }
++/********************************************************************//**
++This function parses a SYS_STATS record and extract necessary
++information from the record and return to caller.
++@return error message, or NULL on success */
++UNIV_INTERN
++const char*
++dict_process_sys_stats_rec(
++/*=============================*/
++      mem_heap_t*     heap,           /*!< in/out: heap memory */
++      const rec_t*    rec,            /*!< in: current SYS_STATS rec */
++      index_id_t*     index_id,       /*!< out: INDEX_ID */
++      ulint*          key_cols,       /*!< out: KEY_COLS */
++      ib_uint64_t*    diff_vals)      /*!< out: DIFF_VALS */
++{
++      ulint           len;
++      const byte*     field;
++
++      if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
++              return("delete-marked record in SYS_STATS");
++      }
++
++      if (UNIV_UNLIKELY(rec_get_n_fields_old(rec) != 5)) {
++              return("wrong number of columns in SYS_STATS record");
++      }
++
++      field = rec_get_nth_field_old(rec, 0/*INDEX_ID*/, &len);
++      if (UNIV_UNLIKELY(len != 8)) {
++err_len:
++              return("incorrect column length in SYS_STATS");
++      }
++      *index_id = mach_read_from_8(field);
++
++      field = rec_get_nth_field_old(rec, 1/*KEY_COLS*/, &len);
++      if (UNIV_UNLIKELY(len != 4)) {
++              goto err_len;
++      }
++      *key_cols = mach_read_from_4(field);
++
++      rec_get_nth_field_offs_old(rec, 2/*DB_TRX_ID*/, &len);
++      if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) {
++              goto err_len;
++      }
++      rec_get_nth_field_offs_old(rec, 3/*DB_ROLL_PTR*/, &len);
++      if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) {
++              goto err_len;
++      }
++
++      field = rec_get_nth_field_old(rec, 4/*DIFF_VALS*/, &len);
++      if (UNIV_UNLIKELY(len != 8)) {
++              goto err_len;
++      }
++      *diff_vals = mach_read_from_8(field);
++
++      return(NULL);
++}
+ /********************************************************************//**
+ Determine the flags of a table described in SYS_TABLES.
+ @return compressed page size in kilobytes; or 0 if the tablespace is
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:17:03.665960357 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:22:21.586939783 +0900
+@@ -187,6 +187,7 @@
+ static my_bool        innobase_rollback_on_timeout            = FALSE;
+ static my_bool        innobase_create_status_file             = FALSE;
+ static my_bool        innobase_stats_on_metadata              = TRUE;
++static my_bool        innobase_use_sys_stats_table            = FALSE;
+ static char*  internal_innobase_data_file_path        = NULL;
+@@ -2387,6 +2388,8 @@
+               goto error;
+       }
++      srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table;
++
+       /* -------------- Log files ---------------------------*/
+       /* The default dir for log files is the datadir of MySQL */
+@@ -5190,6 +5193,10 @@
+       error = row_insert_for_mysql((byte*) record, prebuilt);
++#ifdef EXTENDED_FOR_USERSTAT
++      if (error == DB_SUCCESS) rows_changed++;
++#endif
++
+       /* Handle duplicate key errors */
+       if (auto_inc_used) {
+               ulint           err;
+@@ -5526,6 +5533,10 @@
+               }
+       }
++#ifdef EXTENDED_FOR_USERSTAT
++      if (error == DB_SUCCESS) rows_changed++;
++#endif
++
+       innodb_srv_conc_exit_innodb(trx);
+       error = convert_error_code_to_mysql(error,
+@@ -5579,6 +5590,10 @@
+       error = row_update_for_mysql((byte*) record, prebuilt);
++#ifdef EXTENDED_FOR_USERSTAT
++      if (error == DB_SUCCESS) rows_changed++;
++#endif
++
+       innodb_srv_conc_exit_innodb(trx);
+       error = convert_error_code_to_mysql(
+@@ -6106,6 +6121,11 @@
+       case DB_SUCCESS:
+               error = 0;
+               table->status = 0;
++#ifdef EXTENDED_FOR_USERSTAT
++              rows_read++;
++              if (active_index >= 0 && active_index < MAX_KEY)
++                      index_rows_read[active_index]++;
++#endif
+               break;
+       case DB_RECORD_NOT_FOUND:
+               error = HA_ERR_END_OF_FILE;
+@@ -8000,11 +8020,31 @@
+                       /* In sql_show we call with this flag: update
+                       then statistics so that they are up-to-date */
++                      if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)
++                          && called_from_analyze) {
++                              /* If the indexes on the table don't have enough rows in SYS_STATS system table, */
++                              /* they need to be created. */
++                              dict_index_t*   index;
++
++                              prebuilt->trx->op_info = "confirming rows of SYS_STATS to store statistics";
++
++                              ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED);
++
++                              for (index = dict_table_get_first_index(ib_table);
++                                   index != NULL;
++                                   index = dict_table_get_next_index(index)) {
++                                      row_insert_stats_for_mysql(index, prebuilt->trx);
++                                      innobase_commit_low(prebuilt->trx);
++                              }
++
++                              ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED);
++                      }
++
+                       prebuilt->trx->op_info = "updating table statistics";
+                       dict_update_statistics(ib_table,
+                                              FALSE /* update even if stats
+-                                                   are initialized */);
++                                                   are initialized */, called_from_analyze);
+                       prebuilt->trx->op_info = "returning various info to MySQL";
+               }
+@@ -8082,7 +8122,7 @@
+               are asked by MySQL to avoid locking. Another reason to
+               avoid the call is that it uses quite a lot of CPU.
+               See Bug#38185. */
+-              if (flag & HA_STATUS_NO_LOCK) {
++              if (flag & HA_STATUS_NO_LOCK || !srv_stats_update_need_lock) {
+                       /* We do not update delete_length if no
+                       locking is requested so the "old" value can
+                       remain. delete_length is initialized to 0 in
+@@ -11283,6 +11323,45 @@
+   "The number of index pages to sample when calculating statistics (default 8)",
+   NULL, NULL, 8, 1, ~0ULL, 0);
++const char *innobase_stats_method_names[]=
++{
++  "nulls_equal",
++  "nulls_unequal",
++  "nulls_ignored",
++  NullS
++};
++TYPELIB innobase_stats_method_typelib=
++{
++  array_elements(innobase_stats_method_names) - 1, "innobase_stats_method_typelib",
++  innobase_stats_method_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(stats_method, srv_stats_method,
++  PLUGIN_VAR_RQCMDARG,
++  "Specifies how InnoDB index statistics collection code should threat NULLs. "
++  "Possible values of name are same to for 'myisam_stats_method'. "
++  "This is startup parameter.",
++  NULL, NULL, 0, &innobase_stats_method_typelib);
++
++static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable InnoDB's auto update statistics of indexes. "
++  "(except for ANALYZE TABLE command) 0:disable 1:enable",
++  NULL, NULL, 1, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(stats_update_need_lock, srv_stats_update_need_lock,
++  PLUGIN_VAR_RQCMDARG,
++  "Enable/Disable InnoDB's update statistics which needs to lock dictionary. "
++  "e.g. Data_free.",
++  NULL, NULL, 1, 0, 1, 0);
++
++static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table,
++  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++  "Enable to use SYS_STATS system table to store statistics statically, "
++  "And avoids to calculate statistics at every first open of the tables. "
++  "This option may make the opportunities of update statistics less. "
++  "So you should use ANALYZE TABLE command intentionally.",
++  NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
+   PLUGIN_VAR_OPCMDARG,
+   "Enable InnoDB adaptive hash index (enabled by default).  "
+@@ -11611,6 +11690,10 @@
+   MYSQL_SYSVAR(overwrite_relay_log_info),
+   MYSQL_SYSVAR(rollback_on_timeout),
+   MYSQL_SYSVAR(stats_on_metadata),
++  MYSQL_SYSVAR(stats_method),
++  MYSQL_SYSVAR(stats_auto_update),
++  MYSQL_SYSVAR(stats_update_need_lock),
++  MYSQL_SYSVAR(use_sys_stats_table),
+   MYSQL_SYSVAR(stats_sample_pages),
+   MYSQL_SYSVAR(adaptive_hash_index),
+   MYSQL_SYSVAR(replication_delay),
+@@ -11680,7 +11763,10 @@
+ i_s_innodb_sys_columns,
+ i_s_innodb_sys_fields,
+ i_s_innodb_sys_foreign,
+-i_s_innodb_sys_foreign_cols
++i_s_innodb_sys_foreign_cols,
++i_s_innodb_sys_stats,
++i_s_innodb_table_stats,
++i_s_innodb_index_stats
+ mysql_declare_plugin_end;
+ /** @brief Initialize the default value of innodb_commit_concurrency.
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-03 17:17:03.666956117 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-03 17:19:24.880964526 +0900
+@@ -49,6 +49,7 @@
+ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
+ #include "trx0rseg.h" /* for trx_rseg_struct */
+ #include "trx0sys.h" /* for trx_sys */
++#include "dict0dict.h" /* for dict_sys */
+ }
+ static const char plugin_author[] = "Innobase Oy";
+@@ -3458,6 +3459,203 @@
+       STRUCT_FLD(__reserved1, NULL)
+ };
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_stats */
++static ST_FIELD_INFO  innodb_sys_stats_fields_info[] =
++{
++#define SYS_STATS_INDEX_ID    0
++      {STRUCT_FLD(field_name,         "INDEX_ID"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_STATS_KEY_COLS    1
++      {STRUCT_FLD(field_name,         "KEY_COLS"),
++       STRUCT_FLD(field_length,       MY_INT32_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++#define SYS_STATS_DIFF_VALS   2
++      {STRUCT_FLD(field_name,         "DIFF_VALS"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_stats
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_stats(
++/*====================*/
++      THD*            thd,            /*!< in: thread */
++      index_id_t      index_id,       /*!< in: INDEX_ID */
++      ulint           key_cols,       /*!< in: KEY_COLS */
++      ib_uint64_t     diff_vals,      /*!< in: DIFF_VALS */
++      TABLE*          table_to_fill)  /*!< in/out: fill this table */
++{
++      Field**         fields;
++
++      DBUG_ENTER("i_s_dict_fill_sys_stats");
++
++      fields = table_to_fill->field;
++
++      OK(fields[SYS_STATS_INDEX_ID]->store(longlong(index_id), TRUE));
++
++      OK(fields[SYS_STATS_KEY_COLS]->store(key_cols));
++
++      OK(fields[SYS_STATS_DIFF_VALS]->store(longlong(diff_vals), TRUE));
++
++      OK(schema_table_store_record(thd, table_to_fill));
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to populate INFORMATION_SCHEMA.innodb_sys_stats table.
++@return 0 on success */
++static
++int
++i_s_sys_stats_fill_table(
++/*=====================*/
++      THD*            thd,    /*!< in: thread */
++      TABLE_LIST*     tables, /*!< in/out: tables to fill */
++      COND*           cond)   /*!< in: condition (not used) */
++{
++        btr_pcur_t    pcur;
++      const rec_t*    rec;
++      mem_heap_t*     heap;
++      mtr_t           mtr;
++
++      DBUG_ENTER("i_s_sys_stats_fill_table");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++                DBUG_RETURN(0);
++      }
++
++        heap = mem_heap_create(1000);
++        mutex_enter(&dict_sys->mutex);
++        mtr_start(&mtr);
++
++      rec = dict_startscan_system(&pcur, &mtr, SYS_STATS);
++
++      while (rec) {
++              const char*     err_msg;
++              index_id_t      index_id;
++              ulint           key_cols;
++              ib_uint64_t     diff_vals;
++
++              /* Extract necessary information from a SYS_FOREIGN_COLS row */
++              err_msg = dict_process_sys_stats_rec(
++                      heap, rec, &index_id, &key_cols, &diff_vals);
++
++              mtr_commit(&mtr);
++              mutex_exit(&dict_sys->mutex);
++
++              if (!err_msg) {
++                      i_s_dict_fill_sys_stats(
++                              thd, index_id, key_cols, diff_vals,
++                              tables->table);
++              } else {
++                      push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++                                          ER_CANT_FIND_SYSTEM_REC,
++                                          err_msg);
++              }
++
++              mem_heap_empty(heap);
++
++              /* Get the next record */
++              mutex_enter(&dict_sys->mutex);
++              mtr_start(&mtr);
++              rec = dict_getnext_system(&pcur, &mtr);
++      }
++
++      mtr_commit(&mtr);
++      mutex_exit(&dict_sys->mutex);
++      mem_heap_free(heap);
++
++      DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_stats
++@return 0 on success */
++static
++int
++innodb_sys_stats_init(
++/*========================*/
++        void*   p)      /*!< in/out: table schema object */
++{
++        ST_SCHEMA_TABLE*        schema;
++
++        DBUG_ENTER("innodb_sys_stats_init");
++
++        schema = (ST_SCHEMA_TABLE*) p;
++
++        schema->fields_info = innodb_sys_stats_fields_info;
++        schema->fill_table = i_s_sys_stats_fill_table;
++
++        DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_sys_stats =
++{
++      /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++      /* int */
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++      /* pointer to type-specific plugin descriptor */
++      /* void* */
++      STRUCT_FLD(info, &i_s_info),
++
++      /* plugin name */
++      /* const char* */
++      STRUCT_FLD(name, "INNODB_SYS_STATS"),
++
++      /* plugin author (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(author, plugin_author),
++
++      /* general descriptive text (for SHOW PLUGINS) */
++      /* const char* */
++      STRUCT_FLD(descr, "XtraDB SYS_STATS table"),
++
++      /* the plugin license (PLUGIN_LICENSE_XXX) */
++      /* int */
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++      /* the function to invoke when plugin is loaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(init, innodb_sys_stats_init),
++
++      /* the function to invoke when plugin is unloaded */
++      /* int (*)(void*); */
++      STRUCT_FLD(deinit, i_s_common_deinit),
++
++      /* plugin version (for SHOW PLUGINS) */
++      /* unsigned int */
++      STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++      /* struct st_mysql_show_var* */
++      STRUCT_FLD(status_vars, NULL),
++
++      /* struct st_mysql_sys_var** */
++      STRUCT_FLD(system_vars, NULL),
++
++      /* reserved for dependency checking */
++      /* void* */
++      STRUCT_FLD(__reserved1, NULL)
++};
++
+ /***********************************************************************
+ */
+ static ST_FIELD_INFO  i_s_innodb_rseg_fields_info[] =
+@@ -3620,3 +3818,347 @@
+       /* void* */
+       STRUCT_FLD(__reserved1, NULL)
+ };
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO  i_s_innodb_table_stats_info[] =
++{
++      {STRUCT_FLD(field_name,         "table_schema"),
++       STRUCT_FLD(field_length,       NAME_LEN),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "table_name"),
++       STRUCT_FLD(field_length,       NAME_LEN),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "rows"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "clust_size"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "other_size"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "modified"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO  i_s_innodb_index_stats_info[] =
++{
++      {STRUCT_FLD(field_name,         "table_schema"),
++       STRUCT_FLD(field_length,       NAME_LEN),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "table_name"),
++       STRUCT_FLD(field_length,       NAME_LEN),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "index_name"),
++       STRUCT_FLD(field_length,       NAME_LEN),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "fields"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "rows_per_key"),
++       STRUCT_FLD(field_length,       256),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_STRING),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        0),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "index_total_pages"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      {STRUCT_FLD(field_name,         "index_leaf_pages"),
++       STRUCT_FLD(field_length,       MY_INT64_NUM_DECIMAL_DIGITS),
++       STRUCT_FLD(field_type,         MYSQL_TYPE_LONGLONG),
++       STRUCT_FLD(value,              0),
++       STRUCT_FLD(field_flags,        MY_I_S_UNSIGNED),
++       STRUCT_FLD(old_name,           ""),
++       STRUCT_FLD(open_method,        SKIP_OPEN_TABLE)},
++
++      END_OF_ST_FIELD_INFO
++};
++
++static
++int
++i_s_innodb_table_stats_fill(
++/*========================*/
++      THD*            thd,
++      TABLE_LIST*     tables,
++      COND*           cond)
++{
++      TABLE*  i_s_table       = (TABLE *) tables->table;
++      int     status  = 0;
++      dict_table_t*   table;
++
++      DBUG_ENTER("i_s_innodb_table_stats_fill");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++              DBUG_RETURN(0);
++      }
++
++      mutex_enter(&(dict_sys->mutex));
++
++      table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
++
++      while (table) {
++              char    buf[NAME_LEN * 2 + 2];
++              char*   ptr;
++
++              if (table->stat_clustered_index_size == 0) {
++                      table = UT_LIST_GET_NEXT(table_LRU, table);
++                      continue;
++              }
++
++              buf[NAME_LEN * 2 + 1] = 0;
++              strncpy(buf, table->name, NAME_LEN * 2 + 1);
++              ptr = strchr(buf, '/');
++              if (ptr) {
++                      *ptr = '\0';
++                      ++ptr;
++              } else {
++                      ptr = buf;
++              }
++
++              field_store_string(i_s_table->field[0], buf);
++              field_store_string(i_s_table->field[1], ptr);
++              i_s_table->field[2]->store(table->stat_n_rows);
++              i_s_table->field[3]->store(table->stat_clustered_index_size);
++              i_s_table->field[4]->store(table->stat_sum_of_other_index_sizes);
++              i_s_table->field[5]->store(table->stat_modified_counter);
++
++              if (schema_table_store_record(thd, i_s_table)) {
++                      status = 1;
++                      break;
++              }
++
++              table = UT_LIST_GET_NEXT(table_LRU, table);
++      }
++
++      mutex_exit(&(dict_sys->mutex));
++
++      DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_index_stats_fill(
++/*========================*/
++      THD*            thd,
++      TABLE_LIST*     tables,
++      COND*           cond)
++{
++      TABLE*  i_s_table       = (TABLE *) tables->table;
++      int     status  = 0;
++      dict_table_t*   table;
++      dict_index_t*   index;
++
++      DBUG_ENTER("i_s_innodb_index_stats_fill");
++
++      /* deny access to non-superusers */
++      if (check_global_access(thd, PROCESS_ACL)) {
++              DBUG_RETURN(0);
++      }
++
++      mutex_enter(&(dict_sys->mutex));
++
++      table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
++
++      while (table) {
++              if (table->stat_clustered_index_size == 0) {
++                      table = UT_LIST_GET_NEXT(table_LRU, table);
++                      continue;
++              }
++
++              ib_int64_t      n_rows = table->stat_n_rows;
++
++              if (n_rows < 0) {
++                      n_rows = 0;
++              }
++
++              index = dict_table_get_first_index(table);
++
++              while (index) {
++                      char    buff[256+1];
++                      char    row_per_keys[256+1];
++                      char    buf[NAME_LEN * 2 + 2];
++                      char*   ptr;
++                      ulint   i;
++
++                      buf[NAME_LEN * 2 + 1] = 0;
++                      strncpy(buf, table->name, NAME_LEN * 2 + 1);
++                      ptr = strchr(buf, '/');
++                      if (ptr) {
++                              *ptr = '\0';
++                              ++ptr;
++                      } else {
++                              ptr = buf;
++                      }
++
++                      field_store_string(i_s_table->field[0], buf);
++                      field_store_string(i_s_table->field[1], ptr);
++                      field_store_string(i_s_table->field[2], index->name);
++                      i_s_table->field[3]->store(index->n_uniq);
++
++                      row_per_keys[0] = '\0';
++
++                      /* It is remained optimistic operation still for now */
++                      //dict_index_stat_mutex_enter(index);
++                      if (index->stat_n_diff_key_vals) {
++                              for (i = 1; i <= index->n_uniq; i++) {
++                                      ib_int64_t      rec_per_key;
++                                      if (index->stat_n_diff_key_vals[i]) {
++                                              rec_per_key = n_rows / index->stat_n_diff_key_vals[i];
++                                      } else {
++                                              rec_per_key = n_rows;
++                                      }
++                                      ut_snprintf(buff, 256, (i == index->n_uniq)?"%llu":"%llu, ",
++                                               rec_per_key);
++                                      strncat(row_per_keys, buff, 256 - strlen(row_per_keys));
++                              }
++                      }
++                      //dict_index_stat_mutex_exit(index);
++
++                      field_store_string(i_s_table->field[4], row_per_keys);
++
++                      i_s_table->field[5]->store(index->stat_index_size);
++                      i_s_table->field[6]->store(index->stat_n_leaf_pages);
++
++                      if (schema_table_store_record(thd, i_s_table)) {
++                              status = 1;
++                              break;
++                      }
++
++                      index = dict_table_get_next_index(index);
++              }
++
++              if (status == 1) {
++                      break;
++              }
++
++              table = UT_LIST_GET_NEXT(table_LRU, table);
++      }
++
++      mutex_exit(&(dict_sys->mutex));
++
++      DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_table_stats_init(
++/*========================*/
++      void*   p)
++{
++      DBUG_ENTER("i_s_innodb_table_stats_init");
++      ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++      schema->fields_info = i_s_innodb_table_stats_info;
++      schema->fill_table = i_s_innodb_table_stats_fill;
++
++      DBUG_RETURN(0);
++}
++
++static
++int
++i_s_innodb_index_stats_init(
++/*========================*/
++      void*   p)
++{
++      DBUG_ENTER("i_s_innodb_index_stats_init");
++      ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++      schema->fields_info = i_s_innodb_index_stats_info;
++      schema->fill_table = i_s_innodb_index_stats_fill;
++
++      DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_table_stats =
++{
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++      STRUCT_FLD(info, &i_s_info),
++      STRUCT_FLD(name, "INNODB_TABLE_STATS"),
++      STRUCT_FLD(author, plugin_author),
++      STRUCT_FLD(descr, "InnoDB table statistics in memory"),
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++      STRUCT_FLD(init, i_s_innodb_table_stats_init),
++      STRUCT_FLD(deinit, i_s_common_deinit),
++      STRUCT_FLD(version, 0x0100 /* 1.0 */),
++      STRUCT_FLD(status_vars, NULL),
++      STRUCT_FLD(system_vars, NULL),
++      STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin    i_s_innodb_index_stats =
++{
++      STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++      STRUCT_FLD(info, &i_s_info),
++      STRUCT_FLD(name, "INNODB_INDEX_STATS"),
++      STRUCT_FLD(author, plugin_author),
++      STRUCT_FLD(descr, "InnoDB index statistics in memory"),
++      STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++      STRUCT_FLD(init, i_s_innodb_index_stats_init),
++      STRUCT_FLD(deinit, i_s_common_deinit),
++      STRUCT_FLD(version, 0x0100 /* 1.0 */),
++      STRUCT_FLD(status_vars, NULL),
++      STRUCT_FLD(system_vars, NULL),
++      STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h   2010-12-03 17:17:03.668953884 +0900
++++ b/storage/innobase/handler/i_s.h   2010-12-03 17:19:24.882947826 +0900
+@@ -41,5 +41,8 @@
+ extern struct st_mysql_plugin   i_s_innodb_sys_foreign;
+ extern struct st_mysql_plugin   i_s_innodb_sys_foreign_cols;
+ extern struct st_mysql_plugin i_s_innodb_rseg;
++extern struct st_mysql_plugin i_s_innodb_sys_stats;
++extern struct st_mysql_plugin i_s_innodb_table_stats;
++extern struct st_mysql_plugin i_s_innodb_index_stats;
+ #endif /* i_s_h */
+diff -ruN a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
+--- a/storage/innobase/include/dict0boot.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0boot.h     2010-12-03 17:19:24.885947372 +0900
+@@ -104,6 +104,7 @@
+ #define DICT_COLUMNS_ID               2
+ #define DICT_INDEXES_ID               3
+ #define DICT_FIELDS_ID                4
++#define DICT_STATS_ID         6
+ /* The following is a secondary index on SYS_TABLES */
+ #define DICT_TABLE_IDS_ID     5
+@@ -131,10 +132,13 @@
+ #define       DICT_HDR_INDEXES        44      /* Root of the index index tree */
+ #define       DICT_HDR_FIELDS         48      /* Root of the index field
+                                       index tree */
++#define       DICT_HDR_STATS          52      /* Root of the stats tree */
+ #define DICT_HDR_FSEG_HEADER  56      /* Segment header for the tablespace
+                                       segment into which the dictionary
+                                       header is created */
++
++#define       DICT_HDR_XTRADB_MARK    256     /* Flag to distinguish expansion of XtraDB */
+ /*-------------------------------------------------------------*/
+ /* The field number of the page number field in the sys_indexes table
+@@ -144,11 +148,15 @@
+ #define DICT_SYS_INDEXES_TYPE_FIELD    6
+ #define DICT_SYS_INDEXES_NAME_FIELD    4
++#define DICT_SYS_STATS_DIFF_VALS_FIELD         4
++
+ /* When a row id which is zero modulo this number (which must be a power of
+ two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
+ updated */
+ #define DICT_HDR_ROW_ID_WRITE_MARGIN  256
++#define DICT_HDR_XTRADB_FLAG          0x5854524144425F31ULL   /* "XTRADB_1" */
++
+ #ifndef UNIV_NONINL
+ #include "dict0boot.ic"
+ #endif
+diff -ruN a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
+--- a/storage/innobase/include/dict0crea.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0crea.h     2010-12-03 17:19:24.886949643 +0900
+@@ -53,6 +53,14 @@
+       dict_index_t*   index,  /*!< in: index to create, built as a memory data
+                               structure */
+       mem_heap_t*     heap);  /*!< in: heap where created */
++/*********************************************************************//**
++*/
++UNIV_INTERN
++ind_node_t*
++ind_insert_stats_graph_create(
++/*==========================*/
++      dict_index_t*   index,
++      mem_heap_t*     heap);
+ /***********************************************************//**
+ Creates a table. This is a high-level function used in SQL execution graphs.
+ @return       query thread to run next or NULL */
+@@ -62,6 +70,13 @@
+ /*===================*/
+       que_thr_t*      thr);   /*!< in: query thread */
+ /***********************************************************//**
++*/
++UNIV_INTERN
++que_thr_t*
++dict_insert_stats_step(
++/*===================*/
++      que_thr_t*      thr);
++/***********************************************************//**
+ Creates an index. This is a high-level function used in SQL execution
+ graphs.
+ @return       query thread to run next or NULL */
+@@ -170,6 +185,7 @@
+       ins_node_t*     field_def; /* child node which does the inserts of
+                               the field definitions; the row to be inserted
+                               is built by the parent node  */
++      ins_node_t*     stats_def;
+       commit_node_t*  commit_node;
+                               /* child node which performs a commit after
+                               a successful index creation */
+@@ -180,6 +196,7 @@
+       dict_table_t*   table;  /*!< table which owns the index */
+       dtuple_t*       ind_row;/* index definition row built */
+       ulint           field_no;/* next field definition to insert */
++      ulint           stats_no;
+       mem_heap_t*     heap;   /*!< memory heap used as auxiliary storage */
+ };
+@@ -189,6 +206,7 @@
+ #define       INDEX_CREATE_INDEX_TREE 3
+ #define       INDEX_COMMIT_WORK       4
+ #define       INDEX_ADD_TO_CACHE      5
++#define       INDEX_BUILD_STATS_COLS  6
+ #ifndef UNIV_NONINL
+ #include "dict0crea.ic"
+diff -ruN a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
+--- a/storage/innobase/include/dict0dict.h     2010-12-03 15:48:03.073024387 +0900
++++ b/storage/innobase/include/dict0dict.h     2010-12-03 17:19:24.888965622 +0900
+@@ -1084,10 +1084,11 @@
+ dict_update_statistics(
+ /*===================*/
+       dict_table_t*   table,          /*!< in/out: table */
+-      ibool           only_calc_if_missing_stats);/*!< in: only
++      ibool           only_calc_if_missing_stats, /*!< in: only
+                                       update/recalc the stats if they have
+                                       not been initialized yet, otherwise
+                                       do nothing */
++      ibool           sync);
+ /********************************************************************//**
+ Reserves the dictionary system mutex for MySQL. */
+ UNIV_INTERN
+@@ -1202,6 +1203,7 @@
+       dict_table_t*   sys_columns;    /*!< SYS_COLUMNS table */
+       dict_table_t*   sys_indexes;    /*!< SYS_INDEXES table */
+       dict_table_t*   sys_fields;     /*!< SYS_FIELDS table */
++      dict_table_t*   sys_stats;      /*!< SYS_STATS table */
+ };
+ #endif /* !UNIV_HOTBACKUP */
+diff -ruN a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
+--- a/storage/innobase/include/dict0load.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0load.h     2010-12-03 17:19:24.889947481 +0900
+@@ -41,6 +41,7 @@
+       SYS_FIELDS,
+       SYS_FOREIGN,
+       SYS_FOREIGN_COLS,
++      SYS_STATS,
+       /* This must be last item. Defines the number of system tables. */
+       SYS_NUM_SYSTEM_TABLES
+@@ -319,6 +320,19 @@
+       const char**    ref_col_name,   /*!< out: referenced column name
+                                       in referenced table */
+       ulint*          pos);           /*!< out: column position */
++/********************************************************************//**
++This function parses a SYS_STATS record and extract necessary
++information from the record and return to caller.
++@return error message, or NULL on success */
++UNIV_INTERN
++const char*
++dict_process_sys_stats_rec(
++/*=============================*/
++      mem_heap_t*     heap,           /*!< in/out: heap memory */
++      const rec_t*    rec,            /*!< in: current SYS_STATS rec */
++      index_id_t*     index_id,       /*!< out: INDEX_ID */
++      ulint*          key_cols,       /*!< out: KEY_COLS */
++      ib_uint64_t*    diff_vals);     /*!< out: DIFF_VALS */
+ #ifndef UNIV_NONINL
+ #include "dict0load.ic"
+ #endif
+diff -ruN a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
+--- a/storage/innobase/include/page0cur.h      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0cur.h      2010-12-03 17:19:24.891954511 +0900
+@@ -293,6 +293,22 @@
+ /*==========================*/
+       buf_block_t*    block,  /*!< in: page */
+       page_cur_t*     cursor);/*!< out: page cursor */
++
++UNIV_INTERN
++void
++page_cur_open_on_nth_user_rec(
++/*==========================*/
++      buf_block_t*    block,  /*!< in: page */
++      page_cur_t*     cursor, /*!< out: page cursor */
++      ulint           nth);
++
++UNIV_INTERN
++ibool
++page_cur_open_on_rnd_user_rec_after_nth(
++/*==========================*/
++      buf_block_t*    block,  /*!< in: page */
++      page_cur_t*     cursor, /*!< out: page cursor */
++      ulint           nth);
+ #endif /* !UNIV_HOTBACKUP */
+ /***********************************************************//**
+ Parses a log record of a record insert on a page.
+diff -ruN a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
+--- a/storage/innobase/include/que0que.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/que0que.h       2010-12-03 17:19:24.892947946 +0900
+@@ -492,6 +492,8 @@
+ #define QUE_NODE_CALL         31
+ #define QUE_NODE_EXIT         32
++#define QUE_NODE_INSERT_STATS 34
++
+ /* Query thread states */
+ #define QUE_THR_RUNNING               1
+ #define QUE_THR_PROCEDURE_WAIT        2
+diff -ruN a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
+--- a/storage/innobase/include/rem0cmp.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/rem0cmp.h       2010-12-03 17:19:24.893953395 +0900
+@@ -169,10 +169,11 @@
+                               matched fields; when the function returns,
+                               contains the value the for current
+                               comparison */
+-      ulint*          matched_bytes);/*!< in/out: number of already matched
++      ulint*          matched_bytes, /*!< in/out: number of already matched
+                               bytes within the first field not completely
+                               matched; when the function returns, contains
+                               the value for the current comparison */
++      ulint           stats_method);
+ /*************************************************************//**
+ This function is used to compare two physical records. Only the common
+ first fields are compared.
+diff -ruN a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
+--- a/storage/innobase/include/rem0cmp.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/rem0cmp.ic      2010-12-03 17:19:24.902983425 +0900
+@@ -87,5 +87,5 @@
+       ulint   match_b         = 0;
+       return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
+-                                    &match_f, &match_b));
++                                    &match_f, &match_b, 0));
+ }
+diff -ruN a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
+--- a/storage/innobase/include/row0mysql.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/row0mysql.h     2010-12-03 17:19:24.904973020 +0900
+@@ -387,6 +387,14 @@
+                                       then checked for not being too
+                                       large. */
+ /*********************************************************************//**
++*/
++UNIV_INTERN
++int
++row_insert_stats_for_mysql(
++/*=======================*/
++      dict_index_t*   index,
++      trx_t*          trx);
++/*********************************************************************//**
+ Scans a table create SQL string and adds to the data dictionary
+ the foreign key constraints declared in the string. This function
+ should be called after the indexes for a table have been created.
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 15:53:54.622036720 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 17:19:24.906953188 +0900
+@@ -209,6 +209,13 @@
+ extern ibool  srv_innodb_status;
+ extern unsigned long long     srv_stats_sample_pages;
++extern ulint  srv_stats_method;
++#define SRV_STATS_METHOD_NULLS_EQUAL     0
++#define SRV_STATS_METHOD_NULLS_NOT_EQUAL 1
++#define SRV_STATS_METHOD_IGNORE_NULLS    2
++extern ulint  srv_stats_auto_update;
++extern ulint  srv_stats_update_need_lock;
++extern ibool  srv_use_sys_stats_table;
+ extern ibool  srv_use_doublewrite_buf;
+ extern ibool  srv_use_checksums;
+diff -ruN a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
+--- a/storage/innobase/page/page0cur.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/page/page0cur.c 2010-12-03 17:19:24.908973357 +0900
+@@ -564,6 +564,74 @@
+       } while (rnd--);
+ }
++UNIV_INTERN
++void
++page_cur_open_on_nth_user_rec(
++/*==========================*/
++      buf_block_t*    block,  /*!< in: page */
++      page_cur_t*     cursor, /*!< out: page cursor */
++      ulint           nth)
++{
++      ulint   n_recs = page_get_n_recs(buf_block_get_frame(block));
++
++      page_cur_set_before_first(block, cursor);
++
++      if (UNIV_UNLIKELY(n_recs == 0)) {
++
++              return;
++      }
++
++      nth--;
++
++      if (nth >= n_recs) {
++              nth = n_recs - 1;
++      }
++
++      do {
++              page_cur_move_to_next(cursor);
++      } while (nth--);
++}
++
++UNIV_INTERN
++ibool
++page_cur_open_on_rnd_user_rec_after_nth(
++/*==========================*/
++      buf_block_t*    block,  /*!< in: page */
++      page_cur_t*     cursor, /*!< out: page cursor */
++      ulint           nth)
++{
++      ulint   rnd;
++      ulint   n_recs = page_get_n_recs(buf_block_get_frame(block));
++      ibool   ret;
++
++      page_cur_set_before_first(block, cursor);
++
++      if (UNIV_UNLIKELY(n_recs == 0)) {
++
++              return (FALSE);
++      }
++
++      nth--;
++
++      if (nth >= n_recs) {
++              nth = n_recs - 1;
++      }
++
++      rnd = (ulint) (nth + page_cur_lcg_prng() % (n_recs - nth));
++
++      if (rnd == nth) {
++              ret = TRUE;
++      } else {
++              ret = FALSE;
++      }
++
++      do {
++              page_cur_move_to_next(cursor);
++      } while (rnd--);
++
++      return (ret);
++}
++
+ /***********************************************************//**
+ Writes the log record of a record insert on a page. */
+ static
+diff -ruN a/storage/innobase/que/que0que.c b/storage/innobase/que/que0que.c
+--- a/storage/innobase/que/que0que.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/que/que0que.c   2010-12-03 17:19:24.910953422 +0900
+@@ -621,11 +621,21 @@
+               que_graph_free_recursive(cre_ind->ind_def);
+               que_graph_free_recursive(cre_ind->field_def);
++              if (srv_use_sys_stats_table)
++                      que_graph_free_recursive(cre_ind->stats_def);
+               que_graph_free_recursive(cre_ind->commit_node);
+               mem_heap_free(cre_ind->heap);
+               break;
++      case QUE_NODE_INSERT_STATS:
++              cre_ind = node;
++
++              que_graph_free_recursive(cre_ind->stats_def);
++              que_graph_free_recursive(cre_ind->commit_node);
++
++              mem_heap_free(cre_ind->heap);
++              break;
+       case QUE_NODE_PROC:
+               que_graph_free_stat_list(((proc_node_t*)node)->stat_list);
+@@ -1138,6 +1148,8 @@
+               str = "CREATE TABLE";
+       } else if (type == QUE_NODE_CREATE_INDEX) {
+               str = "CREATE INDEX";
++      } else if (type == QUE_NODE_INSERT_STATS) {
++              str = "INSERT TO SYS_STATS";
+       } else if (type == QUE_NODE_FOR) {
+               str = "FOR LOOP";
+       } else if (type == QUE_NODE_RETURN) {
+@@ -1255,6 +1267,8 @@
+               thr = dict_create_table_step(thr);
+       } else if (type == QUE_NODE_CREATE_INDEX) {
+               thr = dict_create_index_step(thr);
++      } else if (type == QUE_NODE_INSERT_STATS) {
++              thr = dict_insert_stats_step(thr);
+       } else if (type == QUE_NODE_ROW_PRINTF) {
+               thr = row_printf_step(thr);
+       } else {
+diff -ruN a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c
+--- a/storage/innobase/rem/rem0cmp.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/rem/rem0cmp.c   2010-12-03 17:19:24.911953579 +0900
+@@ -866,10 +866,11 @@
+                               matched fields; when the function returns,
+                               contains the value the for current
+                               comparison */
+-      ulint*          matched_bytes) /*!< in/out: number of already matched
++      ulint*          matched_bytes, /*!< in/out: number of already matched
+                               bytes within the first field not completely
+                               matched; when the function returns, contains
+                               the value for the current comparison */
++      ulint           stats_method)
+ {
+       ulint           rec1_n_fields;  /* the number of fields in rec */
+       ulint           rec1_f_len;     /* length of current field in rec */
+@@ -962,7 +963,11 @@
+                               if (rec1_f_len == rec2_f_len) {
+-                                      goto next_field;
++                                      if (stats_method == SRV_STATS_METHOD_NULLS_EQUAL) {
++                                              goto next_field;
++                                      } else {
++                                              ret = -1;
++                                      }
+                               } else if (rec2_f_len == UNIV_SQL_NULL) {
+diff -ruN a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
+--- a/storage/innobase/row/row0merge.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0merge.c 2010-12-03 17:19:24.914955391 +0900
+@@ -2020,6 +2020,8 @@
+               "UPDATE SYS_INDEXES SET NAME=CONCAT('"
+               TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n"
+               "COMMIT WORK;\n"
++              /* Drop the statistics of the index. */
++              "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n"
+               /* Drop the field definitions of the index. */
+               "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
+               /* Drop the index definition and the B-tree. */
+diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
+--- a/storage/innobase/row/row0mysql.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0mysql.c 2010-12-03 17:19:24.918953476 +0900
+@@ -921,6 +921,9 @@
+       table->stat_modified_counter = counter + 1;
++      if (!srv_stats_auto_update)
++              return;
++
+       /* Calculate new statistics if 1 / 16 of table has been modified
+       since the last time a statistics batch was run, or if
+       stat_modified_counter > 2 000 000 000 (to avoid wrap-around).
+@@ -931,7 +934,7 @@
+           || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) {
+               dict_update_statistics(table, FALSE /* update even if stats
+-                                                  are initialized */);
++                                                  are initialized */, TRUE);
+       }
+ }
+@@ -2105,6 +2108,45 @@
+ }
+ /*********************************************************************//**
++*/
++UNIV_INTERN
++int
++row_insert_stats_for_mysql(
++/*=======================*/
++      dict_index_t*   index,
++      trx_t*          trx)
++{
++      ind_node_t*     node;
++      mem_heap_t*     heap;
++      que_thr_t*      thr;
++      ulint           err;
++
++      ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
++
++      trx->op_info = "try to insert rows to SYS_STATS";
++
++      trx_start_if_not_started(trx);
++      trx->error_state = DB_SUCCESS;
++
++      heap = mem_heap_create(512);
++
++      node = ind_insert_stats_graph_create(index, heap);
++
++      thr = pars_complete_graph_for_exec(node, trx, heap);
++
++      ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
++      que_run_threads(thr);
++
++      err = trx->error_state;
++
++      que_graph_free((que_t*) que_node_get_parent(thr));
++
++      trx->op_info = "";
++
++      return((int) err);
++}
++
++/*********************************************************************//**
+ Scans a table create SQL string and adds to the data dictionary
+ the foreign key constraints declared in the string. This function
+ should be called after the indexes for a table have been created.
+@@ -3024,7 +3066,7 @@
+       dict_table_autoinc_initialize(table, 1);
+       dict_table_autoinc_unlock(table);
+       dict_update_statistics(table, FALSE /* update even if stats are
+-                                          initialized */);
++                                          initialized */, TRUE);
+       trx_commit_for_mysql(trx);
+@@ -3326,6 +3368,8 @@
+                          "       IF (SQL % NOTFOUND) THEN\n"
+                          "               found := 0;\n"
+                          "       ELSE\n"
++                         "               DELETE FROM SYS_STATS\n"
++                         "               WHERE INDEX_ID = index_id;\n"
+                          "               DELETE FROM SYS_FIELDS\n"
+                          "               WHERE INDEX_ID = index_id;\n"
+                          "               DELETE FROM SYS_INDEXES\n"
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:53:54.625288512 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 17:19:24.922953561 +0900
+@@ -395,6 +395,10 @@
+ /* When estimating number of different key values in an index, sample
+ this many index pages */
+ UNIV_INTERN unsigned long long        srv_stats_sample_pages = 8;
++UNIV_INTERN ulint     srv_stats_method = 0;
++UNIV_INTERN ulint     srv_stats_auto_update = 1;
++UNIV_INTERN ulint     srv_stats_update_need_lock = 1;
++UNIV_INTERN ibool     srv_use_sys_stats_table = FALSE;
+ UNIV_INTERN ibool     srv_use_doublewrite_buf = TRUE;
+ UNIV_INTERN ibool     srv_use_checksums = TRUE;
diff --git a/innodb_thread_concurrency_timer_based.patch b/innodb_thread_concurrency_timer_based.patch
new file mode 100644 (file)
index 0000000..e3e613b
--- /dev/null
@@ -0,0 +1,191 @@
+# name       : innodb_thread_concurrency_timer_based.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:41:52.045404706 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:42:11.568959457 +0900
+@@ -148,6 +148,7 @@
+ static ulong innobase_write_io_threads;
+ static long innobase_buffer_pool_instances = 1;
++static my_bool innobase_thread_concurrency_timer_based;
+ static long long innobase_buffer_pool_size, innobase_log_file_size;
+ /** Percentage of the buffer pool to reserve for 'old' blocks.
+@@ -2496,6 +2497,9 @@
+       srv_n_log_files = (ulint) innobase_log_files_in_group;
+       srv_log_file_size = (ulint) innobase_log_file_size;
++      srv_thread_concurrency_timer_based =
++              (ibool) innobase_thread_concurrency_timer_based;
++
+ #ifdef UNIV_LOG_ARCHIVE
+       srv_log_archive_on = (ulint) innobase_log_archive;
+ #endif /* UNIV_LOG_ARCHIVE */
+@@ -11373,6 +11377,12 @@
+   "Maximum delay between polling for a spin lock (6 by default)",
+   NULL, NULL, 6L, 0L, ~0L, 0);
++static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based,
++  innobase_thread_concurrency_timer_based,
++  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++  "Use InnoDB timer based concurrency throttling. ",
++  NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
+   PLUGIN_VAR_RQCMDARG,
+   "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
+@@ -11586,6 +11596,7 @@
+   MYSQL_SYSVAR(spin_wait_delay),
+   MYSQL_SYSVAR(table_locks),
+   MYSQL_SYSVAR(thread_concurrency),
++  MYSQL_SYSVAR(thread_concurrency_timer_based),
+   MYSQL_SYSVAR(thread_sleep_delay),
+   MYSQL_SYSVAR(autoinc_lock_mode),
+   MYSQL_SYSVAR(show_verbose_locks),
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 15:37:45.543027751 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 15:42:11.571024631 +0900
+@@ -164,6 +164,8 @@
+ extern ulint  srv_mem_pool_size;
+ extern ulint  srv_lock_table_size;
++extern ibool  srv_thread_concurrency_timer_based;
++
+ extern ulint  srv_n_file_io_threads;
+ extern ulong  srv_read_ahead_threshold;
+ extern ulint  srv_n_read_io_threads;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:37:45.546023493 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:42:11.574955879 +0900
+@@ -344,6 +344,7 @@
+ computer. Bigger computers need bigger values. Value 0 will disable the
+ concurrency check. */
++UNIV_INTERN ibool     srv_thread_concurrency_timer_based = FALSE;
+ UNIV_INTERN ulong     srv_thread_concurrency  = 0;
+ /* this mutex protects srv_conc data structures */
+@@ -1130,6 +1131,75 @@
+ /*********************************************************************//**
+ Puts an OS thread to wait if there are too many concurrent threads
+ (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
++
++#ifdef HAVE_ATOMIC_BUILTINS
++static void
++enter_innodb_with_tickets(trx_t* trx)
++{
++      trx->declared_to_be_inside_innodb = TRUE;
++      trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
++      return;
++}
++
++static void
++srv_conc_enter_innodb_timer_based(trx_t* trx)
++{
++      lint    conc_n_threads;
++      ibool   has_yielded = FALSE;
++      ulint   has_slept = 0;
++
++      if (trx->declared_to_be_inside_innodb) {
++              ut_print_timestamp(stderr);
++              fputs(
++"  InnoDB: Error: trying to declare trx to enter InnoDB, but\n"
++"InnoDB: it already is declared.\n", stderr);
++              trx_print(stderr, trx, 0);
++              putc('\n', stderr);
++      }
++retry:
++      if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
++              conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
++              if (conc_n_threads <= (lint) srv_thread_concurrency) {
++                      enter_innodb_with_tickets(trx);
++                      return;
++              }
++              os_atomic_increment_lint(&srv_conc_n_threads, -1);
++      }
++      if (!has_yielded)
++      {
++              has_yielded = TRUE;
++              os_thread_yield();
++              goto retry;
++      }
++      if (trx->has_search_latch
++          || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
++
++              conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
++              enter_innodb_with_tickets(trx);
++              return;
++      }
++      if (has_slept < 2)
++      {
++              trx->op_info = "sleeping before entering InnoDB";
++              os_thread_sleep(10000);
++              trx->op_info = "";
++              has_slept++;
++      }
++      conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
++      enter_innodb_with_tickets(trx);
++      return;
++}
++
++static void
++srv_conc_exit_innodb_timer_based(trx_t* trx)
++{
++      os_atomic_increment_lint(&srv_conc_n_threads, -1);
++      trx->declared_to_be_inside_innodb = FALSE;
++      trx->n_tickets_to_enter_innodb = 0;
++      return;
++}
++#endif
++
+ UNIV_INTERN
+ void
+ srv_conc_enter_innodb(
+@@ -1160,6 +1230,13 @@
+               return;
+       }
++#ifdef HAVE_ATOMIC_BUILTINS
++      if (srv_thread_concurrency_timer_based) {
++              srv_conc_enter_innodb_timer_based(trx);
++              return;
++      }
++#endif
++
+       os_fast_mutex_lock(&srv_conc_mutex);
+ retry:
+       if (trx->declared_to_be_inside_innodb) {
+@@ -1305,6 +1382,14 @@
+       }
+       ut_ad(srv_conc_n_threads >= 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++      if (srv_thread_concurrency_timer_based) {
++              os_atomic_increment_lint(&srv_conc_n_threads, 1);
++              trx->declared_to_be_inside_innodb = TRUE;
++              trx->n_tickets_to_enter_innodb = 1;
++              return;
++      }
++#endif
+       os_fast_mutex_lock(&srv_conc_mutex);
+@@ -1338,6 +1423,13 @@
+               return;
+       }
++#ifdef HAVE_ATOMIC_BUILTINS
++      if (srv_thread_concurrency_timer_based) {
++              srv_conc_exit_innodb_timer_based(trx);
++              return;
++      }
++#endif
++
+       os_fast_mutex_lock(&srv_conc_mutex);
+       ut_ad(srv_conc_n_threads > 0);
diff --git a/log_connection_error.patch b/log_connection_error.patch
new file mode 100644 (file)
index 0000000..7bf06b7
--- /dev/null
@@ -0,0 +1,53 @@
+# name       : log_connection_error.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/log_connection_error.patch b/patch_info/log_connection_error.patch
+--- a/patch_info/log_connection_error.patch    1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/log_connection_error.patch    2010-07-28 16:47:47.634070367 +0400
+@@ -0,0 +1,6 @@
++File=log_connection_error.patch
++Name=logging abandoned connections
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-07-28 16:47:47.105319218 +0400
++++ b/sql/mysqld.cc    2010-07-28 16:47:47.644101813 +0400
+@@ -4995,6 +4995,10 @@
+     DBUG_PRINT("error",("Too many connections"));
+     close_connection(thd, ER_CON_COUNT_ERROR, 1);
++    if (global_system_variables.log_warnings)
++    {
++      sql_print_warning("%s", ER_DEFAULT(ER_CON_COUNT_ERROR));
++    }
+     delete thd;
+     DBUG_VOID_RETURN;
+   }
+@@ -5375,6 +5379,10 @@
+     if (!(thd->net.vio= vio_new_win32pipe(hConnectedPipe)) ||
+       my_net_init(&thd->net, thd->net.vio))
+     {
++      if (global_system_variables.log_warnings)
++      {
++        sql_print_warning("%s", ER_DEFAULT(ER_OUT_OF_RESOURCES));
++      }
+       close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+       delete thd;
+       continue;
+@@ -5570,6 +5578,10 @@
+                                                    event_conn_closed)) ||
+                         my_net_init(&thd->net, thd->net.vio))
+     {
++      if (global_system_variables.log_warnings)
++      {
++        sql_print_warning("%s", ER_DEFAULT(ER_OUT_OF_RESOURCES));
++      }
+       close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+       errmsg= 0;
+       goto errorconn;
diff --git a/log_warnings_silence.patch b/log_warnings_silence.patch
new file mode 100644 (file)
index 0000000..f6ffa6e
--- /dev/null
@@ -0,0 +1,85 @@
+# name       : log_warnings_silence.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/log_warnings_silence.patch b/patch_info/log_warnings_silence.patch
+--- a/patch_info/log_warnings_silence.patch    1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/log_warnings_silence.patch    2011-01-05 20:35:46.000000000 +0300
+@@ -0,0 +1,8 @@
++File=log_warnings_silence.patch
++Name=Disable log warnings for enumerated warnings (old name:suppress_log_warning_1592.patch)
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++Changelog
++2011-01-05 rename patch suppress_log_warning_1592.patch to log_warnings_silence/patch. Also remove boolean system variable "suppress_log_warning_1592" and add set varbile "log_warnings_silence" (possible values: 1592)
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2011-01-05 20:31:33.000000000 +0300
++++ b/sql/mysqld.cc    2011-01-05 20:32:34.000000000 +0300
+@@ -617,6 +617,8 @@
+ SHOW_COMP_OPTION have_crypt, have_compress;
+ SHOW_COMP_OPTION have_profiling;
++ulonglong opt_log_warnings_silence= 0;
++
+ /* Thread specific variables */
+ pthread_key(MEM_ROOT**,THR_MALLOC);
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h     2011-01-05 20:31:33.000000000 +0300
++++ b/sql/mysqld.h     2011-01-05 20:32:35.000000000 +0300
+@@ -224,6 +224,8 @@
+ extern TYPELIB thread_handling_typelib;
+ extern my_decimal decimal_zero;
++extern ulonglong opt_log_warnings_silence;
++
+ extern pthread_key(MEM_ROOT**,THR_MALLOC);
+ #ifdef HAVE_PSI_INTERFACE
+diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
+--- a/sql/sql_class.cc 2011-01-05 20:31:32.000000000 +0300
++++ b/sql/sql_class.cc 2011-01-05 20:32:34.000000000 +0300
+@@ -4544,7 +4544,7 @@
+                           ER_BINLOG_UNSAFE_STATEMENT,
+                           ER(ER_BINLOG_UNSAFE_STATEMENT),
+                           ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
+-      if (global_system_variables.log_warnings)
++      if (global_system_variables.log_warnings && ((opt_log_warnings_silence & (ULL(1) << log_warnings_silence_1592)) == 0))
+       {
+         char buf[MYSQL_ERRMSG_SIZE * 2];
+         sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h  2011-01-05 20:31:33.000000000 +0300
++++ b/sql/sql_class.h  2011-01-05 20:32:37.000000000 +0300
+@@ -83,6 +83,7 @@
+   SLOG_F_TMP_TABLE, SLOG_F_TMP_DISK, SLOG_F_FILESORT,
+   SLOG_F_FILESORT_DISK
+ };
++enum enum_log_warnings_silence { log_warnings_silence_1592 };
+ enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT,
+                             SLAVE_EXEC_MODE_IDEMPOTENT,
+                             SLAVE_EXEC_MODE_LAST_BIT};
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc  2011-01-05 20:31:33.000000000 +0300
++++ b/sql/sys_vars.cc  2011-01-05 20:32:38.000000000 +0300
+@@ -1408,6 +1408,15 @@
+        READ_ONLY GLOBAL_VAR(mysqld_port), CMD_LINE(REQUIRED_ARG, 'P'),
+        VALID_RANGE(0, UINT_MAX32), DEFAULT(0), BLOCK_SIZE(1));
++const char *log_warnings_silence_name[]= { "1592" };
++static Sys_var_set Sys_log_warnings_silence(
++       "log_warnings_silence",
++       "disable logging of enumerated warnings: "
++       "1592: unsafe statements for binary logging; "
++       "possible values : [1592]",
++       GLOBAL_VAR(opt_log_warnings_silence), CMD_LINE(REQUIRED_ARG),
++       log_warnings_silence_name, DEFAULT(0));
++
+ static Sys_var_ulong Sys_preload_buff_size(
+        "preload_buffer_size",
+        "The size of the buffer that is allocated when preloading indexes",
similarity index 74%
rename from mysql-microsec_process.patch
rename to microsec_process.patch
index cad8a8d2cd36cfa389f61428c261ba0e29f37dbc..00da221aa3de310fdddad88b4ace626cd04e3491 100644 (file)
@@ -5,9 +5,9 @@
 #!!! notice !!!
 # Any small change to this file in the main branch
 # should be done or reviewed by the maintainer!
-diff -ruN a/patch_info/microsec_process.info b/patch_info/microsec_process.info
+diff -ruN /dev/null b/patch_info/microsec_process.info
 --- /dev/null  1970-01-01 09:00:00.000000000 +0900
-+++ b/patch_info/microsec_process.info 2010-08-27 14:45:52.941058849 +0900
++++ b/patch_info/microsec_process.info 2010-12-02 20:41:41.616069579 +0900
 @@ -0,0 +1,8 @@
 +File=microsec_process.patch
 +Name=Adds INFOMATION_SCHEMA.PROCESSLIST with TIME_MS column
@@ -18,9 +18,9 @@ diff -ruN a/patch_info/microsec_process.info b/patch_info/microsec_process.info
 +2010-01
 +Ported to 5.1.42
 diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
---- a/sql/sql_show.cc  2010-08-27 14:38:08.781057666 +0900
-+++ b/sql/sql_show.cc  2010-08-27 14:45:52.946058726 +0900
-@@ -1919,7 +1919,8 @@
+--- a/sql/sql_show.cc  2010-12-02 19:22:40.054024541 +0900
++++ b/sql/sql_show.cc  2010-12-02 20:41:41.622941425 +0900
+@@ -1882,7 +1882,8 @@
    TABLE *table= tables->table;
    CHARSET_INFO *cs= system_charset_info;
    char *user;
@@ -30,9 +30,9 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
    DBUG_ENTER("fill_process_list");
  
    user= thd->security_ctx->master_access & PROCESS_ACL ?
-@@ -2024,6 +2025,10 @@
+@@ -1966,6 +1967,10 @@
        }
-       pthread_mutex_unlock(&tmp->LOCK_thd_data);
+       mysql_mutex_unlock(&tmp->LOCK_thd_data);
  
 +      /* TIME_MS */
 +      table->field[8]->store(((tmp->start_utime ?
@@ -40,8 +40,8 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +
        if (schema_table_store_record(thd, table))
        {
-         VOID(pthread_mutex_unlock(&LOCK_thread_count));
-@@ -6695,6 +6700,8 @@
+         mysql_mutex_unlock(&LOCK_thread_count);
+@@ -7220,6 +7225,8 @@
    {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
    {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
     SKIP_OPEN_TABLE},
diff --git a/mysql-fix-dummy-thread-race-condition.patch b/mysql-fix-dummy-thread-race-condition.patch
deleted file mode 100644 (file)
index 6ed7380..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-## fix-dummy-thread-race-condition.dpatch by  <mkoegler@auto.tuwien.ac.at>
-##
-## Avoid dummy thread for pthread_exit workaround
-
---- mysql-5.1.40/mysys/my_thr_init.c~  2009-10-06 20:49:02.000000000 +0300
-+++ mysql-5.1.40/mysys/my_thr_init.c   2009-10-27 15:01:59.807275693 +0200
-@@ -47,24 +47,6 @@
- pthread_mutexattr_t my_errorcheck_mutexattr;
- #endif
--#ifdef TARGET_OS_LINUX
--
--/*
--  Dummy thread spawned in my_thread_global_init() below to avoid
--  race conditions in NPTL pthread_exit code.
--*/
--
--static pthread_handler_t
--nptl_pthread_exit_hack_handler(void *arg __attribute((unused)))
--{
--  /* Do nothing! */
--  pthread_exit(0);
--  return 0;
--}
--
--#endif /* TARGET_OS_LINUX */
--
--
- static uint get_thread_lib(void);
- /*
-@@ -89,33 +71,6 @@
-     return 1;
-   }
--#ifdef TARGET_OS_LINUX
--  /*
--    BUG#24507: Race conditions inside current NPTL pthread_exit()
--    implementation.
--
--    To avoid a possible segmentation fault during concurrent
--    executions of pthread_exit(), a dummy thread is spawned which
--    initializes internal variables of pthread lib. See bug description
--    for a full explanation.
--
--    TODO: Remove this code when fixed versions of glibc6 are in common
--    use.
--  */
--  if (thd_lib_detected == THD_LIB_NPTL)
--  {
--    pthread_t       dummy_thread;
--    pthread_attr_t  dummy_thread_attr;
--
--    pthread_attr_init(&dummy_thread_attr);
--    pthread_attr_setdetachstate(&dummy_thread_attr, PTHREAD_CREATE_JOINABLE);
--
--    if (pthread_create(&dummy_thread,&dummy_thread_attr,
--                       nptl_pthread_exit_hack_handler, NULL) == 0)
--      (void)pthread_join(dummy_thread, NULL);
--  }
--#endif /* TARGET_OS_LINUX */
--
- #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
-   /*
-     Set mutex type to "fast" a.k.a "adaptive"
diff --git a/mysql-innodb_split_buf_pool_mutex.patch b/mysql-innodb_split_buf_pool_mutex.patch
deleted file mode 100644 (file)
index a4c1886..0000000
+++ /dev/null
@@ -1,4133 +0,0 @@
-# name       : innodb_split_buf_pool_mutex.patch
-# introduced : 11 or before
-# maintainer : Yasufumi
-#
-#!!! notice !!!
-# Any small change to this file in the main branch
-# should be done or reviewed by the maintainer!
-diff -ruN a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c
---- a/storage/innodb_plugin/btr/btr0cur.c      2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/btr/btr0cur.c      2010-08-27 16:11:40.593021205 +0900
-@@ -3764,7 +3764,8 @@
-       mtr_commit(mtr);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-       mutex_enter(&block->mutex);
-       /* Only free the block if it is still allocated to
-@@ -3775,17 +3776,22 @@
-           && buf_block_get_space(block) == space
-           && buf_block_get_page_no(block) == page_no) {
--              if (buf_LRU_free_block(&block->page, all, NULL)
-+              if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
-                   != BUF_LRU_FREED
--                  && all && block->page.zip.data) {
-+                  && all && block->page.zip.data
-+                  /* Now, buf_LRU_free_block() may release mutex temporarily */
-+                  && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
-+                  && buf_block_get_space(block) == space
-+                  && buf_block_get_page_no(block) == page_no) {
-                       /* Attempt to deallocate the uncompressed page
-                       if the whole block cannot be deallocted. */
--                      buf_LRU_free_block(&block->page, FALSE, NULL);
-+                      buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-       mutex_exit(&block->mutex);
- }
-diff -ruN a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c
---- a/storage/innodb_plugin/btr/btr0sea.c      2010-08-27 16:11:12.151975789 +0900
-+++ b/storage/innodb_plugin/btr/btr0sea.c      2010-08-27 16:11:40.593021205 +0900
-@@ -1199,7 +1199,7 @@
-       ulint*          offsets;
-       rw_lock_x_lock(&btr_search_latch);
--      buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-       table = btr_search_sys->hash_index;
-@@ -1285,7 +1285,7 @@
-               bpage = UT_LIST_GET_PREV(LRU, bpage);
-       }
--      buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-       rw_lock_x_unlock(&btr_search_latch);
-       if (UNIV_LIKELY_NULL(heap)) {
-@@ -1878,7 +1878,8 @@
-       rec_offs_init(offsets_);
-       rw_lock_x_lock(&btr_search_latch);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_x_lock(&page_hash_latch);
-       cell_count = hash_get_n_cells(btr_search_sys->hash_index);
-@@ -1886,11 +1887,13 @@
-               /* We release btr_search_latch every once in a while to
-               give other queries a chance to run. */
-               if ((i != 0) && ((i % chunk_size) == 0)) {
--                      buf_pool_mutex_exit();
-+                      //buf_pool_mutex_exit();
-+                      rw_lock_x_unlock(&page_hash_latch);
-                       rw_lock_x_unlock(&btr_search_latch);
-                       os_thread_yield();
-                       rw_lock_x_lock(&btr_search_latch);
--                      buf_pool_mutex_enter();
-+                      //buf_pool_mutex_enter();
-+                      rw_lock_x_lock(&page_hash_latch);
-               }
-               node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
-@@ -1997,11 +2000,13 @@
-               /* We release btr_search_latch every once in a while to
-               give other queries a chance to run. */
-               if (i != 0) {
--                      buf_pool_mutex_exit();
-+                      //buf_pool_mutex_exit();
-+                      rw_lock_x_unlock(&page_hash_latch);
-                       rw_lock_x_unlock(&btr_search_latch);
-                       os_thread_yield();
-                       rw_lock_x_lock(&btr_search_latch);
--                      buf_pool_mutex_enter();
-+                      //buf_pool_mutex_enter();
-+                      rw_lock_x_lock(&page_hash_latch);
-               }
-               if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
-@@ -2009,7 +2014,8 @@
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_x_unlock(&page_hash_latch);
-       rw_lock_x_unlock(&btr_search_latch);
-       if (UNIV_LIKELY_NULL(heap)) {
-               mem_heap_free(heap);
-diff -ruN a/storage/innodb_plugin/buf/buf0buddy.c b/storage/innodb_plugin/buf/buf0buddy.c
---- a/storage/innodb_plugin/buf/buf0buddy.c    2010-08-27 15:54:59.015990108 +0900
-+++ b/storage/innodb_plugin/buf/buf0buddy.c    2010-08-27 16:11:40.596022762 +0900
-@@ -82,10 +82,11 @@
-       if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
- #endif /* UNIV_DEBUG_VALGRIND */
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&zip_free_mutex));
-       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
-       ut_ad(buf_pool->zip_free[i].start != bpage);
--      UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
-+      UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
- #ifdef UNIV_DEBUG_VALGRIND
-       if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
-@@ -103,8 +104,8 @@
-       ulint           i)      /*!< in: index of buf_pool->zip_free[] */
- {
- #ifdef UNIV_DEBUG_VALGRIND
--      buf_page_t*     prev = UT_LIST_GET_PREV(list, bpage);
--      buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
-+      buf_page_t*     prev = UT_LIST_GET_PREV(zip_list, bpage);
-+      buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
-       if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
-       if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
-@@ -113,9 +114,10 @@
-       ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
- #endif /* UNIV_DEBUG_VALGRIND */
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&zip_free_mutex));
-       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
--      UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
-+      UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
- #ifdef UNIV_DEBUG_VALGRIND
-       if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
-@@ -134,12 +136,13 @@
- {
-       buf_page_t*     bpage;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&zip_free_mutex));
-       ut_a(i < BUF_BUDDY_SIZES);
- #ifndef UNIV_DEBUG_VALGRIND
-       /* Valgrind would complain about accessing free memory. */
--      ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+      ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-                             ut_ad(buf_page_get_state(ut_list_node_313)
-                                   == BUF_BLOCK_ZIP_FREE)));
- #endif /* !UNIV_DEBUG_VALGRIND */
-@@ -182,16 +185,19 @@
- void
- buf_buddy_block_free(
- /*=================*/
--      void*   buf)    /*!< in: buffer frame to deallocate */
-+      void*   buf,    /*!< in: buffer frame to deallocate */
-+      ibool   have_page_hash_mutex)
- {
-       const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
-       buf_page_t*     bpage;
-       buf_block_t*    block;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(!mutex_own(&buf_pool_zip_mutex));
-       ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
-+      mutex_enter(&zip_hash_mutex);
-+
-       HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
-                   ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
-                         && bpage->in_zip_hash && !bpage->in_page_hash),
-@@ -203,12 +209,14 @@
-       ut_d(bpage->in_zip_hash = FALSE);
-       HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
-+      mutex_exit(&zip_hash_mutex);
-+
-       ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
-       UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
-       block = (buf_block_t*) bpage;
-       mutex_enter(&block->mutex);
--      buf_LRU_block_free_non_file_page(block);
-+      buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
-       mutex_exit(&block->mutex);
-       ut_ad(buf_buddy_n_frames > 0);
-@@ -224,7 +232,7 @@
-       buf_block_t*    block)  /*!< in: buffer frame to allocate */
- {
-       const ulint     fold = BUF_POOL_ZIP_FOLD(block);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(!mutex_own(&buf_pool_zip_mutex));
-       ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
-@@ -236,7 +244,10 @@
-       ut_ad(!block->page.in_page_hash);
-       ut_ad(!block->page.in_zip_hash);
-       ut_d(block->page.in_zip_hash = TRUE);
-+
-+      mutex_enter(&zip_hash_mutex);
-       HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
-+      mutex_exit(&zip_hash_mutex);
-       ut_d(buf_buddy_n_frames++);
- }
-@@ -270,7 +281,7 @@
-               bpage->state = BUF_BLOCK_ZIP_FREE;
- #ifndef UNIV_DEBUG_VALGRIND
-               /* Valgrind would complain about accessing free memory. */
--              ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+              ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-                                     ut_ad(buf_page_get_state(
-                                                   ut_list_node_313)
-                                           == BUF_BLOCK_ZIP_FREE)));
-@@ -292,24 +303,28 @@
- /*================*/
-       ulint   i,      /*!< in: index of buf_pool->zip_free[],
-                       or BUF_BUDDY_SIZES */
--      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
-+      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
-                       TRUE if storage was allocated from the LRU list
-                       and buf_pool_mutex was temporarily released,
-                       or NULL if the LRU list should not be used */
-+      ibool   have_page_hash_mutex)
- {
-       buf_block_t*    block;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(!mutex_own(&buf_pool_zip_mutex));
-       if (i < BUF_BUDDY_SIZES) {
-               /* Try to allocate from the buddy system. */
-+              mutex_enter(&zip_free_mutex);
-               block = buf_buddy_alloc_zip(i);
-               if (block) {
-                       goto func_exit;
-               }
-+
-+              mutex_exit(&zip_free_mutex);
-       }
-       /* Try allocating from the buf_pool->free list. */
-@@ -326,18 +341,29 @@
-       }
-       /* Try replacing an uncompressed page in the buffer pool. */
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-+      if (have_page_hash_mutex) {
-+              rw_lock_x_unlock(&page_hash_latch);
-+      }
-       block = buf_LRU_get_free_block(0);
-       *lru = TRUE;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      if (have_page_hash_mutex) {
-+              rw_lock_x_lock(&page_hash_latch);
-+      }
- alloc_big:
-       buf_buddy_block_register(block);
-+      mutex_enter(&zip_free_mutex);
-       block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
- func_exit:
-       buf_buddy_stat[i].used++;
-+      mutex_exit(&zip_free_mutex);
-+
-       return(block);
- }
-@@ -353,7 +379,10 @@
- {
-       buf_page_t*     b;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+      ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
-       switch (buf_page_get_state(bpage)) {
-       case BUF_BLOCK_ZIP_FREE:
-@@ -362,7 +391,7 @@
-       case BUF_BLOCK_FILE_PAGE:
-       case BUF_BLOCK_MEMORY:
-       case BUF_BLOCK_REMOVE_HASH:
--              ut_error;
-+              /* ut_error; */ /* optimistic */
-       case BUF_BLOCK_ZIP_DIRTY:
-               /* Cannot relocate dirty pages. */
-               return(FALSE);
-@@ -372,9 +401,17 @@
-       }
-       mutex_enter(&buf_pool_zip_mutex);
-+      mutex_enter(&zip_free_mutex);
-       if (!buf_page_can_relocate(bpage)) {
-               mutex_exit(&buf_pool_zip_mutex);
-+              mutex_exit(&zip_free_mutex);
-+              return(FALSE);
-+      }
-+
-+      if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
-+              mutex_exit(&buf_pool_zip_mutex);
-+              mutex_exit(&zip_free_mutex);
-               return(FALSE);
-       }
-@@ -382,18 +419,21 @@
-       ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
-       /* relocate buf_pool->zip_clean */
--      b = UT_LIST_GET_PREV(list, dpage);
--      UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
-+      mutex_enter(&flush_list_mutex);
-+      b = UT_LIST_GET_PREV(zip_list, dpage);
-+      UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
-       if (b) {
--              UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
-+              UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
-       } else {
--              UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
-+              UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
-       }
-+      mutex_exit(&flush_list_mutex);
-       UNIV_MEM_INVALID(bpage, sizeof *bpage);
-       mutex_exit(&buf_pool_zip_mutex);
-+      mutex_exit(&zip_free_mutex);
-       return(TRUE);
- }
-@@ -406,13 +446,15 @@
- /*===============*/
-       void*   src,    /*!< in: block to relocate */
-       void*   dst,    /*!< in: free block to relocate to */
--      ulint   i)      /*!< in: index of buf_pool->zip_free[] */
-+      ulint   i,      /*!< in: index of buf_pool->zip_free[] */
-+      ibool   have_page_hash_mutex)
- {
-       buf_page_t*     bpage;
-       const ulint     size    = BUF_BUDDY_LOW << i;
-       ullint          usec    = ut_time_us(NULL);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&zip_free_mutex));
-       ut_ad(!mutex_own(&buf_pool_zip_mutex));
-       ut_ad(!ut_align_offset(src, size));
-       ut_ad(!ut_align_offset(dst, size));
-@@ -434,6 +476,12 @@
-               /* This is a compressed page. */
-               mutex_t*        mutex;
-+              if (!have_page_hash_mutex) {
-+                      mutex_exit(&zip_free_mutex);
-+                      mutex_enter(&LRU_list_mutex);
-+                      rw_lock_x_lock(&page_hash_latch);
-+              }
-+
-               /* The src block may be split into smaller blocks,
-               some of which may be free.  Thus, the
-               mach_read_from_4() calls below may attempt to read
-@@ -458,6 +506,11 @@
-                       added to buf_pool->page_hash yet.  Obviously,
-                       it cannot be relocated. */
-+                      if (!have_page_hash_mutex) {
-+                              mutex_enter(&zip_free_mutex);
-+                              mutex_exit(&LRU_list_mutex);
-+                              rw_lock_x_unlock(&page_hash_latch);
-+                      }
-                       return(FALSE);
-               }
-@@ -467,18 +520,27 @@
-                       For the sake of simplicity, give up. */
-                       ut_ad(page_zip_get_size(&bpage->zip) < size);
-+                      if (!have_page_hash_mutex) {
-+                              mutex_enter(&zip_free_mutex);
-+                              mutex_exit(&LRU_list_mutex);
-+                              rw_lock_x_unlock(&page_hash_latch);
-+                      }
-                       return(FALSE);
-               }
-+              /* To keep latch order */
-+              if (have_page_hash_mutex)
-+                      mutex_exit(&zip_free_mutex);
-+
-               /* The block must have been allocated, but it may
-               contain uninitialized data. */
-               UNIV_MEM_ASSERT_W(src, size);
--              mutex = buf_page_get_mutex(bpage);
-+              mutex = buf_page_get_mutex_enter(bpage);
--              mutex_enter(mutex);
-+              mutex_enter(&zip_free_mutex);
--              if (buf_page_can_relocate(bpage)) {
-+              if (mutex && buf_page_can_relocate(bpage)) {
-                       /* Relocate the compressed page. */
-                       ut_a(bpage->zip.data == src);
-                       memcpy(dst, src, size);
-@@ -493,10 +555,22 @@
-                               buddy_stat->relocated_usec
-                                       += ut_time_us(NULL) - usec;
-                       }
-+
-+                      if (!have_page_hash_mutex) {
-+                              mutex_exit(&LRU_list_mutex);
-+                              rw_lock_x_unlock(&page_hash_latch);
-+                      }
-                       return(TRUE);
-               }
--              mutex_exit(mutex);
-+              if (!have_page_hash_mutex) {
-+                      mutex_exit(&LRU_list_mutex);
-+                      rw_lock_x_unlock(&page_hash_latch);
-+              }
-+
-+              if (mutex) {
-+                      mutex_exit(mutex);
-+              }
-       } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
-               /* This must be a buf_page_t object. */
- #if UNIV_WORD_SIZE == 4
-@@ -505,10 +579,31 @@
-               about uninitialized pad bytes. */
-               UNIV_MEM_ASSERT_RW(src, size);
- #endif
-+
-+              mutex_exit(&zip_free_mutex);
-+
-+              if (!have_page_hash_mutex) {
-+                      mutex_enter(&LRU_list_mutex);
-+                      rw_lock_x_lock(&page_hash_latch);
-+              }
-+
-               if (buf_buddy_relocate_block(src, dst)) {
-+                      mutex_enter(&zip_free_mutex);
-+
-+                      if (!have_page_hash_mutex) {
-+                              mutex_exit(&LRU_list_mutex);
-+                              rw_lock_x_unlock(&page_hash_latch);
-+                      }
-                       goto success;
-               }
-+
-+              mutex_enter(&zip_free_mutex);
-+
-+              if (!have_page_hash_mutex) {
-+                      mutex_exit(&LRU_list_mutex);
-+                      rw_lock_x_unlock(&page_hash_latch);
-+              }
-       }
-       return(FALSE);
-@@ -522,13 +617,15 @@
- /*===============*/
-       void*   buf,    /*!< in: block to be freed, must not be
-                       pointed to by the buffer pool */
--      ulint   i)      /*!< in: index of buf_pool->zip_free[],
-+      ulint   i,      /*!< in: index of buf_pool->zip_free[],
-                       or BUF_BUDDY_SIZES */
-+      ibool   have_page_hash_mutex)
- {
-       buf_page_t*     bpage;
-       buf_page_t*     buddy;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&zip_free_mutex));
-       ut_ad(!mutex_own(&buf_pool_zip_mutex));
-       ut_ad(i <= BUF_BUDDY_SIZES);
-       ut_ad(buf_buddy_stat[i].used > 0);
-@@ -539,7 +636,9 @@
-       ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
-       if (i == BUF_BUDDY_SIZES) {
--              buf_buddy_block_free(buf);
-+              mutex_exit(&zip_free_mutex);
-+              buf_buddy_block_free(buf, have_page_hash_mutex);
-+              mutex_enter(&zip_free_mutex);
-               return;
-       }
-@@ -584,7 +683,7 @@
-               ut_a(bpage != buf);
-               {
--                      buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
-+                      buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
-                       UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
-                       bpage = next;
-               }
-@@ -593,13 +692,13 @@
- #ifndef UNIV_DEBUG_VALGRIND
- buddy_nonfree:
-       /* Valgrind would complain about accessing free memory. */
--      ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+      ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-                             ut_ad(buf_page_get_state(ut_list_node_313)
-                                   == BUF_BLOCK_ZIP_FREE)));
- #endif /* UNIV_DEBUG_VALGRIND */
-       /* The buddy is not free. Is there a free block of this size? */
--      bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
-+      bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
-       if (bpage) {
-               /* Remove the block from the free list, because a successful
-@@ -609,7 +708,7 @@
-               buf_buddy_remove_from_free(bpage, i);
-               /* Try to relocate the buddy of buf to the free block. */
--              if (buf_buddy_relocate(buddy, bpage, i)) {
-+              if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
-                       ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
-                       goto buddy_free2;
-@@ -629,14 +728,14 @@
-               (Parts of the buddy can be free in
-               buf_pool->zip_free[j] with j < i.) */
--              ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+              ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-                                     ut_ad(buf_page_get_state(
-                                                   ut_list_node_313)
-                                           == BUF_BLOCK_ZIP_FREE
-                                           && ut_list_node_313 != buddy)));
- #endif /* !UNIV_DEBUG_VALGRIND */
--              if (buf_buddy_relocate(buddy, buf, i)) {
-+              if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
-                       buf = bpage;
-                       UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
-diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
---- a/storage/innodb_plugin/buf/buf0buf.c      2010-08-27 15:55:39.385322978 +0900
-+++ b/storage/innodb_plugin/buf/buf0buf.c      2010-08-27 16:11:40.603021006 +0900
-@@ -251,6 +251,12 @@
- /** mutex protecting the buffer pool struct and control blocks, except the
- read-write lock in them */
- UNIV_INTERN mutex_t           buf_pool_mutex;
-+UNIV_INTERN mutex_t           LRU_list_mutex;
-+UNIV_INTERN mutex_t           flush_list_mutex;
-+UNIV_INTERN rw_lock_t         page_hash_latch;
-+UNIV_INTERN mutex_t           free_list_mutex;
-+UNIV_INTERN mutex_t           zip_free_mutex;
-+UNIV_INTERN mutex_t           zip_hash_mutex;
- /** mutex protecting the control blocks of compressed-only pages
- (of type buf_page_t, not buf_block_t) */
- UNIV_INTERN mutex_t           buf_pool_zip_mutex;
-@@ -661,9 +667,9 @@
-       block->page.in_zip_hash = FALSE;
-       block->page.in_flush_list = FALSE;
-       block->page.in_free_list = FALSE;
--      block->in_unzip_LRU_list = FALSE;
- #endif /* UNIV_DEBUG */
-       block->page.in_LRU_list = FALSE;
-+      block->in_unzip_LRU_list = FALSE;
- #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-       block->n_pointers = 0;
- #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-@@ -748,8 +754,10 @@
-               memset(block->frame, '\0', UNIV_PAGE_SIZE);
- #endif
-               /* Add the block to the free list */
--              UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
-+              mutex_enter(&free_list_mutex);
-+              UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
-               ut_d(block->page.in_free_list = TRUE);
-+              mutex_exit(&free_list_mutex);
-               block++;
-               frame += UNIV_PAGE_SIZE;
-@@ -774,7 +782,7 @@
-       ulint           i;
-       ut_ad(buf_pool);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       block = chunk->blocks;
-@@ -826,7 +834,7 @@
-       ulint           i;
-       ut_ad(buf_pool);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
-       block = chunk->blocks;
-@@ -878,7 +886,7 @@
-       ulint                   i;
-       ut_ad(buf_pool);
--      ut_ad(buf_pool_mutex_own());
-+      ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
-       block = chunk->blocks;
-@@ -904,7 +912,7 @@
-       buf_block_t*            block;
-       const buf_block_t*      block_end;
--      ut_ad(buf_pool_mutex_own());
-+      ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
-       block_end = chunk->blocks + chunk->size;
-@@ -916,8 +924,10 @@
-               ut_ad(!block->in_unzip_LRU_list);
-               ut_ad(!block->page.in_flush_list);
-               /* Remove the block from the free list. */
-+              mutex_enter(&free_list_mutex);
-               ut_ad(block->page.in_free_list);
--              UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
-+              UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
-+              mutex_exit(&free_list_mutex);
-               /* Free the latches. */
-               mutex_free(&block->mutex);
-@@ -947,8 +957,17 @@
-       /* 1. Initialize general fields
-       ------------------------------- */
-       mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
-+      mutex_create(&LRU_list_mutex, SYNC_BUF_LRU_LIST);
-+      mutex_create(&flush_list_mutex, SYNC_BUF_FLUSH_LIST);
-+      rw_lock_create(&page_hash_latch, SYNC_BUF_PAGE_HASH);
-+      mutex_create(&free_list_mutex, SYNC_BUF_FREE_LIST);
-+      mutex_create(&zip_free_mutex, SYNC_BUF_ZIP_FREE);
-+      mutex_create(&zip_hash_mutex, SYNC_BUF_ZIP_HASH);
-+
-       mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
-+      mutex_enter(&LRU_list_mutex);
-+      rw_lock_x_lock(&page_hash_latch);
-       buf_pool_mutex_enter();
-       buf_pool->n_chunks = 1;
-@@ -983,6 +1002,8 @@
-       --------------------------- */
-       /* All fields are initialized by mem_zalloc(). */
-+      mutex_exit(&LRU_list_mutex);
-+      rw_lock_x_unlock(&page_hash_latch);
-       buf_pool_mutex_exit();
-       btr_search_sys_create(buf_pool->curr_size
-@@ -1120,7 +1141,11 @@
-       buf_page_t*     b;
-       ulint           fold;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-+#ifdef UNIV_SYNC_DEBUG
-+      ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
-       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-       ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
-       ut_a(bpage->buf_fix_count == 0);
-@@ -1204,7 +1229,8 @@
- try_again:
-       btr_search_disable(); /* Empty the adaptive hash index again */
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
- shrink_again:
-       if (buf_pool->n_chunks <= 1) {
-@@ -1275,7 +1301,7 @@
-                               buf_LRU_make_block_old(&block->page);
-                               dirty++;
--                      } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
-+                      } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
-                                  != BUF_LRU_FREED) {
-                               nonfree++;
-                       }
-@@ -1283,7 +1309,8 @@
-                       mutex_exit(&block->mutex);
-               }
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-               /* Request for a flush of the chunk if it helps.
-               Do not flush if there are non-free blocks, since
-@@ -1332,7 +1359,8 @@
- func_done:
-       srv_buf_pool_old_size = srv_buf_pool_size;
- func_exit:
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-       btr_search_enable();
- }
-@@ -1350,7 +1378,11 @@
-       hash_table_t*   zip_hash;
-       buf_page_t*     b;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      rw_lock_x_lock(&page_hash_latch);
-+      mutex_enter(&flush_list_mutex);
-+      
-       /* Free, create, and populate the hash table. */
-       hash_table_free(buf_pool->page_hash);
-@@ -1392,7 +1424,7 @@
-       in buf_pool->flush_list. */
-       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
--           b = UT_LIST_GET_NEXT(list, b)) {
-+           b = UT_LIST_GET_NEXT(zip_list, b)) {
-               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
-               ut_ad(!b->in_flush_list);
-               ut_ad(b->in_LRU_list);
-@@ -1404,7 +1436,7 @@
-       }
-       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
--           b = UT_LIST_GET_NEXT(list, b)) {
-+           b = UT_LIST_GET_NEXT(flush_list, b)) {
-               ut_ad(b->in_flush_list);
-               ut_ad(b->in_LRU_list);
-               ut_ad(b->in_page_hash);
-@@ -1430,7 +1462,10 @@
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-+      rw_lock_x_unlock(&page_hash_latch);
-+      mutex_exit(&flush_list_mutex);
- }
- /********************************************************************//**
-@@ -1440,17 +1475,20 @@
- buf_pool_resize(void)
- /*=================*/
- {
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-       if (srv_buf_pool_old_size == srv_buf_pool_size) {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-               return;
-       }
-       if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-               /* Disable adaptive hash indexes and empty the index
-               in order to free up memory in the buffer pool chunks. */
-@@ -1484,7 +1522,8 @@
-               }
-               srv_buf_pool_old_size = srv_buf_pool_size;
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-       }
-       buf_pool_page_hash_rebuild();
-@@ -1500,13 +1539,15 @@
- /*================*/
-       buf_page_t*     bpage)  /*!< in: buffer block of a file page */
- {
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-       ut_a(buf_page_in_file(bpage));
-       buf_LRU_make_block_young(bpage);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
- }
- /********************************************************************//**
-@@ -1528,14 +1569,20 @@
-       ut_a(buf_page_in_file(bpage));
-       if (buf_page_peek_if_too_old(bpage)) {
--              buf_pool_mutex_enter();
-+              //buf_pool_mutex_enter();
-+              mutex_enter(&LRU_list_mutex);
-               buf_LRU_make_block_young(bpage);
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-       } else if (!access_time) {
-               ulint   time_ms = ut_time_ms();
--              buf_pool_mutex_enter();
-+              mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
-+              //buf_pool_mutex_enter();
-+              if (block_mutex) {
-               buf_page_set_accessed(bpage, time_ms);
--              buf_pool_mutex_exit();
-+              mutex_exit(block_mutex);
-+              }
-+              //buf_pool_mutex_exit();
-       }
- }
-@@ -1551,7 +1598,8 @@
- {
-       buf_block_t*    block;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_s_lock(&page_hash_latch);
-       block = (buf_block_t*) buf_page_hash_get(space, offset);
-@@ -1559,7 +1607,8 @@
-               block->check_index_page_at_flush = FALSE;
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
- }
- /********************************************************************//**
-@@ -1577,7 +1626,8 @@
-       buf_block_t*    block;
-       ibool           is_hashed;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_s_lock(&page_hash_latch);
-       block = (buf_block_t*) buf_page_hash_get(space, offset);
-@@ -1587,7 +1637,8 @@
-               is_hashed = block->is_hashed;
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
-       return(is_hashed);
- }
-@@ -1608,7 +1659,8 @@
- {
-       buf_page_t*     bpage;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_s_lock(&page_hash_latch);
-       bpage = buf_page_hash_get(space, offset);
-@@ -1616,7 +1668,8 @@
-               bpage->file_page_was_freed = TRUE;
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
-       return(bpage);
- }
-@@ -1636,7 +1689,8 @@
- {
-       buf_page_t*     bpage;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_s_lock(&page_hash_latch);
-       bpage = buf_page_hash_get(space, offset);
-@@ -1644,7 +1698,8 @@
-               bpage->file_page_was_freed = FALSE;
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
-       return(bpage);
- }
-@@ -1678,8 +1733,9 @@
-       buf_pool->stat.n_page_gets++;
-       for (;;) {
--              buf_pool_mutex_enter();
-+              //buf_pool_mutex_enter();
- lookup:
-+              rw_lock_s_lock(&page_hash_latch);
-               bpage = buf_page_hash_get(space, offset);
-               if (bpage) {
-                       break;
-@@ -1687,7 +1743,8 @@
-               /* Page not in buf_pool: needs to be read from file */
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              rw_lock_s_unlock(&page_hash_latch);
-               buf_read_page(space, zip_size, offset);
-@@ -1699,29 +1756,34 @@
-       if (UNIV_UNLIKELY(!bpage->zip.data)) {
-               /* There is no compressed page. */
- err_exit:
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              rw_lock_s_unlock(&page_hash_latch);
-               return(NULL);
-       }
-+      block_mutex = buf_page_get_mutex_enter(bpage);
-+
-+      rw_lock_s_unlock(&page_hash_latch);
-+
-       switch (buf_page_get_state(bpage)) {
-       case BUF_BLOCK_NOT_USED:
-       case BUF_BLOCK_READY_FOR_USE:
-       case BUF_BLOCK_MEMORY:
-       case BUF_BLOCK_REMOVE_HASH:
-       case BUF_BLOCK_ZIP_FREE:
-+              if (block_mutex)
-+                      mutex_exit(block_mutex);
-               break;
-       case BUF_BLOCK_ZIP_PAGE:
-       case BUF_BLOCK_ZIP_DIRTY:
--              block_mutex = &buf_pool_zip_mutex;
--              mutex_enter(block_mutex);
-+              ut_a(block_mutex == &buf_pool_zip_mutex);
-               bpage->buf_fix_count++;
-               goto got_block;
-       case BUF_BLOCK_FILE_PAGE:
--              block_mutex = &((buf_block_t*) bpage)->mutex;
--              mutex_enter(block_mutex);
-+              ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
-               /* Discard the uncompressed page frame if possible. */
--              if (buf_LRU_free_block(bpage, FALSE, NULL)
-+              if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
-                   == BUF_LRU_FREED) {
-                       mutex_exit(block_mutex);
-@@ -1740,7 +1802,7 @@
-       must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
-       access_time = buf_page_is_accessed(bpage);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-       mutex_exit(block_mutex);
-@@ -1995,7 +2057,7 @@
-       const buf_block_t*      block)  /*!< in: pointer to block,
-                                       not dereferenced */
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
-               /* The pointer should be aligned. */
-@@ -2029,6 +2091,7 @@
-       ulint           fix_type;
-       ibool           must_read;
-       ulint           retries = 0;
-+      mutex_t*        block_mutex;
-       ut_ad(mtr);
-       ut_ad(mtr->state == MTR_ACTIVE);
-@@ -2046,9 +2109,11 @@
-       buf_pool->stat.n_page_gets++;
- loop:
-       block = guess;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-       if (block) {
-+              block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
-+
-               /* If the guess is a compressed page descriptor that
-               has been allocated by buf_buddy_alloc(), it may have
-               been invalidated by buf_buddy_relocate().  In that
-@@ -2057,11 +2122,15 @@
-               the guess may be pointing to a buffer pool chunk that
-               has been released when resizing the buffer pool. */
--              if (!buf_block_is_uncompressed(block)
-+              if (!block_mutex) {
-+                      block = guess = NULL;
-+              } else if (!buf_block_is_uncompressed(block)
-                   || offset != block->page.offset
-                   || space != block->page.space
-                   || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
-+                      mutex_exit(block_mutex);
-+
-                       block = guess = NULL;
-               } else {
-                       ut_ad(!block->page.in_zip_hash);
-@@ -2070,14 +2139,20 @@
-       }
-       if (block == NULL) {
-+              rw_lock_s_lock(&page_hash_latch);
-               block = (buf_block_t*) buf_page_hash_get(space, offset);
-+              if (block) {
-+                      block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
-+                      ut_a(block_mutex);
-+              }
-+              rw_lock_s_unlock(&page_hash_latch);
-       }
- loop2:
-       if (block == NULL) {
-               /* Page not in buf_pool: needs to be read from file */
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-               if (mode == BUF_GET_IF_IN_POOL) {
-@@ -2120,7 +2195,8 @@
-       if (must_read && mode == BUF_GET_IF_IN_POOL) {
-               /* The page is only being read to buffer */
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(block_mutex);
-               return(NULL);
-       }
-@@ -2130,38 +2206,50 @@
-               ibool           success;
-       case BUF_BLOCK_FILE_PAGE:
-+              if (block_mutex == &buf_pool_zip_mutex) {
-+                      /* it is wrong mutex... */
-+                      mutex_exit(block_mutex);
-+                      goto loop;
-+              }
-               break;
-       case BUF_BLOCK_ZIP_PAGE:
-       case BUF_BLOCK_ZIP_DIRTY:
-+              ut_ad(block_mutex == &buf_pool_zip_mutex);
-               bpage = &block->page;
-               /* Protect bpage->buf_fix_count. */
--              mutex_enter(&buf_pool_zip_mutex);
-+              /* Already proteced here. */
-+              //mutex_enter(&buf_pool_zip_mutex);
-               if (bpage->buf_fix_count
-                   || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
-                       /* This condition often occurs when the buffer
-                       is not buffer-fixed, but I/O-fixed by
-                       buf_page_init_for_read(). */
--                      mutex_exit(&buf_pool_zip_mutex);
-+                      //mutex_exit(&buf_pool_zip_mutex);
- wait_until_unfixed:
-                       /* The block is buffer-fixed or I/O-fixed.
-                       Try again later. */
--                      buf_pool_mutex_exit();
-+                      //buf_pool_mutex_exit();
-+                      mutex_exit(block_mutex);
-                       os_thread_sleep(WAIT_FOR_READ);
-                       goto loop;
-               }
-               /* Allocate an uncompressed page. */
--              buf_pool_mutex_exit();
--              mutex_exit(&buf_pool_zip_mutex);
-+              //buf_pool_mutex_exit();
-+              //mutex_exit(&buf_pool_zip_mutex);
-+              mutex_exit(block_mutex);
-               block = buf_LRU_get_free_block(0);
-               ut_a(block);
-+              block_mutex = &block->mutex;
--              buf_pool_mutex_enter();
--              mutex_enter(&block->mutex);
-+              //buf_pool_mutex_enter();
-+              mutex_enter(&LRU_list_mutex);
-+              rw_lock_x_lock(&page_hash_latch);
-+              mutex_enter(block_mutex);
-               {
-                       buf_page_t*     hash_bpage
-@@ -2172,35 +2260,49 @@
-                               while buf_pool_mutex was released.
-                               Free the block that was allocated. */
--                              buf_LRU_block_free_non_file_page(block);
--                              mutex_exit(&block->mutex);
-+                              buf_LRU_block_free_non_file_page(block, TRUE);
-+                              mutex_exit(block_mutex);
-                               block = (buf_block_t*) hash_bpage;
-+                              if (block) {
-+                                      block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
-+                                      ut_a(block_mutex);
-+                              }
-+                              rw_lock_x_unlock(&page_hash_latch);
-+                              mutex_exit(&LRU_list_mutex);
-                               goto loop2;
-                       }
-               }
-+              mutex_enter(&buf_pool_zip_mutex);
-+
-               if (UNIV_UNLIKELY
-                   (bpage->buf_fix_count
-                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
-+                      mutex_exit(&buf_pool_zip_mutex);
-                       /* The block was buffer-fixed or I/O-fixed
-                       while buf_pool_mutex was not held by this thread.
-                       Free the block that was allocated and try again.
-                       This should be extremely unlikely. */
--                      buf_LRU_block_free_non_file_page(block);
--                      mutex_exit(&block->mutex);
-+                      buf_LRU_block_free_non_file_page(block, TRUE);
-+                      //mutex_exit(&block->mutex);
-+                      rw_lock_x_unlock(&page_hash_latch);
-+                      mutex_exit(&LRU_list_mutex);
-                       goto wait_until_unfixed;
-               }
-               /* Move the compressed page from bpage to block,
-               and uncompress it. */
--              mutex_enter(&buf_pool_zip_mutex);
-+              mutex_enter(&flush_list_mutex);
-               buf_relocate(bpage, &block->page);
-+
-+              rw_lock_x_unlock(&page_hash_latch);
-+
-               buf_block_init_low(block);
-               block->lock_hash_val = lock_rec_hash(space, offset);
-@@ -2209,7 +2311,7 @@
-               if (buf_page_get_state(&block->page)
-                   == BUF_BLOCK_ZIP_PAGE) {
--                      UT_LIST_REMOVE(list, buf_pool->zip_clean,
-+                      UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
-                                      &block->page);
-                       ut_ad(!block->page.in_flush_list);
-               } else {
-@@ -2218,6 +2320,8 @@
-                                                        &block->page);
-               }
-+              mutex_exit(&flush_list_mutex);
-+
-               /* Buffer-fix, I/O-fix, and X-latch the block
-               for the duration of the decompression.
-               Also add the block to the unzip_LRU list. */
-@@ -2226,19 +2330,24 @@
-               /* Insert at the front of unzip_LRU list */
-               buf_unzip_LRU_add_block(block, FALSE);
-+              mutex_exit(&LRU_list_mutex);
-+
-               block->page.buf_fix_count = 1;
-               buf_block_set_io_fix(block, BUF_IO_READ);
-               rw_lock_x_lock_func(&block->lock, 0, file, line);
-               UNIV_MEM_INVALID(bpage, sizeof *bpage);
--              mutex_exit(&block->mutex);
-+              mutex_exit(block_mutex);
-               mutex_exit(&buf_pool_zip_mutex);
-+
-+              mutex_enter(&buf_pool_mutex);
-               buf_pool->n_pend_unzip++;
-+              mutex_exit(&buf_pool_mutex);
--              buf_buddy_free(bpage, sizeof *bpage);
-+              buf_buddy_free(bpage, sizeof *bpage, FALSE);
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-               /* Decompress the page and apply buffered operations
-               while not holding buf_pool_mutex or block->mutex. */
-@@ -2251,12 +2360,15 @@
-               }
-               /* Unfix and unlatch the block. */
--              buf_pool_mutex_enter();
--              mutex_enter(&block->mutex);
-+              //buf_pool_mutex_enter();
-+              block_mutex = &block->mutex;
-+              mutex_enter(block_mutex);
-               block->page.buf_fix_count--;
-               buf_block_set_io_fix(block, BUF_IO_NONE);
--              mutex_exit(&block->mutex);
-+
-+              mutex_enter(&buf_pool_mutex);
-               buf_pool->n_pend_unzip--;
-+              mutex_exit(&buf_pool_mutex);
-               rw_lock_x_unlock(&block->lock);
-               break;
-@@ -2271,7 +2383,7 @@
-       ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
--      mutex_enter(&block->mutex);
-+      //mutex_enter(&block->mutex);
- #if UNIV_WORD_SIZE == 4
-       /* On 32-bit systems, there is no padding in buf_page_t.  On
-       other systems, Valgrind could complain about uninitialized pad
-@@ -2305,13 +2417,14 @@
-       buf_block_buf_fix_inc(block, file, line);
--      mutex_exit(&block->mutex);
-+      //mutex_exit(&block->mutex);
-       /* Check if this is the first access to the page */
-       access_time = buf_page_is_accessed(&block->page);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(block_mutex);
-       buf_page_set_accessed_make_young(&block->page, access_time);
-@@ -2539,9 +2652,11 @@
-       mutex_exit(&block->mutex);
-       if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
--              buf_pool_mutex_enter();
-+              //buf_pool_mutex_enter();
-+              mutex_enter(&LRU_list_mutex);
-               buf_LRU_make_block_young(&block->page);
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-       } else if (!buf_page_is_accessed(&block->page)) {
-               /* Above, we do a dirty read on purpose, to avoid
-               mutex contention.  The field buf_page_t::access_time
-@@ -2549,9 +2664,11 @@
-               field must be protected by mutex, however. */
-               ulint   time_ms = ut_time_ms();
--              buf_pool_mutex_enter();
-+              //buf_pool_mutex_enter();
-+              mutex_enter(&block->mutex);
-               buf_page_set_accessed(&block->page, time_ms);
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&block->mutex);
-       }
-       ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
-@@ -2617,16 +2734,19 @@
-       ut_ad(mtr);
-       ut_ad(mtr->state == MTR_ACTIVE);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_s_lock(&page_hash_latch);
-       block = buf_block_hash_get(space_id, page_no);
-       if (!block) {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              rw_lock_s_unlock(&page_hash_latch);
-               return(NULL);
-       }
-       mutex_enter(&block->mutex);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-       ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-@@ -2713,7 +2833,10 @@
- {
-       buf_page_t*     hash_page;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+      ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
-       ut_ad(mutex_own(&(block->mutex)));
-       ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-@@ -2746,7 +2869,8 @@
-                       (const void*) hash_page, (const void*) block);
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-               mutex_exit(&block->mutex);
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              rw_lock_x_unlock(&page_hash_latch);
-               buf_print();
-               buf_LRU_print();
-               buf_validate();
-@@ -2825,16 +2949,24 @@
-               ut_ad(block);
-       }
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      rw_lock_x_lock(&page_hash_latch);
-       if (buf_page_hash_get(space, offset)) {
-               /* The page is already in the buffer pool. */
- err_exit:
-               if (block) {
-                       mutex_enter(&block->mutex);
--                      buf_LRU_block_free_non_file_page(block);
-+                      mutex_exit(&LRU_list_mutex);
-+                      rw_lock_x_unlock(&page_hash_latch);
-+                      buf_LRU_block_free_non_file_page(block, FALSE);
-                       mutex_exit(&block->mutex);
-               }
-+              else {
-+                      mutex_exit(&LRU_list_mutex);
-+                      rw_lock_x_unlock(&page_hash_latch);
-+              }
-               bpage = NULL;
-               goto func_exit;
-@@ -2854,6 +2986,8 @@
-               mutex_enter(&block->mutex);
-               buf_page_init(space, offset, block);
-+              rw_lock_x_unlock(&page_hash_latch);
-+
-               /* The block must be put to the LRU list, to the old blocks */
-               buf_LRU_add_block(bpage, TRUE/* to old blocks */);
-@@ -2881,7 +3015,7 @@
-                       been added to buf_pool->LRU and
-                       buf_pool->page_hash. */
-                       mutex_exit(&block->mutex);
--                      data = buf_buddy_alloc(zip_size, &lru);
-+                      data = buf_buddy_alloc(zip_size, &lru, FALSE);
-                       mutex_enter(&block->mutex);
-                       block->page.zip.data = data;
-@@ -2894,6 +3028,7 @@
-                       buf_unzip_LRU_add_block(block, TRUE);
-               }
-+              mutex_exit(&LRU_list_mutex);
-               mutex_exit(&block->mutex);
-       } else {
-               /* Defer buf_buddy_alloc() until after the block has
-@@ -2905,8 +3040,8 @@
-               control block (bpage), in order to avoid the
-               invocation of buf_buddy_relocate_block() on
-               uninitialized data. */
--              data = buf_buddy_alloc(zip_size, &lru);
--              bpage = buf_buddy_alloc(sizeof *bpage, &lru);
-+              data = buf_buddy_alloc(zip_size, &lru, TRUE);
-+              bpage = buf_buddy_alloc(sizeof *bpage, &lru, TRUE);
-               /* If buf_buddy_alloc() allocated storage from the LRU list,
-               it released and reacquired buf_pool_mutex.  Thus, we must
-@@ -2915,8 +3050,11 @@
-                   && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
-                       /* The block was added by some other thread. */
--                      buf_buddy_free(bpage, sizeof *bpage);
--                      buf_buddy_free(data, zip_size);
-+                      buf_buddy_free(bpage, sizeof *bpage, TRUE);
-+                      buf_buddy_free(data, zip_size, TRUE);
-+
-+                      mutex_exit(&LRU_list_mutex);
-+                      rw_lock_x_unlock(&page_hash_latch);
-                       bpage = NULL;
-                       goto func_exit;
-@@ -2946,18 +3084,26 @@
-               HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-                           buf_page_address_fold(space, offset), bpage);
-+              rw_lock_x_unlock(&page_hash_latch);
-+
-               /* The block must be put to the LRU list, to the old blocks */
-               buf_LRU_add_block(bpage, TRUE/* to old blocks */);
-+              mutex_enter(&flush_list_mutex);
-               buf_LRU_insert_zip_clean(bpage);
-+              mutex_exit(&flush_list_mutex);
-+
-+              mutex_exit(&LRU_list_mutex);
-               buf_page_set_io_fix(bpage, BUF_IO_READ);
-               mutex_exit(&buf_pool_zip_mutex);
-       }
-+      mutex_enter(&buf_pool_mutex);
-       buf_pool->n_pend_reads++;
-+      mutex_exit(&buf_pool_mutex);
- func_exit:
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-       if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-@@ -2995,7 +3141,9 @@
-       free_block = buf_LRU_get_free_block(0);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      rw_lock_x_lock(&page_hash_latch);
-       block = (buf_block_t*) buf_page_hash_get(space, offset);
-@@ -3008,7 +3156,9 @@
- #endif /* UNIV_DEBUG_FILE_ACCESSES */
-               /* Page can be found in buf_pool */
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-+              rw_lock_x_unlock(&page_hash_latch);
-               buf_block_free(free_block);
-@@ -3030,6 +3180,7 @@
-       mutex_enter(&block->mutex);
-       buf_page_init(space, offset, block);
-+      rw_lock_x_unlock(&page_hash_latch);
-       /* The block must be put to the LRU list */
-       buf_LRU_add_block(&block->page, FALSE);
-@@ -3056,7 +3207,7 @@
-               the reacquisition of buf_pool_mutex.  We also must
-               defer this operation until after the block descriptor
-               has been added to buf_pool->LRU and buf_pool->page_hash. */
--              data = buf_buddy_alloc(zip_size, &lru);
-+              data = buf_buddy_alloc(zip_size, &lru, FALSE);
-               mutex_enter(&block->mutex);
-               block->page.zip.data = data;
-@@ -3074,7 +3225,8 @@
-       buf_page_set_accessed(&block->page, time_ms);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-       mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
-@@ -3124,6 +3276,8 @@
-       enum buf_io_fix io_type;
-       const ibool     uncompressed = (buf_page_get_state(bpage)
-                                       == BUF_BLOCK_FILE_PAGE);
-+      enum buf_flush  flush_type;
-+      mutex_t*        block_mutex;
-       ut_a(buf_page_in_file(bpage));
-@@ -3257,8 +3411,17 @@
-               }
-       }
--      buf_pool_mutex_enter();
--      mutex_enter(buf_page_get_mutex(bpage));
-+      //buf_pool_mutex_enter();
-+      if (io_type == BUF_IO_WRITE) {
-+              flush_type = buf_page_get_flush_type(bpage);
-+              /* to keep consistency at buf_LRU_insert_zip_clean() */
-+              //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
-+                      mutex_enter(&LRU_list_mutex);
-+              //}
-+      }
-+      block_mutex = buf_page_get_mutex_enter(bpage);
-+      ut_a(block_mutex);
-+      mutex_enter(&buf_pool_mutex);
- #ifdef UNIV_IBUF_COUNT_DEBUG
-       if (io_type == BUF_IO_WRITE || uncompressed) {
-@@ -3298,6 +3461,11 @@
-               buf_flush_write_complete(bpage);
-+              /* to keep consistency at buf_LRU_insert_zip_clean() */
-+              //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
-+                      mutex_exit(&LRU_list_mutex);
-+              //}
-+
-               if (uncompressed) {
-                       rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
-                                            BUF_IO_WRITE);
-@@ -3320,8 +3488,9 @@
-       }
- #endif /* UNIV_DEBUG */
--      mutex_exit(buf_page_get_mutex(bpage));
--      buf_pool_mutex_exit();
-+      mutex_exit(&buf_pool_mutex);
-+      mutex_exit(block_mutex);
-+      //buf_pool_mutex_exit();
- }
- /*********************************************************************//**
-@@ -3368,7 +3537,8 @@
-               freed = buf_LRU_search_and_free_block(100);
-       }
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-       ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
-       ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
-@@ -3381,7 +3551,8 @@
-       memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
-       buf_refresh_io_stats();
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
- }
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-@@ -3406,7 +3577,10 @@
-       ut_ad(buf_pool);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      rw_lock_x_lock(&page_hash_latch);
-+      /* for keep the new latch order, it cannot validate correctly... */
-       chunk = buf_pool->chunks;
-@@ -3505,7 +3679,7 @@
-       /* Check clean compressed-only blocks. */
-       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
--           b = UT_LIST_GET_NEXT(list, b)) {
-+           b = UT_LIST_GET_NEXT(zip_list, b)) {
-               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
-               switch (buf_page_get_io_fix(b)) {
-               case BUF_IO_NONE:
-@@ -3530,8 +3704,9 @@
-       /* Check dirty compressed-only blocks. */
-+      mutex_enter(&flush_list_mutex);
-       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
--           b = UT_LIST_GET_NEXT(list, b)) {
-+           b = UT_LIST_GET_NEXT(flush_list, b)) {
-               ut_ad(b->in_flush_list);
-               switch (buf_page_get_state(b)) {
-@@ -3576,6 +3751,7 @@
-               }
-               ut_a(buf_page_hash_get(b->space, b->offset) == b);
-       }
-+      mutex_exit(&flush_list_mutex);
-       mutex_exit(&buf_pool_zip_mutex);
-@@ -3587,19 +3763,27 @@
-       }
-       ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
-+      /* because of latching order with block->mutex, we cannot get free_list_mutex before that */
-+/*
-       if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
-               fprintf(stderr, "Free list len %lu, free blocks %lu\n",
-                       (ulong) UT_LIST_GET_LEN(buf_pool->free),
-                       (ulong) n_free);
-               ut_error;
-       }
-+*/
-+      /* because of latching order with block->mutex, we cannot get flush_list_mutex before that */
-+/*
-       ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
-       ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
-       ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
-       ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
-+*/
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-+      rw_lock_x_unlock(&page_hash_latch);
-       ut_a(buf_LRU_validate());
-       ut_a(buf_flush_validate());
-@@ -3633,7 +3817,10 @@
-       index_ids = mem_alloc(sizeof(dulint) * size);
-       counts = mem_alloc(sizeof(ulint) * size);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      mutex_enter(&free_list_mutex);
-+      mutex_enter(&flush_list_mutex);
-       fprintf(stderr,
-               "buf_pool size %lu\n"
-@@ -3700,7 +3887,10 @@
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-+      mutex_exit(&free_list_mutex);
-+      mutex_exit(&flush_list_mutex);
-       for (i = 0; i < n_found; i++) {
-               index = dict_index_get_if_in_cache(index_ids[i]);
-@@ -3739,7 +3929,7 @@
-       ulint           i;
-       ulint           fixed_pages_number = 0;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-       chunk = buf_pool->chunks;
-@@ -3773,7 +3963,7 @@
-       /* Traverse the lists of clean and dirty compressed-only blocks. */
-       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
--           b = UT_LIST_GET_NEXT(list, b)) {
-+           b = UT_LIST_GET_NEXT(zip_list, b)) {
-               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
-               ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
-@@ -3783,8 +3973,9 @@
-               }
-       }
-+      mutex_enter(&flush_list_mutex);
-       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
--           b = UT_LIST_GET_NEXT(list, b)) {
-+           b = UT_LIST_GET_NEXT(flush_list, b)) {
-               ut_ad(b->in_flush_list);
-               switch (buf_page_get_state(b)) {
-@@ -3807,9 +3998,10 @@
-                       break;
-               }
-       }
-+      mutex_exit(&flush_list_mutex);
-       mutex_exit(&buf_pool_zip_mutex);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-       return(fixed_pages_number);
- }
-@@ -3867,7 +4059,11 @@
-       ut_ad(buf_pool);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      mutex_enter(&free_list_mutex);
-+      mutex_enter(&buf_pool_mutex);
-+      mutex_enter(&flush_list_mutex);
-       fprintf(file,
-               "Buffer pool size        %lu\n"
-@@ -3966,7 +4162,11 @@
-               buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
-       buf_refresh_io_stats();
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-+      mutex_exit(&free_list_mutex);
-+      mutex_exit(&buf_pool_mutex);
-+      mutex_exit(&flush_list_mutex);
- }
- /**********************************************************************//**
-@@ -3993,7 +4193,7 @@
-       ut_ad(buf_pool);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter(); /* optimistic */
-       chunk = buf_pool->chunks;
-@@ -4010,7 +4210,7 @@
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit(); /* optimistic */
-       return(TRUE);
- }
-@@ -4026,7 +4226,8 @@
- {
-       ibool   ret;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&buf_pool_mutex);
-       if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
-           + buf_pool->n_flush[BUF_FLUSH_LIST]
-@@ -4036,7 +4237,8 @@
-               ret = TRUE;
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&buf_pool_mutex);
-       return(ret);
- }
-@@ -4051,11 +4253,13 @@
- {
-       ulint   len;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&free_list_mutex);
-       len = UT_LIST_GET_LEN(buf_pool->free);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&free_list_mutex);
-       return(len);
- }
-diff -ruN a/storage/innodb_plugin/buf/buf0flu.c b/storage/innodb_plugin/buf/buf0flu.c
---- a/storage/innodb_plugin/buf/buf0flu.c      2010-08-27 15:54:59.022021357 +0900
-+++ b/storage/innodb_plugin/buf/buf0flu.c      2010-08-27 16:11:40.607020890 +0900
-@@ -102,7 +102,8 @@
-       const ib_rbt_node_t*    c_node;
-       const ib_rbt_node_t*    p_node;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&flush_list_mutex));
-       /* Insert this buffer into the rbt. */
-       c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
-@@ -132,7 +133,8 @@
-       ibool   ret = FALSE;
- #endif /* UNIV_DEBUG */
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&flush_list_mutex));
- #ifdef UNIV_DEBUG
-       ret =
- #endif /* UNIV_DEBUG */
-@@ -199,12 +201,14 @@
- buf_flush_init_flush_rbt(void)
- /*==========================*/
- {
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&flush_list_mutex);
-       /* Create red black tree for speedy insertions in flush list. */
-       buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
-                                        buf_flush_block_cmp);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&flush_list_mutex);
- }
- /********************************************************************//**
-@@ -214,7 +218,8 @@
- buf_flush_free_flush_rbt(void)
- /*==========================*/
- {
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&flush_list_mutex);
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-       ut_a(buf_flush_validate_low());
-@@ -223,7 +228,8 @@
-       rbt_free(buf_pool->flush_rbt);
-       buf_pool->flush_rbt = NULL;
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&flush_list_mutex);
- }
- /********************************************************************//**
-@@ -234,7 +240,9 @@
- /*=============================*/
-       buf_block_t*    block)  /*!< in/out: block which is modified */
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&block->mutex));
-+      ut_ad(mutex_own(&flush_list_mutex));
-       ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
-             || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
-                 <= block->page.oldest_modification));
-@@ -252,7 +260,7 @@
-       ut_ad(!block->page.in_zip_hash);
-       ut_ad(!block->page.in_flush_list);
-       ut_d(block->page.in_flush_list = TRUE);
--      UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
-+      UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
- #ifdef UNIV_DEBUG_VALGRIND
-       {
-@@ -283,7 +291,9 @@
-       buf_page_t*     prev_b;
-       buf_page_t*     b;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&block->mutex));
-+      ut_ad(mutex_own(&flush_list_mutex));
-       ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-       ut_ad(block->page.in_LRU_list);
-@@ -324,14 +334,14 @@
-                      > block->page.oldest_modification) {
-                       ut_ad(b->in_flush_list);
-                       prev_b = b;
--                      b = UT_LIST_GET_NEXT(list, b);
-+                      b = UT_LIST_GET_NEXT(flush_list, b);
-               }
-       }
-       if (prev_b == NULL) {
--              UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
-+              UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
-       } else {
--              UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
-+              UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
-                                    prev_b, &block->page);
-       }
-@@ -352,7 +362,7 @@
-                               buf_page_in_file(bpage) and in the LRU list */
- {
-       //ut_ad(buf_pool_mutex_own());
--      //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-+      ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-       //ut_ad(bpage->in_LRU_list); /* optimistic use */
-       if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
-@@ -387,12 +397,12 @@
-                               buf_page_in_file(bpage) */
-       enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
- {
--      ut_a(buf_page_in_file(bpage));
--      ut_ad(buf_pool_mutex_own());
-+      //ut_a(buf_page_in_file(bpage));
-+      //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
-       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-       ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
--      if (bpage->oldest_modification != 0
-+      if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
-           && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
-               ut_ad(bpage->in_flush_list);
-@@ -421,8 +431,11 @@
- /*=============*/
-       buf_page_t*     bpage)  /*!< in: pointer to the block in question */
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-+
-+      mutex_enter(&flush_list_mutex);
-+
-       ut_ad(bpage->in_flush_list);
-       switch (buf_page_get_state(bpage)) {
-@@ -433,15 +446,16 @@
-       case BUF_BLOCK_READY_FOR_USE:
-       case BUF_BLOCK_MEMORY:
-       case BUF_BLOCK_REMOVE_HASH:
-+              mutex_exit(&flush_list_mutex);
-               ut_error;
-               return;
-       case BUF_BLOCK_ZIP_DIRTY:
-               buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
--              UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-+              UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
-               buf_LRU_insert_zip_clean(bpage);
-               break;
-       case BUF_BLOCK_FILE_PAGE:
--              UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-+              UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
-               break;
-       }
-@@ -456,8 +470,9 @@
-       bpage->oldest_modification = 0;
--      ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
-+      ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
-                             ut_ad(ut_list_node_313->in_flush_list)));
-+      mutex_exit(&flush_list_mutex);
- }
- /********************************************************************//**
-@@ -474,7 +489,8 @@
-       buf_page_t* prev;
-       buf_page_t* prev_b = NULL;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&flush_list_mutex));
-       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-@@ -492,18 +508,18 @@
-       because we assert on in_flush_list in comparison function. */
-       ut_d(bpage->in_flush_list = FALSE);
--      prev = UT_LIST_GET_PREV(list, bpage);
--      UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-+      prev = UT_LIST_GET_PREV(flush_list, bpage);
-+      UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
-       if (prev) {
-               ut_ad(prev->in_flush_list);
-               UT_LIST_INSERT_AFTER(
--                      list,
-+                      flush_list,
-                       buf_pool->flush_list,
-                       prev, dpage);
-       } else {
-               UT_LIST_ADD_FIRST(
--                      list,
-+                      flush_list,
-                       buf_pool->flush_list,
-                       dpage);
-       }
-@@ -977,7 +993,9 @@
-       io_fixed and oldest_modification != 0.  Thus, it cannot be
-       relocated in the buffer pool or removed from flush_list or
-       LRU_list. */
--      ut_ad(!buf_pool_mutex_own());
-+      //ut_ad(!buf_pool_mutex_own());
-+      ut_ad(!mutex_own(&LRU_list_mutex));
-+      ut_ad(!mutex_own(&flush_list_mutex));
-       ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
-       ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
-       ut_ad(bpage->oldest_modification != 0);
-@@ -1137,12 +1155,19 @@
-       ibool           is_uncompressed;
-       ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+      ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
-+            || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
-+#endif
-       ut_ad(buf_page_in_file(bpage));
-       block_mutex = buf_page_get_mutex(bpage);
-       ut_ad(mutex_own(block_mutex));
-+      mutex_enter(&buf_pool_mutex);
-+      rw_lock_s_unlock(&page_hash_latch);
-+
-       ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
-       buf_page_set_io_fix(bpage, BUF_IO_WRITE);
-@@ -1173,7 +1198,8 @@
-               }
-               mutex_exit(block_mutex);
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-               /* Even though bpage is not protected by any mutex at
-               this point, it is safe to access bpage, because it is
-@@ -1210,7 +1236,8 @@
-               immediately. */
-               mutex_exit(block_mutex);
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-               break;
-       default:
-@@ -1275,7 +1302,8 @@
-               high = fil_space_get_size(space);
-       }
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_s_lock(&page_hash_latch);
-       for (i = low; i < high; i++) {
-@@ -1294,11 +1322,9 @@
-               if (flush_type != BUF_FLUSH_LRU
-                   || i == offset
-                   || buf_page_is_old(bpage)) {
--                      mutex_t* block_mutex = buf_page_get_mutex(bpage);
--
--                      mutex_enter(block_mutex);
-+                      mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
--                      if (buf_flush_ready_for_flush(bpage, flush_type)
-+                      if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
-                           && (i == offset || !bpage->buf_fix_count)) {
-                               /* We only try to flush those
-                               neighbors != offset where the buf fix count is
-@@ -1312,14 +1338,16 @@
-                               ut_ad(!mutex_own(block_mutex));
-                               count++;
--                              buf_pool_mutex_enter();
--                      } else {
-+                              //buf_pool_mutex_enter();
-+                              rw_lock_s_lock(&page_hash_latch);
-+                      } else if (block_mutex) {
-                               mutex_exit(block_mutex);
-                       }
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
-       return(count);
- }
-@@ -1350,9 +1378,11 @@
-                                       min_n), otherwise ignored */
- {
-       buf_page_t*     bpage;
-+      buf_page_t*     prev_bpage      = NULL;
-       ulint           page_count      = 0;
-       ulint           space;
-       ulint           offset;
-+      ulint           remaining       = 0;
-       ut_ad((flush_type == BUF_FLUSH_LRU)
-             || (flush_type == BUF_FLUSH_LIST));
-@@ -1360,20 +1390,28 @@
-       ut_ad((flush_type != BUF_FLUSH_LIST)
-             || sync_thread_levels_empty_gen(TRUE));
- #endif /* UNIV_SYNC_DEBUG */
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&buf_pool_mutex);
-       if ((buf_pool->n_flush[flush_type] > 0)
-           || (buf_pool->init_flush[flush_type] == TRUE)) {
-               /* There is already a flush batch of the same type running */
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-               return(ULINT_UNDEFINED);
-       }
-       buf_pool->init_flush[flush_type] = TRUE;
-+      mutex_exit(&buf_pool_mutex);
-+
-+      if (flush_type == BUF_FLUSH_LRU) {
-+              mutex_enter(&LRU_list_mutex);
-+      }
-+
-       for (;;) {
- flush_next:
-               /* If we have flushed enough, leave the loop */
-@@ -1390,7 +1428,13 @@
-               } else {
-                       ut_ad(flush_type == BUF_FLUSH_LIST);
-+                      mutex_enter(&flush_list_mutex);
-+                      remaining = UT_LIST_GET_LEN(buf_pool->flush_list);
-                       bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-+                      if (bpage) {
-+                              prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
-+                      }
-+                      mutex_exit(&flush_list_mutex);
-                       if (!bpage
-                           || bpage->oldest_modification >= lsn_limit) {
-                               /* We have flushed enough */
-@@ -1407,26 +1451,35 @@
-               function a pointer to a block in the list! */
-               do {
--                      mutex_t*block_mutex = buf_page_get_mutex(bpage);
-+                      mutex_t*block_mutex = buf_page_get_mutex_enter(bpage);
-                       ibool   ready;
--                      ut_a(buf_page_in_file(bpage));
-+                      //ut_a(buf_page_in_file(bpage));
--                      mutex_enter(block_mutex);
--                      ready = buf_flush_ready_for_flush(bpage, flush_type);
--                      mutex_exit(block_mutex);
-+                      if (block_mutex) {
-+                              ready = buf_flush_ready_for_flush(bpage, flush_type);
-+                              mutex_exit(block_mutex);
-+                      } else {
-+                              ready = FALSE;
-+                      }
-                       if (ready) {
-                               space = buf_page_get_space(bpage);
-                               offset = buf_page_get_page_no(bpage);
--                              buf_pool_mutex_exit();
-+                              //buf_pool_mutex_exit();
-+                              if (flush_type == BUF_FLUSH_LRU) {
-+                                      mutex_exit(&LRU_list_mutex);
-+                              }
-                               /* Try to flush also all the neighbors */
-                               page_count += buf_flush_try_neighbors(
-                                       space, offset, flush_type, srv_flush_neighbor_pages);
--                              buf_pool_mutex_enter();
-+                              //buf_pool_mutex_enter();
-+                              if (flush_type == BUF_FLUSH_LRU) {
-+                                      mutex_enter(&LRU_list_mutex);
-+                              }
-                               goto flush_next;
-                       } else if (flush_type == BUF_FLUSH_LRU) {
-@@ -1434,16 +1487,35 @@
-                       } else {
-                               ut_ad(flush_type == BUF_FLUSH_LIST);
--                              bpage = UT_LIST_GET_PREV(list, bpage);
--                              ut_ad(!bpage || bpage->in_flush_list);
-+                              mutex_enter(&flush_list_mutex);
-+                              bpage = UT_LIST_GET_PREV(flush_list, bpage);
-+                              //ut_ad(!bpage || bpage->in_flush_list); /* optimistic */
-+                              if (bpage != prev_bpage) {
-+                                      /* the search may warp.. retrying */
-+                                      bpage = NULL;
-+                              }
-+                              if (bpage) {
-+                                      prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
-+                              }
-+                              mutex_exit(&flush_list_mutex);
-+                              remaining--;
-                       }
-               } while (bpage != NULL);
-+              if (remaining)
-+                      goto flush_next;
-+
-               /* If we could not find anything to flush, leave the loop */
-               break;
-       }
-+      if (flush_type == BUF_FLUSH_LRU) {
-+              mutex_exit(&LRU_list_mutex);
-+      }
-+
-+      mutex_enter(&buf_pool_mutex);
-+
-       buf_pool->init_flush[flush_type] = FALSE;
-       if (buf_pool->n_flush[flush_type] == 0) {
-@@ -1453,7 +1525,8 @@
-               os_event_set(buf_pool->no_flush[flush_type]);
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&buf_pool_mutex);
-       buf_flush_buffered_writes();
-@@ -1514,7 +1587,7 @@
- retry:
-       //buf_pool_mutex_enter();
-       if (have_LRU_mutex)
--              buf_pool_mutex_enter();
-+              mutex_enter(&LRU_list_mutex);
-       n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
-@@ -1531,15 +1604,15 @@
-                       bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-                       continue;
-               }
--              block_mutex = buf_page_get_mutex(bpage);
--
--              mutex_enter(block_mutex);
-+              block_mutex = buf_page_get_mutex_enter(bpage);
--              if (buf_flush_ready_for_replace(bpage)) {
-+              if (block_mutex && buf_flush_ready_for_replace(bpage)) {
-                       n_replaceable++;
-               }
--              mutex_exit(block_mutex);
-+              if (block_mutex) {
-+                      mutex_exit(block_mutex);
-+              }
-               distance++;
-@@ -1548,7 +1621,7 @@
-       //buf_pool_mutex_exit();
-       if (have_LRU_mutex)
--              buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-       if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
-@@ -1715,7 +1788,7 @@
-       buf_page_t*             bpage;
-       const ib_rbt_node_t*    rnode = NULL;
--      UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
-+      UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
-                        ut_ad(ut_list_node_313->in_flush_list));
-       bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-@@ -1730,7 +1803,7 @@
-       while (bpage != NULL) {
-               const ib_uint64_t om = bpage->oldest_modification;
-               ut_ad(bpage->in_flush_list);
--              ut_a(buf_page_in_file(bpage));
-+              //ut_a(buf_page_in_file(bpage)); /* optimistic */
-               ut_a(om > 0);
-               if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
-@@ -1742,7 +1815,7 @@
-                       rnode = rbt_next(buf_pool->flush_rbt, rnode);
-               }
--              bpage = UT_LIST_GET_NEXT(list, bpage);
-+              bpage = UT_LIST_GET_NEXT(flush_list, bpage);
-               ut_a(!bpage || om >= bpage->oldest_modification);
-       }
-@@ -1764,11 +1837,13 @@
- {
-       ibool   ret;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&flush_list_mutex);
-       ret = buf_flush_validate_low();
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&flush_list_mutex);
-       return(ret);
- }
-diff -ruN a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c
---- a/storage/innodb_plugin/buf/buf0lru.c      2010-08-27 15:54:59.025058614 +0900
-+++ b/storage/innodb_plugin/buf/buf0lru.c      2010-08-27 16:11:40.611021077 +0900
-@@ -145,8 +145,9 @@
- void
- buf_LRU_block_free_hashed_page(
- /*===========================*/
--      buf_block_t*    block); /*!< in: block, must contain a file page and
-+      buf_block_t*    block,  /*!< in: block, must contain a file page and
-                               be in a state where it can be freed */
-+      ibool           have_page_hash_mutex);
- /******************************************************************//**
- Determines if the unzip_LRU list should be used for evicting a victim
-@@ -154,16 +155,21 @@
- @return       TRUE if should use unzip_LRU */
- UNIV_INLINE
- ibool
--buf_LRU_evict_from_unzip_LRU(void)
-+buf_LRU_evict_from_unzip_LRU(
-+      ibool           have_LRU_mutex)
- /*==============================*/
- {
-       ulint   io_avg;
-       ulint   unzip_avg;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      if (!have_LRU_mutex)
-+              mutex_enter(&LRU_list_mutex);
-       /* If the unzip_LRU list is empty, we can only use the LRU. */
-       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
-+              if (!have_LRU_mutex)
-+                      mutex_exit(&LRU_list_mutex);
-               return(FALSE);
-       }
-@@ -172,14 +178,20 @@
-       decompressed pages in the buffer pool. */
-       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
-           <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
-+              if (!have_LRU_mutex)
-+                      mutex_exit(&LRU_list_mutex);
-               return(FALSE);
-       }
-       /* If eviction hasn't started yet, we assume by default
-       that a workload is disk bound. */
-       if (buf_pool->freed_page_clock == 0) {
-+              if (!have_LRU_mutex)
-+                      mutex_exit(&LRU_list_mutex);
-               return(TRUE);
-       }
-+      if (!have_LRU_mutex)
-+              mutex_exit(&LRU_list_mutex);
-       /* Calculate the average over past intervals, and add the values
-       of the current interval. */
-@@ -245,19 +257,23 @@
-       page_arr = ut_malloc(sizeof(ulint)
-                            * BUF_LRU_DROP_SEARCH_HASH_SIZE);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
- scan_again:
-       num_entries = 0;
-       bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-       while (bpage != NULL) {
--              mutex_t*        block_mutex = buf_page_get_mutex(bpage);
-+              mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
-               buf_page_t*     prev_bpage;
--              mutex_enter(block_mutex);
-               prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-+              if (!block_mutex) {
-+                      goto next_page;
-+              }
-+
-               ut_a(buf_page_in_file(bpage));
-               if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
-@@ -285,12 +301,14 @@
-                       }
-                       /* Array full. We release the buf_pool_mutex to
-                       obey the latching order. */
--                      buf_pool_mutex_exit();
-+                      //buf_pool_mutex_exit();
-+                      mutex_exit(&LRU_list_mutex);
-                       buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
-                                                    num_entries);
-                       num_entries = 0;
--                      buf_pool_mutex_enter();
-+                      //buf_pool_mutex_enter();
-+                      mutex_enter(&LRU_list_mutex);
-               } else {
-                       mutex_exit(block_mutex);
-               }
-@@ -315,7 +333,8 @@
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-       /* Drop any remaining batch of search hashed pages. */
-       buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
-@@ -343,7 +362,9 @@
-       buf_LRU_drop_page_hash_for_tablespace(id);
- scan_again:
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      rw_lock_x_lock(&page_hash_latch);
-       all_freed = TRUE;
-@@ -371,8 +392,16 @@
-                       all_freed = FALSE;
-               } else {
--                      mutex_t* block_mutex = buf_page_get_mutex(bpage);
--                      mutex_enter(block_mutex);
-+                      mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
-+
-+                      if (!block_mutex) {
-+                              /* It may be impossible case...
-+                              Something wrong, so will be scan_again */
-+
-+                              all_freed = FALSE;
-+
-+                              goto next_page_no_mutex;
-+                      }
-                       if (bpage->buf_fix_count > 0) {
-@@ -431,7 +460,9 @@
-                               ulint   page_no;
-                               ulint   zip_size;
--                              buf_pool_mutex_exit();
-+                              //buf_pool_mutex_exit();
-+                              mutex_exit(&LRU_list_mutex);
-+                              rw_lock_x_unlock(&page_hash_latch);
-                               zip_size = buf_page_get_zip_size(bpage);
-                               page_no = buf_page_get_page_no(bpage);
-@@ -456,7 +487,7 @@
-                       if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
-                           != BUF_BLOCK_ZIP_FREE) {
-                               buf_LRU_block_free_hashed_page((buf_block_t*)
--                                                             bpage);
-+                                                             bpage, TRUE);
-                       } else {
-                               /* The block_mutex should have been
-                               released by buf_LRU_block_remove_hashed_page()
-@@ -488,7 +519,9 @@
-               bpage = prev_bpage;
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-+      rw_lock_x_unlock(&page_hash_latch);
-       if (!all_freed) {
-               os_thread_sleep(20000);
-@@ -507,7 +540,9 @@
- {
-       buf_page_t*     b;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-+      ut_ad(mutex_own(&flush_list_mutex));
-       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
-       /* Find the first successor of bpage in the LRU list
-@@ -515,17 +550,17 @@
-       b = bpage;
-       do {
-               b = UT_LIST_GET_NEXT(LRU, b);
--      } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
-+      } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
-       /* Insert bpage before b, i.e., after the predecessor of b. */
-       if (b) {
--              b = UT_LIST_GET_PREV(list, b);
-+              b = UT_LIST_GET_PREV(zip_list, b);
-       }
-       if (b) {
--              UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
-+              UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
-       } else {
--              UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
-+              UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
-       }
- }
-@@ -537,16 +572,17 @@
- ibool
- buf_LRU_free_from_unzip_LRU_list(
- /*=============================*/
--      ulint   n_iterations)   /*!< in: how many times this has been called
-+      ulint   n_iterations,   /*!< in: how many times this has been called
-                               repeatedly without result: a high value means
-                               that we should search farther; we will search
-                               n_iterations / 5 of the unzip_LRU list,
-                               or nothing if n_iterations >= 5 */
-+      ibool   have_LRU_mutex)
- {
-       buf_block_t*    block;
-       ulint           distance;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own()); /* optimistic */
-       /* Theoratically it should be much easier to find a victim
-       from unzip_LRU as we can choose even a dirty block (as we'll
-@@ -556,7 +592,7 @@
-       if we have done five iterations so far. */
-       if (UNIV_UNLIKELY(n_iterations >= 5)
--          || !buf_LRU_evict_from_unzip_LRU()) {
-+          || !buf_LRU_evict_from_unzip_LRU(have_LRU_mutex)) {
-               return(FALSE);
-       }
-@@ -564,18 +600,25 @@
-       distance = 100 + (n_iterations
-                         * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
-+restart:
-       for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
-            UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
-            block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
-               enum buf_lru_free_block_status  freed;
-+              mutex_enter(&block->mutex);
-+              if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
-+                  || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
-+                      mutex_exit(&block->mutex);
-+                      goto restart;
-+              }
-+
-               ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-               ut_ad(block->in_unzip_LRU_list);
-               ut_ad(block->page.in_LRU_list);
--              mutex_enter(&block->mutex);
--              freed = buf_LRU_free_block(&block->page, FALSE, NULL);
-+              freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
-               mutex_exit(&block->mutex);
-               switch (freed) {
-@@ -608,20 +651,22 @@
- ibool
- buf_LRU_free_from_common_LRU_list(
- /*==============================*/
--      ulint   n_iterations)   /*!< in: how many times this has been called
-+      ulint   n_iterations,   /*!< in: how many times this has been called
-                               repeatedly without result: a high value means
-                               that we should search farther; if
-                               n_iterations < 10, then we search
-                               n_iterations / 10 * buf_pool->curr_size
-                               pages from the end of the LRU list */
-+      ibool   have_LRU_mutex)
- {
-       buf_page_t*     bpage;
-       ulint           distance;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own()); /* optimistic */
-       distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
-+restart:
-       for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-            UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
-            bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
-@@ -629,14 +674,23 @@
-               enum buf_lru_free_block_status  freed;
-               unsigned                        accessed;
-               mutex_t*                        block_mutex
--                      = buf_page_get_mutex(bpage);
-+                      = buf_page_get_mutex_enter(bpage);
-+
-+              if (!block_mutex) {
-+                      goto restart;
-+              }
-+
-+              if (!bpage->in_LRU_list
-+                  || !buf_page_in_file(bpage)) {
-+                      mutex_exit(block_mutex);
-+                      goto restart;
-+              }
-               ut_ad(buf_page_in_file(bpage));
-               ut_ad(bpage->in_LRU_list);
--              mutex_enter(block_mutex);
-               accessed = buf_page_is_accessed(bpage);
--              freed = buf_LRU_free_block(bpage, TRUE, NULL);
-+              freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
-               mutex_exit(block_mutex);
-               switch (freed) {
-@@ -685,22 +739,33 @@
-                               n_iterations / 5 of the unzip_LRU list. */
- {
-       ibool   freed = FALSE;
-+      ibool   have_LRU_mutex = FALSE;
-+
-+      if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
-+              have_LRU_mutex = TRUE;
--      buf_pool_mutex_enter();
-+      /* optimistic search... */
-+      //buf_pool_mutex_enter();
-+      if (have_LRU_mutex)
-+              mutex_enter(&LRU_list_mutex);
--      freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
-+      freed = buf_LRU_free_from_unzip_LRU_list(n_iterations, have_LRU_mutex);
-       if (!freed) {
--              freed = buf_LRU_free_from_common_LRU_list(n_iterations);
-+              freed = buf_LRU_free_from_common_LRU_list(n_iterations, have_LRU_mutex);
-       }
-+      mutex_enter(&buf_pool_mutex);
-       if (!freed) {
-               buf_pool->LRU_flush_ended = 0;
-       } else if (buf_pool->LRU_flush_ended > 0) {
-               buf_pool->LRU_flush_ended--;
-       }
-+      mutex_exit(&buf_pool_mutex);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      if (have_LRU_mutex)
-+              mutex_exit(&LRU_list_mutex);
-       return(freed);
- }
-@@ -718,18 +783,22 @@
- buf_LRU_try_free_flushed_blocks(void)
- /*=================================*/
- {
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&buf_pool_mutex);
-       while (buf_pool->LRU_flush_ended > 0) {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-               buf_LRU_search_and_free_block(1);
--              buf_pool_mutex_enter();
-+              //buf_pool_mutex_enter();
-+              mutex_enter(&buf_pool_mutex);
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&buf_pool_mutex);
- }
- /******************************************************************//**
-@@ -744,7 +813,9 @@
- {
-       ibool   ret     = FALSE;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-+      mutex_enter(&free_list_mutex);
-       if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
-           + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
-@@ -752,7 +823,9 @@
-               ret = TRUE;
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-+      mutex_exit(&free_list_mutex);
-       return(ret);
- }
-@@ -768,9 +841,10 @@
- {
-       buf_block_t*    block;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
--      block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
-+      mutex_enter(&free_list_mutex);
-+      block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
-       if (block) {
-               ut_ad(block->page.in_free_list);
-@@ -778,7 +852,9 @@
-               ut_ad(!block->page.in_flush_list);
-               ut_ad(!block->page.in_LRU_list);
-               ut_a(!buf_page_in_file(&block->page));
--              UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
-+              UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
-+
-+              mutex_exit(&free_list_mutex);
-               mutex_enter(&block->mutex);
-@@ -786,6 +862,8 @@
-               UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-               mutex_exit(&block->mutex);
-+      } else {
-+              mutex_exit(&free_list_mutex);
-       }
-       return(block);
-@@ -809,7 +887,7 @@
-       ibool           mon_value_was   = FALSE;
-       ibool           started_monitor = FALSE;
- loop:
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-       if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
-           + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
-@@ -889,14 +967,16 @@
-               if (UNIV_UNLIKELY(zip_size)) {
-                       ibool   lru;
-                       page_zip_set_size(&block->page.zip, zip_size);
--                      block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
-+                      mutex_enter(&LRU_list_mutex);
-+                      block->page.zip.data = buf_buddy_alloc(zip_size, &lru, FALSE);
-+                      mutex_exit(&LRU_list_mutex);
-                       UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
-               } else {
-                       page_zip_set_size(&block->page.zip, 0);
-                       block->page.zip.data = NULL;
-               }
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-               if (started_monitor) {
-                       srv_print_innodb_monitor = mon_value_was;
-@@ -908,7 +988,7 @@
-       /* If no block was in the free list, search from the end of the LRU
-       list and try to free a block there */
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-       freed = buf_LRU_search_and_free_block(n_iterations);
-@@ -957,18 +1037,21 @@
-       os_aio_simulated_wake_handler_threads();
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&buf_pool_mutex);
-       if (buf_pool->LRU_flush_ended > 0) {
-               /* We have written pages in an LRU flush. To make the insert
-               buffer more efficient, we try to move these pages to the free
-               list. */
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-               buf_LRU_try_free_flushed_blocks();
-       } else {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-       }
-       if (n_iterations > 10) {
-@@ -993,7 +1076,8 @@
-       ulint   new_len;
-       ut_a(buf_pool->LRU_old);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
-       ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
- #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
-@@ -1058,7 +1142,8 @@
- {
-       buf_page_t*     bpage;
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
-       /* We first initialize all blocks in the LRU list as old and then use
-@@ -1091,13 +1176,14 @@
-       ut_ad(buf_pool);
-       ut_ad(bpage);
-       ut_ad(buf_page_in_file(bpage));
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       if (buf_page_belongs_to_unzip_LRU(bpage)) {
-               buf_block_t*    block = (buf_block_t*) bpage;
-               ut_ad(block->in_unzip_LRU_list);
--              ut_d(block->in_unzip_LRU_list = FALSE);
-+              block->in_unzip_LRU_list = FALSE;
-               UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
-       }
-@@ -1113,7 +1199,8 @@
- {
-       ut_ad(buf_pool);
-       ut_ad(bpage);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       ut_a(buf_page_in_file(bpage));
-@@ -1188,12 +1275,13 @@
- {
-       ut_ad(buf_pool);
-       ut_ad(block);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
-       ut_ad(!block->in_unzip_LRU_list);
--      ut_d(block->in_unzip_LRU_list = TRUE);
-+      block->in_unzip_LRU_list = TRUE;
-       if (old) {
-               UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
-@@ -1212,7 +1300,8 @@
- {
-       ut_ad(buf_pool);
-       ut_ad(bpage);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       ut_a(buf_page_in_file(bpage));
-@@ -1261,7 +1350,8 @@
- {
-       ut_ad(buf_pool);
-       ut_ad(bpage);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       ut_a(buf_page_in_file(bpage));
-       ut_ad(!bpage->in_LRU_list);
-@@ -1338,7 +1428,8 @@
- /*=====================*/
-       buf_page_t*     bpage)  /*!< in: control block */
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       if (bpage->old) {
-               buf_pool->stat.n_pages_made_young++;
-@@ -1380,18 +1471,19 @@
-       buf_page_t*     bpage,  /*!< in: block to be freed */
-       ibool           zip,    /*!< in: TRUE if should remove also the
-                               compressed page of an uncompressed page */
--      ibool*          buf_pool_mutex_released)
-+      ibool*          buf_pool_mutex_released,
-                               /*!< in: pointer to a variable that will
-                               be assigned TRUE if buf_pool_mutex
-                               was temporarily released, or NULL */
-+      ibool           have_LRU_mutex)
- {
-       buf_page_t*     b = NULL;
-       mutex_t*        block_mutex = buf_page_get_mutex(bpage);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(mutex_own(block_mutex));
-       ut_ad(buf_page_in_file(bpage));
--      ut_ad(bpage->in_LRU_list);
-+      //ut_ad(bpage->in_LRU_list);
-       ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
- #if UNIV_WORD_SIZE == 4
-       /* On 32-bit systems, there is no padding in buf_page_t.  On
-@@ -1400,7 +1492,7 @@
-       UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
- #endif
--      if (!buf_page_can_relocate(bpage)) {
-+      if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
-               /* Do not free buffer-fixed or I/O-fixed blocks. */
-               return(BUF_LRU_NOT_FREED);
-@@ -1432,15 +1524,15 @@
-               If it cannot be allocated (without freeing a block
-               from the LRU list), refuse to free bpage. */
- alloc:
--              buf_pool_mutex_exit_forbid();
--              b = buf_buddy_alloc(sizeof *b, NULL);
--              buf_pool_mutex_exit_allow();
-+              //buf_pool_mutex_exit_forbid();
-+              b = buf_buddy_alloc(sizeof *b, NULL, FALSE);
-+              //buf_pool_mutex_exit_allow();
-               if (UNIV_UNLIKELY(!b)) {
-                       return(BUF_LRU_CANNOT_RELOCATE);
-               }
--              memcpy(b, bpage, sizeof *b);
-+              //memcpy(b, bpage, sizeof *b);
-       }
- #ifdef UNIV_DEBUG
-@@ -1451,6 +1543,39 @@
-       }
- #endif /* UNIV_DEBUG */
-+      /* not to break latch order, must re-enter block_mutex */
-+      mutex_exit(block_mutex);
-+
-+      if (!have_LRU_mutex)
-+              mutex_enter(&LRU_list_mutex); /* optimistic */
-+      rw_lock_x_lock(&page_hash_latch);
-+      mutex_enter(block_mutex);
-+
-+      /* recheck states of block */
-+      if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
-+          || !buf_page_can_relocate(bpage)) {
-+not_freed:
-+              if (b) {
-+                      buf_buddy_free(b, sizeof *b, TRUE);
-+              }
-+              if (!have_LRU_mutex)
-+                      mutex_exit(&LRU_list_mutex);
-+              rw_lock_x_unlock(&page_hash_latch);
-+              return(BUF_LRU_NOT_FREED);
-+      } else if (zip || !bpage->zip.data) {
-+              if (bpage->oldest_modification)
-+                      goto not_freed;
-+      } else if (bpage->oldest_modification) {
-+              if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
-+                      ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
-+                      goto not_freed;
-+              }
-+      }
-+
-+      if (b) {
-+              memcpy(b, bpage, sizeof *b);
-+      }
-+
-       if (buf_LRU_block_remove_hashed_page(bpage, zip)
-           != BUF_BLOCK_ZIP_FREE) {
-               ut_a(bpage->buf_fix_count == 0);
-@@ -1462,6 +1587,10 @@
-                       ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
-+                      while (prev_b && !prev_b->in_LRU_list) {
-+                              prev_b = UT_LIST_GET_PREV(LRU, prev_b);
-+                      }
-+
-                       b->state = b->oldest_modification
-                               ? BUF_BLOCK_ZIP_DIRTY
-                               : BUF_BLOCK_ZIP_PAGE;
-@@ -1537,12 +1666,14 @@
-                               buf_LRU_add_block_low(b, buf_page_is_old(b));
-                       }
-+                      mutex_enter(&flush_list_mutex);
-                       if (b->state == BUF_BLOCK_ZIP_PAGE) {
-                               buf_LRU_insert_zip_clean(b);
-                       } else {
-                               /* Relocate on buf_pool->flush_list. */
-                               buf_flush_relocate_on_flush_list(bpage, b);
-                       }
-+                      mutex_exit(&flush_list_mutex);
-                       bpage->zip.data = NULL;
-                       page_zip_set_size(&bpage->zip, 0);
-@@ -1558,7 +1689,9 @@
-                       *buf_pool_mutex_released = TRUE;
-               }
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-+              rw_lock_x_unlock(&page_hash_latch);
-               mutex_exit(block_mutex);
-               /* Remove possible adaptive hash index on the page.
-@@ -1590,7 +1723,9 @@
-                               : BUF_NO_CHECKSUM_MAGIC);
-               }
--              buf_pool_mutex_enter();
-+              //buf_pool_mutex_enter();
-+              if (have_LRU_mutex)
-+                      mutex_enter(&LRU_list_mutex);
-               mutex_enter(block_mutex);
-               if (b) {
-@@ -1600,13 +1735,17 @@
-                       mutex_exit(&buf_pool_zip_mutex);
-               }
--              buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
-+              buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
-       } else {
-               /* The block_mutex should have been released by
-               buf_LRU_block_remove_hashed_page() when it returns
-               BUF_BLOCK_ZIP_FREE. */
-               ut_ad(block_mutex == &buf_pool_zip_mutex);
-               mutex_enter(block_mutex);
-+
-+              if (!have_LRU_mutex)
-+                      mutex_exit(&LRU_list_mutex);
-+              rw_lock_x_unlock(&page_hash_latch);
-       }
-       return(BUF_LRU_FREED);
-@@ -1618,12 +1757,13 @@
- void
- buf_LRU_block_free_non_file_page(
- /*=============================*/
--      buf_block_t*    block)  /*!< in: block, must not contain a file page */
-+      buf_block_t*    block,  /*!< in: block, must not contain a file page */
-+      ibool           have_page_hash_mutex)
- {
-       void*   data;
-       ut_ad(block);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(mutex_own(&block->mutex));
-       switch (buf_block_get_state(block)) {
-@@ -1657,15 +1797,17 @@
-       if (data) {
-               block->page.zip.data = NULL;
-               mutex_exit(&block->mutex);
--              buf_pool_mutex_exit_forbid();
--              buf_buddy_free(data, page_zip_get_size(&block->page.zip));
--              buf_pool_mutex_exit_allow();
-+              //buf_pool_mutex_exit_forbid();
-+              buf_buddy_free(data, page_zip_get_size(&block->page.zip), have_page_hash_mutex);
-+              //buf_pool_mutex_exit_allow();
-               mutex_enter(&block->mutex);
-               page_zip_set_size(&block->page.zip, 0);
-       }
--      UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
-+      mutex_enter(&free_list_mutex);
-+      UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
-       ut_d(block->page.in_free_list = TRUE);
-+      mutex_exit(&free_list_mutex);
-       UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
- }
-@@ -1692,7 +1834,11 @@
- {
-       const buf_page_t*       hashed_bpage;
-       ut_ad(bpage);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-+#ifdef UNIV_SYNC_DEBUG
-+      ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
-       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-       ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
-@@ -1798,7 +1944,9 @@
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-               mutex_exit(buf_page_get_mutex(bpage));
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-+              rw_lock_x_unlock(&page_hash_latch);
-               buf_print();
-               buf_LRU_print();
-               buf_validate();
-@@ -1821,14 +1969,14 @@
-               ut_a(bpage->zip.data);
-               ut_a(buf_page_get_zip_size(bpage));
--              UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
-+              UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
-               mutex_exit(&buf_pool_zip_mutex);
--              buf_pool_mutex_exit_forbid();
-+              //buf_pool_mutex_exit_forbid();
-               buf_buddy_free(bpage->zip.data,
--                             page_zip_get_size(&bpage->zip));
--              buf_buddy_free(bpage, sizeof(*bpage));
--              buf_pool_mutex_exit_allow();
-+                             page_zip_get_size(&bpage->zip), TRUE);
-+              buf_buddy_free(bpage, sizeof(*bpage), TRUE);
-+              //buf_pool_mutex_exit_allow();
-               UNIV_MEM_UNDESC(bpage);
-               return(BUF_BLOCK_ZIP_FREE);
-@@ -1850,9 +1998,9 @@
-                       ut_ad(!bpage->in_flush_list);
-                       ut_ad(!bpage->in_LRU_list);
-                       mutex_exit(&((buf_block_t*) bpage)->mutex);
--                      buf_pool_mutex_exit_forbid();
--                      buf_buddy_free(data, page_zip_get_size(&bpage->zip));
--                      buf_pool_mutex_exit_allow();
-+                      //buf_pool_mutex_exit_forbid();
-+                      buf_buddy_free(data, page_zip_get_size(&bpage->zip), TRUE);
-+                      //buf_pool_mutex_exit_allow();
-                       mutex_enter(&((buf_block_t*) bpage)->mutex);
-                       page_zip_set_size(&bpage->zip, 0);
-               }
-@@ -1878,15 +2026,16 @@
- void
- buf_LRU_block_free_hashed_page(
- /*===========================*/
--      buf_block_t*    block)  /*!< in: block, must contain a file page and
-+      buf_block_t*    block,  /*!< in: block, must contain a file page and
-                               be in a state where it can be freed */
-+      ibool           have_page_hash_mutex)
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(mutex_own(&block->mutex));
-       buf_block_set_state(block, BUF_BLOCK_MEMORY);
--      buf_LRU_block_free_non_file_page(block);
-+      buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
- }
- /**********************************************************************//**
-@@ -1912,7 +2061,8 @@
-       }
-       if (adjust) {
--              buf_pool_mutex_enter();
-+              //buf_pool_mutex_enter();
-+              mutex_enter(&LRU_list_mutex);
-               if (ratio != buf_LRU_old_ratio) {
-                       buf_LRU_old_ratio = ratio;
-@@ -1923,7 +2073,8 @@
-                       }
-               }
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&LRU_list_mutex);
-       } else {
-               buf_LRU_old_ratio = ratio;
-       }
-@@ -1948,7 +2099,8 @@
-               goto func_exit;
-       }
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&buf_pool_mutex);
-       /* Update the index. */
-       item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
-@@ -1962,7 +2114,8 @@
-       /* Put current entry in the array. */
-       memcpy(item, &buf_LRU_stat_cur, sizeof *item);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&buf_pool_mutex);
- func_exit:
-       /* Clear the current entry. */
-@@ -1984,7 +2137,8 @@
-       ulint           new_len;
-       ut_ad(buf_pool);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-       if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-@@ -2044,16 +2198,22 @@
-       ut_a(buf_pool->LRU_old_len == old_len);
--      UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
-+      mutex_exit(&LRU_list_mutex);
-+      mutex_enter(&free_list_mutex);
-+
-+      UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
-                        ut_ad(ut_list_node_313->in_free_list));
-       for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
-            bpage != NULL;
--           bpage = UT_LIST_GET_NEXT(list, bpage)) {
-+           bpage = UT_LIST_GET_NEXT(free, bpage)) {
-               ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
-       }
-+      mutex_exit(&free_list_mutex);
-+      mutex_enter(&LRU_list_mutex);
-+
-       UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
-                        ut_ad(ut_list_node_313->in_unzip_LRU_list
-                              && ut_list_node_313->page.in_LRU_list));
-@@ -2067,7 +2227,8 @@
-               ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
-       return(TRUE);
- }
- #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-@@ -2083,7 +2244,8 @@
-       const buf_page_t*       bpage;
-       ut_ad(buf_pool);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&LRU_list_mutex);
-       bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-@@ -2140,6 +2302,7 @@
-               bpage = UT_LIST_GET_NEXT(LRU, bpage);
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&LRU_list_mutex);
- }
- #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
---- a/storage/innodb_plugin/buf/buf0rea.c      2010-08-27 15:54:59.027059378 +0900
-+++ b/storage/innodb_plugin/buf/buf0rea.c      2010-08-27 16:11:40.614021339 +0900
-@@ -290,10 +290,12 @@
-       tablespace_version = fil_space_get_version(space);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&buf_pool_mutex);
-       if (high > fil_space_get_size(space)) {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-               /* The area is not whole, return */
-               return(0);
-@@ -301,10 +303,12 @@
-       if (buf_pool->n_pend_reads
-           > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              mutex_exit(&buf_pool_mutex);
-               return(0);
-       }
-+      mutex_exit(&buf_pool_mutex);
-       /* Check that almost all pages in the area have been accessed; if
-       offset == low, the accesses must be in a descending order, otherwise,
-@@ -323,6 +327,7 @@
-       fail_count = 0;
-+      rw_lock_s_lock(&page_hash_latch);
-       for (i = low; i < high; i++) {
-               bpage = buf_page_hash_get(space, i);
-@@ -350,7 +355,8 @@
-               if (fail_count > threshold) {
-                       /* Too many failures: return */
--                      buf_pool_mutex_exit();
-+                      //buf_pool_mutex_exit();
-+                      rw_lock_s_unlock(&page_hash_latch);
-                       return(0);
-               }
-@@ -365,7 +371,8 @@
-       bpage = buf_page_hash_get(space, offset);
-       if (bpage == NULL) {
--              buf_pool_mutex_exit();
-+              //buf_pool_mutex_exit();
-+              rw_lock_s_unlock(&page_hash_latch);
-               return(0);
-       }
-@@ -391,7 +398,8 @@
-       pred_offset = fil_page_get_prev(frame);
-       succ_offset = fil_page_get_next(frame);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
-       if ((offset == low) && (succ_offset == offset + 1)) {
-diff -ruN a/storage/innodb_plugin/handler/i_s.cc b/storage/innodb_plugin/handler/i_s.cc
---- a/storage/innodb_plugin/handler/i_s.cc     2010-08-27 15:59:21.753412068 +0900
-+++ b/storage/innodb_plugin/handler/i_s.cc     2010-08-27 16:11:40.617020805 +0900
-@@ -2230,7 +2230,8 @@
-       RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&zip_free_mutex);
-       for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
-               buf_buddy_stat_t*       buddy_stat = &buf_buddy_stat[x];
-@@ -2256,7 +2257,8 @@
-               }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&zip_free_mutex);
-       DBUG_RETURN(status);
- }
-diff -ruN a/storage/innodb_plugin/handler/innodb_patch_info.h b/storage/innodb_plugin/handler/innodb_patch_info.h
---- a/storage/innodb_plugin/handler/innodb_patch_info.h        2010-08-27 16:11:12.167183642 +0900
-+++ b/storage/innodb_plugin/handler/innodb_patch_info.h        2010-08-27 16:11:40.614990183 +0900
-@@ -33,5 +33,6 @@
- {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
- {"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
- {"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
-+{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
- {NULL, NULL, NULL, NULL}
- };
-diff -ruN a/storage/innodb_plugin/include/buf0buddy.h b/storage/innodb_plugin/include/buf0buddy.h
---- a/storage/innodb_plugin/include/buf0buddy.h        2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0buddy.h        2010-08-27 16:11:40.618988049 +0900
-@@ -49,10 +49,11 @@
- buf_buddy_alloc(
- /*============*/
-       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
--      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
-+      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
-                       TRUE if storage was allocated from the LRU list
-                       and buf_pool_mutex was temporarily released,
-                       or NULL if the LRU list should not be used */
-+      ibool   have_page_hash_mutex)
-       __attribute__((malloc));
- /**********************************************************************//**
-@@ -63,7 +64,8 @@
- /*===========*/
-       void*   buf,    /*!< in: block to be freed, must not be
-                       pointed to by the buffer pool */
--      ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
-+      ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
-+      ibool   have_page_hash_mutex)
-       __attribute__((nonnull));
- /** Statistics of buddy blocks of a given size. */
-diff -ruN a/storage/innodb_plugin/include/buf0buddy.ic b/storage/innodb_plugin/include/buf0buddy.ic
---- a/storage/innodb_plugin/include/buf0buddy.ic       2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0buddy.ic       2010-08-27 16:11:40.619989772 +0900
-@@ -44,10 +44,11 @@
- /*================*/
-       ulint   i,      /*!< in: index of buf_pool->zip_free[],
-                       or BUF_BUDDY_SIZES */
--      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
-+      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
-                       TRUE if storage was allocated from the LRU list
-                       and buf_pool_mutex was temporarily released,
-                       or NULL if the LRU list should not be used */
-+      ibool   have_page_hash_mutex)
-       __attribute__((malloc));
- /**********************************************************************//**
-@@ -58,8 +59,9 @@
- /*===============*/
-       void*   buf,    /*!< in: block to be freed, must not be
-                       pointed to by the buffer pool */
--      ulint   i)      /*!< in: index of buf_pool->zip_free[],
-+      ulint   i,      /*!< in: index of buf_pool->zip_free[],
-                       or BUF_BUDDY_SIZES */
-+      ibool   have_page_hash_mutex)
-       __attribute__((nonnull));
- /**********************************************************************//**
-@@ -96,14 +98,15 @@
- buf_buddy_alloc(
- /*============*/
-       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
--      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
-+      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
-                       TRUE if storage was allocated from the LRU list
-                       and buf_pool_mutex was temporarily released,
-                       or NULL if the LRU list should not be used */
-+      ibool   have_page_hash_mutex)
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
--      return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
-+      return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
- }
- /**********************************************************************//**
-@@ -114,11 +117,24 @@
- /*===========*/
-       void*   buf,    /*!< in: block to be freed, must not be
-                       pointed to by the buffer pool */
--      ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
-+      ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
-+      ibool   have_page_hash_mutex)
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
--      buf_buddy_free_low(buf, buf_buddy_get_slot(size));
-+      if (!have_page_hash_mutex) {
-+              mutex_enter(&LRU_list_mutex);
-+              rw_lock_x_lock(&page_hash_latch);
-+      }
-+
-+      mutex_enter(&zip_free_mutex);
-+      buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE);
-+      mutex_exit(&zip_free_mutex);
-+
-+      if (!have_page_hash_mutex) {
-+              mutex_exit(&LRU_list_mutex);
-+              rw_lock_x_unlock(&page_hash_latch);
-+      }
- }
- #ifdef UNIV_MATERIALIZE
-diff -ruN a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
---- a/storage/innodb_plugin/include/buf0buf.h  2010-08-27 15:55:39.399063353 +0900
-+++ b/storage/innodb_plugin/include/buf0buf.h  2010-08-27 16:11:40.622020552 +0900
-@@ -713,6 +713,15 @@
-       const buf_page_t*       bpage)  /*!< in: pointer to control block */
-       __attribute__((pure));
-+/*************************************************************************
-+Gets the mutex of a block and enter the mutex with consistency. */
-+UNIV_INLINE
-+mutex_t*
-+buf_page_get_mutex_enter(
-+/*=========================*/
-+      const buf_page_t*       bpage)  /*!< in: pointer to control block */
-+      __attribute__((pure));
-+
- /*********************************************************************//**
- Get the flush type of a page.
- @return       flush type */
-@@ -1066,7 +1075,7 @@
-       All these are protected by buf_pool_mutex. */
-       /* @{ */
--      UT_LIST_NODE_T(buf_page_t) list;
-+      /* UT_LIST_NODE_T(buf_page_t) list; */
-                                       /*!< based on state, this is a
-                                       list node, protected only by
-                                       buf_pool_mutex, in one of the
-@@ -1086,6 +1095,10 @@
-                                       BUF_BLOCK_REMOVE_HASH or
-                                       BUF_BLOCK_READY_IN_USE. */
-+      /* resplit for optimistic use */
-+      UT_LIST_NODE_T(buf_page_t) free;
-+      UT_LIST_NODE_T(buf_page_t) flush_list;
-+      UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
- #ifdef UNIV_DEBUG
-       ibool           in_flush_list;  /*!< TRUE if in buf_pool->flush_list;
-                                       when buf_pool_mutex is free, the
-@@ -1166,11 +1179,11 @@
-                                       a block is in the unzip_LRU list
-                                       if page.state == BUF_BLOCK_FILE_PAGE
-                                       and page.zip.data != NULL */
--#ifdef UNIV_DEBUG
-+//#ifdef UNIV_DEBUG
-       ibool           in_unzip_LRU_list;/*!< TRUE if the page is in the
-                                       decompressed LRU list;
-                                       used in debugging */
--#endif /* UNIV_DEBUG */
-+//#endif /* UNIV_DEBUG */
-       mutex_t         mutex;          /*!< mutex protecting this block:
-                                       state (also protected by the buffer
-                                       pool mutex), io_fix, buf_fix_count,
-@@ -1446,6 +1459,12 @@
- /** mutex protecting the buffer pool struct and control blocks, except the
- read-write lock in them */
- extern mutex_t        buf_pool_mutex;
-+extern mutex_t        LRU_list_mutex;
-+extern mutex_t        flush_list_mutex;
-+extern rw_lock_t      page_hash_latch;
-+extern mutex_t        free_list_mutex;
-+extern mutex_t        zip_free_mutex;
-+extern mutex_t        zip_hash_mutex;
- /** mutex protecting the control blocks of compressed-only pages
- (of type buf_page_t, not buf_block_t) */
- extern mutex_t        buf_pool_zip_mutex;
-diff -ruN a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic
---- a/storage/innodb_plugin/include/buf0buf.ic 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0buf.ic 2010-08-27 16:11:40.624990413 +0900
-@@ -121,7 +121,9 @@
-       buf_page_t*     bpage;
-       ib_uint64_t     lsn;
--      buf_pool_mutex_enter();
-+try_again:
-+      //buf_pool_mutex_enter();
-+      mutex_enter(&flush_list_mutex);
-       bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-@@ -130,9 +132,14 @@
-       } else {
-               ut_ad(bpage->in_flush_list);
-               lsn = bpage->oldest_modification;
-+              if (lsn == 0) {
-+                      mutex_exit(&flush_list_mutex);
-+                      goto try_again;
-+              }
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      mutex_exit(&flush_list_mutex);
-       /* The returned answer may be out of date: the flush_list can
-       change after the mutex has been released. */
-@@ -252,7 +259,7 @@
-       case BUF_BLOCK_ZIP_FREE:
-               /* This is a free page in buf_pool->zip_free[].
-               Such pages should only be accessed by the buddy allocator. */
--              ut_error;
-+              /* ut_error; */ /* optimistic */
-               break;
-       case BUF_BLOCK_ZIP_PAGE:
-       case BUF_BLOCK_ZIP_DIRTY:
-@@ -295,7 +302,7 @@
- {
-       switch (buf_page_get_state(bpage)) {
-       case BUF_BLOCK_ZIP_FREE:
--              ut_error;
-+              /* ut_error; */ /* optimistic */
-               return(NULL);
-       case BUF_BLOCK_ZIP_PAGE:
-       case BUF_BLOCK_ZIP_DIRTY:
-@@ -305,6 +312,28 @@
-       }
- }
-+/*************************************************************************
-+Gets the mutex of a block and enter the mutex with consistency. */
-+UNIV_INLINE
-+mutex_t*
-+buf_page_get_mutex_enter(
-+/*=========================*/
-+      const buf_page_t*       bpage)  /*!< in: pointer to control block */
-+{
-+      mutex_t*        block_mutex;
-+
-+      while(1) {
-+              block_mutex = buf_page_get_mutex(bpage);
-+              if (!block_mutex)
-+                      return block_mutex;
-+
-+              mutex_enter(block_mutex);
-+              if (block_mutex == buf_page_get_mutex(bpage))
-+                      return block_mutex;
-+              mutex_exit(block_mutex);
-+      }
-+}
-+
- /*********************************************************************//**
- Get the flush type of a page.
- @return       flush type */
-@@ -400,7 +429,7 @@
-       buf_page_t*     bpage,  /*!< in/out: control block */
-       enum buf_io_fix io_fix) /*!< in: io_fix state */
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-       bpage->io_fix = io_fix;
-@@ -428,12 +457,13 @@
- /*==================*/
-       const buf_page_t*       bpage)  /*!< control block being relocated */
- {
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-       ut_ad(buf_page_in_file(bpage));
--      ut_ad(bpage->in_LRU_list);
-+      /* optimistic */
-+      //ut_ad(bpage->in_LRU_list);
--      return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
-+      return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
-              && bpage->buf_fix_count == 0);
- }
-@@ -447,7 +477,7 @@
-       const buf_page_t*       bpage)  /*!< in: control block */
- {
-       ut_ad(buf_page_in_file(bpage));
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own()); /* This is used in optimistic */
-       return(bpage->old);
- }
-@@ -462,7 +492,8 @@
-       ibool           old)    /*!< in: old */
- {
-       ut_a(buf_page_in_file(bpage));
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(&LRU_list_mutex));
-       ut_ad(bpage->in_LRU_list);
- #ifdef UNIV_LRU_DEBUG
-@@ -509,7 +540,8 @@
-       ulint           time_ms)        /*!< in: ut_time_ms() */
- {
-       ut_a(buf_page_in_file(bpage));
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+      ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-       if (!bpage->access_time) {
-               /* Make this the time of the first access. */
-@@ -741,17 +773,17 @@
- /*===========*/
-       buf_block_t*    block)  /*!< in, own: block to be freed */
- {
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-       mutex_enter(&block->mutex);
-       ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
--      buf_LRU_block_free_non_file_page(block);
-+      buf_LRU_block_free_non_file_page(block, FALSE);
-       mutex_exit(&block->mutex);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
- }
- #endif /* !UNIV_HOTBACKUP */
-@@ -799,17 +831,17 @@
-                                       page frame */
- {
-       ib_uint64_t     lsn;
--      mutex_t*        block_mutex = buf_page_get_mutex(bpage);
-+      mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
--      mutex_enter(block_mutex);
--
--      if (buf_page_in_file(bpage)) {
-+      if (block_mutex && buf_page_in_file(bpage)) {
-               lsn = bpage->newest_modification;
-       } else {
-               lsn = 0;
-       }
--      mutex_exit(block_mutex);
-+      if (block_mutex) {
-+              mutex_exit(block_mutex);
-+      }
-       return(lsn);
- }
-@@ -825,7 +857,7 @@
-       buf_block_t*    block)  /*!< in: block */
- {
- #ifdef UNIV_SYNC_DEBUG
--      ut_ad((buf_pool_mutex_own()
-+      ut_ad((mutex_own(&LRU_list_mutex)
-              && (block->page.buf_fix_count == 0))
-             || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
- #endif /* UNIV_SYNC_DEBUG */
-@@ -917,7 +949,11 @@
-       ulint           fold;
-       ut_ad(buf_pool);
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+      ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
-+            || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
-+#endif
-       /* Look for the page in the hash table */
-@@ -972,11 +1008,13 @@
- {
-       const buf_page_t*       bpage;
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-+      rw_lock_s_lock(&page_hash_latch);
-       bpage = buf_page_hash_get(space, offset);
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      rw_lock_s_unlock(&page_hash_latch);
-       return(bpage != NULL);
- }
-@@ -1038,11 +1076,14 @@
-       ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-       ut_a(block->page.buf_fix_count > 0);
-+      /* buf_flush_note_modification() should be called before this function. */
-+/*
-       if (rw_latch == RW_X_LATCH && mtr->modifications) {
-               buf_pool_mutex_enter();
-               buf_flush_note_modification(block, mtr);
-               buf_pool_mutex_exit();
-       }
-+*/
-       mutex_enter(&block->mutex);
-diff -ruN a/storage/innodb_plugin/include/buf0flu.ic b/storage/innodb_plugin/include/buf0flu.ic
---- a/storage/innodb_plugin/include/buf0flu.ic 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0flu.ic 2010-08-27 16:11:40.625993554 +0900
-@@ -55,13 +55,23 @@
-       buf_block_t*    block,  /*!< in: block which is modified */
-       mtr_t*          mtr)    /*!< in: mtr */
- {
-+      ibool   use_LRU_mutex = FALSE;
-+
-+      if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
-+              use_LRU_mutex = TRUE;
-+
-+      if (use_LRU_mutex)
-+              mutex_enter(&LRU_list_mutex);
-+
-+      mutex_enter(&block->mutex);
-+
-       ut_ad(block);
-       ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-       ut_ad(block->page.buf_fix_count > 0);
- #ifdef UNIV_SYNC_DEBUG
-       ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- #endif /* UNIV_SYNC_DEBUG */
--      ut_ad(buf_pool_mutex_own());
-+      //ut_ad(buf_pool_mutex_own());
-       ut_ad(mtr->start_lsn != 0);
-       ut_ad(mtr->modifications);
-@@ -70,16 +80,23 @@
-       block->page.newest_modification = mtr->end_lsn;
-       if (!block->page.oldest_modification) {
-+              mutex_enter(&flush_list_mutex);
-               block->page.oldest_modification = mtr->start_lsn;
-               ut_ad(block->page.oldest_modification != 0);
-               buf_flush_insert_into_flush_list(block);
-+              mutex_exit(&flush_list_mutex);
-       } else {
-               ut_ad(block->page.oldest_modification <= mtr->start_lsn);
-       }
-+      mutex_exit(&block->mutex);
-+
-       ++srv_buf_pool_write_requests;
-+
-+      if (use_LRU_mutex)
-+              mutex_exit(&LRU_list_mutex);
- }
- /********************************************************************//**
-@@ -94,6 +111,16 @@
-       ib_uint64_t     end_lsn)        /*!< in: end lsn of the last mtr in the
-                                       set of mtr's */
- {
-+      ibool   use_LRU_mutex = FALSE;
-+
-+      if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
-+              use_LRU_mutex = TRUE;
-+
-+      if (use_LRU_mutex)
-+              mutex_enter(&LRU_list_mutex);
-+
-+      mutex_enter(&(block->mutex));
-+
-       ut_ad(block);
-       ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-       ut_ad(block->page.buf_fix_count > 0);
-@@ -101,23 +128,28 @@
-       ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- #endif /* UNIV_SYNC_DEBUG */
--      buf_pool_mutex_enter();
-+      //buf_pool_mutex_enter();
-       ut_ad(block->page.newest_modification <= end_lsn);
-       block->page.newest_modification = end_lsn;
-       if (!block->page.oldest_modification) {
-+              mutex_enter(&flush_list_mutex);
-               block->page.oldest_modification = start_lsn;
-               ut_ad(block->page.oldest_modification != 0);
-               buf_flush_insert_sorted_into_flush_list(block);
-+              mutex_exit(&flush_list_mutex);
-       } else {
-               ut_ad(block->page.oldest_modification <= start_lsn);
-       }
--      buf_pool_mutex_exit();
-+      //buf_pool_mutex_exit();
-+      if (use_LRU_mutex)
-+              mutex_exit(&LRU_list_mutex);
-+      mutex_exit(&(block->mutex));
- }
- #endif /* !UNIV_HOTBACKUP */
-diff -ruN a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h
---- a/storage/innodb_plugin/include/buf0lru.h  2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0lru.h  2010-08-27 16:11:40.627990038 +0900
-@@ -112,10 +112,11 @@
-       buf_page_t*     bpage,  /*!< in: block to be freed */
-       ibool           zip,    /*!< in: TRUE if should remove also the
-                               compressed page of an uncompressed page */
--      ibool*          buf_pool_mutex_released);
-+      ibool*          buf_pool_mutex_released,
-                               /*!< in: pointer to a variable that will
-                               be assigned TRUE if buf_pool_mutex
-                               was temporarily released, or NULL */
-+      ibool           have_LRU_mutex);
- /******************************************************************//**
- Try to free a replaceable block.
- @return       TRUE if found and freed */
-@@ -157,7 +158,8 @@
- void
- buf_LRU_block_free_non_file_page(
- /*=============================*/
--      buf_block_t*    block); /*!< in: block, must not contain a file page */
-+      buf_block_t*    block,  /*!< in: block, must not contain a file page */
-+      ibool           have_page_hash_mutex);
- /******************************************************************//**
- Adds a block to the LRU list. */
- UNIV_INTERN
-diff -ruN a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h
---- a/storage/innodb_plugin/include/sync0sync.h        2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/sync0sync.h        2010-08-27 16:11:40.628990180 +0900
-@@ -487,8 +487,14 @@
-                                       SYNC_SEARCH_SYS, as memory allocation
-                                       can call routines there! Otherwise
-                                       the level is SYNC_MEM_HASH. */
-+#define SYNC_BUF_LRU_LIST     157
-+#define SYNC_BUF_PAGE_HASH    156
-+#define       SYNC_BUF_BLOCK          155
-+#define SYNC_BUF_FREE_LIST    153
-+#define SYNC_BUF_ZIP_FREE     152
-+#define SYNC_BUF_ZIP_HASH     151
- #define       SYNC_BUF_POOL           150
--#define       SYNC_BUF_BLOCK          149
-+#define SYNC_BUF_FLUSH_LIST   149
- #define SYNC_DOUBLEWRITE      140
- #define       SYNC_ANY_LATCH          135
- #define SYNC_THR_LOCAL                133
-@@ -519,7 +525,7 @@
-               os_fast_mutex;  /*!< We use this OS mutex in place of lock_word
-                               when atomic operations are not enabled */
- #endif
--      ulint   waiters;        /*!< This ulint is set to 1 if there are (or
-+      volatile ulint  waiters;        /*!< This ulint is set to 1 if there are (or
-                               may be) threads waiting in the global wait
-                               array for this mutex to be released.
-                               Otherwise, this is 0. */
-diff -ruN a/storage/innodb_plugin/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c
---- a/storage/innodb_plugin/mtr/mtr0mtr.c      2010-08-04 02:24:20.000000000 +0900
-+++ b/storage/innodb_plugin/mtr/mtr0mtr.c      2010-08-27 16:11:40.631020912 +0900
-@@ -105,6 +105,38 @@
-       }
- }
-+UNIV_INLINE
-+void
-+mtr_memo_note_modification_all(
-+/*===========================*/
-+      mtr_t*  mtr)    /* in: mtr */
-+{
-+      mtr_memo_slot_t* slot;
-+      dyn_array_t*    memo;
-+      ulint           offset;
-+
-+      ut_ad(mtr);
-+      ut_ad(mtr->magic_n == MTR_MAGIC_N);
-+      ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
-+                                           commit */
-+      ut_ad(mtr->modifications);
-+
-+      memo = &(mtr->memo);
-+
-+      offset = dyn_array_get_data_size(memo);
-+
-+      while (offset > 0) {
-+              offset -= sizeof(mtr_memo_slot_t);
-+              slot = dyn_array_get_element(memo, offset);
-+
-+              if (UNIV_LIKELY(slot->object != NULL) &&
-+                  slot->type == MTR_MEMO_PAGE_X_FIX) {
-+                      buf_flush_note_modification(
-+                              (buf_block_t*)slot->object, mtr);
-+              }
-+      }
-+}
-+
- /************************************************************//**
- Writes the contents of a mini-transaction log, if any, to the database log. */
- static
-@@ -188,6 +220,8 @@
-       if (write_log) {
-               mtr_log_reserve_and_write(mtr);
-+
-+              mtr_memo_note_modification_all(mtr);
-       }
-       /* We first update the modification info to buffer pages, and only
-@@ -198,11 +232,13 @@
-       required when we insert modified buffer pages in to the flush list
-       which must be sorted on oldest_modification. */
--      mtr_memo_pop_all(mtr);
--
-       if (write_log) {
-               log_release();
-       }
-+
-+      /* All unlocking has been moved here, after log_sys mutex release. */
-+      mtr_memo_pop_all(mtr);
-+
- #endif /* !UNIV_HOTBACKUP */
-       ut_d(mtr->state = MTR_COMMITTED);
-@@ -273,6 +309,12 @@
-               slot = dyn_array_get_element(memo, offset);
-               if ((object == slot->object) && (type == slot->type)) {
-+                      if (mtr->modifications &&
-+                          UNIV_LIKELY(slot->object != NULL) &&
-+                          slot->type == MTR_MEMO_PAGE_X_FIX) {
-+                              buf_flush_note_modification(
-+                                      (buf_block_t*)slot->object, mtr);
-+                      }
-                       mtr_memo_slot_release(mtr, slot);
-diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
---- a/storage/innodb_plugin/srv/srv0srv.c      2010-08-27 16:11:12.194989878 +0900
-+++ b/storage/innodb_plugin/srv/srv0srv.c      2010-08-27 16:11:40.634022489 +0900
-@@ -2829,7 +2829,7 @@
-                                       mutex_exit(&(log_sys->mutex));
--                                      buf_pool_mutex_enter();
-+                                      mutex_enter(&flush_list_mutex);
-                                       level = 0;
-                                       bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-@@ -2851,7 +2851,7 @@
-                                               bpl = 0;
-                                       }
--                                      buf_pool_mutex_exit();
-+                                      mutex_exit(&flush_list_mutex);
-                                       if (!srv_use_doublewrite_buf) {
-                                               /* flush is faster than when doublewrite */
-diff -ruN a/storage/innodb_plugin/sync/sync0sync.c b/storage/innodb_plugin/sync/sync0sync.c
---- a/storage/innodb_plugin/sync/sync0sync.c   2010-08-04 02:24:20.000000000 +0900
-+++ b/storage/innodb_plugin/sync/sync0sync.c   2010-08-27 16:11:40.636021261 +0900
-@@ -254,7 +254,7 @@
-       mutex->lock_word = 0;
- #endif
-       mutex->event = os_event_create(NULL);
--      mutex_set_waiters(mutex, 0);
-+      mutex->waiters = 0;
- #ifdef UNIV_DEBUG
-       mutex->magic_n = MUTEX_MAGIC_N;
- #endif /* UNIV_DEBUG */
-@@ -432,6 +432,15 @@
-       mutex_t*        mutex,  /*!< in: mutex */
-       ulint           n)      /*!< in: value to set */
- {
-+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-+      ut_ad(mutex);
-+
-+      if (n) {
-+              os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
-+      } else {
-+              os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
-+      }
-+#else
-       volatile ulint* ptr;            /* declared volatile to ensure that
-                                       the value is stored to memory */
-       ut_ad(mutex);
-@@ -440,6 +449,7 @@
-       *ptr = n;               /* Here we assume that the write of a single
-                               word in memory is atomic */
-+#endif
- }
- /******************************************************************//**
-@@ -1153,6 +1163,12 @@
-       case SYNC_TRX_SYS_HEADER:
-       case SYNC_FILE_FORMAT_TAG:
-       case SYNC_DOUBLEWRITE:
-+      case SYNC_BUF_LRU_LIST:
-+      case SYNC_BUF_FLUSH_LIST:
-+      case SYNC_BUF_PAGE_HASH:
-+      case SYNC_BUF_FREE_LIST:
-+      case SYNC_BUF_ZIP_FREE:
-+      case SYNC_BUF_ZIP_HASH:
-       case SYNC_BUF_POOL:
-       case SYNC_SEARCH_SYS:
-       case SYNC_SEARCH_SYS_CONF:
-@@ -1181,7 +1197,7 @@
-               buffer block (block->mutex or buf_pool_zip_mutex). */
-               if (!sync_thread_levels_g(array, level, FALSE)) {
-                       ut_a(sync_thread_levels_g(array, level - 1, TRUE));
--                      ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
-+                      ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
-               }
-               break;
-       case SYNC_REC_LOCK:
diff --git a/mysql-libs.patch b/mysql-libs.patch
deleted file mode 100644 (file)
index bbca817..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
---- mysql-5.1.21-beta/configure.in~    2007-08-29 19:00:29.000000000 +0300
-+++ mysql-5.1.21-beta/configure.in     2007-08-29 19:00:35.000000000 +0300
-@@ -836,8 +836,9 @@
- AC_CHECK_LIB(m, floor, [], AC_CHECK_LIB(m, __infinity))
--AC_CHECK_LIB(nsl_r, gethostbyname_r, [],
--  AC_CHECK_LIB(nsl, gethostbyname_r))
-+AC_CHECK_FUNC(gethostbyname_r, [], [
-+ AC_CHECK_LIB(nsl_r, gethostbyname_r, [],
-+  AC_CHECK_LIB(nsl, gethostbyname_r)) ])
- AC_CHECK_FUNC(gethostbyname_r)
- AC_SEARCH_LIBS(setsockopt, socket)
-@@ -845,6 +846,11 @@
- AC_SEARCH_LIBS(bind, bind)
- # Check if crypt() exists in libc or libcrypt, sets LIBS if needed
- AC_SEARCH_LIBS(crypt, crypt, AC_DEFINE(HAVE_CRYPT, 1, [crypt]))
-+save_LIBS="$LIBS"
-+AC_CHECK_LIB(crypt, crypt, [LIBS="-lcrypt $LIBS"; LIBCRYPT="-lcrypt"])
-+AC_SUBST(LIBCRYPT)
-+AC_CHECK_FUNC(crypt, AC_DEFINE([HAVE_CRYPT], [1], [crypt]))
-+LIBS="$save_LIBS"
- # See if we need a library for address lookup.
- AC_SEARCH_LIBS(inet_aton, [socket nsl resolv])
---- mysql-5.1.21-beta/sql/Makefile.am~ 2007-08-29 18:58:33.000000000 +0300
-+++ mysql-5.1.21-beta/sql/Makefile.am  2007-08-29 19:01:33.000000000 +0300
-@@ -43,7 +43,7 @@
-                       @pstack_libs@ \
-                       @mysql_plugin_libs@ \
-                       $(LDADD)  $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \
--                      $(yassl_libs) $(openssl_libs) @MYSQLD_EXTRA_LIBS@
-+                      $(yassl_libs) $(openssl_libs) @LIBCRYPT@ @MYSQLD_EXTRA_LIBS@
- noinst_HEADERS =      item.h item_func.h item_sum.h item_cmpfunc.h \
-                       item_strfunc.h item_timefunc.h \
---- mysql-5.1.26-rc/config/ac-macros/ssl.m4.orig       2008-07-01 00:35:01.000000000 +0200
-+++ mysql-5.1.26-rc/config/ac-macros/ssl.m4    2008-08-25 19:53:00.258254465 +0200
-@@ -102,7 +102,12 @@
-   #
-   # Try to link with openSSL libs in <location>
-   #
--  openssl_libs="-L$location/lib/ -lssl -lcrypto"
-+  if test "$location" != "/usr"
-+  then
-+  openssl_libs="-L$location/lib -lssl -lcrypto"
-+  else
-+  openssl_libs="-lssl -lcrypto"
-+  fi
-   MYSQL_CHECK_SSL_DIR([$openssl_includes], [$openssl_libs])
-   if test "$mysql_ssl_found" == "no"
diff --git a/mysql-libwrap.patch b/mysql-libwrap.patch
deleted file mode 100644 (file)
index 78c7959..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
---- mysql-4.1.9/sql/mysqld.cc.orig     2005-01-11 23:06:00.000000000 +0100
-+++ mysql-4.1.9/sql/mysqld.cc  2005-02-06 17:21:26.238717200 +0100
-@@ -133,7 +133,16 @@
- #endif /* __WIN__ */
- #ifdef HAVE_LIBWRAP
-+#define hosts_access hosts_access_old
-+#define sock_host sock_host_old
-+#define eval_client eval_client_old
- #include <tcpd.h>
-+#undef hosts_access
-+#undef sock_host
-+#undef eval_client
-+extern int hosts_access(struct request_info *);
-+extern int sock_host(struct request_info *);
-+extern int eval_client(struct request_info *);
- #include <syslog.h>
- #ifdef NEED_SYS_SYSLOG_H
- #include <sys/syslog.h>
diff --git a/mysql-noproc.patch b/mysql-noproc.patch
deleted file mode 100644 (file)
index 90b7878..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
---- mysql-4.0.13/configure.in.orig     Fri Jul 11 11:59:51 2003
-+++ mysql-4.0.13/configure.in  Fri Jul 11 14:06:50 2003
-@@ -431,6 +431,7 @@
- # Lock for PS
- AC_PATH_PROG(PS, ps, ps)
- AC_MSG_CHECKING("how to check if pid exists")
-+if test -z "$FIND_PROC"; then
- PS=$ac_cv_path_PS
- # Linux style
- if $PS p $$ 2> /dev/null | grep $0 > /dev/null
-@@ -467,12 +468,14 @@
-       AC_MSG_ERROR([Could not find the right ps switches. Which OS is this ?. See the Installation chapter in the Reference Manual.])
-   esac
- fi
-+fi
- AC_SUBST(FIND_PROC)
- AC_MSG_RESULT("$FIND_PROC")
- # Check if a pid is valid
- AC_PATH_PROG(KILL, kill, kill)
- AC_MSG_CHECKING("for kill switches")
-+if test -z "$CHECK_PID"; then
- if $ac_cv_path_KILL -0 $$
- then
-   CHECK_PID="$ac_cv_path_KILL -0 \$\$PID > /dev/null 2> /dev/null"
-@@ -483,6 +486,7 @@
-   AC_MSG_WARN([kill -0 to check for pid seems to fail])
-     CHECK_PID="$ac_cv_path_KILL -s SIGCONT \$\$PID > /dev/null 2> /dev/null"
- fi
-+fi
- AC_SUBST(CHECK_PID)
- AC_MSG_RESULT("$CHECK_PID")
diff --git a/mysql-test.diff b/mysql-test.diff
new file mode 100644 (file)
index 0000000..2d7b9bd
--- /dev/null
@@ -0,0 +1,875 @@
+# name       : mysql-test.diff
+# introduced : ???
+# maintainer : ???
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/mysql-test/include/default_mysqld.cnf b/mysql-test/include/default_mysqld.cnf
+--- a/mysql-test/include/default_mysqld.cnf    2010-11-03 07:01:11.000000000 +0900
++++ b/mysql-test/include/default_mysqld.cnf    2010-12-10 16:48:10.996387047 +0900
+@@ -29,7 +29,7 @@
+ max_heap_table_size=        1M
+ loose-innodb_data_file_path=      ibdata1:10M:autoextend
+-loose-innodb_buffer_pool_size=    8M
++loose-innodb_buffer_pool_size=    32M
+ loose-innodb_write_io_threads=    2
+ loose-innodb_read_io_threads=     2
+ loose-innodb_log_buffer_size=     1M
+diff -ruN a/mysql-test/r/connect.result b/mysql-test/r/connect.result
+--- a/mysql-test/r/connect.result      2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/connect.result      2010-12-10 16:48:10.997386982 +0900
+@@ -1,3 +1,4 @@
++set global log_warnings=0;
+ drop table if exists t1,t2;
+ show tables;
+ Tables_in_mysql
+@@ -221,3 +222,4 @@
+ # ------------------------------------------------------------------
+ # -- End of 5.1 tests
+ # ------------------------------------------------------------------
++set global log_warnings=1;
+diff -ruN a/mysql-test/r/create.result b/mysql-test/r/create.result
+--- a/mysql-test/r/create.result       2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/create.result       2010-12-10 16:48:11.001009442 +0900
+@@ -1741,7 +1741,8 @@
+   `COMMAND` varchar(16) NOT NULL DEFAULT '',
+   `TIME` int(7) NOT NULL DEFAULT '0',
+   `STATE` varchar(64) DEFAULT NULL,
+-  `INFO` longtext
++  `INFO` longtext,
++  `TIME_MS` bigint(21) NOT NULL DEFAULT '0'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create temporary table t1 like information_schema.processlist;
+@@ -1755,7 +1756,8 @@
+   `COMMAND` varchar(16) NOT NULL DEFAULT '',
+   `TIME` int(7) NOT NULL DEFAULT '0',
+   `STATE` varchar(64) DEFAULT NULL,
+-  `INFO` longtext
++  `INFO` longtext,
++  `TIME_MS` bigint(21) NOT NULL DEFAULT '0'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create table t1 like information_schema.character_sets;
+diff -ruN a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result
+--- a/mysql-test/r/information_schema.result   2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/information_schema.result   2011-01-10 23:02:09.000000000 +0300
+@@ -46,14 +46,17 @@
+ select * from v1;
+ c
+ CHARACTER_SETS
++CLIENT_STATISTICS
+ COLLATIONS
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INDEX_STATISTICS
+ ENGINES
+ EVENTS
+ FILES
+ GLOBAL_STATUS
++GLOBAL_TEMPORARY_TABLES
+ GLOBAL_VARIABLES
+ KEY_COLUMN_USAGE
+ PARAMETERS
+@@ -63,6 +66,7 @@
+ PROFILING
+ REFERENTIAL_CONSTRAINTS
+ ROUTINES
++QUERY_RESPONSE_TIME
+ SCHEMATA
+ SCHEMA_PRIVILEGES
+ SESSION_STATUS
+@@ -72,9 +76,14 @@
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ USER_PRIVILEGES
++USER_STATISTICS
+ VIEWS
++XTRADB_ADMIN_COMMAND
+ columns_priv
+ db
+ event
+@@ -112,6 +121,9 @@
+ TABLESPACES   TABLESPACES
+ TABLE_CONSTRAINTS     TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES      TABLE_PRIVILEGES
++TABLE_STATISTICS      TABLE_STATISTICS
++TEMPORARY_TABLES      TEMPORARY_TABLES
++THREAD_STATISTICS     THREAD_STATISTICS
+ TRIGGERS      TRIGGERS
+ tables_priv   tables_priv
+ time_zone     time_zone
+@@ -132,6 +144,9 @@
+ TABLESPACES   TABLESPACES
+ TABLE_CONSTRAINTS     TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES      TABLE_PRIVILEGES
++TABLE_STATISTICS      TABLE_STATISTICS
++TEMPORARY_TABLES      TEMPORARY_TABLES
++THREAD_STATISTICS     THREAD_STATISTICS
+ TRIGGERS      TRIGGERS
+ tables_priv   tables_priv
+ time_zone     time_zone
+@@ -152,6 +167,9 @@
+ TABLESPACES   TABLESPACES
+ TABLE_CONSTRAINTS     TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES      TABLE_PRIVILEGES
++TABLE_STATISTICS      TABLE_STATISTICS
++TEMPORARY_TABLES      TEMPORARY_TABLES
++THREAD_STATISTICS     THREAD_STATISTICS
+ TRIGGERS      TRIGGERS
+ tables_priv   tables_priv
+ time_zone     time_zone
+@@ -634,13 +652,16 @@
+ where table_schema='information_schema' limit 2;
+ TABLE_NAME    TABLE_TYPE      ENGINE
+ CHARACTER_SETS        SYSTEM VIEW     MEMORY
+-COLLATIONS    SYSTEM VIEW     MEMORY
++CLIENT_STATISTICS     SYSTEM VIEW     MEMORY
+ show tables from information_schema like "T%";
+ Tables_in_information_schema (T%)
+ TABLES
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ create database information_schema;
+ ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema'
+@@ -651,6 +672,9 @@
+ TABLESPACES   SYSTEM VIEW
+ TABLE_CONSTRAINTS     SYSTEM VIEW
+ TABLE_PRIVILEGES      SYSTEM VIEW
++TABLE_STATISTICS      SYSTEM VIEW
++TEMPORARY_TABLES      SYSTEM VIEW
++THREAD_STATISTICS     SYSTEM VIEW
+ TRIGGERS      SYSTEM VIEW
+ create table t1(a int);
+ ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema'
+@@ -664,6 +688,9 @@
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ select table_name from tables where table_name='user';
+ table_name
+@@ -806,6 +833,8 @@
+ FILES CREATE_TIME     datetime
+ FILES UPDATE_TIME     datetime
+ FILES CHECK_TIME      datetime
++GLOBAL_TEMPORARY_TABLES       CREATE_TIME     datetime
++GLOBAL_TEMPORARY_TABLES       UPDATE_TIME     datetime
+ PARTITIONS    CREATE_TIME     datetime
+ PARTITIONS    UPDATE_TIME     datetime
+ PARTITIONS    CHECK_TIME      datetime
+@@ -814,6 +843,8 @@
+ TABLES        CREATE_TIME     datetime
+ TABLES        UPDATE_TIME     datetime
+ TABLES        CHECK_TIME      datetime
++TEMPORARY_TABLES      CREATE_TIME     datetime
++TEMPORARY_TABLES      UPDATE_TIME     datetime
+ TRIGGERS      CREATED datetime
+ event execute_at      datetime
+ event last_executed   datetime
+@@ -854,7 +885,9 @@
+ TABLE_NAME    COLUMN_NAME     PRIVILEGES
+ COLUMNS       TABLE_NAME      select
+ COLUMN_PRIVILEGES     TABLE_NAME      select
++INDEX_STATISTICS      TABLE_NAME      select
+ FILES TABLE_NAME      select
++GLOBAL_TEMPORARY_TABLES       TABLE_NAME      select
+ KEY_COLUMN_USAGE      TABLE_NAME      select
+ PARTITIONS    TABLE_NAME      select
+ REFERENTIAL_CONSTRAINTS       TABLE_NAME      select
+@@ -862,7 +895,11 @@
+ TABLES        TABLE_NAME      select
+ TABLE_CONSTRAINTS     TABLE_NAME      select
+ TABLE_PRIVILEGES      TABLE_NAME      select
++TABLE_STATISTICS      TABLE_NAME      select
++TEMPORARY_TABLES      TABLE_NAME      select
+ VIEWS TABLE_NAME      select
++INNODB_TABLE_STATS    table_name      select
++INNODB_INDEX_STATS    table_name      select
+ delete from mysql.user where user='mysqltest_4';
+ delete from mysql.db where user='mysqltest_4';
+ flush privileges;
+@@ -871,7 +908,7 @@
+ AND table_name not like 'ndb%' AND table_name not like 'innodb_%'
+ GROUP BY TABLE_SCHEMA;
+ table_schema  count(*)
+-information_schema    30
++information_schema    39
+ mysql 23
+ create table t1 (i int, j int);
+ create trigger trg1 before insert on t1 for each row
+@@ -1245,12 +1282,12 @@
+ DROP USER mysql_bug20230@localhost;
+ SELECT MAX(table_name) FROM information_schema.tables WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test');
+ MAX(table_name)
+-VIEWS
++XTRADB_ADMIN_COMMAND
+ SELECT table_name from information_schema.tables
+ WHERE table_name=(SELECT MAX(table_name)
+ FROM information_schema.tables WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test'));
+ table_name
+-VIEWS
++XTRADB_ADMIN_COMMAND
+ DROP TABLE IF EXISTS bug23037;
+ DROP FUNCTION IF EXISTS get_value;
+ SELECT COLUMN_NAME, MD5(COLUMN_DEFAULT), LENGTH(COLUMN_DEFAULT) FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME='bug23037';
+@@ -1311,6 +1348,7 @@
+ group by t.table_name order by num1, t.table_name;
+ table_name    group_concat(t.table_schema, '.', t.table_name) num1
+ CHARACTER_SETS        information_schema.CHARACTER_SETS       1
++CLIENT_STATISTICS     information_schema.CLIENT_STATISTICS    1
+ COLLATIONS    information_schema.COLLATIONS   1
+ COLLATION_CHARACTER_SET_APPLICABILITY information_schema.COLLATION_CHARACTER_SET_APPLICABILITY        1
+ COLUMNS       information_schema.COLUMNS      1
+@@ -1319,13 +1357,16 @@
+ EVENTS        information_schema.EVENTS       1
+ FILES information_schema.FILES        1
+ GLOBAL_STATUS information_schema.GLOBAL_STATUS        1
++GLOBAL_TEMPORARY_TABLES       information_schema.GLOBAL_TEMPORARY_TABLES      1
+ GLOBAL_VARIABLES      information_schema.GLOBAL_VARIABLES     1
++INDEX_STATISTICS      information_schema.INDEX_STATISTICS     1
+ KEY_COLUMN_USAGE      information_schema.KEY_COLUMN_USAGE     1
+ PARAMETERS    information_schema.PARAMETERS   1
+ PARTITIONS    information_schema.PARTITIONS   1
+ PLUGINS       information_schema.PLUGINS      1
+ PROCESSLIST   information_schema.PROCESSLIST  1
+ PROFILING     information_schema.PROFILING    1
++QUERY_RESPONSE_TIME   information_schema.QUERY_RESPONSE_TIME  1
+ REFERENTIAL_CONSTRAINTS       information_schema.REFERENTIAL_CONSTRAINTS      1
+ ROUTINES      information_schema.ROUTINES     1
+ SCHEMATA      information_schema.SCHEMATA     1
+@@ -1337,8 +1378,12 @@
+ TABLESPACES   information_schema.TABLESPACES  1
+ TABLE_CONSTRAINTS     information_schema.TABLE_CONSTRAINTS    1
+ TABLE_PRIVILEGES      information_schema.TABLE_PRIVILEGES     1
++TABLE_STATISTICS      information_schema.TABLE_STATISTICS     1
++TEMPORARY_TABLES      information_schema.TEMPORARY_TABLES     1
++THREAD_STATISTICS     information_schema.THREAD_STATISTICS    1
+ TRIGGERS      information_schema.TRIGGERS     1
+ USER_PRIVILEGES       information_schema.USER_PRIVILEGES      1
++USER_STATISTICS       information_schema.USER_STATISTICS      1
+ VIEWS information_schema.VIEWS        1
+ create table t1(f1 int);
+ create view v1 as select f1+1 as a from t1;
+diff -ruN a/mysql-test/r/information_schema_db.result b/mysql-test/r/information_schema_db.result
+--- a/mysql-test/r/information_schema_db.result        2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/information_schema_db.result        2011-01-10 23:06:43.000000000 +0300
+@@ -6,14 +6,17 @@
+ show tables where Tables_in_information_schema NOT LIKE 'Innodb%';
+ Tables_in_information_schema
+ CHARACTER_SETS
++CLIENT_STATISTICS
+ COLLATIONS
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INDEX_STATISTICS
+ ENGINES
+ EVENTS
+ FILES
+ GLOBAL_STATUS
++GLOBAL_TEMPORARY_TABLES
+ GLOBAL_VARIABLES
+ KEY_COLUMN_USAGE
+ PARAMETERS
+@@ -23,6 +26,7 @@
+ PROFILING
+ REFERENTIAL_CONSTRAINTS
+ ROUTINES
++QUERY_RESPONSE_TIME
+ SCHEMATA
+ SCHEMA_PRIVILEGES
+ SESSION_STATUS
+@@ -32,15 +36,23 @@
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ USER_PRIVILEGES
++USER_STATISTICS
+ VIEWS
++XTRADB_ADMIN_COMMAND
+ show tables from INFORMATION_SCHEMA like 'T%';
+ Tables_in_information_schema (T%)
+ TABLES
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ create database `inf%`;
+ create database mbase;
+diff -ruN a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result
+--- a/mysql-test/r/mysqld--help-notwin.result  2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/mysqld--help-notwin.result  2011-01-10 23:34:28.000000000 +0300
+@@ -125,6 +125,9 @@
+  --div-precision-increment=# 
+  Precision of the result of '/' operator will be increased
+  on that value
++ --enable-query-response-time-stats 
++ Enable or disable query response time statisics
++ collecting
+  --engine-condition-pushdown 
+  Push supported query conditions to the storage engine.
+  Deprecated, use --optimizer-switch instead.
+@@ -244,21 +247,43 @@
+  --log-slow-admin-statements 
+  Log slow OPTIMIZE, ANALYZE, ALTER and other
+  administrative statements to the slow log if it is open.
++ --log-slow-filter=name 
++ Log only the queries that followed certain execution
++ plan. Multiple flags allowed in a comma-separated string.
++ [qc_miss, full_scan, full_join, tmp_table,
++ tmp_table_on_disk, filesort, filesort_on_disk]
+  --log-slow-queries[=name] 
+  Log slow queries to a table or log file. Defaults logging
+  to table mysql.slow_log or hostname-slow.log if
+  --log-output=file is used. Must be enabled to activate
+  other slow log options. Deprecated option, use
+  --slow-query-log/--slow-query-log-file instead.
++ --log-slow-rate-limit=# 
++ Rate limit statement writes to slow log to only those
++ from every (1/log_slow_rate_limit) session.
+  --log-slow-slave-statements 
+- Log slow statements executed by slave thread to the slow
+- log if it is open.
++ Log queries replayed be the slave SQL thread
++ --log-slow-sp-statements 
++ Log slow statements executed by stored procedure to the
++ slow log if it is open.
++ (Defaults to on; use --skip-log-slow-sp-statements to disable.)
++ --log-slow-timestamp-every 
++ Timestamp is printed for all records of the slow log even
++ if they are same time.
++ --log-slow-verbosity=name 
++ Choose how verbose the messages to your slow log will be.
++ Multiple flags allowed in a comma-separated string.
++ [microtime, query_plan, innodb, profiling,
++ profiling_get_rusage]
+  --log-tc=name       Path to transaction coordinator log (used for
+  transactions that affect more than one storage engine,
+  when binary log is disabled).
+  --log-tc-size=#     Size of transaction coordinator log.
+  -W, --log-warnings[=#] 
+  Log some not critical warnings to the log file
++ --log-warnings-silence=name 
++ disable logging of enumerated warnings: 1592: unsafe
++ statements for binary logging; possible values : [1592]
+  --long-query-time=# Log all queries that have taken more than long_query_time
+  seconds to execute to file. The argument will be treated
+  as a decimal value with microsecond precision
+@@ -464,6 +489,10 @@
+  The minimum size for blocks allocated by the query cache
+  --query-cache-size=# 
+  The memory allocated to store results from old queries
++ --query-cache-strip-comments 
++ Enable and disable optimisation "strip comment for query
++ cache" - optimisation strip all comments from query while
++ search query result in query cache
+  --query-cache-type=name 
+  OFF = Don't cache or retrieve results. ON = Cache all
+  results except SELECT SQL_NO_CACHE ... queries. DEMAND =
+@@ -472,6 +501,9 @@
+  Invalidate queries in query cache on LOCK for write
+  --query-prealloc-size=# 
+  Persistent buffer for query parsing and execution
++ --query-response-time-range-base=# 
++ Select base of log for query_response_time ranges.
++ WARNING: variable change affect only after flush
+  --range-alloc-block-size=# 
+  Allocation block size for storing ranges during
+  optimization
+@@ -649,6 +681,9 @@
+  Log slow queries to given log file. Defaults logging to
+  hostname-slow.log. Must be enabled to activate other slow
+  log options
++ --slow-query-log-microseconds-timestamp 
++ Log slow statements executed by stored procedure to the
++ slow log if it is open.
+  --socket=name       Socket file to use for connection
+  --sort-buffer-size=# 
+  Each thread that needs to do a sort allocates a buffer of
+@@ -691,6 +726,8 @@
+  Define threads usage for handling queries, one of
+  one-thread-per-connection, no-threads, loaded-dynamically
+  --thread-stack=#    The stack size for each thread
++ --thread-statistics Control TABLE_STATISTICS running, when userstat_running
++ is enabled
+  --time-format=name  The TIME format (ignored)
+  --timed-mutexes     Specify whether to time mutexes (only InnoDB mutexes are
+  currently supported)
+@@ -715,11 +752,24 @@
+  Prohibit update of a VIEW, which does not contain a key
+  of the underlying table and the query uses a LIMIT clause
+  (usually get from GUI tools)
++ --use-global-log-slow-control=name 
++ Choose flags, wich always use the global variables.
++ Multiple flags allowed in a comma-separated string.
++ [none, log_slow_filter, log_slow_rate_limit,
++ log_slow_verbosity, long_query_time,
++ min_examined_row_limit, all]
+  -u, --user=name     Run mysqld daemon as user.
++ --userstat-running  Control USER_STATISTICS, CLIENT_STATISTICS,
++ THREAD_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS
++ running
+  -v, --verbose       Used with --help option for detailed help.
+  -V, --version       Output version information and exit.
+  --wait-timeout=#    The number of seconds the server waits for activity on a
+  connection before closing it
++ --xtradb-admin-command[=name] 
++ Enable or disable XTRADB_ADMIN_COMMAND plugin. Possible
++ values are ON, OFF, FORCE (don't start if the plugin
++ fails to load).
+ Variables (--variable-name=value)
+ abort-slave-event-count 0
+@@ -756,6 +806,7 @@
+ delayed-queue-size 1000
+ disconnect-slave-event-count 0
+ div-precision-increment 4
++enable-query-response-time-stats FALSE
+ engine-condition-pushdown TRUE
+ event-scheduler OFF
+ expire-logs-days 0
+@@ -800,10 +851,16 @@
+ log-short-format FALSE
+ log-slave-updates FALSE
+ log-slow-admin-statements FALSE
++log-slow-filter 
++log-slow-rate-limit 1
+ log-slow-slave-statements FALSE
++log-slow-sp-statements TRUE
++log-slow-timestamp-every FALSE
++log-slow-verbosity 
+ log-tc tc.log
+ log-tc-size 24576
+ log-warnings 1
++log-warnings-silence 
+ long-query-time 10
+ low-priority-updates FALSE
+ lower-case-table-names 1
+@@ -877,9 +934,11 @@
+ query-cache-limit 1048576
+ query-cache-min-res-unit 4096
+ query-cache-size 0
++query-cache-strip-comments FALSE
+ query-cache-type ON
+ query-cache-wlock-invalidate FALSE
+ query-prealloc-size 8192
++query-response-time-range-base 10
+ range-alloc-block-size 4096
+ read-buffer-size 131072
+ read-only FALSE
+@@ -914,6 +973,7 @@
+ slave-type-conversions 
+ slow-launch-time 2
+ slow-query-log FALSE
++slow-query-log-microseconds-timestamp FALSE
+ sort-buffer-size 2097152
+ sporadic-binlog-dump-fail FALSE
+ sql-mode 
+@@ -931,6 +991,7 @@
+ thread-cache-size 0
+ thread-handling one-thread-per-connection
+ thread-stack 262144
++thread-statistics FALSE
+ time-format %H:%i:%s
+ timed-mutexes FALSE
+ tmp-table-size 16777216
+@@ -938,8 +999,11 @@
+ transaction-isolation REPEATABLE-READ
+ transaction-prealloc-size 4096
+ updatable-views-with-limit YES
++use-global-log-slow-control 
++userstat-running FALSE
+ verbose TRUE
+ wait-timeout 28800
++xtradb-admin-command ON
+ To see what values a running MySQL server is using, type
+ 'mysqladmin variables' instead of 'mysqld --verbose --help'.
+diff -ruN a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result
+--- a/mysql-test/r/mysqldump.result    2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/mysqldump.result    2010-12-10 16:48:11.013968901 +0900
+@@ -1832,7 +1832,7 @@
+ # Bug#21288 mysqldump segmentation fault when using --where
+ #
+ create table t1 (a int);
+-mysqldump: Couldn't execute 'SELECT /*!40001 SQL_NO_CACHE */ * FROM `t1` WHERE xx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx': You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' at line 1 (1064)
++mysqldump: Couldn't execute 'SELECT /*!40001 SQL_NO_CACHE */ /*!50084 SQL_NO_FCACHE */ * FROM `t1` WHERE xx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx': You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' at line 1 (1064)
+ mysqldump: Got error: 1064: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' at line 1 when retrieving data from server
+ /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+diff -ruN a/mysql-test/r/mysqlshow.result b/mysql-test/r/mysqlshow.result
+--- a/mysql-test/r/mysqlshow.result    2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/mysqlshow.result    2011-01-11 16:41:03.000000000 +0300
+@@ -80,14 +80,17 @@
+ |                Tables                 |
+ +---------------------------------------+
+ | CHARACTER_SETS                        |
++| CLIENT_STATISTICS                     |
+ | COLLATIONS                            |
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS                               |
+ | COLUMN_PRIVILEGES                     |
++| INDEX_STATISTICS                      |
+ | ENGINES                               |
+ | EVENTS                                |
+ | FILES                                 |
+ | GLOBAL_STATUS                         |
++| GLOBAL_TEMPORARY_TABLES               |
+ | GLOBAL_VARIABLES                      |
+ | KEY_COLUMN_USAGE                      |
+ | PARAMETERS                            |
+@@ -97,6 +100,7 @@
+ | PROFILING                             |
+ | REFERENTIAL_CONSTRAINTS               |
+ | ROUTINES                              |
++| QUERY_RESPONSE_TIME                   |
+ | SCHEMATA                              |
+ | SCHEMA_PRIVILEGES                     |
+ | SESSION_STATUS                        |
+@@ -106,30 +110,52 @@
+ | TABLESPACES                           |
+ | TABLE_CONSTRAINTS                     |
+ | TABLE_PRIVILEGES                      |
++| TABLE_STATISTICS                      |
++| TEMPORARY_TABLES                      |
++| THREAD_STATISTICS                     |
+ | TRIGGERS                              |
+ | USER_PRIVILEGES                       |
++| USER_STATISTICS                       |
+ | VIEWS                                 |
+-| INNODB_CMP_RESET                      |
++| INNODB_SYS_COLUMNS                    |
++| INNODB_RSEG                           |
++| INNODB_CMP                            |
+ | INNODB_TRX                            |
+-| INNODB_CMPMEM_RESET                   |
++| INNODB_SYS_TABLESTATS                 |
+ | INNODB_LOCK_WAITS                     |
+-| INNODB_CMPMEM                         |
+-| INNODB_CMP                            |
++| XTRADB_ADMIN_COMMAND                  |
+ | INNODB_LOCKS                          |
++| INNODB_SYS_FOREIGN_COLS               |
++| INNODB_CMP_RESET                      |
++| INNODB_BUFFER_POOL_PAGES              |
++| INNODB_SYS_TABLES                     |
++| INNODB_BUFFER_POOL_PAGES_INDEX        |
++| INNODB_CMPMEM                         |
++| INNODB_BUFFER_POOL_PAGES_BLOB         |
++| INNODB_CMPMEM_RESET                   |
++| INNODB_SYS_FIELDS                     |
++| INNODB_TABLE_STATS                    |
++| INNODB_SYS_STATS                      |
++| INNODB_SYS_FOREIGN                    |
++| INNODB_SYS_INDEXES                    |
++| INNODB_INDEX_STATS                    |
+ +---------------------------------------+
+ Database: INFORMATION_SCHEMA
+ +---------------------------------------+
+ |                Tables                 |
+ +---------------------------------------+
+ | CHARACTER_SETS                        |
++| CLIENT_STATISTICS                     |
+ | COLLATIONS                            |
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS                               |
+ | COLUMN_PRIVILEGES                     |
++| INDEX_STATISTICS                      |
+ | ENGINES                               |
+ | EVENTS                                |
+ | FILES                                 |
+ | GLOBAL_STATUS                         |
++| GLOBAL_TEMPORARY_TABLES               |
+ | GLOBAL_VARIABLES                      |
+ | KEY_COLUMN_USAGE                      |
+ | PARAMETERS                            |
+@@ -139,6 +165,7 @@
+ | PROFILING                             |
+ | REFERENTIAL_CONSTRAINTS               |
+ | ROUTINES                              |
++| QUERY_RESPONSE_TIME                   |
+ | SCHEMATA                              |
+ | SCHEMA_PRIVILEGES                     |
+ | SESSION_STATUS                        |
+@@ -148,16 +175,35 @@
+ | TABLESPACES                           |
+ | TABLE_CONSTRAINTS                     |
+ | TABLE_PRIVILEGES                      |
++| TABLE_STATISTICS                      |
++| TEMPORARY_TABLES                      |
++| THREAD_STATISTICS                     |
+ | TRIGGERS                              |
+ | USER_PRIVILEGES                       |
++| USER_STATISTICS                       |
+ | VIEWS                                 |
+-| INNODB_CMP_RESET                      |
++| INNODB_SYS_COLUMNS                    |
++| INNODB_RSEG                           |
++| INNODB_CMP                            |
+ | INNODB_TRX                            |
+-| INNODB_CMPMEM_RESET                   |
++| INNODB_SYS_TABLESTATS                 |
+ | INNODB_LOCK_WAITS                     |
+-| INNODB_CMPMEM                         |
+-| INNODB_CMP                            |
++| XTRADB_ADMIN_COMMAND                  |
+ | INNODB_LOCKS                          |
++| INNODB_SYS_FOREIGN_COLS               |
++| INNODB_CMP_RESET                      |
++| INNODB_BUFFER_POOL_PAGES              |
++| INNODB_SYS_TABLES                     |
++| INNODB_BUFFER_POOL_PAGES_INDEX        |
++| INNODB_CMPMEM                         |
++| INNODB_BUFFER_POOL_PAGES_BLOB         |
++| INNODB_CMPMEM_RESET                   |
++| INNODB_SYS_FIELDS                     |
++| INNODB_TABLE_STATS                    |
++| INNODB_SYS_STATS                      |
++| INNODB_SYS_FOREIGN                    |
++| INNODB_SYS_INDEXES                    |
++| INNODB_INDEX_STATS                    |
+ +---------------------------------------+
+ Wildcard: inf_rmation_schema
+ +--------------------+
+diff -ruN a/mysql-test/r/select.result b/mysql-test/r/select.result
+--- a/mysql-test/r/select.result       2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/select.result       2010-12-10 16:48:11.023052909 +0900
+@@ -2196,10 +2196,10 @@
+ select * from (t1 as t2 left join t1 as t3 using (a)) inner join t1 on t1.a>1;
+ a     a
+ 1     2
+-2     2
+-3     2
+ 1     3
++2     2
+ 2     3
++3     2
+ 3     3
+ select * from t1 inner join (t1 as t2 left join t1 as t3 using (a)) on t1.a>1;
+ a     a
+diff -ruN a/mysql-test/suite/innodb/r/innodb.result b/mysql-test/suite/innodb/r/innodb.result
+--- a/mysql-test/suite/innodb/r/innodb.result  2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/suite/innodb/r/innodb.result  2010-12-10 16:48:11.026994635 +0900
+@@ -1661,7 +1661,7 @@
+ drop table t1;
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+ variable_value
+-511
++2047
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
+ variable_value
+ 16384
+diff -ruN a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result
+--- a/mysql-test/suite/sys_vars/r/all_vars.result      2010-12-03 20:58:25.000000000 +0300
++++ b/mysql-test/suite/sys_vars/r/all_vars.result      2011-01-10 23:09:13.000000000 +0300
+@@ -11,7 +11,99 @@
+ select variable_name as `There should be *no* variables listed below:` from t2
+ left join t1 on variable_name=test_name where test_name is null;
+ There should be *no* variables listed below:
++INNODB_BUFFER_POOL_SHM_KEY
++LOG_SLOW_SP_STATEMENTS
++INNODB_DOUBLEWRITE_FILE
++INNODB_THREAD_CONCURRENCY_TIMER_BASED
++LOG_SLOW_TIMESTAMP_EVERY
++INNODB_READ_AHEAD
++INNODB_PASS_CORRUPT_TABLE
++INNODB_RECOVERY_STATS
++INNODB_FLUSH_NEIGHBOR_PAGES
++INNODB_BUFFER_POOL_SHM_CHECKSUM
++INNODB_FLUSH_LOG_AT_TRX_COMMIT_SESSION
++LOG_SLOW_VERBOSITY
++ENABLE_QUERY_RESPONSE_TIME_STATS
++OPTIMIZER_FIX
++INNODB_ADAPTIVE_CHECKPOINT
++INNODB_SHOW_LOCKS_HELD
++INNODB_IBUF_ACCEL_RATE
++INNODB_EXPAND_IMPORT
++QUERY_CACHE_STRIP_COMMENTS
++INNODB_CHECKPOINT_AGE_TARGET
++INNODB_STATS_METHOD
++LOG_SLOW_RATE_LIMIT
++INNODB_PAGE_SIZE
++USERSTAT_RUNNING
++FAST_INDEX_CREATION
++USE_GLOBAL_LOG_SLOW_CONTROL
++INNODB_ENABLE_UNSAFE_GROUP_COMMIT
++INNODB_SHOW_VERBOSE_LOCKS
++INNODB_ADAPTIVE_HASH_INDEX_PARTITIONS
++SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP
++INNODB_LOG_BLOCK_SIZE
++INNODB_USE_SYS_STATS_TABLE
++INNODB_STATS_UPDATE_NEED_LOCK
++QUERY_RESPONSE_TIME_RANGE_BASE
++INNODB_STATS_AUTO_UPDATE
++INNODB_IBUF_ACTIVE_CONTRACT
+ INNODB_FILE_FORMAT_MAX
++INNODB_AUTO_LRU_DUMP
++LOG_WARNINGS_SILENCE
++LOG_SLOW_FILTER
++INNODB_DICT_SIZE_LIMIT
++INNODB_IBUF_MAX_SIZE
++INNODB_FAST_CHECKSUM
++LOG_SLOW_SLAVE_STATEMENTS
++INNODB_OVERWRITE_RELAY_LOG_INFO
++INNODB_EXTRA_RSEGMENTS
++THREAD_STATISTICS
++INNODB_BUFFER_POOL_SHM_KEY
++LOG_SLOW_SP_STATEMENTS
++INNODB_DOUBLEWRITE_FILE
++INNODB_THREAD_CONCURRENCY_TIMER_BASED
++LOG_SLOW_TIMESTAMP_EVERY
++INNODB_READ_AHEAD
++INNODB_PASS_CORRUPT_TABLE
++INNODB_RECOVERY_STATS
++INNODB_FLUSH_NEIGHBOR_PAGES
++INNODB_BUFFER_POOL_SHM_CHECKSUM
++INNODB_FLUSH_LOG_AT_TRX_COMMIT_SESSION
++LOG_SLOW_VERBOSITY
++ENABLE_QUERY_RESPONSE_TIME_STATS
++OPTIMIZER_FIX
++INNODB_ADAPTIVE_CHECKPOINT
++INNODB_SHOW_LOCKS_HELD
++INNODB_IBUF_ACCEL_RATE
++INNODB_EXPAND_IMPORT
++QUERY_CACHE_STRIP_COMMENTS
++INNODB_CHECKPOINT_AGE_TARGET
++INNODB_STATS_METHOD
++LOG_SLOW_RATE_LIMIT
++INNODB_PAGE_SIZE
++USERSTAT_RUNNING
++FAST_INDEX_CREATION
++USE_GLOBAL_LOG_SLOW_CONTROL
++INNODB_ENABLE_UNSAFE_GROUP_COMMIT
++INNODB_SHOW_VERBOSE_LOCKS
++INNODB_ADAPTIVE_HASH_INDEX_PARTITIONS
++SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP
++INNODB_LOG_BLOCK_SIZE
++INNODB_USE_SYS_STATS_TABLE
++INNODB_STATS_UPDATE_NEED_LOCK
++QUERY_RESPONSE_TIME_RANGE_BASE
++INNODB_STATS_AUTO_UPDATE
++INNODB_IBUF_ACTIVE_CONTRACT
+ INNODB_FILE_FORMAT_MAX
++INNODB_AUTO_LRU_DUMP
++LOG_WARNINGS_SILENCE
++LOG_SLOW_FILTER
++INNODB_DICT_SIZE_LIMIT
++INNODB_IBUF_MAX_SIZE
++INNODB_FAST_CHECKSUM
++LOG_SLOW_SLAVE_STATEMENTS
++INNODB_OVERWRITE_RELAY_LOG_INFO
++INNODB_EXTRA_RSEGMENTS
++THREAD_STATISTICS
+ drop table t1;
+ drop table t2;
+diff -ruN a/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result b/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result
+--- a/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result        2010-11-03 07:01:13.000000000 +0900
++++ b/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result        2010-12-10 16:48:11.031065741 +0900
+@@ -1,28 +1,28 @@
+ SET @start_global_value = @@global.innodb_adaptive_flushing;
+ SELECT @start_global_value;
+ @start_global_value
+-1
++0
+ Valid values are 'ON' and 'OFF' 
+ select @@global.innodb_adaptive_flushing in (0, 1);
+ @@global.innodb_adaptive_flushing in (0, 1)
+ 1
+ select @@global.innodb_adaptive_flushing;
+ @@global.innodb_adaptive_flushing
+-1
++0
+ select @@session.innodb_adaptive_flushing;
+ ERROR HY000: Variable 'innodb_adaptive_flushing' is a GLOBAL variable
+ show global variables like 'innodb_adaptive_flushing';
+ Variable_name Value
+-innodb_adaptive_flushing      ON
++innodb_adaptive_flushing      OFF
+ show session variables like 'innodb_adaptive_flushing';
+ Variable_name Value
+-innodb_adaptive_flushing      ON
++innodb_adaptive_flushing      OFF
+ select * from information_schema.global_variables where variable_name='innodb_adaptive_flushing';
+ VARIABLE_NAME VARIABLE_VALUE
+-INNODB_ADAPTIVE_FLUSHING      ON
++INNODB_ADAPTIVE_FLUSHING      OFF
+ select * from information_schema.session_variables where variable_name='innodb_adaptive_flushing';
+ VARIABLE_NAME VARIABLE_VALUE
+-INNODB_ADAPTIVE_FLUSHING      ON
++INNODB_ADAPTIVE_FLUSHING      OFF
+ set global innodb_adaptive_flushing='OFF';
+ select @@global.innodb_adaptive_flushing;
+ @@global.innodb_adaptive_flushing
+@@ -89,4 +89,4 @@
+ SET @@global.innodb_adaptive_flushing = @start_global_value;
+ SELECT @@global.innodb_adaptive_flushing;
+ @@global.innodb_adaptive_flushing
+-1
++0
+diff -ruN a/mysql-test/suite/sys_vars/r/plugin_dir_basic.result b/mysql-test/suite/sys_vars/r/plugin_dir_basic.result
+--- a/mysql-test/suite/sys_vars/r/plugin_dir_basic.result      2010-11-03 07:01:13.000000000 +0900
++++ b/mysql-test/suite/sys_vars/r/plugin_dir_basic.result      2010-12-10 16:48:11.033057415 +0900
+@@ -1,20 +1,20 @@
+ select @@global.plugin_dir;
+ @@global.plugin_dir
+-MYSQL_LIBDIR/plugin
++MYSQL_LIBDIR64/plugin
+ select @@session.plugin_dir;
+ ERROR HY000: Variable 'plugin_dir' is a GLOBAL variable
+ show global variables like 'plugin_dir';
+ Variable_name Value
+-plugin_dir    MYSQL_LIBDIR/plugin
++plugin_dir    MYSQL_LIBDIR64/plugin
+ show session variables like 'plugin_dir';
+ Variable_name Value
+-plugin_dir    MYSQL_LIBDIR/plugin
++plugin_dir    MYSQL_LIBDIR64/plugin
+ select * from information_schema.global_variables where variable_name='plugin_dir';
+ VARIABLE_NAME VARIABLE_VALUE
+-PLUGIN_DIR    MYSQL_LIBDIR/plugin
++PLUGIN_DIR    MYSQL_LIBDIR64/plugin
+ select * from information_schema.session_variables where variable_name='plugin_dir';
+ VARIABLE_NAME VARIABLE_VALUE
+-PLUGIN_DIR    MYSQL_LIBDIR/plugin
++PLUGIN_DIR    MYSQL_LIBDIR64/plugin
+ set global plugin_dir=1;
+ ERROR HY000: Variable 'plugin_dir' is a read only variable
+ set session plugin_dir=1;
+diff -ruN a/mysql-test/t/connect.test b/mysql-test/t/connect.test
+--- a/mysql-test/t/connect.test        2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/t/connect.test        2010-12-10 16:48:11.034065111 +0900
+@@ -1,3 +1,5 @@
++set global log_warnings=0;
++
+ # This test is to check various cases of connections
+ # with right and wrong password, with and without database
+ # Unfortunately the check is incomplete as we can't connect without database
+@@ -300,3 +302,4 @@
+ # Wait till all disconnects are completed
+ --source include/wait_until_count_sessions.inc
++set global log_warnings=1;
index f817799eb8f5fad9a075336dffd9f2f248092857..b6572949491418d4c8e456805942da00366428b3 100644 (file)
@@ -36,7 +36,7 @@ Summary(uk.UTF-8):    MySQL - швидкий SQL-сервер
 Summary(zh_CN.UTF-8):  MySQL数据库服务器
 Name:          mysql
 Version:       5.5.8
-Release:       0.2
+Release:       0.5
 License:       GPL + MySQL FLOSS Exception
 Group:         Applications/Databases
 # Source0Download: http://dev.mysql.com/downloads/mysql/5.5.html#downloads
@@ -57,12 +57,9 @@ Source11:    %{name}-ndb-cpc.init
 Source12:      %{name}-ndb-cpc.sysconfig
 Source13:      %{name}-client.conf
 Source14:      my.cnf
-Patch0:                %{name}-libs.patch
-Patch1:                %{name}-libwrap.patch
 Patch2:                %{name}-c++.patch
 Patch3:                %{name}-info.patch
 Patch4:                %{name}-sql-cxx-pic.patch
-Patch5:                %{name}-noproc.patch
 Patch6:                %{name}-system-users.patch
 Patch7:                %{name}-bug-34192.patch
 Patch8:                %{name}-client-config.patch
@@ -71,14 +68,55 @@ Patch10:    %{name}-alpha.patch
 Patch11:       %{name}-upgrade.patch
 Patch12:       %{name}-config.patch
 Patch14:       %{name}-bug-43594.patch
-Patch15:       plugin-avoid-version.patch
-Patch16:       %{name}-fix-dummy-thread-race-condition.patch
 Patch18:       %{name}-sphinx.patch
-# <percona patches, http://bazaar.launchpad.net/~percona-dev/percona-server/5.5.7/files>
-Patch100:      %{name}-userstat.patch
-Patch101:      %{name}-innodb_extend_slow.patch
-Patch102:      %{name}-microsec_process.patch
-Patch103:      %{name}-innodb_split_buf_pool_mutex.patch
+# <percona patches, http://bazaar.launchpad.net/~percona-dev/percona-server/5.5.8/files>
+# series file shows the order of patches
+Patch100:      microsec_process.patch
+Patch101:      optimizer_fix.patch
+Patch102:      mysql_dump_ignore_ct.patch
+Patch103:      control_online_alter_index.patch
+Patch104:      show_temp.patch
+Patch105:      innodb_show_status.patch
+Patch106:      innodb_io_patches.patch
+Patch107:      innodb_opt_lru_count.patch
+Patch108:      innodb_extra_rseg.patch
+Patch109:      innodb_overwrite_relay_log_info.patch
+Patch110:      innodb_thread_concurrency_timer_based.patch
+Patch111:      innodb_dict_size_limit.patch
+Patch112:      innodb_split_buf_pool_mutex.patch
+Patch113:      innodb_expand_import.patch
+Patch114:      innodb_show_sys_tables.patch
+Patch115:      innodb_stats.patch
+Patch116:      innodb_recovery_patches.patch
+Patch117:      innodb_admin_command_base.patch
+Patch118:      innodb_show_lock_name.patch
+Patch119:      innodb_extend_slow.patch
+Patch120:      innodb_lru_dump_restore.patch
+Patch121:      innodb_separate_doublewrite.patch
+Patch122:      innodb_pass_corrupt_table.patch
+Patch123:      innodb_fast_checksum.patch
+Patch124:      innodb_files_extend.patch
+Patch125:      innodb_fix_misc.patch
+Patch126:      innodb_deadlock_count.patch
+Patch127:      innodb_adaptive_hash_index_partitions.patch
+Patch128:      innodb_buffer_pool_pages_i_s.patch
+Patch129:      innodb_buffer_pool_shm.patch
+Patch130:      innodb_show_status_extend.patch
+Patch131:      slow_extended.patch
+Patch132:      percona_support.patch
+Patch133:      query_cache_enhance.patch
+Patch134:      log_connection_error.patch
+Patch135:      mysql_syslog.patch
+Patch136:      response_time_distribution.patch
+Patch137:      error_pad.patch
+Patch138:      remove_fcntl_excessive_calls.patch
+Patch139:      sql_no_fcache.patch
+Patch140:      show_slave_status_nolock.patch
+Patch141:      log_warnings_silence.patch
+Patch142:      userstat.patch
+Patch143:      bug580324.patch
+Patch144:      mysql_remove_eol_carret.patch
+Patch145:      mysql-test.diff
 # </percona>
 URL:           http://www.mysql.com/products/community/
 BuildRequires: bison
@@ -497,9 +535,6 @@ Ten pakiet zawiera standardowego demona MySQL NDB CPC.
 mv sphinx-*/mysqlse storage/sphinx
 %patch18 -p1
 %endif
-# CHECK ME, seems obsolete
-#%patch0 -p1
-#%{?with_tcpd:%patch1 -p1}  # WHATS PURPOSE OF THIS PATCH?
 #%patch2 -p1 # NEEDS CHECK, which exact program needs -lc++
 %patch3 -p1
 %ifarch alpha
@@ -510,8 +545,6 @@ mv sphinx-*/mysqlse storage/sphinx
 # gcc 3.3.x ICE
 %patch10 -p1
 %endif
-# CHECK ME, obsolete
-#%patch5 -p1
 %patch6 -p1
 %patch7 -p1
 %patch8 -p1
@@ -519,19 +552,56 @@ mv sphinx-*/mysqlse storage/sphinx
 %patch11 -p1
 %patch12 -p1
 %patch14 -p0
-# CHECK ME, obsolete
-#%patch15 -p1
-# OBSOLETE, YES
-#%patch16 -p1
 # <percona %patches>
-# CHECK ME
-#%patch100 -p1
-# CHECK ME
-#%patch101 -p1
-# CHECK ME
+%patch100 -p1
+%patch101 -p1
 #%patch102 -p1
-# CHECK ME
-#%patch103 -p1
+%patch103 -p1
+%patch104 -p1
+%patch105 -p1
+%patch106 -p1
+%patch107 -p1
+%patch108 -p1
+%patch109 -p1
+%patch110 -p1
+%patch111 -p1
+%patch112 -p1
+%patch113 -p1
+%patch114 -p1
+%patch115 -p1
+%patch116 -p1
+%patch117 -p1
+%patch118 -p1
+%patch119 -p1
+%patch120 -p1
+%patch121 -p1
+%patch122 -p1
+%patch123 -p1
+%patch124 -p1
+%patch125 -p1
+%patch126 -p1
+%patch127 -p1
+%patch128 -p1
+%patch129 -p1
+%patch130 -p1
+%patch131 -p1
+%patch132 -p1
+%patch133 -p1
+%patch134 -p1
+%patch135 -p1
+%patch136 -p1
+%patch137 -p1
+%patch138 -p1
+%patch139 -p1
+%patch140 -p1
+%patch141 -p1
+%patch142 -p1
+%patch143 -p1
+%patch144 -p1
+%patch145 -p1
+# to get these files rebuild
+rm sql/sql_yacc.cc
+rm sql/sql_yacc.h
 # </percona>
 
 %build
diff --git a/mysql_dump_ignore_ct.patch b/mysql_dump_ignore_ct.patch
new file mode 100644 (file)
index 0000000..daaf2b9
--- /dev/null
@@ -0,0 +1,62 @@
+# name       : mysql_dump_ignore_ct.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/client_priv.h b/client/client_priv.h
+--- a/client/client_priv.h     2010-11-03 07:01:14.000000000 +0900
++++ b/client/client_priv.h     2010-12-03 13:39:32.317046060 +0900
+@@ -58,6 +58,7 @@
+   OPT_MYSQL_LOCK_DIRECTORY,
+   OPT_USE_THREADS,
+   OPT_IMPORT_USE_THREADS,
++  OPT_IGNORE_CREATE_ERROR,
+   OPT_MYSQL_NUMBER_OF_QUERY,
+   OPT_IGNORE_TABLE,OPT_INSERT_IGNORE,OPT_SHOW_WARNINGS,OPT_DROP_DATABASE,
+   OPT_TZ_UTC, OPT_CREATE_SLAP_SCHEMA,
+diff -ruN a/client/mysqldump.c b/client/mysqldump.c
+--- a/client/mysqldump.c       2010-11-03 07:01:14.000000000 +0900
++++ b/client/mysqldump.c       2010-12-03 13:44:55.000069761 +0900
+@@ -101,7 +101,7 @@
+                 opt_dump_triggers= 0, opt_routines=0, opt_tz_utc=1,
+                 opt_slave_apply= 0, 
+                 opt_include_master_host_port= 0,
+-                opt_events= 0,
++                opt_events= 0, opt_ignore_show_create_table_error=0,
+                 opt_alltspcs=0, opt_notspcs= 0;
+ static my_bool insert_pat_inited= 0, debug_info_flag= 0, debug_check_flag= 0;
+ static ulong opt_max_allowed_packet, opt_net_buffer_length;
+@@ -349,6 +349,9 @@
+   {"insert-ignore", OPT_INSERT_IGNORE, "Insert rows with INSERT IGNORE.",
+    &opt_ignore, &opt_ignore, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0,
+    0, 0},
++  {"ignore-create-error", OPT_IGNORE_CREATE_ERROR, "Don't exit on show create table errors.",
++   (uchar**) &opt_ignore_show_create_table_error, (uchar**) &opt_ignore_show_create_table_error, 0, GET_BOOL,
++   NO_ARG, 0, 0, 0, 0, 0, 0},
+   {"lines-terminated-by", OPT_LTB,
+    "Lines in the output file are terminated by the given string.",
+    &lines_terminated, &lines_terminated, 0, GET_STR,
+@@ -2278,13 +2281,21 @@
+       /* Make an sql-file, if path was given iow. option -T was given */
+       char buff[20+FN_REFLEN];
+       MYSQL_FIELD *field;
++   
++      my_bool old_ignore_errors=ignore_errors;
++      //fprintf(stderr, "ignore create table %d\n", opt_ignore_show_create_table_error);
++      if (opt_ignore_show_create_table_error)
++         ignore_errors=1;
+       my_snprintf(buff, sizeof(buff), "show create table %s", result_table);
+       if (switch_character_set_results(mysql, "binary") ||
+           mysql_query_with_error_report(mysql, &result, buff) ||
+           switch_character_set_results(mysql, default_charset))
++      {
++        ignore_errors=old_ignore_errors;
+         DBUG_RETURN(0);
++      }
+       if (path)
+       {
diff --git a/mysql_remove_eol_carret.patch b/mysql_remove_eol_carret.patch
new file mode 100644 (file)
index 0000000..4c3655f
--- /dev/null
@@ -0,0 +1,73 @@
+# name       : mysql_remove_eol_carret.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/client_priv.h b/client/client_priv.h
+--- a/client/client_priv.h     2010-07-06 15:06:50.000000000 -0700
++++ b/client/client_priv.h     2010-07-06 15:07:18.000000000 -0700
+@@ -89,6 +89,7 @@
+   OPT_SYSLOG,
+ #endif
+   OPT_PLUGIN_DIR,
++  OPT_NO_REMOVE_EOL_CARRET,
+   OPT_DEFAULT_PLUGIN,
+   OPT_MAX_CLIENT_OPTION
+ };
+diff -ruN a/client/mysql.cc b/client/mysql.cc
+--- a/client/mysql.cc  2010-06-03 08:50:02.000000000 -0700
++++ b/client/mysql.cc  2010-07-06 15:07:18.000000000 -0700
+@@ -133,6 +133,8 @@
+ enum enum_info_type { INFO_INFO,INFO_ERROR,INFO_RESULT};
+ typedef enum enum_info_type INFO_TYPE;
++my_bool opt_no_remove_eol_carret=0;
++
+ static MYSQL mysql;                   /* The connection */
+ static my_bool ignore_errors=0,wait_flag=0,quick=0,
+                connected=0,opt_raw_data=0,unbuffered=0,output_tables=0,
+@@ -1450,6 +1452,10 @@
+    NO_ARG, 1, 0, 0, 0, 0, 0},
+   {"skip-line-numbers", 'L', "Don't write line number for errors.", 0, 0, 0, GET_NO_ARG,
+    NO_ARG, 0, 0, 0, 0, 0, 0},
++  {"no-remove-eol-carret", OPT_NO_REMOVE_EOL_CARRET, "Do not remove \\r before \\n in batch mode", 
++  (uchar**)&opt_no_remove_eol_carret , (uchar**)&opt_no_remove_eol_carret, 0, 
++   GET_BOOL,
++   NO_ARG, 0, 0, 0, 0, 0, 0},
+   {"unbuffered", 'n', "Flush buffer after each query.", &unbuffered,
+    &unbuffered, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+   {"column-names", OPT_COLUMN_NAMES, "Write column names in results.",
+diff -ruN a/client/readline.cc b/client/readline.cc
+--- a/client/readline.cc       2010-06-03 08:50:23.000000000 -0700
++++ b/client/readline.cc       2010-07-06 15:07:18.000000000 -0700
+@@ -20,6 +20,8 @@
+ #include <m_string.h>
+ #include "my_readline.h"
++extern my_bool opt_no_remove_eol_carret;
++
+ static bool init_line_buffer(LINE_BUFFER *buffer,File file,ulong size,
+                           ulong max_size);
+ static bool init_line_buffer_from_string(LINE_BUFFER *buffer,char * str);
+@@ -51,7 +53,7 @@
+   if (!(pos=intern_read_line(line_buff,&out_length, truncated)))
+     return 0;
+   if (out_length && pos[out_length-1] == '\n')
+-    if (--out_length && pos[out_length-1] == '\r')  /* Remove '\n' */
++    if (--out_length && !opt_no_remove_eol_carret && pos[out_length-1] == '\r')  /* Remove '\n' */
+       out_length--;                                 /* Remove '\r' */
+   line_buff->read_length=out_length;
+   pos[out_length]=0;
+diff -ruN a/patch_info/mysql_remove_eol_carret.patch b/patch_info/mysql_remove_eol_carret.patch
+--- a/patch_info/mysql_remove_eol_carret.patch 1969-12-31 16:00:00.000000000 -0800
++++ b/patch_info/mysql_remove_eol_carret.patch 2010-07-06 15:10:10.000000000 -0700
+@@ -0,0 +1,7 @@
++File=mysql_remove_eol_carret.patch
++Name=
++Version=1.1
++Author=Percona <info@percona.com>
++License=GPL
++Comment=Do not remove carret before eol if --no-remove-eol-carret is enabled in MySQL client.
++Changelog
diff --git a/mysql_syslog.patch b/mysql_syslog.patch
new file mode 100644 (file)
index 0000000..6ed843a
--- /dev/null
@@ -0,0 +1,128 @@
+# name       : mysql-syslog.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/client_priv.h b/client/client_priv.h
+--- a/client/client_priv.h     2011-01-13 18:35:59.000000000 +0300
++++ b/client/client_priv.h     2011-01-13 18:38:21.000000000 +0300
+@@ -85,6 +85,9 @@
+   OPT_DEBUG_INFO, OPT_DEBUG_CHECK, OPT_COLUMN_TYPES, OPT_ERROR_LOG_FILE,
+   OPT_WRITE_BINLOG, OPT_DUMP_DATE,
+   OPT_INIT_COMMAND,
++#ifndef __WIN__
++  OPT_SYSLOG,
++#endif
+   OPT_PLUGIN_DIR,
+   OPT_DEFAULT_PLUGIN,
+   OPT_MAX_CLIENT_OPTION
+diff -ruN a/client/mysql.cc b/client/mysql.cc
+--- a/client/mysql.cc  2010-12-03 20:58:26.000000000 +0300
++++ b/client/mysql.cc  2011-01-13 18:38:21.000000000 +0300
+@@ -38,6 +38,11 @@
+ #include "my_readline.h"
+ #include <signal.h>
+ #include <violite.h>
++#ifndef __WIN__
++#include "syslog.h"
++#endif
++
++#define MAX_SYSLOG_MESSAGE 900
+ #if defined(USE_LIBEDIT_INTERFACE) && defined(HAVE_LOCALE_H)
+ #include <locale.h>
+@@ -140,7 +145,7 @@
+                default_pager_set= 0, opt_sigint_ignore= 0,
+                auto_vertical_output= 0,
+                show_warnings= 0, executing_query= 0, interrupted_query= 0,
+-               ignore_spaces= 0;
++               ignore_spaces= 0, opt_syslog= 0;
+ static my_bool debug_info_flag, debug_check_flag;
+ static my_bool column_types_flag;
+ static my_bool preserve_comments= 0;
+@@ -198,6 +203,7 @@
+ void tee_fputs(const char *s, FILE *file);
+ void tee_puts(const char *s, FILE *file);
+ void tee_putc(int c, FILE *file);
++void write_syslog(String *buffer);
+ static void tee_print_sized_data(const char *, unsigned int, unsigned int, bool);
+ /* The names of functions that actually do the manipulation. */
+ static int get_options(int argc,char **argv);
+@@ -1561,6 +1567,10 @@
+   {"show-warnings", OPT_SHOW_WARNINGS, "Show warnings after every statement.",
+     &show_warnings, &show_warnings, 0, GET_BOOL, NO_ARG,
+     0, 0, 0, 0, 0, 0},
++#ifndef __WIN__
++  {"syslog", OPT_SYSLOG, "Logs all queries to syslog", 0, 0, 0, GET_NO_ARG,
++   NO_ARG, 0, 0, 0, 0, 0, 0},
++#endif
+   {"plugin_dir", OPT_PLUGIN_DIR, "Directory for client-side plugins.",
+    (uchar**) &opt_plugin_dir, (uchar**) &opt_plugin_dir, 0,
+    GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+@@ -1665,6 +1675,11 @@
+                                     opt->name);
+ #endif
+     break;
++#ifndef __WIN__
++  case OPT_SYSLOG:
++    opt_syslog = 1;
++    break;
++#endif
+   case OPT_SERVER_ARG:
+ #ifdef EMBEDDED_LIBRARY
+     /*
+@@ -2014,6 +2029,40 @@
+   DBUG_RETURN((COMMANDS *) 0);
+ }
++void write_syslog(String *line){
++#ifndef __WIN__
++  uint length= line->length();
++  uint chunk_len= min(MAX_SYSLOG_MESSAGE, length);
++  char *ptr= line->c_ptr_safe();
++  char buff[MAX_SYSLOG_MESSAGE + 1];
++
++  for (;
++       length;
++       length-= chunk_len, ptr+= chunk_len, chunk_len= min(MAX_SYSLOG_MESSAGE,
++                                                           length))
++  {
++    char *str;
++    if (length == chunk_len)
++      str= ptr;                                 // last chunk => skip copy
++    else
++    {
++      memcpy(buff, ptr, chunk_len);
++      buff[chunk_len]= '\0';
++      str= buff;
++    }
++    syslog(LOG_INFO,
++           "SYSTEM_USER:'%s', MYSQL_USER:'%s', CONNECTION_ID:%lu, "
++           "DB_SERVER:'%s', DB:'%s', QUERY:'%s'",
++           getenv("SUDO_USER") ? getenv("SUDO_USER") : 
++           getenv("USER") ? getenv("USER") : "--",
++           current_user ? current_user : "--",
++           mysql_thread_id(&mysql),
++           current_host ? current_host : "--",
++           current_db ? current_db : "--",
++           str);
++  }
++#endif
++}
+ static bool add_line(String &buffer,char *line,char *in_string,
+                      bool *ml_comment, bool truncated)
+@@ -2986,6 +3035,11 @@
+     fix_history(buffer);
+   }
+ #endif
++#ifndef __WIN__
++  if (opt_syslog && buffer->length() && connect_flag == CLIENT_INTERACTIVE){
++    write_syslog(buffer);
++  }
++#endif
+   buffer->length(0);
diff --git a/optimizer_fix.patch b/optimizer_fix.patch
new file mode 100644 (file)
index 0000000..135c0da
--- /dev/null
@@ -0,0 +1,159 @@
+# name       : optimizer_fix.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN /dev/null b/patch_info/optimizer_fix.info
+--- /dev/null  1970-01-01 09:00:00.000000000 +0900
++++ b/patch_info/optimizer_fix.info    2010-12-02 20:47:55.781968475 +0900
+@@ -0,0 +1,8 @@
++File=optimizer_fix.patch
++Name=Unofficial optimizer fixes
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++2010-01
++Ported to 5.1.42
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-12-02 19:22:40.027024953 +0900
++++ b/sql/mysqld.cc    2010-12-02 20:51:50.811356434 +0900
+@@ -427,6 +427,7 @@
+ uint    opt_debug_sync_timeout= 0;
+ #endif /* defined(ENABLED_DEBUG_SYNC) */
+ my_bool opt_old_style_user_limits= 0, trust_function_creators= 0;
++my_bool opt_optimizer_fix= 0;
+ /*
+   True if there is at least one per-hour limit for some user, so we should
+   check them before each query (and possibly reset counters when hour is
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h     2010-11-03 07:01:14.000000000 +0900
++++ b/sql/mysqld.h     2010-12-02 20:51:10.392070356 +0900
+@@ -109,6 +109,7 @@
+ extern ulonglong slave_type_conversions_options;
+ extern my_bool read_only, opt_readonly;
+ extern my_bool lower_case_file_system;
++extern my_bool opt_optimizer_fix;
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+ extern char* opt_secure_file_priv;
+diff -ruN a/sql/opt_range.cc b/sql/opt_range.cc
+--- a/sql/opt_range.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/opt_range.cc 2010-12-02 20:47:55.795969853 +0900
+@@ -727,7 +727,7 @@
+ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+                                        bool index_read_must_be_used,
+                                        bool update_tbl_stats,
+-                                       double read_time);
++                                       double read_time, ha_rows *estimated_records);
+ static
+ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+                                           double read_time,
+@@ -2151,6 +2151,7 @@
+                                 ha_rows limit, bool force_quick_range)
+ {
+   uint idx;
++  ha_rows estimated_records=0;
+   double scan_time;
+   DBUG_ENTER("SQL_SELECT::test_quick_select");
+   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
+@@ -2319,12 +2320,17 @@
+         /* Get best 'range' plan and prepare data for making other plans */
+         if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
+-                                             best_read_time)))
++                                             best_read_time, &estimated_records)))
+         {
+           best_trp= range_trp;
+           best_read_time= best_trp->read_cost;
+         }
++        if (opt_optimizer_fix && estimated_records)
++        {
++          records = estimated_records;
++        }
++
+         /*
+           Simultaneous key scans and row deletes on several handler
+           objects are not allowed so don't use ROR-intersection for
+@@ -3820,7 +3826,7 @@
+   {
+     DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
+                                         "tree in SEL_IMERGE"););
+-    if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time)))
++    if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time, NULL)))
+     {
+       /*
+         One of index scans in this index_merge is more expensive than entire
+@@ -4923,11 +4929,12 @@
+ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+                                        bool index_read_must_be_used, 
+                                        bool update_tbl_stats,
+-                                       double read_time)
++                                       double read_time, ha_rows *estimated_records)
+ {
+   int idx;
+   SEL_ARG **key,**end, **key_to_read= NULL;
+   ha_rows UNINIT_VAR(best_records);              /* protected by key_to_read */
++  ha_rows min_records= HA_POS_ERROR;
+   TRP_RANGE* read_plan= NULL;
+   bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+   DBUG_ENTER("get_key_scans_params");
+@@ -4998,6 +5005,11 @@
+         key_to_read=  key;
+       }
++      if (estimated_records && found_records
++          && min_records > found_records)
++      {
++        min_records = found_records;
++      }
+     }
+   }
+@@ -5020,6 +5032,12 @@
+   else
+     DBUG_PRINT("info", ("No 'range' table read plan found"));
++  /* minimum number of records (not 0) as estimated number of records */
++  if (estimated_records && min_records != HA_POS_ERROR)
++  {
++    *estimated_records = min_records;
++  }
++
+   DBUG_RETURN(read_plan);
+ }
+diff -ruN a/sql/sql_select.cc b/sql/sql_select.cc
+--- a/sql/sql_select.cc        2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_select.cc        2010-12-02 20:47:55.813953789 +0900
+@@ -2610,6 +2610,11 @@
+       table->reginfo.impossible_range=1;
+       DBUG_RETURN(0);
+     }
++    if (opt_optimizer_fix && error == 0)
++    {
++      /* quick select is not effective. but the estimated value is used. */
++      DBUG_RETURN(select->records);
++    }
+     DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
+   }
+   DBUG_RETURN(HA_POS_ERROR);                  /* This shouldn't happend */
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc  2010-12-02 20:31:56.208023606 +0900
++++ b/sql/sys_vars.cc  2010-12-02 21:17:44.618120277 +0900
+@@ -2118,6 +2118,12 @@
+        VALID_RANGE(1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT)),
+        DEFAULT(NET_WAIT_TIMEOUT), BLOCK_SIZE(1));
++static Sys_var_mybool Sys_optimizer_fix(
++       "optimizer_fix",
++       "Enable unofficial optimizer fixes.",
++       GLOBAL_VAR(opt_optimizer_fix),
++       NO_CMD_LINE, DEFAULT(TRUE));
++
+ /** propagates changes to the relevant flag of @@optimizer_switch */
+ static bool fix_engine_condition_pushdown(sys_var *self, THD *thd,
+                                           enum_var_type type)
diff --git a/percona.sh b/percona.sh
deleted file mode 100644 (file)
index 0b6472e..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/sh
-# updates percona patches
-# http://www.percona.com/docs/wiki/release:start
-
-version=release-5.1.53-11
-bzr_branch=lp:percona-server/$version
-branch=MYSQL_5_1
-
-filter_names() {
-       # mysql_dump_ignore_ct.patch is broken, therefore we skip
-       grep -v 'mysql_dump_ignore_ct.patch' | \
-       grep -v 'percona-support.patch' | \
-       grep -v 'mysqld_safe_syslog.patch' | \
-       grep -v 'mysql-test.diff'
-}
-
-filter_files() {
-       filterdiff -x '*/configure'
-}
-
-if [ -d $version ]; then
-       cd $version
-       bzr up
-       cd ..
-else
-       bzr branch $bzr_branch $version
-fi
-
-> .percona.spec
-> .patch.spec
-i=100
-for patch in $(cat $version/series | filter_names); do
-       file=mysql-$patch
-       cat $version/$patch | filter_files > $file
-
-       if [ -z "$(awk -vfile=$file -F/ '$2 == file{print}' CVS/Entries)" ]; then
-               cvs add $file
-               ${branch:+cvs up -r $branch $file}
-       fi
-
-       printf "Patch%d:\t%s\n" $i %{name}-$patch >> .percona.spec
-       printf "%%patch%d -p1\n" $i >> .patch.spec
-       i=$((i+1))
-done
-
-# update PatchX section
-sed -i -e '
-/^# <percona patches/,/^# <\/percona>/ {
-       /^ <\/percona>/b
-       /^# <percona patches/ {
-               p # print header
-               r .percona.spec
-               a# </percona>
-       }
-       d
-}
-' mysql.spec
-
-# update %patchX section
-sed -i -e '
-/^# <percona %patches/,/^# <\/percona>/ {
-       /^ <\/percona>/b
-       /^# <percona %patches/ {
-               p # print header
-               r .patch.spec
-               a# </percona>
-       }
-       d
-}
-' mysql.spec
diff --git a/percona_support.patch b/percona_support.patch
new file mode 100644 (file)
index 0000000..ee622cf
--- /dev/null
@@ -0,0 +1,19 @@
+# name       : percona-support.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/scripts/mysql_install_db.sh b/scripts/mysql_install_db.sh
+--- a/scripts/mysql_install_db.sh       2009-08-08 09:20:07.000000000 +0000
++++ b/scripts/mysql_install_db.sh       2009-08-08 09:29:23.000000000 +0000
+@@ -475,6 +475,8 @@
+   echo
+   echo "Please report any problems with the $scriptdir/mysqlbug script!"
+   echo
++  echo "For commercial support please contact Percona at http://www.percona.com/support/"
++  echo
+ fi
+ exit 0
diff --git a/plugin-avoid-version.patch b/plugin-avoid-version.patch
deleted file mode 100644 (file)
index 9d72205..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-diff -ur mysql-5.1.50.org/plugin/daemon_example/Makefile.am mysql-5.1.50/plugin/daemon_example/Makefile.am
---- mysql-5.1.50.org/plugin/daemon_example/Makefile.am 2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/plugin/daemon_example/Makefile.am     2010-09-23 20:52:58.514057048 +0200
-@@ -26,7 +26,7 @@
- EXTRA_LTLIBRARIES =   libdaemon_example.la
- pkgplugin_LTLIBRARIES =       @plugin_daemon_example_shared_target@
--libdaemon_example_la_LDFLAGS =        -module -rpath $(pkgplugindir)
-+libdaemon_example_la_LDFLAGS =        -module -avoid-version -rpath $(pkgplugindir)
- libdaemon_example_la_CXXFLAGS=        $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- libdaemon_example_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- libdaemon_example_la_SOURCES =        daemon_example.cc
-diff -ur mysql-5.1.50.org/storage/archive/Makefile.am mysql-5.1.50/storage/archive/Makefile.am
---- mysql-5.1.50.org/storage/archive/Makefile.am       2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/archive/Makefile.am   2010-09-23 20:53:04.487687594 +0200
-@@ -35,7 +35,7 @@
- EXTRA_LTLIBRARIES =   ha_archive.la
- pkgplugin_LTLIBRARIES =       @plugin_archive_shared_target@
--ha_archive_la_LDFLAGS =       -module -rpath $(pkgplugindir)
-+ha_archive_la_LDFLAGS =       -module -avoid-version -rpath $(pkgplugindir)
- ha_archive_la_CXXFLAGS=       $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_archive_la_CFLAGS =        $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_archive_la_SOURCES =       ha_archive.cc azio.c
-diff -ur mysql-5.1.50.org/storage/blackhole/Makefile.am mysql-5.1.50/storage/blackhole/Makefile.am
---- mysql-5.1.50.org/storage/blackhole/Makefile.am     2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/blackhole/Makefile.am 2010-09-23 20:53:04.487687594 +0200
-@@ -34,7 +34,7 @@
- EXTRA_LTLIBRARIES =   ha_blackhole.la
- pkgplugin_LTLIBRARIES =       @plugin_blackhole_shared_target@
--ha_blackhole_la_LDFLAGS=-module -rpath $(pkgplugindir)
-+ha_blackhole_la_LDFLAGS=-module -avoid-version -rpath $(pkgplugindir)
- ha_blackhole_la_CXXFLAGS=$(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_blackhole_la_CFLAGS=       $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_blackhole_la_SOURCES=ha_blackhole.cc
-diff -ur mysql-5.1.50.org/storage/csv/Makefile.am mysql-5.1.50/storage/csv/Makefile.am
---- mysql-5.1.50.org/storage/csv/Makefile.am   2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/csv/Makefile.am       2010-09-23 20:53:04.487687596 +0200
-@@ -31,7 +31,7 @@
- EXTRA_LTLIBRARIES =   ha_csv.la
- pkglib_LTLIBRARIES =  @plugin_csv_shared_target@
--ha_csv_la_LDFLAGS =   -module -rpath $(MYSQLLIBdir)
-+ha_csv_la_LDFLAGS =   -module -avoid-version -rpath $(MYSQLLIBdir)
- ha_csv_la_CXXFLAGS =  $(AM_CXXFLAGS) -DMYSQL_PLUGIN
- ha_csv_la_SOURCES =   transparent_file.cc ha_tina.cc 
-diff -ur mysql-5.1.50.org/storage/example/Makefile.am mysql-5.1.50/storage/example/Makefile.am
---- mysql-5.1.50.org/storage/example/Makefile.am       2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/example/Makefile.am   2010-09-23 20:53:04.487687596 +0200
-@@ -34,7 +34,7 @@
- EXTRA_LTLIBRARIES =   ha_example.la
- pkgplugin_LTLIBRARIES =       @plugin_example_shared_target@
--ha_example_la_LDFLAGS =       -module -rpath $(pkgplugindir)
-+ha_example_la_LDFLAGS =       -module -avoid-version -rpath $(pkgplugindir)
- ha_example_la_CXXFLAGS=       $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_example_la_CFLAGS =        $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_example_la_SOURCES =       ha_example.cc
-diff -ur mysql-5.1.50.org/storage/federated/Makefile.am mysql-5.1.50/storage/federated/Makefile.am
---- mysql-5.1.50.org/storage/federated/Makefile.am     2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/federated/Makefile.am 2010-09-23 20:53:04.487687596 +0200
-@@ -34,7 +34,7 @@
- EXTRA_LTLIBRARIES =   ha_federated.la
- pkgplugin_LTLIBRARIES =       @plugin_federated_shared_target@
--ha_federated_la_LDFLAGS =     -module -rpath $(pkgplugindir)
-+ha_federated_la_LDFLAGS =     -module -avoid-version -rpath $(pkgplugindir)
- ha_federated_la_CXXFLAGS=     $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_federated_la_CFLAGS =      $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_federated_la_SOURCES =     ha_federated.cc
-diff -ur mysql-5.1.50.org/storage/ibmdb2i/Makefile.am mysql-5.1.50/storage/ibmdb2i/Makefile.am
---- mysql-5.1.50.org/storage/ibmdb2i/Makefile.am       2010-08-03 19:24:22.000000000 +0200
-+++ mysql-5.1.50/storage/ibmdb2i/Makefile.am   2010-09-23 20:53:04.491021090 +0200
-@@ -33,7 +33,7 @@
- EXTRA_LTLIBRARIES =   ha_ibmdb2i.la
- pkgplugin_LTLIBRARIES =       @plugin_ibmdb2i_shared_target@
- ha_ibmdb2i_la_LIBADD =  -liconv
--ha_ibmdb2i_la_LDFLAGS =       -module -rpath $(MYSQLLIBdir)
-+ha_ibmdb2i_la_LDFLAGS =       -module -avoid-version -rpath $(MYSQLLIBdir)
- ha_ibmdb2i_la_CXXFLAGS=       $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_ibmdb2i_la_CFLAGS =        $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_ibmdb2i_la_SOURCES =       ha_ibmdb2i.cc db2i_ileBridge.cc db2i_conversion.cc \
-diff -ur mysql-5.1.50.org/storage/innobase/Makefile.am mysql-5.1.50/storage/innobase/Makefile.am
---- mysql-5.1.50.org/storage/innobase/Makefile.am      2010-08-03 19:24:20.000000000 +0200
-+++ mysql-5.1.50/storage/innobase/Makefile.am  2010-09-23 20:53:04.494354584 +0200
-@@ -162,7 +162,7 @@
- EXTRA_LTLIBRARIES=    ha_innodb.la
- pkgplugin_LTLIBRARIES=        @plugin_innobase_shared_target@
--ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir)
-+ha_innodb_la_LDFLAGS= -module -avoid-version -rpath $(pkgplugindir)
- ha_innodb_la_CXXFLAGS=        $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_la_CFLAGS=  $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES)
-diff -ur mysql-5.1.50.org/storage/innodb_plugin/Makefile.am mysql-5.1.50/storage/innodb_plugin/Makefile.am
---- mysql-5.1.50.org/storage/innodb_plugin/Makefile.am 2010-08-03 19:24:19.000000000 +0200
-+++ mysql-5.1.50/storage/innodb_plugin/Makefile.am     2010-09-23 20:53:04.494354584 +0200
-@@ -331,7 +331,7 @@
- EXTRA_LTLIBRARIES=    ha_innodb_plugin.la
- pkgplugin_LTLIBRARIES=        @plugin_innodb_plugin_shared_target@
--ha_innodb_plugin_la_LDFLAGS=  -module -rpath $(pkgplugindir)
-+ha_innodb_plugin_la_LDFLAGS=  -module -avoid-version -rpath $(pkgplugindir)
- ha_innodb_plugin_la_CXXFLAGS= $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_plugin_la_CFLAGS=   $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_plugin_la_SOURCES=  $(libinnobase_a_SOURCES)
-diff -ur mysql-5.1.50.org/storage/sphinx/Makefile.am mysql-5.1.50/storage/sphinx/Makefile.am
---- mysql-5.1.50.org/storage/sphinx/Makefile.am        2010-09-23 20:51:48.660581537 +0200
-+++ mysql-5.1.50/storage/sphinx/Makefile.am    2010-09-23 20:53:05.261059401 +0200
-@@ -38,12 +38,12 @@
- EXTRA_LTLIBRARIES =   ha_sphinx.la
- pkgplugin_LTLIBRARIES = @plugin_sphinx_shared_target@ sphinx.la
--ha_sphinx_la_LDFLAGS =        -module -avoid-version -rpath $(MYSQLLIBdir)
-+ha_sphinx_la_LDFLAGS =        -module -avoid-version -avoid-version -rpath $(MYSQLLIBdir)
- ha_sphinx_la_CXXFLAGS=        $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_sphinx_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_sphinx_la_SOURCES =        ha_sphinx.cc
--sphinx_la_LDFLAGS = -module -avoid-version -rpath $(MYSQLLIBdir)
-+sphinx_la_LDFLAGS = -module -avoid-version -avoid-version -rpath $(MYSQLLIBdir)
- sphinx_la_CXXFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- sphinx_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- sphinx_la_SOURCES = snippets_udf.cc
diff --git a/query_cache_enhance.patch b/query_cache_enhance.patch
new file mode 100644 (file)
index 0000000..e7fd078
--- /dev/null
@@ -0,0 +1,491 @@
+# name       : query_cache_with_comments.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/query_cache_enhance.patch b/patch_info/query_cache_enhance.patch
+--- a/patch_info/query_cache_enhance.patch     1970-01-01 05:00:00.000000000 +0500
++++ b/patch_info/query_cache_enhance.patch     2010-11-12 17:24:47.000000000 +0500
+@@ -0,0 +1,15 @@
++File=query_cache_enhance.patch
++Name= query cache Percona's cumulative patch
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment= 1) Add new status - Waiting on query cache mutex (status_wait_query_cache_mutex.patch)
++         2) Remove comments from query (need for cache hit) (query_cache_with_comments.patch)
++         3) Totally disable query cache (query_cache_totally_disable.info)
++2010-05 - First version avaliable (query_cache_with_comments.patch)
++2010-07 - First version avaliable (status_wait_query_cache_mutex.patch
++2010-07 - First version avaliable (query_cache_totally_disable.info)
++2010-07 - Fix crash (query_cache_with_comments.patch)
++2010-07 - Fix incorrect behavior diff (query_cache_with_comments.patch)
++2010-09 - Merge patches to one
++2010-11 - Ported to 5.5
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-11-03 03:01:14.000000000 +0500
++++ b/sql/mysqld.cc    2010-11-13 15:34:40.000000000 +0500
+@@ -893,6 +893,7 @@
+ #endif
+ #ifdef HAVE_QUERY_CACHE
+ ulong query_cache_min_res_unit= QUERY_CACHE_MIN_RESULT_DATA_SIZE;
++my_bool opt_query_cache_strip_comments= FALSE;
+ Query_cache query_cache;
+ #endif
+ #ifdef HAVE_SMEM
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h     2010-11-03 03:01:14.000000000 +0500
++++ b/sql/mysqld.h     2010-11-13 15:34:36.000000000 +0500
+@@ -91,6 +91,7 @@
+ extern my_bool opt_log, opt_slow_log;
+ extern my_bool opt_backup_history_log;
+ extern my_bool opt_backup_progress_log;
++extern my_bool opt_query_cache_strip_comments;
+ extern ulonglong log_output_options;
+ extern ulong log_backup_output_options;
+ extern my_bool opt_log_queries_not_using_indexes;
+diff -ruN a/sql/query_strip_comments.h b/sql/query_strip_comments.h
+--- a/sql/query_strip_comments.h       1970-01-01 05:00:00.000000000 +0500
++++ b/sql/query_strip_comments.h       2010-11-12 17:24:47.000000000 +0500
+@@ -0,0 +1,37 @@
++#ifndef _SQL_QUERY_STRIPC_COMMENTS_H_
++#define _SQL_QUERY_STRIPC_COMMENTS_H_
++#ifdef HAVE_QUERY_CACHE
++
++// implemented in sql_cache.cc
++class QueryStripComments
++{
++private:
++  QueryStripComments(const QueryStripComments&);
++  QueryStripComments& operator=(const QueryStripComments&);
++public:
++  QueryStripComments();
++  ~QueryStripComments();
++  void set(const char* a_query, uint a_query_length, uint a_additional_length);
++  
++  char* query()        { return buffer; }
++  uint  query_length() { return length; }
++private:
++  void cleanup();
++private:
++  char* buffer;
++  uint  length /*query length, not buffer length*/;
++  uint  buffer_length;
++};
++class QueryStripComments_Backup
++{
++public:
++  QueryStripComments_Backup(THD* a_thd,QueryStripComments* qsc);
++  ~QueryStripComments_Backup();
++private:
++  THD*  thd;
++  char* query;
++  uint  length;
++};
++
++#endif // HAVE_QUERY_CACHE
++#endif // _SQL_QUERY_STRIPC_COMMENTS_H_
+diff -ruN a/sql/sql_cache.cc b/sql/sql_cache.cc
+--- a/sql/sql_cache.cc 2010-11-03 03:01:14.000000000 +0500
++++ b/sql/sql_cache.cc 2010-11-12 17:24:47.000000000 +0500
+@@ -344,6 +344,181 @@
+ #include "probes_mysql.h"
+ #include "transaction.h"
++#include "query_strip_comments.h"
++
++QueryStripComments::QueryStripComments()
++{
++  buffer = 0;
++  length = 0;
++  buffer_length = 0;
++}
++QueryStripComments::~QueryStripComments()
++{
++  cleanup();
++}
++
++inline bool query_strip_comments_is_white_space(char c)
++{
++  return ((' ' == c) || ('\t' == c) || ('\r' == c) || ('\n' ==c ));
++}
++void QueryStripComments::set(const char* query, uint query_length, uint additional_length)
++{
++  uint new_buffer_length = query_length + additional_length;
++  if(new_buffer_length > buffer_length)
++  {
++    cleanup();
++    buffer = (char*)my_malloc(new_buffer_length,MYF(0));
++  }
++  uint query_position = 0;
++  uint position = 0;
++  // Skip whitespaces from begin
++  while((query_position < query_length) && query_strip_comments_is_white_space(query[query_position]))
++  {
++    ++query_position;
++  }
++  long int last_space = -1;
++  while(query_position < query_length)
++  {
++    char current = query[query_position];
++    bool insert_space = false; // insert space to buffer, (IMPORTANT) don't update query_position
++    switch(current)
++    {
++    case '\'':
++    case '"':
++      {
++        buffer[position++] = query[query_position++]; // copy current symbol
++        while(query_position < query_length)
++        {
++          if(current == query[query_position]) // found pair quote
++          {
++            break;
++          }
++          buffer[position++] = query[query_position++]; // copy current symbol
++        }
++        break;
++      }
++    case '/':
++      {
++        if(((query_position + 2) < query_length) && ('*' == query[query_position+1]) && ('!' != query[query_position+2]))
++        {
++          query_position += 2; // skip "/*"
++          do
++          {
++            if('*' == query[query_position] && '/' == query[query_position+1]) // check for "*/"
++            {
++              query_position += 2; // skip "*/"
++              insert_space = true;
++              break;
++            }
++            else
++            {
++              ++query_position;
++            }
++          }
++          while(query_position < query_length);
++          if(!insert_space)
++          {
++            continue;
++          }
++        }
++        break;
++      }
++    case '-':
++      {
++        if(query[query_position+1] == '-')
++        {
++          ++query_position; // skip "-", and go to search of "\n"
++        }
++        else
++        {
++          break;
++        }
++      }
++    case '#':
++      {
++        do
++        {
++          ++query_position; // skip current symbol (# or -)
++          if('\n' == query[query_position])  // check for '\n'
++          {
++            ++query_position; // skip '\n'
++            insert_space = true;
++            break;
++          }
++        }
++        while(query_position < query_length);
++        if(insert_space)
++        {
++          break;
++        }
++        else
++        {
++          continue;
++        }
++      }
++    default:
++      if(query_strip_comments_is_white_space(current))
++      {
++        insert_space = true;
++        ++query_position;
++      }
++      break; // make gcc happy
++    }
++    if(insert_space)
++    {
++      if((last_space + 1) != position)
++      {
++        last_space = position;
++        buffer[position++] = ' ';
++      }
++    }
++    else
++    {
++      buffer[position++] = query[query_position++];
++    }
++  }
++  while((0 < position) && query_strip_comments_is_white_space(buffer[position - 1]))
++  {
++    --position;
++  }
++  buffer[position] = 0;
++  length = position;
++}
++void QueryStripComments::cleanup()
++{
++  if(buffer)
++  {
++    my_free(buffer);
++  }
++  buffer        = 0;
++  length        = 0;
++  buffer_length = 0;
++}
++QueryStripComments_Backup::QueryStripComments_Backup(THD* a_thd,QueryStripComments* qsc)
++{
++  if(opt_query_cache_strip_comments)
++  {
++    thd = a_thd;
++    query = thd->query();
++    length = thd->query_length();
++    qsc->set(query,length,thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE);
++    thd->set_query(qsc->query(),qsc->query_length());
++  }
++  else
++  {
++    thd = 0;
++    query = 0;
++    length = 0;
++  }
++}
++QueryStripComments_Backup::~QueryStripComments_Backup()
++{
++  if(thd)
++  {
++    thd->set_query(query,length);
++  }
++}
++
+ #ifdef EMBEDDED_LIBRARY
+ #include "emb_qcache.h"
+ #endif
+@@ -454,7 +629,12 @@
+   Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__);
+   DBUG_ENTER("Query_cache::try_lock");
++  const char* old_proc_info= thd->proc_info;
++  thd_proc_info(thd,"Waiting on query cache mutex");
+   mysql_mutex_lock(&structure_guard_mutex);
++  DBUG_EXECUTE_IF("status_wait_query_cache_mutex_sleep", {
++      sleep(5);
++    });
+   while (1)
+   {
+     if (m_cache_lock_status == Query_cache::UNLOCKED)
+@@ -501,6 +681,7 @@
+     }
+   }
+   mysql_mutex_unlock(&structure_guard_mutex);
++  thd->proc_info = old_proc_info;
+   DBUG_RETURN(interrupt);
+ }
+@@ -1274,6 +1455,8 @@
+       unlock();
+       DBUG_VOID_RETURN;
+     }
++    QueryStripComments *query_strip_comments = &(thd->query_strip_comments);
++    QueryStripComments_Backup backup(thd,query_strip_comments);
+     /* Key is query + database + flag */
+     if (thd->db_length)
+@@ -1451,6 +1634,9 @@
+   Query_cache_block_table *block_table, *block_table_end;
+   ulong tot_length;
+   Query_cache_query_flags flags;
++  QueryStripComments *query_strip_comments = &(thd->query_strip_comments);
++  char *sql_backup          = sql;
++  uint  query_length_backup = query_length;
+   DBUG_ENTER("Query_cache::send_result_to_client");
+   /*
+@@ -1472,21 +1658,103 @@
+   {
+     uint i= 0;
+-    /*
+-      Skip '(' characters in queries like following:
+-      (select a from t1) union (select a from t1);
+-    */
+-    while (sql[i]=='(')
+-      i++;
++    if(opt_query_cache_strip_comments)
++    {
++      /* Skip all comments and non-letter symbols */
++      uint& query_position = i;
++      char* query = sql;
++      while(query_position < query_length)
++      {
++        bool check = false;
++        char current = query[query_position];
++        switch(current)
++        {
++        case '/':
++          if(((query_position + 2) < query_length) && ('*' == query[query_position+1]) && ('!' != query[query_position+2]))
++          {
++            query_position += 2; // skip "/*"
++            do
++            {
++              if('*' == query[query_position] && '/' == query[query_position+1]) // check for "*/" (without space)
++              {
++                query_position += 2; // skip "*/" (without space)
++                break;
++              }
++              else
++              {
++                ++query_position;
++              }
++            }
++            while(query_position < query_length);
++            continue; // analyze current symbol
++          }
++          break;
++        case '-':
++          if(query[query_position+1] == '-')
++          {
++            ++query_position; // skip "-"
++          }
++          else
++          {
++            break;
++          }
++        case '#':
++          do
++          {
++            ++query_position; // skip current symbol
++            if('\n' == query[query_position])  // check for '\n'
++            {
++              ++query_position; // skip '\n'
++              break;
++            }
++          }
++          while(query_position < query_length);
++          continue; // analyze current symbol
++        case '\r':
++        case '\n':
++        case '\t':
++        case ' ':
++        case '(':
++        case ')':
++          break;
++        default:
++          check = true;
++          break; // make gcc happy
++        } // switch(current)
++        if(check)
++        {
++          if(query_position + 2 < query_length)
++          {
++            // cacheable
++            break;
++          }
++          else
++          {
++            DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
++            goto err;
++          }
++        } // if(check)
++        ++query_position;
++      } // while(query_position < query_length)
++    }
++    else // if(opt_query_cache_strip_comments)
++    {
++      /*
++        Skip '(' characters in queries like following:
++        (select a from t1) union (select a from t1);
++      */
++      while (sql[i]=='(')
++        i++;
+-    /*
+-      Test if the query is a SELECT
+-      (pre-space is removed in dispatch_command).
++    } // if(opt_query_cache_strip_comments)    
++      /*
++        Test if the query is a SELECT
++        (pre-space is removed in dispatch_command).
+-      First '/' looks like comment before command it is not
+-      frequently appeared in real life, consequently we can
+-      check all such queries, too.
+-    */
++        First '/' looks like comment before command it is not
++        frequently appeared in real life, consequently we can
++        check all such queries, too.
++      */
+     if ((my_toupper(system_charset_info, sql[i])     != 'S' ||
+          my_toupper(system_charset_info, sql[i + 1]) != 'E' ||
+          my_toupper(system_charset_info, sql[i + 2]) != 'L') &&
+@@ -1521,6 +1789,12 @@
+     goto err_unlock;
+   Query_cache_block *query_block;
++  if(opt_query_cache_strip_comments)
++  {
++    query_strip_comments->set(sql, query_length, thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE);
++    sql          = query_strip_comments->query();
++    query_length = query_strip_comments->query_length();
++  }
+   tot_length= query_length + thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE;
+   if (thd->db_length)
+@@ -1587,6 +1861,8 @@
+        (uchar*) &flags, QUERY_CACHE_FLAGS_SIZE);
+   query_block = (Query_cache_block *)  my_hash_search(&queries, (uchar*) sql,
+                                                       tot_length);
++  sql          = sql_backup;
++  query_length = query_length_backup;
+   /* Quick abort on unlocked data */
+   if (query_block == 0 ||
+       query_block->query()->result() == 0 ||
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h  2010-11-03 03:01:14.000000000 +0500
++++ b/sql/sql_class.h  2010-11-13 15:34:25.000000000 +0500
+@@ -40,6 +40,9 @@
+ #include "thr_lock.h"             /* thr_lock_type, THR_LOCK_DATA,
+                                      THR_LOCK_INFO */
++#ifdef HAVE_QUERY_CACHE
++#include "query_strip_comments.h"
++#endif // HAVE_QUERY_CACHE
+ class Reprepare_observer;
+ class Relay_log_info;
+@@ -758,6 +761,9 @@
+     statement lifetime. FIXME: must be const
+   */
+    ulong id;
++#ifdef HAVE_QUERY_CACHE
++  QueryStripComments query_strip_comments; // see sql_cache.cc
++#endif //HAVE_QUERY_CACHE
+   /*
+     MARK_COLUMNS_NONE:  Means mark_used_colums is not set and no indicator to
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc  2010-11-03 03:01:14.000000000 +0500
++++ b/sql/sys_vars.cc  2010-11-13 15:34:59.000000000 +0500
+@@ -1724,6 +1724,11 @@
+        NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+        ON_UPDATE(fix_query_cache_size));
++static Sys_var_mybool Sys_query_cache_strip_comments(
++       "query_cache_strip_comments", "Enable and disable optimisation \"strip comment for query cache\" - optimisation strip all comments from query while search query result in query cache",
++       GLOBAL_VAR(opt_query_cache_strip_comments), CMD_LINE(OPT_ARG),
++       DEFAULT(FALSE));
++
+ static Sys_var_ulong Sys_query_cache_limit(
+        "query_cache_limit",
+        "Don't cache results that are bigger than this",
diff --git a/remove_fcntl_excessive_calls.patch b/remove_fcntl_excessive_calls.patch
new file mode 100644 (file)
index 0000000..e48bbdf
--- /dev/null
@@ -0,0 +1,82 @@
+# name       : remove_fcntl_excessive_calls.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/remove_fcntl_excessive_calls.info b/patch_info/remove_fcntl_excessive_calls.info
+--- a/patch_info/remove_fcntl_excessive_calls.info     1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/remove_fcntl_excessive_calls.info     2010-07-22 21:42:08.560424001 +0400
+@@ -0,0 +1,6 @@
++File=remove_fcntl_excessive_calls.patch
++Name=remove fcntl excessive calls
++Version=1.0
++Author=This is a port of the official fix.
++License=GPL
++Comment=
+diff -ruN a/sql/net_serv.cc b/sql/net_serv.cc
+--- a/sql/net_serv.cc  2010-06-03 19:50:27.000000000 +0400
++++ b/sql/net_serv.cc  2010-07-22 21:40:30.680424001 +0400
+@@ -133,7 +133,7 @@
+   if (vio != 0)                                       /* If real connection */
+   {
+     net->fd  = vio_fd(vio);                   /* For perl DBI/DBD */
+-#if defined(MYSQL_SERVER) && !defined(__WIN__)
++#if defined(MYSQL_SERVER) && !defined(__WIN__) && !defined(NO_ALARM)
+     if (!(test_flags & TEST_BLOCKING))
+     {
+       my_bool old_mode;
+@@ -642,7 +642,7 @@
+     if ((long) (length= vio_write(net->vio,pos,(size_t) (end-pos))) <= 0)
+     {
+       my_bool interrupted = vio_should_retry(net->vio);
+-#if !defined(__WIN__)
++#if !defined(NO_ALARM) && !defined(__WIN__)
+       if ((interrupted || length == 0) && !thr_alarm_in_use(&alarmed))
+       {
+         if (!thr_alarm(&alarmed, net->write_timeout, &alarm_buff))
+@@ -680,7 +680,7 @@
+                 my_progname);
+ #endif /* EXTRA_DEBUG */
+       }
+-#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER)
++#if defined(THREAD_SAFE_CLIENT) && defined(NO_ALARM)
+       if (vio_errno(net->vio) == SOCKET_EINTR)
+       {
+       DBUG_PRINT("warning",("Interrupted write. Retrying..."));
+@@ -698,7 +698,7 @@
+     pos+=length;
+     update_statistics(thd_increment_bytes_sent(length));
+   }
+-#ifndef __WIN__
++#if !defined(NO_ALARM) && !defined(__WIN__)
+  end:
+ #endif
+ #ifdef HAVE_COMPRESS
+@@ -830,6 +830,7 @@
+     thr_alarm(&alarmed,net->read_timeout,&alarm_buff);
+ #else
+   /* Read timeout is set in my_net_set_read_timeout */
++  DBUG_ASSERT(net_blocking);
+ #endif /* NO_ALARM */
+     pos = net->buff + net->where_b;           /* net->packet -4 */
+@@ -844,7 +845,7 @@
+         DBUG_PRINT("info",("vio_read returned %ld  errno: %d",
+                            (long) length, vio_errno(net->vio)));
+-#if !defined(__WIN__) || defined(MYSQL_SERVER)
++#if !defined(NO_ALARM) && (!defined(__WIN__) || defined(MYSQL_SERVER))
+         /*
+           We got an error that there was no data on the socket. We now set up
+           an alarm to not 'read forever', change the socket to non blocking
+@@ -891,7 +892,7 @@
+                   my_progname,vio_errno(net->vio));
+ #endif /* EXTRA_DEBUG */
+         }
+-#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER)
++#if defined(THREAD_SAFE_CLIENT) && defined(NO_ALARM)
+         if (vio_errno(net->vio) == SOCKET_EINTR)
+         {
+           DBUG_PRINT("warning",("Interrupted read. Retrying..."));
diff --git a/response_time_distribution.patch b/response_time_distribution.patch
new file mode 100644 (file)
index 0000000..def263e
--- /dev/null
@@ -0,0 +1,858 @@
+# name       : response-time-distribution.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/CMakeLists.txt b/CMakeLists.txt
+--- a/CMakeLists.txt   2010-12-03 20:58:24.000000000 +0300
++++ b/CMakeLists.txt   2011-01-16 20:11:28.000000000 +0300
+@@ -171,7 +171,12 @@
+ OPTION (WITH_UNIT_TESTS "Compile MySQL with unit tests" ON)
+ MARK_AS_ADVANCED(CYBOZU BACKUP_TEST WITHOUT_SERVER DISABLE_SHARED)
+- 
++OPTION(WITHOUT_RESPONSE_TIME_DISTRIBUTION "If we want to have response_time_distribution" OFF)
++IF(WITHOUT_RESPONSE_TIME_DISTRIBUTION)
++ELSE()
++ADD_DEFINITIONS(-DHAVE_RESPONSE_TIME_DISTRIBUTION)
++ENDIF()
++                                                                                                                                                             
+ OPTION(ENABLE_DEBUG_SYNC "Enable debug sync (debug builds only)" ON) 
+ IF(ENABLE_DEBUG_SYNC) 
+   SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DENABLED_DEBUG_SYNC") 
+diff -ruN a/include/mysql_com.h b/include/mysql_com.h
+--- a/include/mysql_com.h      2010-12-03 20:58:24.000000000 +0300
++++ b/include/mysql_com.h      2011-01-16 18:53:58.000000000 +0300
+@@ -137,10 +137,11 @@
+ #define REFRESH_FAST          32768   /* Intern flag */
+ /* RESET (remove all queries) from query cache */
+-#define REFRESH_QUERY_CACHE   65536
+-#define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */
+-#define REFRESH_DES_KEY_FILE  0x40000L
+-#define REFRESH_USER_RESOURCES        0x80000L
++#define REFRESH_QUERY_CACHE        65536
++#define REFRESH_QUERY_CACHE_FREE    0x20000L /* pack query cache */
++#define REFRESH_DES_KEY_FILE       0x40000L
++#define REFRESH_USER_RESOURCES             0x80000L
++#define REFRESH_QUERY_RESPONSE_TIME 0x100000L /* response time distibution */
+ #define CLIENT_LONG_PASSWORD  1       /* new more secure passwords */
+ #define CLIENT_FOUND_ROWS     2       /* Found instead of affected rows */
+diff -ruN a/patch_info/response-time-distribution.info b/patch_info/response-time-distribution.info
+--- a/patch_info/response-time-distribution.info       1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/response-time-distribution.info       2011-01-16 18:53:59.000000000 +0300
+@@ -0,0 +1,9 @@
++File=response-time-distribution.patch
++Name=Response time distribution
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++Changelog
++2010-07-02 first version avaliable
++2010-09-15 add column 'total'
+diff -ruN a/sql/CMakeLists.txt b/sql/CMakeLists.txt
+--- a/sql/CMakeLists.txt       2010-12-03 20:58:26.000000000 +0300
++++ b/sql/CMakeLists.txt       2011-01-16 18:53:59.000000000 +0300
+@@ -51,7 +51,7 @@
+                message.h mf_iocache.cc my_decimal.cc ../sql-common/my_time.c
+                mysqld.cc net_serv.cc  keycaches.cc
+                ../sql-common/client_plugin.c
+-               opt_range.cc opt_range.h opt_sum.cc 
++               opt_range.cc opt_range.h query_response_time.h opt_sum.cc 
+                ../sql-common/pack.c parse_file.cc password.c procedure.cc 
+                protocol.cc records.cc repl_failsafe.cc rpl_filter.cc set_var.cc 
+                slave.cc sp.cc sp_cache.cc sp_head.cc sp_pcontext.cc 
+@@ -59,7 +59,7 @@
+                sql_cache.cc sql_class.cc sql_client.cc sql_crypt.cc sql_crypt.h 
+                sql_cursor.cc sql_db.cc sql_delete.cc sql_derived.cc sql_do.cc 
+                sql_error.cc sql_handler.cc sql_help.cc sql_insert.cc sql_lex.cc 
+-               sql_list.cc sql_load.cc sql_manager.cc sql_parse.cc
++               sql_list.cc sql_load.cc sql_manager.cc sql_parse.cc query_response_time.cc
+                sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc 
+                debug_sync.cc debug_sync.h
+                sql_repl.cc sql_select.cc sql_show.cc sql_state.c sql_string.cc 
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h    2011-01-16 18:53:33.000000000 +0300
++++ b/sql/handler.h    2011-01-16 18:54:00.000000000 +0300
+@@ -580,6 +580,7 @@
+   SCH_PROFILES,
+   SCH_REFERENTIAL_CONSTRAINTS,
+   SCH_PROCEDURES,
++  SCH_QUERY_RESPONSE_TIME,
+   SCH_SCHEMATA,
+   SCH_SCHEMA_PRIVILEGES,
+   SCH_SESSION_STATUS,
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/lex.h        2011-01-16 18:54:01.000000000 +0300
+@@ -426,6 +426,7 @@
+   { "PURGE",          SYM(PURGE)},
+   { "QUARTER",          SYM(QUARTER_SYM)},
+   { "QUERY",          SYM(QUERY_SYM)},
++  { "QUERY_RESPONSE_TIME", SYM(QUERY_RESPONSE_TIME_SYM)},
+   { "QUICK",          SYM(QUICK)},
+   { "RANGE",            SYM(RANGE_SYM)},
+   { "READ",           SYM(READ_SYM)},
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2011-01-16 18:53:35.000000000 +0300
++++ b/sql/mysqld.cc    2011-01-17 02:22:27.000000000 +0300
+@@ -69,6 +69,8 @@
+ #include "debug_sync.h"
+ #include "sql_callback.h"
++#include "query_response_time.h"
++
+ #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
+ #include "../storage/perfschema/pfs_server.h"
+ #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
+@@ -600,7 +602,7 @@
+ MY_LOCALE *my_default_lc_messages;
+ MY_LOCALE *my_default_lc_time_names;
+-SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen, have_query_cache;
++SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen, have_query_cache, have_response_time_distribution;
+ SHOW_COMP_OPTION have_geometry, have_rtree_keys;
+ SHOW_COMP_OPTION have_crypt, have_compress;
+ SHOW_COMP_OPTION have_profiling;
+@@ -901,6 +903,10 @@
+ my_bool opt_enable_shared_memory;
+ HANDLE smem_event_connect_request= 0;
+ #endif
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ulong   opt_query_response_time_range_base  = QRT_DEFAULT_BASE;
++my_bool opt_enable_query_response_time_stats= 0;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ my_bool opt_use_ssl  = 0;
+ char *opt_ssl_ca= NULL, *opt_ssl_capath= NULL, *opt_ssl_cert= NULL,
+@@ -1469,6 +1475,9 @@
+   my_free(opt_bin_logname);
+   bitmap_free(&temp_pool);
+   free_max_user_conn();
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++  query_response_time_free();
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ #ifdef HAVE_REPLICATION
+   end_slave_list();
+ #endif
+@@ -3930,6 +3939,9 @@
+   if (!DEFAULT_ERRMSGS[0][0])
+     unireg_abort(1);  
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++  query_response_time_init();
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+   /* We have to initialize the storage engines before CSV logging */
+   if (ha_init())
+   {
+@@ -6800,6 +6812,11 @@
+ #else
+   have_query_cache=SHOW_OPTION_NO;
+ #endif
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++  have_response_time_distribution= SHOW_OPTION_YES;
++#else // HAVE_RESPONSE_TIME_DISTRIBUTION
++  have_response_time_distribution= SHOW_OPTION_NO;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ #ifdef HAVE_SPATIAL
+   have_geometry=SHOW_OPTION_YES;
+ #else
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h     2011-01-16 18:53:35.000000000 +0300
++++ b/sql/mysqld.h     2011-01-17 02:08:20.000000000 +0300
+@@ -98,6 +98,10 @@
+ extern bool opt_disable_networking, opt_skip_show_db;
+ extern bool opt_skip_name_resolve;
+ extern bool opt_ignore_builtin_innodb;
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++extern ulong   opt_query_response_time_range_base;
++extern my_bool opt_enable_query_response_time_stats;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ extern my_bool opt_character_set_client_handshake;
+ extern bool volatile abort_loop;
+ extern bool in_bootstrap;
+diff -ruN a/sql/query_response_time.cc b/sql/query_response_time.cc
+--- a/sql/query_response_time.cc       1970-01-01 03:00:00.000000000 +0300
++++ b/sql/query_response_time.cc       2011-01-17 02:13:08.000000000 +0300
+@@ -0,0 +1,372 @@
++#include "mysql_version.h"
++#ifdef __FreeBSD__
++#include <sys/types.h>
++#include <machine/atomic.h>
++#endif // __FreeBSD__
++#include "my_global.h"
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++#include "mysql_com.h"
++#include "rpl_tblmap.h"
++#include "table.h"
++#include "field.h"
++#include "sql_show.h"
++#include "query_response_time.h"
++
++#define TIME_STRING_POSITIVE_POWER_LENGTH QRT_TIME_STRING_POSITIVE_POWER_LENGTH
++#define TIME_STRING_NEGATIVE_POWER_LENGTH 6
++#define TOTAL_STRING_POSITIVE_POWER_LENGTH QRT_TOTAL_STRING_POSITIVE_POWER_LENGTH
++#define TOTAL_STRING_NEGATIVE_POWER_LENGTH 6
++#define MINIMUM_BASE 2
++#define MAXIMUM_BASE QRT_MAXIMUM_BASE
++#define POSITIVE_POWER_FILLER QRT_POSITIVE_POWER_FILLER
++#define NEGATIVE_POWER_FILLER QRT_NEGATIVE_POWER_FILLER
++#define STRING_OVERFLOW QRT_STRING_OVERFLOW
++#define TIME_OVERFLOW   QRT_TIME_OVERFLOW
++#define DEFAULT_BASE    QRT_DEFAULT_BASE
++
++#define do_xstr(s) do_str(s)
++#define do_str(s) #s
++#define do_format(filler,width) "%" filler width "lld"
++/*
++  Format strings for snprintf. Generate from:
++  POSITIVE_POWER_FILLER and TIME_STRING_POSITIVE_POWER_LENGTH
++  NEFATIVE_POWER_FILLER and TIME_STRING_NEGATIVE_POWER_LENGTH
++*/
++#define TIME_STRING_POSITIVE_POWER_FORMAT do_format(POSITIVE_POWER_FILLER,do_xstr(TIME_STRING_POSITIVE_POWER_LENGTH))
++#define TIME_STRING_NEGATIVE_POWER_FORMAT do_format(NEGATIVE_POWER_FILLER,do_xstr(TIME_STRING_NEGATIVE_POWER_LENGTH))
++#define TIME_STRING_FORMAT                  TIME_STRING_POSITIVE_POWER_FORMAT "." TIME_STRING_NEGATIVE_POWER_FORMAT
++
++#define TOTAL_STRING_POSITIVE_POWER_FORMAT do_format(POSITIVE_POWER_FILLER,do_xstr(TOTAL_STRING_POSITIVE_POWER_LENGTH))
++#define TOTAL_STRING_NEGATIVE_POWER_FORMAT do_format(NEGATIVE_POWER_FILLER,do_xstr(TOTAL_STRING_NEGATIVE_POWER_LENGTH))
++#define TOTAL_STRING_FORMAT                 TOTAL_STRING_POSITIVE_POWER_FORMAT "." TOTAL_STRING_NEGATIVE_POWER_FORMAT
++
++#define TIME_STRING_LENGTH    QRT_TIME_STRING_LENGTH
++#define TIME_STRING_BUFFER_LENGTH     (TIME_STRING_LENGTH + 1 /* '\0' */)
++
++#define TOTAL_STRING_LENGTH   QRT_TOTAL_STRING_LENGTH
++#define TOTAL_STRING_BUFFER_LENGTH    (TOTAL_STRING_LENGTH + 1 /* '\0' */)
++
++/*
++  Calculate length of "log linear"
++  1)
++  (MINIMUM_BASE ^ result) <= (10 ^ STRING_POWER_LENGTH) < (MINIMUM_BASE ^ (result + 1))
++
++  2)
++  (MINIMUM_BASE ^ result) <= (10 ^ STRING_POWER_LENGTH)
++  and
++  (MINIMUM_BASE ^ (result + 1)) > (10 ^ STRING_POWER_LENGTH)
++
++  3)
++  result     <= LOG(MINIMUM_BASE, 10 ^ STRING_POWER_LENGTH)= STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10)
++  result + 1 >  LOG(MINIMUM_BASE, 10 ^ STRING_POWER_LENGTH)= STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10)
++
++  4) STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10) - 1 < result <= STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10)
++
++  MINIMUM_BASE= 2 always, LOG(MINIMUM_BASE,10)= 3.3219280948873626, result= (int)3.3219280948873626 * STRING_POWER_LENGTH
++
++  Last counter always use for time overflow
++*/
++#define POSITIVE_POWER_COUNT ((int)(3.32192809 * TIME_STRING_POSITIVE_POWER_LENGTH))
++#define NEGATIVE_POWER_COUNT ((int)(3.32192809 * TIME_STRING_NEGATIVE_POWER_LENGTH))
++#define OVERALL_POWER_COUNT (NEGATIVE_POWER_COUNT + 1 + POSITIVE_POWER_COUNT)
++
++#define MILLION ((unsigned long)1000 * 1000)
++
++namespace query_response_time
++{
++
++class utility
++{
++public:
++  utility() : m_base(0)
++  {
++    m_max_dec_value= MILLION;
++    for(int i= 0; TIME_STRING_POSITIVE_POWER_LENGTH > i; ++i)
++      m_max_dec_value *= 10;
++    setup(DEFAULT_BASE);
++  }
++public:
++  uint      base()            const { return m_base; }
++  uint      negative_count()  const { return m_negative_count; }
++  uint      positive_count()  const { return m_positive_count; }
++  uint      bound_count()     const { return m_bound_count; }
++  ulonglong max_dec_value()   const { return m_max_dec_value; }
++  ulonglong bound(uint index) const { return m_bound[ index ]; }
++public:
++  void setup(uint base)
++  {
++    if(base != m_base)
++    {
++      m_base= base;
++
++      const ulonglong million= 1000 * 1000;
++      ulonglong value= million;
++      m_negative_count= 0;
++      while(value > 0)
++      {
++      m_negative_count += 1;
++      value /= m_base;
++      }
++      m_negative_count -= 1;
++
++      value= million;
++      m_positive_count= 0;
++      while(value < m_max_dec_value)
++      {
++      m_positive_count += 1;
++      value *= m_base;
++      }
++      m_bound_count= m_negative_count + m_positive_count;
++
++      value= million;
++      for(uint i= 0; i < m_negative_count; ++i)
++      {
++      value /= m_base;
++      m_bound[m_negative_count - i - 1]= value;
++      }
++      value= million;
++      for(uint i= 0; i < m_positive_count;  ++i)
++      {
++      m_bound[m_negative_count + i]= value;
++      value *= m_base;
++      }
++    }
++  }
++private:
++  uint      m_base;
++  uint      m_negative_count;
++  uint      m_positive_count;
++  uint      m_bound_count;
++  ulonglong m_max_dec_value; /* for TIME_STRING_POSITIVE_POWER_LENGTH=7 is 10000000 */
++  ulonglong m_bound[OVERALL_POWER_COUNT];
++};
++
++void print_time(char* buffer, std::size_t buffer_size, std::size_t string_positive_power_length, const char* format, uint64 value)
++{
++  memset(buffer,'X',buffer_size);
++  buffer[string_positive_power_length]= '.';
++  ulonglong second=      (value / MILLION);
++  ulonglong microsecond= (value % MILLION);
++  std::size_t result_length= snprintf(buffer,buffer_size,format,second,microsecond);
++  if(result_length < 0)
++  {
++    assert(sizeof(STRING_OVERFLOW) <= buffer_size);
++    memcpy(buffer, STRING_OVERFLOW, sizeof(STRING_OVERFLOW));
++    return;
++  }
++  buffer[result_length]= 0;
++}
++#ifdef __x86_64__
++typedef uint64 TimeCounter;
++void add_time_atomic(TimeCounter* counter, uint64 time)
++{
++  __sync_fetch_and_add(counter,time);  
++}
++#endif // __x86_64__
++#ifdef __i386__
++inline uint32 get_high(uint64 value)
++{
++  return ((value >> 32) << 32);
++}
++inline uint32 get_low(uint64 value)
++{
++  return ((value << 32) >> 32);
++}
++#ifdef __FreeBSD__
++inline bool compare_and_swap(volatile uint32 *target, uint32 old, uint32 new_value)
++{
++  return atomic_cmpset_32(target,old,new_value);
++}
++#else // __FreeBSD__
++inline bool compare_and_swap(volatile uint32* target, uint32 old, uint32 new_value)
++{
++  return __sync_bool_compare_and_swap(target,old,new_value);
++}
++#endif // __FreeBSD__
++class TimeCounter
++{
++public:
++  TimeCounter& operator=(uint64 time)
++  {
++    this->m_high= get_high(time);
++    this->m_low=  get_low(time);
++    return *this;
++  }
++  operator uint64() const
++  {
++    return ((static_cast<uint64>(m_high) << 32) + static_cast<uint64>(m_low));
++  }
++  void add(uint64 time)
++  {
++    uint32 time_high = get_high(time);
++    uint32 time_low  = get_low(time);
++    uint64 time_low64= time_low;
++    while(true)
++    {
++      uint32 old_low= this->m_low;
++      uint64 old_low64= old_low;
++
++      uint64 new_low64= old_low64 + time_low64;
++      uint32 new_low= (get_low(new_low64));
++      bool add_high= (get_high(new_low64) != 0);
++
++      if(!compare_and_swap(&m_low,old_low,new_low))
++      {
++      continue;
++      }
++      if(add_high)
++      {
++      ++time_high;
++      }
++      if(time_high > 0)
++      {
++        __sync_fetch_and_add(&m_high,time_high);
++      }
++      break;
++    }
++  }
++private:
++  uint32 m_low;
++  uint32 m_high;
++};
++void add_time_atomic(TimeCounter* counter, uint64 time)
++{
++  counter->add(time);
++}
++#endif // __i386__
++
++class time_collector
++{
++public:
++  time_collector(utility& u) : m_utility(&u)
++  {
++  }
++  uint32 count(uint index) const { return m_count[index]; }
++  uint64 total(uint index) const { return m_total[index]; }
++public:
++  void flush()
++  {
++    memset(&m_count,0,sizeof(m_count));
++    memset((void*)&m_total,0,sizeof(m_total));
++  }
++  void collect(uint64 time)
++  {
++    bool no_collect= false;
++    DBUG_EXECUTE_IF("response_time_distribution_log_only_more_300_milliseconds", {   \
++        no_collect= time < 300 * 1000; \
++      });
++    if(no_collect) return;
++    int i= 0;
++    for(int count= m_utility->bound_count(); count > i; ++i)
++    {
++      if(m_utility->bound(i) > time)
++      {
++        __sync_fetch_and_add(&(m_count[i]),(uint32)1);
++       add_time_atomic(&(m_total[i]),time);
++        break;
++      }
++    }
++  }
++private:
++  utility* m_utility;
++  uint32   m_count[OVERALL_POWER_COUNT + 1];
++  TimeCounter m_total[OVERALL_POWER_COUNT + 1];
++};
++
++class collector
++{
++public:
++  collector() : m_time(m_utility)
++  {
++    m_utility.setup(DEFAULT_BASE);
++    m_time.flush();
++  }
++public:
++  void flush()
++  {
++    m_utility.setup(opt_query_response_time_range_base);
++    m_time.flush();
++  }
++  int fill(THD* thd, TABLE_LIST *tables, COND *cond)
++  {
++    DBUG_ENTER("fill_schema_query_response_time");
++    TABLE        *table= static_cast<TABLE*>(tables->table);
++    Field        **fields= table->field;
++    for(uint i= 0, count= bound_count() + 1 /* with overflow */; count > i; ++i)
++    {
++      char time[TIME_STRING_BUFFER_LENGTH];
++      char total[TOTAL_STRING_BUFFER_LENGTH];
++      if(i == bound_count())
++      {        
++        assert(sizeof(TIME_OVERFLOW) <= TIME_STRING_BUFFER_LENGTH);
++        assert(sizeof(TIME_OVERFLOW) <= TOTAL_STRING_BUFFER_LENGTH);
++        memcpy(time,TIME_OVERFLOW,sizeof(TIME_OVERFLOW));
++        memcpy(total,TIME_OVERFLOW,sizeof(TIME_OVERFLOW));
++      }
++      else
++      {
++        print_time(time,sizeof(time),TIME_STRING_POSITIVE_POWER_LENGTH,TIME_STRING_FORMAT,this->bound(i));
++        print_time(total,sizeof(total),TOTAL_STRING_POSITIVE_POWER_LENGTH,TOTAL_STRING_FORMAT,this->total(i));
++      }
++      fields[0]->store(time,strlen(time),system_charset_info);
++      fields[1]->store(this->count(i));
++      fields[2]->store(total,strlen(total),system_charset_info);
++      if (schema_table_store_record(thd, table))
++      {
++      DBUG_RETURN(1);
++      }
++    }
++    DBUG_RETURN(0);
++  }
++  void collect(ulonglong time)
++  {
++    m_time.collect(time);
++  }
++  uint bound_count() const
++  {
++    return m_utility.bound_count();
++  }
++  ulonglong bound(uint index)
++  {
++    return m_utility.bound(index);
++  }
++  ulonglong count(uint index)
++  {
++    return m_time.count(index);
++  }
++  ulonglong total(uint index)
++  {
++    return m_time.total(index);
++  }
++private:
++  utility          m_utility;
++  time_collector   m_time;
++};
++
++static collector g_collector;
++
++} // namespace query_response_time
++
++void query_response_time_init()
++{
++}
++
++void query_response_time_free()
++{
++  query_response_time::g_collector.flush();
++}
++
++void query_response_time_flush()
++{
++  query_response_time::g_collector.flush();
++}
++void query_response_time_collect(ulonglong query_time)
++{
++  query_response_time::g_collector.collect(query_time);
++}
++
++int query_response_time_fill(THD* thd, TABLE_LIST *tables, COND *cond)
++{
++  return query_response_time::g_collector.fill(thd,tables,cond);
++}
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+diff -ruN a/sql/query_response_time.h b/sql/query_response_time.h
+--- a/sql/query_response_time.h        1970-01-01 03:00:00.000000000 +0300
++++ b/sql/query_response_time.h        2011-01-17 02:13:34.000000000 +0300
+@@ -0,0 +1,71 @@
++#ifndef QUERY_RESPONSE_TIME_H
++#define QUERY_RESPONSE_TIME_H
++
++/*
++  Settings for query response time
++*/
++
++/*
++  Maximum string length for (10 ^ (-1 * QRT_STRING_NEGATIVE_POWER_LENGTH)) in text representation.
++  Example: for 6 is 0.000001
++  Always 2
++
++  Maximum string length for (10 ^ (QRT_STRING_POSITIVE_POWER_LENGTH + 1) - 1) in text representation.
++  Example: for 7 is 9999999.0
++*/
++#define QRT_TIME_STRING_POSITIVE_POWER_LENGTH 7
++#define QRT_TOTAL_STRING_POSITIVE_POWER_LENGTH 7
++
++/*
++  Minimum base for log - ALWAYS 2
++  Maximum base for log:
++*/
++#define QRT_MAXIMUM_BASE 1000
++
++/*
++  Filler for whole number (positive power)
++  Example: for
++  QRT_POSITIVE_POWER_FILLER ' '
++  QRT_POSITIVE_POWER_LENGTH 7
++  and number 7234 result is:
++  '   7234'
++*/
++#define QRT_POSITIVE_POWER_FILLER " "
++/*
++  Filler for fractional number. Similiary to whole number
++*/
++#define QRT_NEGATIVE_POWER_FILLER "0"
++
++/*
++  Message if string overflow (string overflow - internal error, this string say about bug in QRT)
++*/
++#define QRT_STRING_OVERFLOW "TOO BIG STRING"
++
++/*
++  Message if time too big for statistic collecting (very long query)
++*/
++#define QRT_TIME_OVERFLOW "TOO LONG"
++
++#define QRT_DEFAULT_BASE 10
++
++#define QRT_TIME_STRING_LENGTH                                \
++  max( (QRT_TIME_STRING_POSITIVE_POWER_LENGTH + 1 /* '.' */ + 6 /*QRT_TIME_STRING_NEGATIVE_POWER_LENGTH*/), \
++       max( (sizeof(QRT_TIME_OVERFLOW) - 1),          \
++          (sizeof(QRT_STRING_OVERFLOW) - 1) ) )
++
++#define QRT_TOTAL_STRING_LENGTH                               \
++  max( (QRT_TOTAL_STRING_POSITIVE_POWER_LENGTH + 1 /* '.' */ + 6 /*QRT_TOTAL_STRING_NEGATIVE_POWER_LENGTH*/), \
++       max( (sizeof(QRT_TIME_OVERFLOW) - 1),          \
++          (sizeof(QRT_STRING_OVERFLOW) - 1) ) )
++
++extern ST_SCHEMA_TABLE query_response_time_table;
++
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++extern void query_response_time_init   ();
++extern void query_response_time_free   ();
++extern void query_response_time_flush  ();
++extern void query_response_time_collect(ulonglong query_time);
++extern int  query_response_time_fill   (THD* thd, TABLE_LIST *tables, COND *cond);
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++
++#endif // QUERY_RESPONSE_TIME_H
+diff -ruN a/sql/set_var.h b/sql/set_var.h
+--- a/sql/set_var.h    2010-12-03 20:58:26.000000000 +0300
++++ b/sql/set_var.h    2011-01-17 02:20:59.000000000 +0300
+@@ -293,6 +293,7 @@
+ extern SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen;
+ extern SHOW_COMP_OPTION have_query_cache;
++extern SHOW_COMP_OPTION have_response_time_distribution;
+ extern SHOW_COMP_OPTION have_geometry, have_rtree_keys;
+ extern SHOW_COMP_OPTION have_crypt;
+ extern SHOW_COMP_OPTION have_compress;
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2011-01-16 18:53:35.000000000 +0300
++++ b/sql/sql_parse.cc 2011-01-17 02:15:09.000000000 +0300
+@@ -88,6 +88,7 @@
+ #include "sp_cache.h"
+ #include "events.h"
+ #include "sql_trigger.h"
++#include "query_response_time.h"
+ #include "transaction.h"
+ #include "sql_audit.h"
+ #include "sql_prepare.h"
+@@ -1476,22 +1477,36 @@
+     Do not log administrative statements unless the appropriate option is
+     set.
+   */
++  #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++  if (opt_enable_query_response_time_stats || thd->enable_slow_log)
++#else // HAVE_RESPONSE_TIME_DISTRIBUTION
+   if (thd->enable_slow_log)
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+   {
+-    ulonglong end_utime_of_query= thd->current_utime();
+-    thd_proc_info(thd, "logging slow query");
+-
+-    if (((thd->server_status & SERVER_QUERY_WAS_SLOW) ||
+-         ((thd->server_status &
+-           (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
+-          opt_log_queries_not_using_indexes &&
+-           !(sql_command_flags[thd->lex->sql_command] & CF_STATUS_COMMAND))) &&
+-        thd->examined_row_count >= thd->variables.min_examined_row_limit)
++    ulonglong end_utime_of_query   = thd->current_utime();
++    ulonglong query_execution_time = end_utime_of_query - thd->utime_after_lock;
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++    if(opt_enable_query_response_time_stats)
++    {
++      query_response_time_collect(query_execution_time);
++    }
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++    if (thd->enable_slow_log)
+     {
+       thd_proc_info(thd, "logging slow query");
+-      thd->status_var.long_query_count++;
+-      slow_log_print(thd, thd->query(), thd->query_length(), 
+-                     end_utime_of_query);
++
++      if (((thd->server_status & SERVER_QUERY_WAS_SLOW) ||
++           ((thd->server_status &
++             (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
++            opt_log_queries_not_using_indexes &&
++             !(sql_command_flags[thd->lex->sql_command] & CF_STATUS_COMMAND))) &&
++          thd->examined_row_count >= thd->variables.min_examined_row_limit)
++      {
++        thd_proc_info(thd, "logging slow query");
++        thd->status_var.long_query_count++;
++        slow_log_print(thd, thd->query(), thd->query_length(), 
++                       end_utime_of_query);
++      }
+     }
+   }
+   DBUG_VOID_RETURN;
+@@ -1610,6 +1625,7 @@
+   case SCH_CHARSETS:
+   case SCH_ENGINES:
+   case SCH_COLLATIONS:
++  case SCH_QUERY_RESPONSE_TIME:
+   case SCH_COLLATION_CHARACTER_SET_APPLICABILITY:
+   case SCH_USER_PRIVILEGES:
+   case SCH_SCHEMA_PRIVILEGES:
+diff -ruN a/sql/sql_reload.cc b/sql/sql_reload.cc
+--- a/sql/sql_reload.cc        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_reload.cc        2011-01-17 02:16:19.000000000 +0300
+@@ -25,7 +25,7 @@
+ #include "hostname.h"    // hostname_cache_refresh
+ #include "sql_repl.h"    // reset_master, reset_slave
+ #include "debug_sync.h"
+-
++#include "query_response_time.h"
+ /**
+   Reload/resets privileges and the different caches.
+@@ -274,6 +274,12 @@
+ #endif
+  if (options & REFRESH_USER_RESOURCES)
+    reset_mqh((LEX_USER *) NULL, 0);             /* purecov: inspected */
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ if (options & REFRESH_QUERY_RESPONSE_TIME)
++ {
++   query_response_time_flush();
++ }
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+  *write_to_binlog= tmp_write_to_binlog;
+  /*
+    If the query was killed then this function must fail.
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc  2011-01-16 18:53:35.000000000 +0300
++++ b/sql/sql_show.cc  2011-01-17 02:17:44.000000000 +0300
+@@ -50,6 +50,7 @@
+ #include "event_data_objects.h"
+ #endif
+ #include <my_dir.h>
++#include "query_response_time.h"
+ #include "lock.h"                           // MYSQL_OPEN_IGNORE_FLUSH
+ #include "debug_sync.h"
+ #include "datadict.h"   // dd_frm_type()
+@@ -7641,6 +7642,14 @@
+ */
++ST_FIELD_INFO query_response_time_fields_info[] =
++  {
++    {"time",  QRT_TIME_STRING_LENGTH,      MYSQL_TYPE_STRING,  0, 0,            "", SKIP_OPEN_TABLE },
++    {"count", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, MY_I_S_UNSIGNED, "", SKIP_OPEN_TABLE },
++    {"total",  QRT_TIME_STRING_LENGTH,     MYSQL_TYPE_STRING,  0, 0,            "", SKIP_OPEN_TABLE },
++    {0,       0,                           MYSQL_TYPE_STRING,  0, 0,             0, SKIP_OPEN_TABLE }
++  };
++
+ ST_SCHEMA_TABLE schema_tables[]=
+ {
+   {"CHARACTER_SETS", charsets_fields_info, create_schema_table, 
+@@ -7694,6 +7703,13 @@
+    1, 9, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY},
+   {"ROUTINES", proc_fields_info, create_schema_table, 
+    fill_schema_proc, make_proc_old_format, 0, -1, -1, 0, 0},
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++  {"QUERY_RESPONSE_TIME", query_response_time_fields_info, create_schema_table, 
++   query_response_time_fill, make_old_format, 0, -1, -1, 0, 0},
++#else 
++  {"QUERY_RESPONSE_TIME", query_response_time_fields_info, create_schema_table, 
++   0, make_old_format, 0, -1, -1, 0, 0},
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+   {"SCHEMATA", schema_fields_info, create_schema_table,
+    fill_schema_schemata, make_schemata_old_format, 0, 1, -1, 0, 0},
+   {"SCHEMA_PRIVILEGES", schema_privileges_fields_info, create_schema_table,
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy  2011-01-16 18:53:34.000000000 +0300
++++ b/sql/sql_yacc.yy  2011-01-17 02:19:03.000000000 +0300
+@@ -1193,6 +1193,7 @@
+ %token  PURGE
+ %token  QUARTER_SYM
+ %token  QUERY_SYM
++%token  QUERY_RESPONSE_TIME_SYM
+ %token  QUICK
+ %token  RANGE_SYM                     /* SQL-2003-R */
+ %token  READS_SYM                     /* SQL-2003-R */
+@@ -11089,6 +11090,15 @@
+           {
+             Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
+           }
++        | QUERY_RESPONSE_TIME_SYM wild_and_where
++         {
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++           LEX *lex= Lex;
++           lex->sql_command= SQLCOM_SELECT;
++           if (prepare_schema_table(YYTHD, lex, 0, SCH_QUERY_RESPONSE_TIME))
++             MYSQL_YYABORT;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++         }
+         | CREATE PROCEDURE_SYM sp_name
+           {
+             LEX *lex= Lex;
+@@ -11325,6 +11335,12 @@
+           { Lex->type|= REFRESH_STATUS; }
+         | SLAVE
+           { Lex->type|= REFRESH_SLAVE; }
++        | QUERY_RESPONSE_TIME_SYM
++          { 
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++            Lex->type|= REFRESH_QUERY_RESPONSE_TIME;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++          }
+         | MASTER_SYM
+           { Lex->type|= REFRESH_MASTER; }
+         | DES_KEY_FILE
+@@ -12610,6 +12626,7 @@
+         | PROXY_SYM                {}
+         | QUARTER_SYM              {}
+         | QUERY_SYM                {}
++        | QUERY_RESPONSE_TIME_SYM  {}
+         | QUICK                    {}
+         | READ_ONLY_SYM            {}
+         | REBUILD_SYM              {}
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc  2011-01-16 18:53:35.000000000 +0300
++++ b/sql/sys_vars.cc  2011-01-17 02:19:53.000000000 +0300
+@@ -49,6 +49,7 @@
+ #include "../storage/perfschema/pfs_server.h"
+ #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
++#include "query_response_time.h" 
+ /*
+   This forward declaration is needed because including sql_base.h
+   causes further includes.  [TODO] Eliminate this forward declaration
+@@ -1775,6 +1776,26 @@
+        DEFAULT(FALSE));
+ #endif /* HAVE_QUERY_CACHE */
++
++static Sys_var_have Sys_have_response_time_distribution(
++       "have_response_time_distribution", "have_response_time_distribution",
++       READ_ONLY GLOBAL_VAR(have_response_time_distribution), NO_CMD_LINE);
++
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++static Sys_var_mybool Sys_enable_query_response_time_stats(
++       "enable_query_response_time_stats", "Enable or disable query response time statisics collecting",
++       GLOBAL_VAR(opt_enable_query_response_time_stats), CMD_LINE(OPT_ARG),
++       DEFAULT(FALSE));
++
++static Sys_var_ulong Sys_query_response_time_range_base(
++       "query_response_time_range_base",
++       "Select base of log for query_response_time ranges. WARNING: variable change affect only after flush",
++       GLOBAL_VAR(opt_query_response_time_range_base),
++       CMD_LINE(REQUIRED_ARG), VALID_RANGE(2, QRT_MAXIMUM_BASE),
++       DEFAULT(QRT_DEFAULT_BASE),
++       BLOCK_SIZE(1));
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++
+ static Sys_var_mybool Sys_secure_auth(
+        "secure_auth",
+        "Disallow authentication for accounts that have old (pre-4.1) "
diff --git a/show_slave_status_nolock.patch b/show_slave_status_nolock.patch
new file mode 100644 (file)
index 0000000..3c1ed42
--- /dev/null
@@ -0,0 +1,114 @@
+# name       : show_slave_status_nolock.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/show_slave_status_nolock.patch b/patch_info/show_slave_status_nolock.patch
+--- a/patch_info/show_slave_status_nolock.patch        1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/show_slave_status_nolock.patch        2010-12-29 20:38:13.000000000 +0300
+@@ -0,0 +1,6 @@
++File=show_slave_status_nolock.patch
++Name= SHOW SLAVE STATUS NOLOCK
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment= Implement SHOW SLAVE STATUS without lock (STOP SLAVE lock the same mutex what lock SHOW SLAVE STATUS)
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h        2010-12-29 20:27:19.000000000 +0300
++++ b/sql/lex.h        2010-12-29 20:28:57.000000000 +0300
+@@ -378,6 +378,7 @@
+   { "NONE",           SYM(NONE_SYM)},
+   { "NOT",            SYM(NOT_SYM)},
+   { "NO_WRITE_TO_BINLOG",  SYM(NO_WRITE_TO_BINLOG)},
++  { "NOLOCK",           SYM(NOLOCK_SYM)},
+   { "NULL",           SYM(NULL_SYM)},
+   { "NUMERIC",                SYM(NUMERIC_SYM)},
+   { "NVARCHAR",               SYM(NVARCHAR_SYM)},
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-12-29 20:27:19.000000000 +0300
++++ b/sql/mysqld.cc    2010-12-29 20:29:19.000000000 +0300
+@@ -3058,6 +3058,7 @@
+   {"show_relaylog_events", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_RELAYLOG_EVENTS]), SHOW_LONG_STATUS},
+   {"show_slave_hosts",     (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_HOSTS]), SHOW_LONG_STATUS},
+   {"show_slave_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
++  {"show_slave_status_nolock", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_NOLOCK_STAT]), SHOW_LONG_STATUS},
+   {"show_status",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
+   {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
+   {"show_table_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h    2010-12-29 20:27:19.000000000 +0300
++++ b/sql/sql_lex.h    2010-12-29 20:32:26.000000000 +0300
+@@ -190,6 +190,8 @@
+   SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES,
+   SQLCOM_SIGNAL, SQLCOM_RESIGNAL,
+   SQLCOM_SHOW_RELAYLOG_EVENTS, 
++  /* SHOW SLAVE STATUS NOLOCK */
++  SQLCOM_SHOW_SLAVE_NOLOCK_STAT,
+   /*
+     When a command is added here, be sure it's also added in mysqld.cc
+     in "struct show_var_st status_vars[]= {" ...
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-12-29 20:27:19.000000000 +0300
++++ b/sql/sql_parse.cc 2010-12-29 20:34:50.000000000 +0300
+@@ -336,6 +336,7 @@
+   sql_command_flags[SQLCOM_SHOW_CREATE]=  CF_STATUS_COMMAND;
+   sql_command_flags[SQLCOM_SHOW_MASTER_STAT]= CF_STATUS_COMMAND;
+   sql_command_flags[SQLCOM_SHOW_SLAVE_STAT]=  CF_STATUS_COMMAND;
++  sql_command_flags[SQLCOM_SHOW_SLAVE_NOLOCK_STAT]=  CF_STATUS_COMMAND;
+   sql_command_flags[SQLCOM_SHOW_CREATE_PROC]= CF_STATUS_COMMAND;
+   sql_command_flags[SQLCOM_SHOW_CREATE_FUNC]= CF_STATUS_COMMAND;
+   sql_command_flags[SQLCOM_SHOW_CREATE_TRIGGER]=  CF_STATUS_COMMAND;
+@@ -2262,12 +2263,16 @@
+     mysql_mutex_unlock(&LOCK_active_mi);
+     break;
+   }
++  case SQLCOM_SHOW_SLAVE_NOLOCK_STAT:
+   case SQLCOM_SHOW_SLAVE_STAT:
+   {
+     /* Accept one of two privileges */
+     if (check_global_access(thd, SUPER_ACL | REPL_CLIENT_ACL))
+       goto error;
+-    mysql_mutex_lock(&LOCK_active_mi);
++    if(SQLCOM_SHOW_SLAVE_NOLOCK_STAT != lex->sql_command)
++    {
++      mysql_mutex_lock(&LOCK_active_mi);
++    }
+     if (active_mi != NULL)
+     {
+       res = show_master_info(thd, active_mi);
+@@ -2278,7 +2283,10 @@
+                    WARN_NO_MASTER_INFO, ER(WARN_NO_MASTER_INFO));
+       my_ok(thd);
+     }
+-    mysql_mutex_unlock(&LOCK_active_mi);
++    if(SQLCOM_SHOW_SLAVE_NOLOCK_STAT != lex->sql_command)
++    {
++      mysql_mutex_unlock(&LOCK_active_mi);
++    }
+     break;
+   }
+   case SQLCOM_SHOW_MASTER_STAT:
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy  2010-12-29 20:27:19.000000000 +0300
++++ b/sql/sql_yacc.yy  2010-12-29 20:36:40.000000000 +0300
+@@ -1292,6 +1292,7 @@
+ %token  STARTS_SYM
+ %token  START_SYM                     /* SQL-2003-R */
+ %token  STATUS_SYM
++%token  NOLOCK_SYM                    /* SHOW SLAVE STATUS NOLOCK */
+ %token  STDDEV_SAMP_SYM               /* SQL-2003-N */
+ %token  STD_SYM
+ %token  STOP_SYM
+@@ -11095,6 +11096,10 @@
+           {
+             Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
+           }
++        | SLAVE STATUS_SYM NOLOCK_SYM
++          {
++          Lex->sql_command = SQLCOM_SHOW_SLAVE_NOLOCK_STAT; //SQLCOM_SHOW_SLAVE_NOLOCK_STAT;
++          }
+         | QUERY_RESPONSE_TIME_SYM wild_and_where
+          {
+ #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
diff --git a/show_temp.patch b/show_temp.patch
new file mode 100644 (file)
index 0000000..ecdbf6a
--- /dev/null
@@ -0,0 +1,393 @@
+# name       : show_temp.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h    2010-12-03 14:09:14.406955791 +0900
++++ b/sql/handler.h    2010-12-03 14:29:16.533356953 +0900
+@@ -569,6 +569,7 @@
+   SCH_EVENTS,
+   SCH_FILES,
+   SCH_GLOBAL_STATUS,
++  SCH_GLOBAL_TEMPORARY_TABLES,
+   SCH_GLOBAL_VARIABLES,
+   SCH_KEY_COLUMN_USAGE,
+   SCH_OPEN_TABLES,
+@@ -590,6 +591,7 @@
+   SCH_TABLE_CONSTRAINTS,
+   SCH_TABLE_NAMES,
+   SCH_TABLE_PRIVILEGES,
++  SCH_TEMPORARY_TABLES,
+   SCH_TRIGGERS,
+   SCH_USER_PRIVILEGES,
+   SCH_VARIABLES,
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-12-02 21:23:05.495293844 +0900
++++ b/sql/mysqld.cc    2010-12-03 14:25:40.317039327 +0900
+@@ -3038,6 +3038,7 @@
+   {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
+   {"show_table_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
+   {"show_tables",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
++  {"show_temporary_tables",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TEMPORARY_TABLES]), SHOW_LONG_STATUS},
+   {"show_triggers",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
+   {"show_variables",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
+   {"show_warnings",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h    2010-12-02 19:22:40.040023288 +0900
++++ b/sql/sql_lex.h    2010-12-03 14:09:53.465292483 +0900
+@@ -186,7 +186,7 @@
+   SQLCOM_CREATE_EVENT, SQLCOM_ALTER_EVENT, SQLCOM_DROP_EVENT,
+   SQLCOM_SHOW_CREATE_EVENT, SQLCOM_SHOW_EVENTS,
+   SQLCOM_SHOW_CREATE_TRIGGER,
+-  SQLCOM_ALTER_DB_UPGRADE,
++  SQLCOM_ALTER_DB_UPGRADE, SQLCOM_SHOW_TEMPORARY_TABLES,
+   SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES,
+   SQLCOM_SIGNAL, SQLCOM_RESIGNAL,
+   SQLCOM_SHOW_RELAYLOG_EVENTS, 
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-12-02 19:22:40.046023936 +0900
++++ b/sql/sql_parse.cc 2010-12-03 14:09:53.471950455 +0900
+@@ -349,6 +349,9 @@
+    sql_command_flags[SQLCOM_SHOW_TABLES]=       (CF_STATUS_COMMAND |
+                                                  CF_SHOW_TABLE_COMMAND |
+                                                  CF_REEXECUTION_FRAGILE);
++   sql_command_flags[SQLCOM_SHOW_TEMPORARY_TABLES]=       (CF_STATUS_COMMAND |
++                                                 CF_SHOW_TABLE_COMMAND |
++                                                 CF_REEXECUTION_FRAGILE);
+   sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND |
+                                                 CF_SHOW_TABLE_COMMAND |
+                                                 CF_REEXECUTION_FRAGILE);
+@@ -1504,6 +1507,8 @@
+   case SCH_TABLE_NAMES:
+   case SCH_TABLES:
++  case SCH_TEMPORARY_TABLES:
++  case SCH_GLOBAL_TEMPORARY_TABLES:
+   case SCH_VIEWS:
+   case SCH_TRIGGERS:
+   case SCH_EVENTS:
+@@ -2011,6 +2016,7 @@
+   }
+   case SQLCOM_SHOW_DATABASES:
+   case SQLCOM_SHOW_TABLES:
++  case SQLCOM_SHOW_TEMPORARY_TABLES:
+   case SQLCOM_SHOW_TRIGGERS:
+   case SQLCOM_SHOW_TABLE_STATUS:
+   case SQLCOM_SHOW_OPEN_TABLES:
+@@ -4787,6 +4793,8 @@
+   case SCH_TABLE_NAMES:
+   case SCH_TABLES:
++  case SCH_TEMPORARY_TABLES:
++  case SCH_GLOBAL_TEMPORARY_TABLES:
+   case SCH_VIEWS:
+   case SCH_TRIGGERS:
+   case SCH_EVENTS:
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc  2010-12-03 13:38:47.493070606 +0900
++++ b/sql/sql_show.cc  2010-12-03 14:27:04.590939717 +0900
+@@ -2685,6 +2685,7 @@
+     break;
+   case SQLCOM_SHOW_TABLES:
+   case SQLCOM_SHOW_TABLE_STATUS:
++  case SQLCOM_SHOW_TEMPORARY_TABLES:
+   case SQLCOM_SHOW_TRIGGERS:
+   case SQLCOM_SHOW_EVENTS:
+     thd->make_lex_string(&lookup_field_values->db_value, 
+@@ -3173,6 +3174,228 @@
+   return (uint) OPEN_FULL_TABLE;
+ }
++/**
++  @brief          Change I_S table item list for SHOW [GLOBAL] TEMPORARY TABLES [FROM/IN db]
++
++  @param[in]      thd                      thread handler
++  @param[in]      schema_table             I_S table
++
++  @return         Operation status
++    @retval       0                        success
++    @retval       1                        error
++*/
++int make_temporary_tables_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
++{
++  char tmp[128];
++  String buffer(tmp,sizeof(tmp), thd->charset());
++  LEX *lex= thd->lex;
++  Name_resolution_context *context= &lex->select_lex.context;
++
++  if (thd->lex->option_type == OPT_GLOBAL) {
++    ST_FIELD_INFO *field_info= &schema_table->fields_info[0];
++    Item_field *field= new Item_field(context, NullS, NullS, field_info->field_name);
++    if (add_item_to_list(thd, field))
++      return 1;
++    field->set_name(field_info->old_name, strlen(field_info->old_name), system_charset_info);
++  }
++
++  ST_FIELD_INFO *field_info= &schema_table->fields_info[2];
++  buffer.length(0);
++  buffer.append(field_info->old_name);
++  buffer.append(lex->select_lex.db);
++
++  if (lex->wild && lex->wild->ptr())
++  {
++    buffer.append(STRING_WITH_LEN(" ("));
++    buffer.append(lex->wild->ptr());
++    buffer.append(')');
++  }
++
++  Item_field *field= new Item_field(context, NullS, NullS, field_info->field_name);    
++  if (add_item_to_list(thd, field))
++    return 1;
++
++  field->set_name(buffer.ptr(), buffer.length(), system_charset_info);
++  return 0;
++}
++
++/**
++  @brief          Fill records for temporary tables by reading info from table object
++
++  @param[in]      thd                      thread handler
++  @param[in]      table                    I_S table
++  @param[in]      tmp_table                temporary table
++  @param[in]      db                       database name
++
++  @return         Operation status
++    @retval       0                        success
++    @retval       1                        error
++*/
++
++static int store_temporary_table_record(THD *thd, TABLE *table, TABLE *tmp_table, const char *db, bool table_name_only)
++{
++  CHARSET_INFO *cs= system_charset_info;
++  DBUG_ENTER("store_temporary_table_record");
++
++  if (db && my_strcasecmp(cs, db, tmp_table->s->db.str))
++    DBUG_RETURN(0);
++
++  restore_record(table, s->default_values);
++
++  //session_id
++  table->field[0]->store((longlong) thd->thread_id, TRUE);
++
++  //database
++  table->field[1]->store(tmp_table->s->db.str, tmp_table->s->db.length, cs);
++
++  //table
++  table->field[2]->store(tmp_table->s->table_name.str, tmp_table->s->table_name.length, cs);
++
++  if (table_name_only)
++    DBUG_RETURN(schema_table_store_record(thd, table));
++
++  //engine
++  handler *handle= tmp_table->file;
++  char *engineType = (char *)(handle ? handle->table_type() : "UNKNOWN");
++  table->field[3]->store(engineType, strlen(engineType), cs);
++
++  //name
++  if (tmp_table->s->path.str) {
++    char *p=strstr(tmp_table->s->path.str, "#sql");
++    int len=tmp_table->s->path.length-(p-tmp_table->s->path.str);
++    table->field[4]->store(p, min(FN_REFLEN, len), cs);
++  }
++
++  // file stats
++  handler *file= tmp_table->file;
++
++  if (file) {
++
++    MYSQL_TIME time;
++
++    /**
++        TODO: InnoDB stat(file) checks file on short names within data dictionary
++        rather than using full path, because of that, temp files created in
++        TMPDIR will not have access/create time as it will not find the file
++
++        The fix is to patch InnoDB to use full path
++    */
++    file->info(HA_STATUS_VARIABLE | HA_STATUS_TIME | HA_STATUS_NO_LOCK);
++
++    table->field[5]->store((longlong) file->stats.records, TRUE);
++    table->field[5]->set_notnull();
++
++    table->field[6]->store((longlong) file->stats.mean_rec_length, TRUE);
++    table->field[7]->store((longlong) file->stats.data_file_length, TRUE);
++    table->field[8]->store((longlong) file->stats.index_file_length, TRUE);
++    if (file->stats.create_time)
++    {
++      thd->variables.time_zone->gmt_sec_to_TIME(&time,
++                                                (my_time_t) file->stats.create_time);
++      table->field[9]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
++      table->field[9]->set_notnull();
++    }
++    if (file->stats.update_time)
++    {
++      thd->variables.time_zone->gmt_sec_to_TIME(&time,
++                                                (my_time_t) file->stats.update_time);
++      table->field[10]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
++      table->field[10]->set_notnull();
++    }
++  }
++
++  DBUG_RETURN(schema_table_store_record(thd, table));
++}
++
++/**
++  @brief          Fill I_S tables with global temporary tables
++
++  @param[in]      thd                      thread handler
++  @param[in]      tables                   I_S table
++  @param[in]      cond                     'WHERE' condition
++
++  @return         Operation status
++    @retval       0                        success
++    @retval       1                        error
++*/
++
++static int fill_global_temporary_tables(THD *thd, TABLE_LIST *tables, COND *cond)
++{
++  DBUG_ENTER("fill_global_temporary_tables");
++
++  mysql_mutex_lock(&LOCK_thread_count);
++
++  bool table_names_only= (thd->lex->sql_command == SQLCOM_SHOW_TEMPORARY_TABLES) ? 1 : 0;
++  I_List_iterator<THD> it(threads);
++  THD *thd_item;
++  TABLE *tmp;
++
++#ifndef NO_EMBEDDED_ACCESS_CHECKS
++  Security_context *sctx= thd->security_ctx;
++  uint db_access;
++#endif
++ 
++  while ((thd_item=it++)) {
++    for (tmp=thd_item->temporary_tables; tmp; tmp=tmp->next) {
++
++#ifndef NO_EMBEDDED_ACCESS_CHECKS
++      if (test_all_bits(sctx->master_access, DB_ACLS))
++        db_access=DB_ACLS;
++      else
++        db_access= (acl_get(sctx->host, sctx->ip, sctx->priv_user, tmp->s->db.str, 0) | sctx->master_access);
++
++      if (!(db_access & DB_ACLS) && check_grant_db(thd,tmp->s->db.str)) {
++        //no access for temp tables within this db for user
++        continue;
++      }
++#endif
++
++      THD *t= tmp->in_use;
++      tmp->in_use= thd;
++
++      if (store_temporary_table_record(thd_item, tables->table, tmp, thd->lex->select_lex.db, table_names_only)) {
++        tmp->in_use= t;
++        mysql_mutex_unlock(&LOCK_thread_count); 
++        DBUG_RETURN(1);
++      }
++
++      tmp->in_use= t;
++    }
++  }
++
++  mysql_mutex_unlock(&LOCK_thread_count); 
++  DBUG_RETURN(0);
++}
++
++/**
++  @brief          Fill I_S tables with session temporary tables
++
++  @param[in]      thd                      thread handler
++  @param[in]      tables                   I_S table
++  @param[in]      cond                     'WHERE' condition
++
++  @return         Operation status
++    @retval       0                        success
++    @retval       1                        error
++*/
++
++int fill_temporary_tables(THD *thd, TABLE_LIST *tables, COND *cond)
++{
++  DBUG_ENTER("fill_temporary_tables");
++
++  if (thd->lex->option_type == OPT_GLOBAL)
++    DBUG_RETURN(fill_global_temporary_tables(thd, tables, cond));
++
++  bool table_names_only= (thd->lex->sql_command == SQLCOM_SHOW_TEMPORARY_TABLES) ? 1 : 0;
++  TABLE *tmp;
++
++  for (tmp=thd->temporary_tables; tmp; tmp=tmp->next) {
++    if (store_temporary_table_record(thd, tables->table, tmp, thd->lex->select_lex.db, table_names_only)) {
++      DBUG_RETURN(1);
++    }
++  }
++  DBUG_RETURN(0);
++}
+ /**
+    Try acquire high priority share metadata lock on a table (with
+@@ -6802,6 +7025,25 @@
+   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+ };
++ST_FIELD_INFO temporary_table_fields_info[]=
++{
++  {"SESSION_ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, "Session", SKIP_OPEN_TABLE},
++  {"TABLE_SCHEMA", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Db", SKIP_OPEN_TABLE},
++  {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Temp_tables_in_", SKIP_OPEN_TABLE},
++  {"ENGINE", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Engine", OPEN_FRM_ONLY},
++  {"NAME", FN_REFLEN, MYSQL_TYPE_STRING, 0, 0, "Name", SKIP_OPEN_TABLE},
++  {"TABLE_ROWS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
++   MY_I_S_UNSIGNED, "Rows", OPEN_FULL_TABLE},
++  {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
++   MY_I_S_UNSIGNED, "Avg Row", OPEN_FULL_TABLE},
++  {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
++   MY_I_S_UNSIGNED, "Data Length", OPEN_FULL_TABLE},
++  {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 
++   MY_I_S_UNSIGNED, "Index Size", OPEN_FULL_TABLE},
++  {"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create Time", OPEN_FULL_TABLE},
++  {"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update Time", OPEN_FULL_TABLE},
++  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
++};
+ ST_FIELD_INFO columns_fields_info[]=
+ {
+@@ -7416,6 +7658,9 @@
+    hton_fill_schema_table, 0, 0, -1, -1, 0, 0},
+   {"GLOBAL_STATUS", variables_fields_info, create_schema_table,
+    fill_status, make_old_format, 0, 0, -1, 0, 0},
++  {"GLOBAL_TEMPORARY_TABLES", temporary_table_fields_info, create_schema_table, 
++   fill_global_temporary_tables, make_temporary_tables_old_format, 0, 2, 3, 0,
++   OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
+   {"GLOBAL_VARIABLES", variables_fields_info, create_schema_table,
+    fill_variables, make_old_format, 0, 0, -1, 0, 0},
+   {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table,
+@@ -7465,6 +7710,9 @@
+    get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
+   {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
+    fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
++  {"TEMPORARY_TABLES", temporary_table_fields_info, create_schema_table,
++   fill_temporary_tables, make_temporary_tables_old_format, 0, 2, 3, 0,
++   OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
+   {"TRIGGERS", triggers_fields_info, create_schema_table,
+    get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
+    OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE},
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy  2010-12-02 19:22:40.077024170 +0900
++++ b/sql/sql_yacc.yy  2010-12-03 14:09:53.496023791 +0900
+@@ -10869,6 +10869,15 @@
+              if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_NAMES))
+                MYSQL_YYABORT;
+            }
++         | opt_var_type TEMPORARY TABLES opt_db
++           {
++             LEX *lex= Lex;
++             lex->sql_command= SQLCOM_SHOW_TEMPORARY_TABLES;
++             lex->option_type= $1;
++             lex->select_lex.db= $4;
++             if (prepare_schema_table(YYTHD, lex, 0, SCH_TEMPORARY_TABLES))
++               MYSQL_YYABORT;
++           }
+          | opt_full TRIGGERS_SYM opt_db wild_and_where
+            {
+              LEX *lex= Lex;
diff --git a/slow_extended.patch b/slow_extended.patch
new file mode 100644 (file)
index 0000000..a7a16b1
--- /dev/null
@@ -0,0 +1,1243 @@
+# name       : slow_extended.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp
+--- a/include/mysql/plugin_audit.h.pp  2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin_audit.h.pp  2010-12-16 04:27:46.000000000 +0300
+@@ -178,6 +178,16 @@
+ char *thd_security_context(void* thd, char *buffer, unsigned int length,
+                            unsigned int max_query_len);
+ void thd_inc_row_count(void* thd);
++void increment_thd_innodb_stats(void* thd,
++                    unsigned long long trx_id,
++                    long io_reads,
++                    long long io_read,
++                    long io_reads_wait_timer,
++                    long lock_que_wait_timer,
++                    long que_wait_timer,
++                    long page_access);
++unsigned long thd_log_slow_verbosity(const void* thd);
++int thd_opt_slow_log();
+ int mysql_tmpfile(const char *prefix);
+ int thd_killed(const void* thd);
+ unsigned long thd_get_thread_id(const void* thd);
+diff -ruN a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp
+--- a/include/mysql/plugin_auth.h.pp   2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin_auth.h.pp   2010-12-16 04:27:46.000000000 +0300
+@@ -178,6 +178,16 @@
+ char *thd_security_context(void* thd, char *buffer, unsigned int length,
+                            unsigned int max_query_len);
+ void thd_inc_row_count(void* thd);
++void increment_thd_innodb_stats(void* thd,
++                    unsigned long long trx_id,
++                    long io_reads,
++                    long long io_read,
++                    long io_reads_wait_timer,
++                    long lock_que_wait_timer,
++                    long que_wait_timer,
++                    long page_access);
++unsigned long thd_log_slow_verbosity(const void* thd);
++int thd_opt_slow_log();
+ int mysql_tmpfile(const char *prefix);
+ int thd_killed(const void* thd);
+ unsigned long thd_get_thread_id(const void* thd);
+diff -ruN a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp
+--- a/include/mysql/plugin_ftparser.h.pp       2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin_ftparser.h.pp       2010-12-16 04:27:46.000000000 +0300
+@@ -131,6 +131,16 @@
+ char *thd_security_context(void* thd, char *buffer, unsigned int length,
+                            unsigned int max_query_len);
+ void thd_inc_row_count(void* thd);
++void increment_thd_innodb_stats(void* thd,
++                    unsigned long long trx_id,
++                    long io_reads,
++                    long long io_read,
++                    long io_reads_wait_timer,
++                    long lock_que_wait_timer,
++                    long que_wait_timer,
++                    long page_access);
++unsigned long thd_log_slow_verbosity(const void* thd);
++int thd_opt_slow_log();
+ int mysql_tmpfile(const char *prefix);
+ int thd_killed(const void* thd);
+ unsigned long thd_get_thread_id(const void* thd);
+diff -ruN a/include/mysql/plugin.h b/include/mysql/plugin.h
+--- a/include/mysql/plugin.h   2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin.h   2010-12-16 04:27:46.000000000 +0300
+@@ -536,6 +536,17 @@
+ /* Increments the row counter, see THD::row_count */
+ void thd_inc_row_count(MYSQL_THD thd);
++void increment_thd_innodb_stats(MYSQL_THD thd,
++                    unsigned long long trx_id,
++                    long io_reads,
++                    long long io_read,
++                    long io_reads_wait_timer,
++                    long lock_que_wait_timer,
++                    long que_wait_timer,
++                    long page_access);
++unsigned long thd_log_slow_verbosity(const MYSQL_THD thd);
++int thd_opt_slow_log();
++#define EXTENDED_SLOWLOG
+ /**
+   Create a temporary file.
+diff -ruN a/patch_info/slow_extended.info b/patch_info/slow_extended.info
+--- a/patch_info/slow_extended.info    1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/slow_extended.info    2010-12-16 04:27:46.000000000 +0300
+@@ -0,0 +1,25 @@
++File=slow_extended.patch
++Name=Extended statistics in slow.log (not InnoDB part)
++Version=1.3
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++Changelog
++2008-11-26
++YK: Fix inefficient determination of trx, Make not to call useless gettimeofday when don't use slow log. Make log_slow_queries dynamic (bool).
++
++2008-11-07
++VT: Moved log_slow_rate_limit in SHOW VARIABLE into right place
++
++2008-11
++Arjen Lentz: Fixups (backward compatibility) by Arjen Lentz <arjen@openquery.com.au>
++
++2010-07
++1) Fix overflow of query time and lock time (Bug 600360) (slow_extended_fix_overflow.patch merged)
++2) Control global slow feature merged (control_global_slow.patch merged)
++3) Microseconds in slow query log merged (microseconds_in_slow_query_log.patch merged)
++4) Now use_global_long_query_time and use_global_log_slow_control are synonims. Add value "all" for use_global_log_slow_control (contol-global_slow-2.patch merged)
++5) Fix innodb_stats on replication (Bug 600684)
++6) Change variable types (system/command-line)
++2010-01
++Patch profiling_slow.patch was merged
+diff -ruN a/scripts/mysqldumpslow.sh b/scripts/mysqldumpslow.sh
+--- a/scripts/mysqldumpslow.sh 2010-11-03 01:01:13.000000000 +0300
++++ b/scripts/mysqldumpslow.sh 2010-12-16 04:27:46.000000000 +0300
+@@ -83,8 +83,8 @@
+     s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//;
+     my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('','');
+-    s/^# Query_time: ([0-9.]+)\s+Lock_time: ([0-9.]+)\s+Rows_sent: ([0-9.]+).*\n//;
+-    my ($t, $l, $r) = ($1, $2, $3);
++    s/^# Query_time: (\d+(\.\d+)?)  Lock_time: (\d+(\.\d+)?)  Rows_sent: (\d+(\.\d+)?).*\n//;
++    my ($t, $l, $r) = ($1, $3, $5);
+     $t -= $l unless $opt{l};
+     # remove fluff that mysqld writes to log when it (re)starts:
+diff -ruN a/sql/event_scheduler.cc b/sql/event_scheduler.cc
+--- a/sql/event_scheduler.cc   2010-11-03 01:01:14.000000000 +0300
++++ b/sql/event_scheduler.cc   2010-12-16 04:27:46.000000000 +0300
+@@ -195,6 +195,7 @@
+   thd->client_capabilities|= CLIENT_MULTI_RESULTS;
+   mysql_mutex_lock(&LOCK_thread_count);
+   thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
++  thd->write_to_slow_log = TRUE;
+   mysql_mutex_unlock(&LOCK_thread_count);
+   /*
+diff -ruN a/sql/filesort.cc b/sql/filesort.cc
+--- a/sql/filesort.cc  2010-11-03 01:01:14.000000000 +0300
++++ b/sql/filesort.cc  2010-12-16 04:27:46.000000000 +0300
+@@ -197,6 +197,7 @@
+   {
+     status_var_increment(thd->status_var.filesort_scan_count);
+   }
++  thd->query_plan_flags|= QPLAN_FILESORT;
+ #ifdef CAN_TRUST_RANGE
+   if (select && select->quick && select->quick->records > 0L)
+   {
+@@ -262,6 +263,7 @@
+   }
+   else
+   {
++    thd->query_plan_flags|= QPLAN_FILESORT_DISK;
+     if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
+     {
+       my_free(table_sort.buffpek);
+@@ -1201,6 +1203,7 @@
+   DBUG_ENTER("merge_buffers");
+   status_var_increment(current_thd->status_var.filesort_merge_passes);
++  current_thd->query_plan_fsort_passes++;
+   if (param->not_killable)
+   {
+     killed= &not_killable;
+diff -ruN a/sql/log.cc b/sql/log.cc
+--- a/sql/log.cc       2010-11-03 07:01:14.000000000 +0900
++++ b/sql/log.cc       2010-12-02 19:28:31.337989417 +0900
+@@ -630,11 +630,13 @@
+ */
+ bool Log_to_csv_event_handler::
+-  log_slow(THD *thd, time_t current_time, time_t query_start_arg,
++  log_slow(THD *thd, ulonglong current_utime, time_t query_start_arg,
+            const char *user_host, uint user_host_len,
+            ulonglong query_utime, ulonglong lock_utime, bool is_command,
+            const char *sql_text, uint sql_text_len)
+ {
++  time_t current_time= my_time_possible_from_micro(current_utime);
++
+   TABLE_LIST table_list;
+   TABLE *table;
+   bool result= TRUE;
+@@ -850,14 +852,14 @@
+ /** Wrapper around MYSQL_LOG::write() for slow log. */
+ bool Log_to_file_event_handler::
+-  log_slow(THD *thd, time_t current_time, time_t query_start_arg,
++  log_slow(THD *thd, ulonglong current_utime, time_t query_start_arg,
+            const char *user_host, uint user_host_len,
+            ulonglong query_utime, ulonglong lock_utime, bool is_command,
+            const char *sql_text, uint sql_text_len)
+ {
+   Silence_log_table_errors error_handler;
+   thd->push_internal_handler(&error_handler);
+-  bool retval= mysql_slow_log.write(thd, current_time, query_start_arg,
++  bool retval= mysql_slow_log.write(thd, current_utime, query_start_arg,
+                                     user_host, user_host_len,
+                                     query_utime, lock_utime, is_command,
+                                     sql_text, sql_text_len);
+@@ -1131,7 +1133,7 @@
+     /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
+     user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
+                              sctx->priv_user ? sctx->priv_user : "", "[",
+-                             sctx->user ? sctx->user : "", "] @ ",
++                             sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
+                              sctx->host ? sctx->host : "", " [",
+                              sctx->ip ? sctx->ip : "", "]", NullS) -
+                     user_host_buff);
+@@ -1139,8 +1141,22 @@
+     current_time= my_time_possible_from_micro(current_utime);
+     if (thd->start_utime)
+     {
+-      query_utime= (current_utime - thd->start_utime);
+-      lock_utime=  (thd->utime_after_lock - thd->start_utime);
++      if(current_utime < thd->start_utime)
++      {
++        query_utime= 0;
++      }
++      else
++      {
++        query_utime= (current_utime - thd->start_utime);
++      }
++      if(thd->utime_after_lock < thd->start_utime)
++      {
++        lock_utime= 0;
++      }
++      else
++      {
++        lock_utime= (thd->utime_after_lock - thd->start_utime);
++      }
+     }
+     else
+     {
+@@ -1154,8 +1170,20 @@
+       query_length= command_name[thd->command].length;
+     }
++    if (!query_length)
++    {
++      thd->sent_row_count= thd->examined_row_count= 0;
++      thd->sent_row_count= 0;
++      thd->bytes_sent_old= thd->status_var.bytes_sent;
++      thd->tmp_tables_used= thd->tmp_tables_disk_used= 0;
++      thd->tmp_tables_size= 0;
++      thd->innodb_was_used= FALSE;
++      thd->query_plan_flags= QPLAN_NONE;
++      thd->query_plan_fsort_passes= 0;
++    }
++
+     for (current_handler= slow_log_handler_list; *current_handler ;)
+-      error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
++      error= (*current_handler++)->log_slow(thd, current_utime, thd->start_time,
+                                             user_host_buff, user_host_len,
+                                             query_utime, lock_utime, is_command,
+                                             query, query_length) || error;
+@@ -2539,12 +2567,13 @@
+     TRUE - error occured
+ */
+-bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
++bool MYSQL_QUERY_LOG::write(THD *thd, ulonglong current_utime,
+                             time_t query_start_arg, const char *user_host,
+                             uint user_host_len, ulonglong query_utime,
+                             ulonglong lock_utime, bool is_command,
+                             const char *sql_text, uint sql_text_len)
+ {
++  time_t current_time= my_time_possible_from_micro(current_utime);
+   bool error= 0;
+   DBUG_ENTER("MYSQL_QUERY_LOG::write");
+@@ -2566,17 +2595,28 @@
+     if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
+     {
+-      if (current_time != last_time)
++      if (opt_log_slow_timestamp_every || current_time != last_time)
+       {
+         last_time= current_time;
+         struct tm start;
+         localtime_r(&current_time, &start);
+-
+-        buff_len= my_snprintf(buff, sizeof buff,
+-                              "# Time: %02d%02d%02d %2d:%02d:%02d\n",
+-                              start.tm_year % 100, start.tm_mon + 1,
+-                              start.tm_mday, start.tm_hour,
+-                              start.tm_min, start.tm_sec);
++      if(opt_slow_query_log_microseconds_timestamp)
++      {
++        ulonglong microsecond = current_utime % (1000 * 1000);
++        buff_len= snprintf(buff, sizeof buff,
++          "# Time: %02d%02d%02d %2d:%02d:%02d.%010lld\n",
++            start.tm_year % 100, start.tm_mon + 1,
++          start.tm_mday, start.tm_hour,
++          start.tm_min, start.tm_sec,microsecond);
++      }
++      else
++      {
++        buff_len= my_snprintf(buff, sizeof buff,
++          "# Time: %02d%02d%02d %2d:%02d:%02d\n",
++            start.tm_year % 100, start.tm_mon + 1,
++          start.tm_mday, start.tm_hour,
++          start.tm_min, start.tm_sec);
++      }
+         /* Note that my_b_write() assumes it knows the length for this */
+         if (my_b_write(&log_file, (uchar*) buff, buff_len))
+@@ -2594,12 +2634,69 @@
+     sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
+     sprintf(lock_time_buff,  "%.6f", ulonglong2double(lock_utime)/1000000.0);
+     if (my_b_printf(&log_file,
+-                    "# Query_time: %s  Lock_time: %s"
+-                    " Rows_sent: %lu  Rows_examined: %lu\n",
++                    "# Thread_id: %lu  Schema: %s  Last_errno: %u  Killed: %u\n" \
++                    "# Query_time: %s  Lock_time: %s  Rows_sent: %lu  Rows_examined: %lu  Rows_affected: %lu  Rows_read: %lu\n"
++                    "# Bytes_sent: %lu  Tmp_tables: %lu  Tmp_disk_tables: %lu  Tmp_table_sizes: %lu\n",
++                    (ulong) thd->thread_id, (thd->db ? thd->db : ""),
++                    thd->last_errno, (uint) thd->killed,
+                     query_time_buff, lock_time_buff,
+                     (ulong) thd->sent_row_count,
+-                    (ulong) thd->examined_row_count) == (uint) -1)
++                    (ulong) thd->examined_row_count,
++                    ((long) thd->get_row_count_func() > 0 ) ? (ulong) thd->get_row_count_func() : 0,
++                    (ulong) thd->sent_row_count,
++                    (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old),
++                    (ulong) thd->tmp_tables_used,
++                    (ulong) thd->tmp_tables_disk_used,
++                    (ulong) thd->tmp_tables_size) == (uint) -1)
+       tmp_errno= errno;
++
++#if defined(ENABLED_PROFILING)
++    thd->profiling.print_current(&log_file);
++#endif
++    if (thd->innodb_was_used)
++    {
++      char buf[20];
++      snprintf(buf, 20, "%llX", thd->innodb_trx_id);
++      if (my_b_printf(&log_file,
++                    "# InnoDB_trx_id: %s\n", buf) == (uint) -1)
++        tmp_errno=errno;
++    }
++    if ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_QUERY_PLAN)) &&
++         my_b_printf(&log_file,
++                    "# QC_Hit: %s  Full_scan: %s  Full_join: %s  Tmp_table: %s  Tmp_table_on_disk: %s\n" \
++                    "# Filesort: %s  Filesort_on_disk: %s  Merge_passes: %lu\n",
++                    ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
++                    ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
++                    ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
++                    ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"),
++                    ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"),
++                    ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
++                    ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"),
++                    thd->query_plan_fsort_passes) == (uint) -1)
++      tmp_errno=errno;
++    if ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_INNODB)) && thd->innodb_was_used)
++    {
++      char buf[3][20];
++      snprintf(buf[0], 20, "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0);
++      snprintf(buf[1], 20, "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0);
++      snprintf(buf[2], 20, "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0);
++      if (my_b_printf(&log_file,
++                      "#   InnoDB_IO_r_ops: %lu  InnoDB_IO_r_bytes: %lu  InnoDB_IO_r_wait: %s\n" \
++                      "#   InnoDB_rec_lock_wait: %s  InnoDB_queue_wait: %s\n" \
++                      "#   InnoDB_pages_distinct: %lu\n",
++                      (ulong) thd->innodb_io_reads,
++                      (ulong) thd->innodb_io_read,
++                      buf[0], buf[1], buf[2],
++                      (ulong) thd->innodb_page_access) == (uint) -1)
++        tmp_errno=errno;
++    } 
++    else
++    {
++      if ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_INNODB)) &&
++          my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1)
++        tmp_errno=errno;
++    }
++
+     if (thd->db && strcmp(thd->db, db))
+     {                                         // Database changed
+       if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
+diff -ruN a/sql/log.h b/sql/log.h
+--- a/sql/log.h        2010-11-03 01:01:14.000000000 +0300
++++ b/sql/log.h        2010-12-16 04:27:46.000000000 +0300
+@@ -242,7 +242,7 @@
+              uint user_host_len, int thread_id,
+              const char *command_type, uint command_type_len,
+              const char *sql_text, uint sql_text_len);
+-  bool write(THD *thd, time_t current_time, time_t query_start_arg,
++  bool write(THD *thd, ulonglong current_time, time_t query_start_arg,
+              const char *user_host, uint user_host_len,
+              ulonglong query_utime, ulonglong lock_utime, bool is_command,
+              const char *sql_text, uint sql_text_len);
+@@ -492,7 +492,7 @@
+   virtual bool init()= 0;
+   virtual void cleanup()= 0;
+-  virtual bool log_slow(THD *thd, time_t current_time,
++  virtual bool log_slow(THD *thd, ulonglong current_time,
+                         time_t query_start_arg, const char *user_host,
+                         uint user_host_len, ulonglong query_utime,
+                         ulonglong lock_utime, bool is_command,
+@@ -521,7 +521,7 @@
+   virtual bool init();
+   virtual void cleanup();
+-  virtual bool log_slow(THD *thd, time_t current_time,
++  virtual bool log_slow(THD *thd, ulonglong current_utime,
+                         time_t query_start_arg, const char *user_host,
+                         uint user_host_len, ulonglong query_utime,
+                         ulonglong lock_utime, bool is_command,
+@@ -553,7 +553,7 @@
+   virtual bool init();
+   virtual void cleanup();
+-  virtual bool log_slow(THD *thd, time_t current_time,
++  virtual bool log_slow(THD *thd, ulonglong current_utime,
+                         time_t query_start_arg, const char *user_host,
+                         uint user_host_len, ulonglong query_utime,
+                         ulonglong lock_utime, bool is_command,
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-12-16 04:27:10.000000000 +0300
++++ b/sql/mysqld.cc    2010-12-16 04:27:46.000000000 +0300
+@@ -418,6 +418,10 @@
+ char* opt_secure_file_priv;
+ my_bool opt_log_slow_admin_statements= 0;
+ my_bool opt_log_slow_slave_statements= 0;
++my_bool opt_log_slow_sp_statements= 0;
++my_bool opt_log_slow_timestamp_every= 0;
++ulonglong opt_use_global_log_slow_control= 0;
++my_bool opt_slow_query_log_microseconds_timestamp= 0;
+ my_bool lower_case_file_system= 0;
+ my_bool opt_large_pages= 0;
+ my_bool opt_super_large_pages= 0;
+@@ -5789,10 +5793,10 @@
+    "Log slow OPTIMIZE, ANALYZE, ALTER and other administrative statements to "
+    "the slow log if it is open.", &opt_log_slow_admin_statements,
+    &opt_log_slow_admin_statements, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+- {"log-slow-slave-statements", 0,
++ /*{"log-slow-slave-statements", 0,
+   "Log slow statements executed by slave thread to the slow log if it is open.",
+   &opt_log_slow_slave_statements, &opt_log_slow_slave_statements,
+-  0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
++  0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},*/
+   {"log-slow-queries", OPT_SLOW_QUERY_LOG,
+    "Log slow queries to a table or log file. Defaults logging to table "
+    "mysql.slow_log or hostname-slow.log if --log-output=file is used. "
+@@ -7182,6 +7186,10 @@
+ C_MODE_END
++/* defined in sys_vars.cc */
++extern void init_log_slow_verbosity();
++extern void init_use_global_log_slow_control();
++
+ /**
+   Get server options from the command line,
+   and perform related server initializations.
+@@ -7321,6 +7329,8 @@
+   global_system_variables.long_query_time= (ulonglong)
+     (global_system_variables.long_query_time_double * 1e6);
++  init_log_slow_verbosity();
++  init_use_global_log_slow_control();
+   if (opt_short_log_format)
+     opt_specialflag|= SPECIAL_SHORT_LOG_FORMAT;
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h     2010-12-16 04:27:10.000000000 +0300
++++ b/sql/mysqld.h     2010-12-16 04:27:46.000000000 +0300
+@@ -116,6 +116,10 @@
+ extern char* opt_secure_backup_file_priv;
+ extern size_t opt_secure_backup_file_priv_len;
+ extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements;
++extern my_bool opt_log_slow_sp_statements;
++extern my_bool opt_log_slow_timestamp_every;
++extern ulonglong opt_use_global_log_slow_control;
++extern my_bool opt_slow_query_log_microseconds_timestamp;
+ extern my_bool sp_automatic_privileges, opt_noacl;
+ extern my_bool opt_old_style_user_limits, trust_function_creators;
+ extern uint opt_crash_binlog_innodb;
+diff -ruN a/sql/slave.cc b/sql/slave.cc
+--- a/sql/slave.cc     2010-11-03 01:01:14.000000000 +0300
++++ b/sql/slave.cc     2010-12-16 04:27:46.000000000 +0300
+@@ -2037,6 +2037,7 @@
+     + MAX_LOG_EVENT_HEADER;  /* note, incr over the global not session var */
+   thd->slave_thread = 1;
+   thd->enable_slow_log= opt_log_slow_slave_statements;
++  thd->write_to_slow_log= opt_log_slow_slave_statements;
+   set_slave_thread_options(thd);
+   thd->client_capabilities = CLIENT_LOCAL_FILES;
+   mysql_mutex_lock(&LOCK_thread_count);
+diff -ruN a/sql/sp_head.cc b/sql/sp_head.cc
+--- a/sql/sp_head.cc   2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sp_head.cc   2010-12-16 04:27:46.000000000 +0300
+@@ -2151,7 +2151,7 @@
+     DBUG_PRINT("info",(" %.*s: eval args done", (int) m_name.length, 
+                        m_name.str));
+   }
+-  if (!(m_flags & LOG_SLOW_STATEMENTS) && thd->enable_slow_log)
++  if (!(m_flags & LOG_SLOW_STATEMENTS || opt_log_slow_sp_statements) && thd->enable_slow_log)
+   {
+     DBUG_PRINT("info", ("Disabling slow log for the execution"));
+     save_enable_slow_log= true;
+diff -ruN a/sql/sql_cache.cc b/sql/sql_cache.cc
+--- a/sql/sql_cache.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/sql_cache.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -1756,6 +1756,7 @@
+     response, we can't handle it anyway.
+   */
+   (void) trans_commit_stmt(thd);
++  thd->query_plan_flags|= QPLAN_QC;
+   if (!thd->stmt_da->is_set())
+     thd->stmt_da->disable_status();
+@@ -1766,6 +1767,7 @@
+ err_unlock:
+   unlock();
+ err:
++  thd->query_plan_flags|= QPLAN_QC_NO;
+   MYSQL_QUERY_CACHE_MISS(thd->query());
+   DBUG_RETURN(0);                             // Query was not cached
+ }
+diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
+--- a/sql/sql_class.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/sql_class.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -367,6 +367,37 @@
+   thd->warning_info->inc_current_row_for_warning();
+ }
++extern "C"
++void increment_thd_innodb_stats(THD* thd,
++                    unsigned long long trx_id,
++                    long io_reads,
++                    long long  io_read,
++                    long      io_reads_wait_timer,
++                    long      lock_que_wait_timer,
++                    long      que_wait_timer,
++                    long      page_access)
++{
++  thd->innodb_was_used = TRUE;
++  thd->innodb_trx_id = trx_id;
++  thd->innodb_io_reads += io_reads;
++  thd->innodb_io_read += io_read;
++  thd->innodb_io_reads_wait_timer += io_reads_wait_timer;
++  thd->innodb_lock_que_wait_timer += lock_que_wait_timer;
++  thd->innodb_innodb_que_wait_timer += que_wait_timer;
++  thd->innodb_page_access += page_access;
++}
++
++extern "C"
++unsigned long thd_log_slow_verbosity(const THD *thd)
++{
++  return (unsigned long) thd->variables.log_slow_verbosity;
++}
++
++extern "C"
++int thd_opt_slow_log()
++{
++  return (int) opt_slow_log;
++}
+ /**
+   Dumps a text description of a thread, its security context
+@@ -661,6 +692,7 @@
+     *cond_hdl= NULL;
+     return FALSE;
+   }
++  last_errno= sql_errno;
+   for (Internal_error_handler *error_handler= m_internal_handler;
+        error_handler;
+@@ -3355,6 +3387,12 @@
+     first_successful_insert_id_in_prev_stmt;
+   backup->first_successful_insert_id_in_cur_stmt= 
+     first_successful_insert_id_in_cur_stmt;
++  backup->innodb_io_reads= innodb_io_reads;
++  backup->innodb_io_read= innodb_io_read;
++  backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer;
++  backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer;
++  backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer;
++  backup->innodb_page_access= innodb_page_access;
+   if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) &&
+       !is_current_stmt_binlog_format_row())
+@@ -3375,6 +3413,14 @@
+   cuted_fields= 0;
+   transaction.savepoints= 0;
+   first_successful_insert_id_in_cur_stmt= 0;
++  last_errno= 0;
++  innodb_trx_id= 0;
++  innodb_io_reads= 0;
++  innodb_io_read= 0;
++  innodb_io_reads_wait_timer= 0;
++  innodb_lock_que_wait_timer= 0;
++  innodb_innodb_que_wait_timer= 0;
++  innodb_page_access= 0;
+ }
+@@ -3437,6 +3483,12 @@
+   */
+   examined_row_count+= backup->examined_row_count;
+   cuted_fields+=       backup->cuted_fields;
++  innodb_io_reads+= backup->innodb_io_reads;
++  innodb_io_read+= backup->innodb_io_read;
++  innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer;
++  innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer;
++  innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer;
++  innodb_page_access+= backup->innodb_page_access;
+   DBUG_VOID_RETURN;
+ }
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h  2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_class.h  2010-12-16 04:27:46.000000000 +0300
+@@ -60,6 +60,26 @@
+ enum enum_duplicates { DUP_ERROR, DUP_REPLACE, DUP_UPDATE };
+ enum enum_delay_key_write { DELAY_KEY_WRITE_NONE, DELAY_KEY_WRITE_ON,
+                           DELAY_KEY_WRITE_ALL };
++enum enum_use_global_log_slow_control { SLOG_UG_LOG_SLOW_FILTER, SLOG_UG_LOG_SLOW_RATE_LIMIT, SLOG_UG_LOG_SLOW_VERBOSITY, SLOG_UG_LONG_QUERY_TIME, SLOG_UG_MIN_EXAMINED_ROW_LIMIT, SLOG_UG_ALL };
++enum enum_log_slow_verbosity { 
++  SLOG_V_MICROTIME, SLOG_V_QUERY_PLAN, SLOG_V_INNODB, 
++  SLOG_V_PROFILING, SLOG_V_PROFILING_USE_GETRUSAGE,
++  SLOG_V_MINIMAL, SLOG_V_STANDARD, SLOG_V_FULL
++};
++#define QPLAN_NONE            0
++#define QPLAN_QC              1 << 0
++#define QPLAN_QC_NO           1 << 1
++#define QPLAN_FULL_SCAN       1 << 2
++#define QPLAN_FULL_JOIN       1 << 3
++#define QPLAN_TMP_TABLE       1 << 4
++#define QPLAN_TMP_DISK        1 << 5
++#define QPLAN_FILESORT        1 << 6
++#define QPLAN_FILESORT_DISK   1 << 7
++enum enum_log_slow_filter {
++  SLOG_F_QC_NO, SLOG_F_FULL_SCAN, SLOG_F_FULL_JOIN,
++  SLOG_F_TMP_TABLE, SLOG_F_TMP_DISK, SLOG_F_FILESORT,
++  SLOG_F_FILESORT_DISK
++};
+ enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT,
+                             SLAVE_EXEC_MODE_IDEMPOTENT,
+                             SLAVE_EXEC_MODE_LAST_BIT};
+@@ -508,6 +528,17 @@
+   my_bool sysdate_is_now;
++  ulong log_slow_rate_limit;
++  ulonglong log_slow_filter;
++  ulonglong log_slow_verbosity;
++
++  ulong      innodb_io_reads;
++  ulonglong  innodb_io_read;
++  ulong      innodb_io_reads_wait_timer;
++  ulong      innodb_lock_que_wait_timer;
++  ulong      innodb_innodb_que_wait_timer;
++  ulong      innodb_page_access;
++
+   double long_query_time_double;
+ } SV;
+@@ -1140,6 +1171,14 @@
+   uint in_sub_stmt;
+   bool enable_slow_log;
+   bool last_insert_id_used;
++
++  ulong      innodb_io_reads;
++  ulonglong  innodb_io_read;
++  ulong      innodb_io_reads_wait_timer;
++  ulong      innodb_lock_que_wait_timer;
++  ulong      innodb_innodb_que_wait_timer;
++  ulong      innodb_page_access;
++
+   SAVEPOINT *savepoints;
+   enum enum_check_fields count_cuted_fields;
+ };
+@@ -1575,6 +1614,26 @@
+   thr_lock_type update_lock_default;
+   Delayed_insert *di;
++  bool       write_to_slow_log;
++
++  ulonglong  bytes_sent_old;
++  ulong      tmp_tables_used;
++  ulong      tmp_tables_disk_used;
++  ulonglong  tmp_tables_size;
++  bool       innodb_was_used;
++  ulonglong  innodb_trx_id;
++  ulong      innodb_io_reads;
++  ulonglong  innodb_io_read;
++  ulong      innodb_io_reads_wait_timer;
++  ulong      innodb_lock_que_wait_timer;
++  ulong      innodb_innodb_que_wait_timer;
++  ulong      innodb_page_access;
++
++  ulong      query_plan_flags;
++  ulong      query_plan_fsort_passes;
++
++  uint       last_errno;
++
+   /* <> 0 if we are inside of trigger or stored function. */
+   uint in_sub_stmt;
+diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
+--- a/sql/sql_connect.cc       2010-11-03 01:01:14.000000000 +0300
++++ b/sql/sql_connect.cc       2010-12-16 04:27:46.000000000 +0300
+@@ -738,6 +738,15 @@
+     prepare_new_connection_state(thd);
++    /* 
++      If rate limiting of slow log writes is enabled, decide whether to log this 
++      new thread's queries or not. Uses extremely simple algorithm. :) 
++    */ 
++    thd->write_to_slow_log= FALSE; 
++    if (thd->variables.log_slow_rate_limit <= 1 ||  
++        (thd->thread_id % thd->variables.log_slow_rate_limit) == 0) 
++         thd->write_to_slow_log= TRUE; 
++
+     while (!net->error && net->vio != 0 &&
+            !(thd->killed == THD::KILL_CONNECTION))
+     {
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_parse.cc 2010-12-16 04:47:41.000000000 +0300
+@@ -1424,7 +1424,6 @@
+   DBUG_RETURN(error);
+ }
+-
+ void log_slow_statement(THD *thd)
+ {
+   DBUG_ENTER("log_slow_statement");
+@@ -1437,6 +1436,42 @@
+   if (unlikely(thd->in_sub_stmt))
+     DBUG_VOID_RETURN;                           // Don't set time for sub stmt
++  /* Follow the slow log filter configuration. */
++  if (thd->variables.log_slow_filter != 0 &&
++      (!(thd->variables.log_slow_filter & thd->query_plan_flags) ||
++       ((thd->variables.log_slow_filter & SLOG_F_QC_NO) &&
++        (thd->query_plan_flags & QPLAN_QC))))
++    DBUG_VOID_RETURN;
++
++  /*
++    Low long_query_time value most likely means user is debugging stuff and even
++    though some thread's queries are not supposed to be logged b/c of the rate
++    limit, if one of them takes long enough (>= 1 second) it will be sensible
++    to make an exception and write to slow log anyway.
++  */
++
++  ulonglong end_utime_of_query= thd->current_utime();
++#define USE_GLOBAL_UPDATE(variable_name,enum_value_name)                \
++  if (opt_use_global_log_slow_control & (ULL(1) << enum_value_name))    \
++  {                                                                     \
++    thd->variables. variable_name=                                      \
++      global_system_variables. variable_name;                           \
++  }
++  USE_GLOBAL_UPDATE(log_slow_filter,SLOG_UG_LOG_SLOW_FILTER);
++  USE_GLOBAL_UPDATE(log_slow_rate_limit,SLOG_UG_LOG_SLOW_RATE_LIMIT);
++  USE_GLOBAL_UPDATE(log_slow_verbosity,SLOG_UG_LOG_SLOW_VERBOSITY);
++  USE_GLOBAL_UPDATE(long_query_time,SLOG_UG_LONG_QUERY_TIME);
++  USE_GLOBAL_UPDATE(long_query_time_double,SLOG_UG_LONG_QUERY_TIME);
++  USE_GLOBAL_UPDATE(min_examined_row_limit,SLOG_UG_MIN_EXAMINED_ROW_LIMIT);
++#undef USE_GLOBAL_UPDATE
++
++  /* Do not log this thread's queries due to rate limiting. */
++  if (thd->write_to_slow_log != TRUE
++      && (thd->variables.long_query_time >= 1000000
++          || (ulong) (end_utime_of_query - thd->utime_after_lock) < 1000000))
++    DBUG_VOID_RETURN;
++
++
+   /*
+     Do not log administrative statements unless the appropriate option is
+     set.
+@@ -1812,6 +1847,9 @@
+     context.resolve_in_table_list_only(select_lex->
+                                        table_list.first);
++  /* Reset the counter at all cases for the extended slow query log */
++  thd->sent_row_count= 0;
++
+   /*
+     Reset warning count for each query that uses tables
+     A better approach would be to reset this for any commands
+@@ -5238,6 +5276,21 @@
+   thd->rand_used= 0;
+   thd->sent_row_count= thd->examined_row_count= 0;
++  thd->bytes_sent_old= thd->status_var.bytes_sent;
++  thd->tmp_tables_used= thd->tmp_tables_disk_used= 0;
++  thd->tmp_tables_size= 0;
++  thd->innodb_was_used= FALSE;
++  thd->innodb_trx_id= 0;
++  thd->innodb_io_reads= 0;
++  thd->innodb_io_read= 0;
++  thd->innodb_io_reads_wait_timer= 0;
++  thd->innodb_lock_que_wait_timer= 0;
++  thd->innodb_innodb_que_wait_timer= 0;
++  thd->innodb_page_access= 0;
++  thd->query_plan_flags= QPLAN_NONE;
++  thd->query_plan_fsort_passes= 0;
++  thd->last_errno= 0;
++
+   thd->reset_current_stmt_binlog_format_row();
+   thd->binlog_unsafe_warning_flags= 0;
+diff -ruN a/sql/sql_select.cc b/sql/sql_select.cc
+--- a/sql/sql_select.cc        2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_select.cc        2010-12-16 04:27:47.000000000 +0300
+@@ -6870,7 +6870,10 @@
+         {
+           join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+           if (statistics)
++            {
+             status_var_increment(join->thd->status_var.select_scan_count);
++              join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
++            }
+         }
+       }
+       else
+@@ -6884,7 +6887,10 @@
+         {
+           join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+           if (statistics)
++            {
+             status_var_increment(join->thd->status_var.select_full_join_count);
++              join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
++            }
+         }
+       }
+       if (!table->no_keyread)
+@@ -10210,6 +10216,7 @@
+               (ulong) rows_limit,test(group)));
+   status_var_increment(thd->status_var.created_tmp_tables);
++  thd->query_plan_flags|= QPLAN_TMP_TABLE;
+   if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
+     temp_pool_slot = bitmap_lock_set_next(&temp_pool);
+@@ -11107,6 +11114,7 @@
+     goto err;
+   }
+   status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
++  table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
+   share->db_record_offset= 1;
+   DBUG_RETURN(0);
+  err:
+@@ -11125,6 +11133,14 @@
+   save_proc_info=thd->proc_info;
+   thd_proc_info(thd, "removing tmp table");
++  thd->tmp_tables_used++;
++  if (entry->file)
++  {
++    thd->tmp_tables_size += entry->file->stats.data_file_length;
++    if (entry->file->ht->db_type != DB_TYPE_HEAP)
++      thd->tmp_tables_disk_used++;
++  }
++
+   // Release latches since this can take a long time
+   ha_release_temporary_latches(thd);
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc  2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_show.cc  2010-12-16 04:27:47.000000000 +0300
+@@ -1942,8 +1942,17 @@
+         table->field[4]->store(command_name[tmp->command].str,
+                                command_name[tmp->command].length, cs);
+       /* MYSQL_TIME */
+-      table->field[5]->store((longlong)(tmp->start_time ?
+-                                      now - tmp->start_time : 0), FALSE);
++      longlong value_in_time_column= 0;
++      if(tmp->start_time)
++      {
++        value_in_time_column = (now - tmp->start_time);
++        if(value_in_time_column > now)
++        {
++          value_in_time_column= 0;
++        }
++      }
++      table->field[5]->store(value_in_time_column, FALSE);
++
+       /* STATE */
+       if ((val= thread_state_info(tmp)))
+       {
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc  2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sys_vars.cc  2010-12-16 04:36:12.000000000 +0300
+@@ -2836,6 +2836,116 @@
+        DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+        ON_UPDATE(fix_log_state));
++const char *log_slow_filter_name[]= { "qc_miss", "full_scan", "full_join",
++                                      "tmp_table", "tmp_table_on_disk", "filesort", "filesort_on_disk", 0};
++static Sys_var_set Sys_log_slow_filter(
++       "log_slow_filter",
++       "Log only the queries that followed certain execution plan. "
++       "Multiple flags allowed in a comma-separated string. "
++       "[qc_miss, full_scan, full_join, tmp_table, tmp_table_on_disk, "
++       "filesort, filesort_on_disk]",
++       SESSION_VAR(log_slow_filter), CMD_LINE(REQUIRED_ARG),
++       log_slow_filter_name, DEFAULT(0));
++static Sys_var_ulong sys_log_slow_rate_limit(
++       "log_slow_rate_limit","Rate limit statement writes to slow log to only those from every (1/log_slow_rate_limit) session.",
++       SESSION_VAR(log_slow_rate_limit), CMD_LINE(REQUIRED_ARG),
++       VALID_RANGE(1, ULONG_MAX), DEFAULT(1), BLOCK_SIZE(1));
++const char* log_slow_verbosity_name[] = { 
++  "microtime", "query_plan", "innodb", 
++  "profiling", "profling_use_getrusage", 
++  "minimal", "standard", "full", 0
++};
++static ulonglong update_log_slow_verbosity_replace(ulonglong value, ulonglong what, ulonglong by)
++{
++  if((value & what) == what)
++  {
++    value = value & (~what);
++    value = value | by;
++  }
++  return value;
++}
++void update_log_slow_verbosity(ulonglong* value_ptr)
++{
++  ulonglong &value    = *value_ptr;
++  ulonglong microtime= ULL(1) << SLOG_V_MICROTIME;
++  ulonglong query_plan= ULL(1) << SLOG_V_QUERY_PLAN;
++  ulonglong innodb= ULL(1) << SLOG_V_INNODB;
++  ulonglong minimal= ULL(1) << SLOG_V_MINIMAL;
++  ulonglong standard= ULL(1) << SLOG_V_STANDARD;
++  ulonglong full= ULL(1) << SLOG_V_FULL;
++  value= update_log_slow_verbosity_replace(value,minimal,microtime);
++  value= update_log_slow_verbosity_replace(value,standard,microtime | query_plan);
++  value= update_log_slow_verbosity_replace(value,full,microtime | query_plan | innodb);
++}
++static bool update_log_slow_verbosity_helper(sys_var */*self*/, THD *thd,
++                                          enum_var_type type)
++{
++  if(type == OPT_SESSION)
++  {
++    update_log_slow_verbosity(&(thd->variables.log_slow_verbosity));
++  }
++  else
++  {
++    update_log_slow_verbosity(&(global_system_variables.log_slow_verbosity));
++  }
++  return false;
++}
++void init_use_global_log_slow_control()
++{
++  update_log_slow_verbosity(&(global_system_variables.log_slow_verbosity));
++}
++static Sys_var_set Sys_log_slow_verbosity(
++        "log_slow_verbosity",
++        "Choose how verbose the messages to your slow log will be. "
++        "Multiple flags allowed in a comma-separated string. [microtime, query_plan, innodb, profiling, profiling_use_getrusage]",
++        SESSION_VAR(log_slow_verbosity), CMD_LINE(REQUIRED_ARG),
++        log_slow_verbosity_name, DEFAULT(SLOG_V_MICROTIME),
++        NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
++        ON_UPDATE(update_log_slow_verbosity_helper));
++static Sys_var_mybool Sys_log_slow_slave_statements(
++       "log_slow_slave_statements",
++       "Log queries replayed be the slave SQL thread",
++       GLOBAL_VAR(opt_log_slow_slave_statements), CMD_LINE(OPT_ARG),
++       DEFAULT(FALSE));
++static Sys_var_mybool Sys_log_slow_sp_statements(
++       "log_slow_sp_statements",
++       "Log slow statements executed by stored procedure to the slow log if it is open.",
++       GLOBAL_VAR(opt_log_slow_sp_statements), CMD_LINE(OPT_ARG),
++       DEFAULT(TRUE));
++static Sys_var_mybool Sys_log_slow_timestamp_every(
++       "log_slow_timestamp_every",
++       "Timestamp is printed for all records of the slow log even if they are same time.",
++       GLOBAL_VAR(opt_log_slow_timestamp_every), CMD_LINE(OPT_ARG),
++       DEFAULT(FALSE));
++const char *use_global_log_slow_control_name[]= { "log_slow_filter", "log_slow_rate_limit", "log_slow_verbosity", "long_query_time", "min_examined_row_limit", "all", 0};
++static bool update_use_global_log_slow_control(sys_var */*self*/, THD */*thd*/,
++                                               enum_var_type /*type*/)
++{
++  if(opt_use_global_log_slow_control & (ULL(1) << SLOG_UG_ALL))
++  {
++    opt_use_global_log_slow_control=
++      SLOG_UG_LOG_SLOW_FILTER | SLOG_UG_LOG_SLOW_RATE_LIMIT | SLOG_UG_LOG_SLOW_VERBOSITY |
++      SLOG_UG_LONG_QUERY_TIME | SLOG_UG_MIN_EXAMINED_ROW_LIMIT;
++  }
++  return false;
++}
++void init_log_slow_verbosity()
++{
++  update_use_global_log_slow_control(0,0,OPT_GLOBAL);
++}
++static Sys_var_set Sys_use_global_log_slow_control(
++       "use_global_log_slow_control",
++       "Choose flags, wich always use the global variables. Multiple flags allowed in a comma-separated string. [none, log_slow_filter, log_slow_rate_limit, log_slow_verbosity, long_query_time, min_examined_row_limit, all]",
++       GLOBAL_VAR(opt_use_global_log_slow_control), CMD_LINE(REQUIRED_ARG),
++       use_global_log_slow_control_name, DEFAULT(0),
++        NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
++       ON_UPDATE(update_use_global_log_slow_control));
++static Sys_var_mybool Sys_slow_query_log_microseconds_timestamp(
++       "slow_query_log_microseconds_timestamp",
++       "Log slow statements executed by stored procedure to the slow log if it is open.",
++       GLOBAL_VAR(opt_slow_query_log_microseconds_timestamp), CMD_LINE(OPT_ARG),
++       DEFAULT(FALSE));
++ 
+ /* Synonym of "slow_query_log" for consistency with SHOW VARIABLES output */
+ static Sys_var_mybool Sys_log_slow(
+        "log_slow_queries",
+diff -ruN a/sql/sql_profile.cc b/sql/sql_profile.cc
+--- a/sql/sql_profile.cc       2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_profile.cc       2010-12-02 20:26:35.448357413 +0900
+@@ -243,7 +243,8 @@
+ {
+   time_usecs= (double) my_getsystime() / 10.0;  /* 1 sec was 1e7, now is 1e6 */
+ #ifdef HAVE_GETRUSAGE
+-  getrusage(RUSAGE_SELF, &rusage);
++  if ((profile->get_profiling())->enabled_getrusage())
++    getrusage(RUSAGE_SELF, &rusage);
+ #elif defined(_WIN32)
+   FILETIME ftDummy;
+   // NOTE: Get{Process|Thread}Times has a granularity of the clock interval,
+@@ -251,6 +252,19 @@
+   // measurable by this function.
+   GetProcessTimes(GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser);
+ #endif
++
++#ifdef HAVE_CLOCK_GETTIME
++  struct timespec tp;
++
++  if (!(clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++  {
++    cpu_time_usecs= tp.tv_sec*1000000000.0 + tp.tv_nsec;
++  }
++  else
++#endif
++  {
++    cpu_time_usecs= 0;
++  }
+ }
+@@ -366,7 +380,8 @@
+     finish_current_query();
+   }
+-  enabled= ((thd->variables.option_bits & OPTION_PROFILING) != 0);
++  enabled= ((thd->variables.option_bits & OPTION_PROFILING) != 0) ||
++            ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_PROFILING)) != 0);
+   if (! enabled) DBUG_VOID_RETURN;
+@@ -404,7 +419,8 @@
+     status_change("ending", NULL, NULL, 0);
+     if ((enabled) &&                                    /* ON at start? */
+-        ((thd->variables.option_bits & OPTION_PROFILING) != 0) &&   /* and ON at end? */
++        (((thd->variables.option_bits & OPTION_PROFILING) != 0) ||
++          ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_PROFILING)) != 0)) &&   /* and ON at end? */
+         (current->query_source != NULL) &&
+         (! current->entries.is_empty()))
+     {
+@@ -505,6 +521,118 @@
+   DBUG_VOID_RETURN;
+ }
++bool PROFILING::enabled_getrusage()
++{
++  return ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_PROFILING_USE_GETRUSAGE)) != 0);
++}
++
++/**
++   For a given profile entry specified by a name and 2 time measurements,
++   print its normalized name (i.e. with all spaces replaced with underscores)
++   along with its wall clock and CPU time.
++*/
++
++static void my_b_print_status(IO_CACHE *log_file, const char *status,
++                              PROF_MEASUREMENT *start, PROF_MEASUREMENT *stop)
++{
++  DBUG_ENTER("my_b_print_status");
++  DBUG_ASSERT(log_file != NULL && status != NULL);
++  char query_time_buff[22+7];
++  const char *tmp;
++
++  my_b_printf(log_file, "Profile_");
++  for (tmp= status; *tmp; tmp++)
++    my_b_write_byte(log_file, *tmp == ' ' ? '_' : *tmp);
++
++  snprintf(query_time_buff, sizeof(query_time_buff), "%.6f",
++           (stop->time_usecs - start->time_usecs) / (1000.0 * 1000));
++  my_b_printf(log_file, ": %s ", query_time_buff);
++
++  my_b_printf(log_file, "Profile_");
++  for (tmp= status; *tmp; tmp++)
++    my_b_write_byte(log_file, *tmp == ' ' ? '_' : *tmp);
++  my_b_printf(log_file, "_cpu: ");
++
++  snprintf(query_time_buff, sizeof(query_time_buff), "%.6f",
++           (stop->cpu_time_usecs - start->cpu_time_usecs) /
++           (1000.0 * 1000 * 1000));
++  my_b_printf(log_file, "%s ", query_time_buff);
++
++  DBUG_VOID_RETURN;
++}
++
++/**
++  Print output for current query to file 
++*/
++
++int PROFILING::print_current(IO_CACHE *log_file)
++{
++  DBUG_ENTER("PROFILING::print_current");
++  ulonglong row_number= 0;
++
++  QUERY_PROFILE *query;
++  /* Get current query */
++  if (current == NULL)
++  {
++    DBUG_RETURN(0);
++  }
++
++  query= current;
++
++  my_b_printf(log_file, "# ");
++
++    void *entry_iterator;
++    PROF_MEASUREMENT *entry= NULL, *previous= NULL, *first= NULL;
++    /* ...and for each query, go through all its state-change steps. */
++    for (entry_iterator= query->entries.new_iterator();
++         entry_iterator != NULL;
++         entry_iterator= query->entries.iterator_next(entry_iterator),
++         previous=entry, row_number++)
++    {
++      entry= query->entries.iterator_value(entry_iterator);
++
++      /* Skip the first.  We count spans of fence, not fence-posts. */
++      if (previous == NULL) {first= entry; continue;}
++
++      if (thd->lex->sql_command == SQLCOM_SHOW_PROFILE)
++      {
++        /*
++          We got here via a SHOW command.  That means that we stored
++          information about the query we wish to show and that isn't
++          in a WHERE clause at a higher level to filter out rows we
++          wish to exclude.
++
++          Because that functionality isn't available in the server yet,
++          we must filter here, at the wrong level.  Once one can con-
++          struct where and having conditions at the SQL layer, then this
++          condition should be ripped out.
++        */
++        if (thd->lex->profile_query_id == 0) /* 0 == show final query */
++        {
++          if (query != last)
++            continue;
++        }
++        else
++        {
++          if (thd->lex->profile_query_id != query->profiling_query_id)
++            continue;
++        }
++      }
++
++      my_b_print_status(log_file, previous->status, previous, entry);
++    }
++
++    my_b_write_byte(log_file, '\n');
++    if ((entry != NULL) && (first != NULL))
++    {
++      my_b_printf(log_file, "# ");
++      my_b_print_status(log_file, "total", first, entry);
++      my_b_write_byte(log_file, '\n');
++    }
++
++  DBUG_RETURN(0);
++}
++
+ /**
+   Fill the information schema table, "query_profile", as defined in show.cc .
+   There are two ways to get to this function:  Selecting from the information
+diff -ruN a/sql/sql_profile.h b/sql/sql_profile.h
+--- a/sql/sql_profile.h        2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_profile.h        2010-12-02 19:23:07.823955510 +0900
+@@ -164,11 +164,15 @@
+ */
+ class PROF_MEASUREMENT
+ {
+-private:
+-  friend class QUERY_PROFILE;
+-  friend class PROFILING;
+-
+   QUERY_PROFILE *profile;
++
++  char *allocated_status_memory;
++
++  void set_label(const char *status_arg, const char *function_arg, 
++                  const char *file_arg, unsigned int line_arg);
++  void clean_up();
++
++public:
+   char *status;
+ #ifdef HAVE_GETRUSAGE
+   struct rusage rusage;
+@@ -181,12 +185,7 @@
+   unsigned int line;
+   double time_usecs;
+-  char *allocated_status_memory;
+-
+-  void set_label(const char *status_arg, const char *function_arg, 
+-                  const char *file_arg, unsigned int line_arg);
+-  void clean_up();
+-  
++  double cpu_time_usecs;
+   PROF_MEASUREMENT();
+   PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, const char *status_arg);
+   PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, const char *status_arg,
+@@ -231,6 +230,11 @@
+   /* Show this profile.  This is called by PROFILING. */
+   bool show(uint options);
++
++public:
++
++  inline PROFILING * get_profiling() { return profiling; };
++
+ };
+@@ -276,9 +280,11 @@
+   /* SHOW PROFILES */
+   bool show_profiles();
++  bool enabled_getrusage();
+   /* ... from INFORMATION_SCHEMA.PROFILING ... */
+   int fill_statistics_info(THD *thd, TABLE_LIST *tables, Item *cond);
++  int print_current(IO_CACHE *log_file);
+ };
+ #  endif /* HAVE_PROFILING */
diff --git a/sql_no_fcache.patch b/sql_no_fcache.patch
new file mode 100644 (file)
index 0000000..ed14f09
--- /dev/null
@@ -0,0 +1,401 @@
+# name       : sql_no_fcache.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/mysqldump.c b/client/mysqldump.c
+--- a/client/mysqldump.c       2010-07-28 16:47:58.264067653 +0400
++++ b/client/mysqldump.c       2010-07-28 16:47:59.604985656 +0400
+@@ -138,6 +138,8 @@
+ #endif
+ static uint opt_protocol= 0;
++static my_bool server_supports_sql_no_fcache= FALSE;
++
+ /*
+ Dynamic_string wrapper functions. In this file use these
+ wrappers, they will terminate the process if there is
+@@ -1471,6 +1473,17 @@
+     /* Don't switch charsets for 4.1 and earlier.  (bug#34192). */
+     server_supports_switching_charsets= FALSE;
+   } 
++  
++  /* Check to see if we support SQL_NO_FCACHE on this server. */ 
++  if (mysql_query(mysql, "SELECT SQL_NO_FCACHE NOW()") == 0)
++  {
++    MYSQL_RES *res = mysql_store_result(mysql);
++    if (res)
++    {
++      mysql_free_result(res);
++    }
++    server_supports_sql_no_fcache= TRUE;
++  }
+   /*
+     As we're going to set SQL_MODE, it would be lost on reconnect, so we
+     cannot reconnect.
+@@ -3143,7 +3156,12 @@
+     /* now build the query string */
+-    dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * INTO OUTFILE '");
++    dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ ");
++    if (server_supports_sql_no_fcache)
++    {
++      dynstr_append_checked(&query_string, "/*!50084 SQL_NO_FCACHE */ ");
++    }
++    dynstr_append_checked(&query_string, "* INTO OUTFILE '");
+     dynstr_append_checked(&query_string, filename);
+     dynstr_append_checked(&query_string, "'");
+@@ -3193,7 +3211,12 @@
+       check_io(md_result_file);
+     }
+     
+-    dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * FROM ");
++    dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ ");
++    if (server_supports_sql_no_fcache)
++    {
++      dynstr_append_checked(&query_string, "/*!50084 SQL_NO_FCACHE */ ");
++    }
++    dynstr_append_checked(&query_string, "* FROM ");
+     dynstr_append_checked(&query_string, result_table);
+     if (where)
+diff -ruN a/include/flashcache_ioctl.h b/include/flashcache_ioctl.h
+--- a/include/flashcache_ioctl.h       1970-01-01 03:00:00.000000000 +0300
++++ b/include/flashcache_ioctl.h       2010-07-28 16:47:59.744079911 +0400
+@@ -0,0 +1,53 @@
++/****************************************************************************
++ *  flashcache_ioctl.h
++ *  FlashCache: Device mapper target for block-level disk caching
++ *
++ *  Copyright 2010 Facebook, Inc.
++ *  Author: Mohan Srinivasan (mohan@facebook.com)
++ *
++ *  Based on DM-Cache:
++ *   Copyright (C) International Business Machines Corp., 2006
++ *   Author: Ming Zhao (mingzhao@ufl.edu)
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; under version 2 of the License.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
++ ****************************************************************************/
++
++#ifndef FLASHCACHE_IOCTL_H
++#define FLASHCACHE_IOCTL_H
++
++#include <linux/types.h>
++
++#define FLASHCACHE_IOCTL 0xfe
++
++enum {
++       FLASHCACHEADDNCPID_CMD=200,
++       FLASHCACHEDELNCPID_CMD,
++       FLASHCACHEDELNCALL_CMD,
++       FLASHCACHEADDWHITELIST_CMD,
++       FLASHCACHEDELWHITELIST_CMD,
++       FLASHCACHEDELWHITELISTALL_CMD,
++};
++
++#define FLASHCACHEADDNCPID     _IOW(FLASHCACHE_IOCTL, FLASHCACHEADDNCPID_CMD, pid_t)
++#define FLASHCACHEDELNCPID     _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELNCPID_CMD, pid_t)
++#define FLASHCACHEDELNCALL     _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELNCALL_CMD, pid_t)
++
++#define FLASHCACHEADDBLACKLIST         FLASHCACHEADDNCPID
++#define FLASHCACHEDELBLACKLIST         FLASHCACHEDELNCPID
++#define FLASHCACHEDELALLBLACKLIST      FLASHCACHEDELNCALL
++
++#define FLASHCACHEADDWHITELIST         _IOW(FLASHCACHE_IOCTL, FLASHCACHEADDWHITELIST_CMD, pid_t)
++#define FLASHCACHEDELWHITELIST         _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELWHITELIST_CMD, pid_t)
++#define FLASHCACHEDELALLWHITELIST      _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELWHITELISTALL_CMD, pid_t)
++
++#endif
+diff -ruN a/patch_info/sql_no_fcache.info b/patch_info/sql_no_fcache.info
+--- a/patch_info/sql_no_fcache.info    1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/sql_no_fcache.info    2010-07-28 16:47:59.915439165 +0400
+@@ -0,0 +1,6 @@
++File=sql_no_fcache.patch
++Name=Support for flashcache including the SQL_NO_FCACHE option that prevents blocks from being cached during a query.
++Version=1.0
++Author=Facebook
++License=GPL
++Comment=
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h        2010-07-28 16:47:58.575318748 +0400
++++ b/sql/lex.h        2010-07-28 16:48:00.134078469 +0400
+@@ -516,6 +516,7 @@
+   { "SQL_CACHE",        SYM(SQL_CACHE_SYM)},
+   { "SQL_CALC_FOUND_ROWS", SYM(SQL_CALC_FOUND_ROWS)},
+   { "SQL_NO_CACHE",   SYM(SQL_NO_CACHE_SYM)},
++  { "SQL_NO_FCACHE",   SYM(SQL_NO_FCACHE_SYM)},
+   { "SQL_SMALL_RESULT", SYM(SQL_SMALL_RESULT)},
+   { "SQL_THREAD",     SYM(SQL_THREAD)},
+   { "SQL_TSI_SECOND",   SYM(SECOND_SYM)},
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h     2010-07-28 16:47:58.565318871 +0400
++++ b/sql/mysqld.h     2010-07-28 16:48:03.004544367 +0400
+@@ -190,6 +190,8 @@
+ extern ulong back_log;
+ extern char language[FN_REFLEN];
+ extern ulong server_id, concurrency;
++/* flashcache */
++extern int cachedev_fd;
+ extern time_t server_start_time, flush_status_time;
+ extern char *opt_mysql_tmpdir, mysql_charsets_dir[];
+ extern int mysql_unpacked_real_data_home_len;
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-07-28 16:47:58.565318871 +0400
++++ b/sql/mysqld.cc    2010-07-28 16:48:03.004544367 +0400
+@@ -86,6 +86,11 @@
+ #ifdef HAVE_SYS_PRCTL_H
+ #include <sys/prctl.h>
+ #endif
++#if defined(__linux__)
++#include <mntent.h>
++#include <sys/statfs.h>
++#include "flashcache_ioctl.h"
++#endif//__linux__
+ #include <thr_alarm.h>
+ #include <ft_global.h>
+@@ -481,6 +486,11 @@
+ ulong specialflag=0;
+ ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
+ ulong max_connections, max_connect_errors;
++
++/* flashcache */
++int cachedev_fd;
++my_bool cachedev_enabled= FALSE;
++
+ /**
+   Limit of the total number of prepared statements in the server.
+   Is necessary to protect the server against out-of-memory attacks.
+@@ -4172,6 +4182,97 @@
+ #define decrement_handler_count()
+ #endif /* defined(_WIN32) || defined(HAVE_SMEM) */
++#if defined(__linux__)
++/*
++ * Auto detect if we support flash cache on the host system.
++ * This needs to be called before we setuid away from root
++ * to avoid permission problems on opening the device node.
++ */
++static void init_cachedev(void)
++{
++  struct statfs stfs_data_home_dir;
++  struct statfs stfs;
++  struct mntent *ent;
++  pid_t pid = getpid();
++  FILE *mounts;
++  const char *error_message= NULL;
++
++  // disabled by default
++  cachedev_fd = -1;
++  cachedev_enabled= FALSE;
++
++  if (!mysql_data_home)
++  {
++    error_message= "mysql_data_home not set";
++    goto epilogue;
++  }
++
++  if (statfs(mysql_data_home, &stfs_data_home_dir) < 0)
++  {
++    error_message= "statfs failed";
++    goto epilogue;
++  }
++
++  mounts = setmntent("/etc/mtab", "r");
++  if (mounts == NULL)
++  {
++    error_message= "setmntent failed";
++    goto epilogue;
++  }
++
++  while ((ent = getmntent(mounts)) != NULL)
++  {
++    if (statfs(ent->mnt_dir, &stfs) < 0)
++      continue;
++    if (memcmp(&stfs.f_fsid, &stfs_data_home_dir.f_fsid, sizeof(fsid_t)) == 0)
++      break;
++  }
++  endmntent(mounts);
++
++  if (ent == NULL)
++  {
++    error_message= "getmntent loop failed";
++    goto epilogue;
++  }
++
++  cachedev_fd = open(ent->mnt_fsname, O_RDONLY);
++  if (cachedev_fd < 0)
++  {
++    error_message= "open flash device failed";
++    goto epilogue;
++  }
++
++  /* cleanup previous whitelistings */
++  if (ioctl(cachedev_fd, FLASHCACHEDELALLWHITELIST, &pid) < 0)
++  {
++    close(cachedev_fd);
++    cachedev_fd = -1;
++    error_message= "ioctl failed";
++  } else {
++    ioctl(cachedev_fd, FLASHCACHEADDWHITELIST, &pid);
++  }
++
++epilogue:
++  sql_print_information("Flashcache bypass: %s",
++      (cachedev_fd > 0) ? "enabled" : "disabled");
++  if (error_message)
++    sql_print_information("Flashcache setup error is : %s\n", error_message);
++  else
++    cachedev_enabled= TRUE;
++
++}
++
++static void cleanup_cachedev(void)
++{
++  pid_t pid = getpid();
++
++  if (cachedev_enabled) {
++    ioctl(cachedev_fd, FLASHCACHEDELWHITELIST, &pid);
++    close(cachedev_fd);
++    cachedev_fd = -1;
++  }
++}
++#endif//__linux__
+ #ifndef EMBEDDED_LIBRARY
+ #ifndef DBUG_OFF
+@@ -4426,6 +4527,10 @@
+   test_lc_time_sz();
+ #endif
++#if defined(__linux__)
++  init_cachedev();
++#endif//__linux__
++
+   /*
+     We have enough space for fiddling with the argv, continue
+   */
+@@ -4629,6 +4734,10 @@
+   }
+ #endif
+   clean_up(1);
++#if defined(__linux__)
++  cleanup_cachedev();
++#endif//__linux__
++
+   mysqld_exit(0);
+ }
+@@ -6448,6 +6557,7 @@
+   {"Delayed_errors",           (char*) &delayed_insert_errors,  SHOW_LONG},
+   {"Delayed_insert_threads",   (char*) &delayed_insert_threads, SHOW_LONG_NOFLUSH},
+   {"Delayed_writes",           (char*) &delayed_insert_writes,  SHOW_LONG},
++  {"Flashcache_enabled",       (char*) &cachedev_enabled,       SHOW_BOOL },
+   {"Flush_commands",           (char*) &refresh_version,        SHOW_LONG_NOFLUSH},
+   {"Handler_commit",           (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS},
+   {"Handler_delete",           (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS},
+diff -ruN a/sql/sql_lex.cc b/sql/sql_lex.cc
+--- a/sql/sql_lex.cc   2010-07-28 16:47:58.555318714 +0400
++++ b/sql/sql_lex.cc   2010-07-28 16:48:07.794069239 +0400
+@@ -384,6 +384,7 @@
+   lex->describe= 0;
+   lex->subqueries= FALSE;
+   lex->view_prepare_mode= FALSE;
++  lex->disable_flashcache= FALSE;
+   lex->derived_tables= 0;
+   lex->safe_to_cache_query= 1;
+   lex->leaf_tables_insert= 0;
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h    2010-07-28 16:47:58.575318748 +0400
++++ b/sql/sql_lex.h    2010-07-28 16:48:08.405691834 +0400
+@@ -2300,6 +2300,7 @@
+   */
+   bool view_prepare_mode;
+   bool safe_to_cache_query;
++  bool disable_flashcache;
+   bool subqueries, ignore;
+   st_parsing_options parsing_options;
+   Alter_info alter_info;
+diff -ruN a/sql/sql_select.cc b/sql/sql_select.cc
+--- a/sql/sql_select.cc        2010-07-28 16:47:58.555318714 +0400
++++ b/sql/sql_select.cc        2010-07-28 16:48:13.414069437 +0400
+@@ -55,6 +55,12 @@
+ #define PREV_BITS(type,A)     ((type) (((type) 1 << (A)) -1))
++#include <sys/syscall.h>
++#include <sys/ioctl.h>
++#if defined(__linux__)
++#include "flashcache_ioctl.h"
++#endif//__linux__
++
+ const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
+                             "MAYBE_REF","ALL","range","index","fulltext",
+                             "ref_or_null","unique_subquery","index_subquery",
+@@ -266,8 +272,17 @@
+                    ulong setup_tables_done_option)
+ {
+   bool res;
++  pid_t pid;
+   register SELECT_LEX *select_lex = &lex->select_lex;
+   DBUG_ENTER("handle_select");
++#if defined(__linux__)
++  if(lex->disable_flashcache && cachedev_fd > 0)
++  {
++    pid = syscall(SYS_gettid);
++    ioctl(cachedev_fd, FLASHCACHEADDNCPID, &pid);
++  }
++#endif//__linux__
++ 
+   MYSQL_SELECT_START(thd->query());
+   if (select_lex->master_unit()->is_union() || 
+@@ -302,6 +317,12 @@
+   if (unlikely(res))
+     result->abort_result_set();
++#if defined(__linux__)
++  if (lex->disable_flashcache && cachedev_fd > 0)
++  {
++    ioctl(cachedev_fd, FLASHCACHEDELNCPID, &pid);
++  }
++#endif//__linux__ 
+   MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
+   DBUG_RETURN(res);
+ }
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy  2010-07-28 16:47:58.565318871 +0400
++++ b/sql/sql_yacc.yy  2010-07-28 16:48:14.205317990 +0400
+@@ -1283,6 +1283,7 @@
+ %token  SQL_CACHE_SYM
+ %token  SQL_CALC_FOUND_ROWS
+ %token  SQL_NO_CACHE_SYM
++%token  SQL_NO_FCACHE_SYM
+ %token  SQL_SMALL_RESULT
+ %token  SQL_SYM                       /* SQL-2003-R */
+ %token  SQL_THREAD
+@@ -7349,6 +7350,10 @@
+               Lex->select_lex.sql_cache= SELECT_LEX::SQL_NO_CACHE;
+             }
+           }
++      | SQL_NO_FCACHE_SYM
++        {
++          Lex->disable_flashcache= TRUE;
++        }
+         | SQL_CACHE_SYM
+           {
+             /* 
similarity index 63%
rename from mysql-userstat.patch
rename to userstat.patch
index faf6fb41375b375d6c3a1eb65d2b77dac9566e04..f04cfaeb48553528cef6e7a07f390fe70455c8dc 100644 (file)
@@ -1,78 +1,14 @@
 # name       : userstat.patch
 # introduced : 11 or before
-# maintainer : Yasufumi
+# maintainer : Oleg
 #
 #!!! notice !!!
 # Any small change to this file in the main branch
 # should be done or reviewed by the maintainer!
-diff -ruN a/configure b/configure
-diff -ruN a/configure.in b/configure.in
---- a/configure.in     2010-10-12 00:34:15.000000000 +0400
-+++ b/configure.in     2010-11-24 18:00:58.000000000 +0300
-@@ -2095,13 +2095,16 @@
-   realpath rename rint rwlock_init setupterm \
-   shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
-   sighold sigset sigthreadmask port_create sleep \
--  snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
-+  snprintf socket strsep stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
-   strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
-   posix_fallocate backtrace backtrace_symbols backtrace_symbols_fd printstack)
- #
- #
- #
-+# The following change can be safely null-merged to 5.5
-+# since configure.cmake in 5.5 does the same check
-+AC_CHECK_LIB(rt, clock_gettime)
- case "$target" in
-  *-*-aix4* | *-*-sco*)
-       # (grr) aix 4.3 has a stub for clock_gettime, (returning ENOSYS)
-diff -ruN a/include/config.h.in b/include/config.h.in
---- a/include/config.h.in      2010-10-12 00:39:59.000000000 +0400
-+++ b/include/config.h.in      2010-11-24 17:53:34.000000000 +0300
-@@ -498,6 +498,9 @@
- /* Define to 1 if you have the `pthread' library (-lpthread). */
- #undef HAVE_LIBPTHREAD
-+/* Define to 1 if you have the `rt' library (-lrt). */
-+#undef HAVE_LIBRT
-+
- /* Define if have -lwrap */
- #undef HAVE_LIBWRAP
-@@ -842,6 +845,9 @@
- /* Define to 1 if you have the `strpbrk' function. */
- #undef HAVE_STRPBRK
-+/* Define to 1 if you have the `strsep' function. */
-+#undef HAVE_STRSEP
-+
- /* Define to 1 if you have the `strsignal' function. */
- #undef HAVE_STRSIGNAL
-@@ -863,7 +869,7 @@
- /* Define to 1 if you have the `strtoull' function. */
- #undef HAVE_STRTOULL
--/* Define to 1 if `st_rdev' is member of `struct stat'. */
-+/* Define to 1 if `st_rdev' is a member of `struct stat'. */
- #undef HAVE_STRUCT_STAT_ST_RDEV
- /* Define to 1 if your `struct stat' has `st_rdev'. Deprecated, use
-@@ -1158,6 +1164,9 @@
- /* Define to the one symbol short name of this package. */
- #undef PACKAGE_TARNAME
-+/* Define to the home page for this package. */
-+#undef PACKAGE_URL
-+
- /* Define to the version of this package. */
- #undef PACKAGE_VERSION
 diff -ruN a/include/mysql/plugin.h b/include/mysql/plugin.h
---- a/include/mysql/plugin.h   2010-11-24 17:24:51.000000000 +0300
-+++ b/include/mysql/plugin.h   2010-11-24 17:24:52.000000000 +0300
-@@ -705,6 +705,9 @@
+--- a/include/mysql/plugin.h   2010-12-03 20:58:24.000000000 +0300
++++ b/include/mysql/plugin.h   2010-12-31 06:06:43.000000000 +0300
+@@ -547,6 +547,9 @@
  unsigned long thd_log_slow_verbosity(const MYSQL_THD thd);
  int thd_opt_slow_log();
  #define EXTENDED_SLOWLOG
@@ -83,33 +19,32 @@ diff -ruN a/include/mysql/plugin.h b/include/mysql/plugin.h
    Create a temporary file.
  
 diff -ruN a/include/mysql_com.h b/include/mysql_com.h
---- a/include/mysql_com.h      2010-10-12 00:34:28.000000000 +0400
-+++ b/include/mysql_com.h      2010-11-24 17:28:26.000000000 +0300
-@@ -29,6 +29,7 @@
+--- a/include/mysql_com.h      2010-12-03 20:58:24.000000000 +0300
++++ b/include/mysql_com.h      2010-12-31 06:12:05.000000000 +0300
+@@ -31,6 +31,7 @@
  
  #define SERVER_VERSION_LENGTH 60
  #define SQLSTATE_LENGTH 5
 +#define LIST_PROCESS_HOST_LEN 64
  
  /*
-   USER_HOST_BUFF_SIZE -- length of string buffer, that is enough to contain
-@@ -115,6 +116,12 @@
-                                          thread */
- #define REFRESH_MASTER          128     /* Remove all bin logs in the index
-                                          and truncate the index */
-+#define REFRESH_TABLE_STATS     256     /* Refresh table stats hash table */
-+#define REFRESH_INDEX_STATS     512     /* Refresh index stats hash table */
-+#define REFRESH_USER_STATS      1024    /* Refresh user stats hash table */
-+#define REFRESH_SLOW_QUERY_LOG  2048    /* Flush slow query log and rotate*/
-+#define REFRESH_CLIENT_STATS    4096    /* Refresh client stats hash table */
-+#define REFRESH_THREAD_STATS    8192    /* Refresh thread stats hash table */
- /* The following can't be set with mysql_refresh() */
- #define REFRESH_READ_LOCK     16384   /* Lock tables for read */
-diff -ruN /dev/null b/patch_info/userstats.info
---- /dev/null  1970-01-01 00:00:00.000000000 +0000
-+++ b/patch_info/userstats.info        2010-11-24 17:24:52.000000000 +0300
-@@ -0,0 +1,11 @@
+   Maximum length of comments
+@@ -142,6 +143,11 @@
+ #define REFRESH_DES_KEY_FILE       0x40000L
+ #define REFRESH_USER_RESOURCES             0x80000L
+ #define REFRESH_QUERY_RESPONSE_TIME 0x100000L /* response time distibution */
++#define REFRESH_TABLE_STATS    0x200000L /* Refresh table stats my_hash table */
++#define REFRESH_INDEX_STATS    0x400000L /* Refresh index stats my_hash table */
++#define REFRESH_USER_STATS     0x800000L /* Refresh user stats my_hash table */
++#define REFRESH_CLIENT_STATS   0x1000000L /* Refresh client stats my_hash table */
++#define REFRESH_THREAD_STATS   0x2000000L /* Refresh thread stats my_hash table */
+ #define CLIENT_LONG_PASSWORD  1       /* new more secure passwords */
+ #define CLIENT_FOUND_ROWS     2       /* Found instead of affected rows */
+diff -ruN a/patch_info/userstats.patch b/patch_info/userstats.patch
+--- a/patch_info/userstats.patch       1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/userstats.patch       2010-12-30 00:45:46.000000000 +0300
+@@ -0,0 +1,15 @@
 +File=userstats.patch
 +Name=SHOW USER/TABLE/INDEX statistics
 +Version=V2
@@ -121,28 +56,32 @@ diff -ruN /dev/null b/patch_info/userstats.info
 +
 +2008-11-26
 +YK: add switch variable "userstat_running" to control INFORMATION_SCHEMA.*_STATISTICS (default:OFF)
++2010-12-31
++Ported to 5.5.8
++2011-1-5
++Fix porting
 diff -ruN a/sql/handler.cc b/sql/handler.cc
---- a/sql/handler.cc   2010-10-12 00:34:25.000000000 +0400
-+++ b/sql/handler.cc   2010-11-24 17:24:52.000000000 +0300
-@@ -1194,6 +1194,8 @@
+--- a/sql/handler.cc   2010-12-03 20:58:26.000000000 +0300
++++ b/sql/handler.cc   2010-12-30 00:59:23.000000000 +0300
+@@ -1239,6 +1239,8 @@
      if (cookie)
        tc_log->unlog(cookie, xid);
      DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
 +    if (is_real_trans)
 +      thd->diff_commit_trans++;
+     RUN_HOOK(transaction, after_commit, (thd, FALSE));
  end:
-     if (rw_trans)
-       start_waiting_global_read_lock(thd);
-@@ -1324,6 +1326,8 @@
-   /* Always cleanup. Even if there nht==0. There may be savepoints. */
+     if (rw_trans && mdl_request.ticket)
+@@ -1393,6 +1395,8 @@
+   /* Always cleanup. Even if nht==0. There may be savepoints. */
    if (is_real_trans)
      thd->transaction.cleanup();
 +
 +  thd->diff_rollback_trans++;
- #endif /* USING_TRANSACTIONS */
    if (all)
      thd->transaction_rollback_request= FALSE;
-@@ -1762,6 +1766,7 @@
+@@ -1796,6 +1800,7 @@
      ha_info->reset(); /* keep it conveniently zero-filled */
    }
    trans->ha_list= sv->ha_list;
@@ -150,84 +89,97 @@ diff -ruN a/sql/handler.cc b/sql/handler.cc
    DBUG_RETURN(error);
  }
  
-@@ -2122,6 +2127,8 @@
+@@ -2165,6 +2170,8 @@
        dup_ref=ref+ALIGN_SIZE(ref_length);
      cached_table_flags= table_flags();
    }
-+  rows_read = rows_changed = 0;
++  rows_read= rows_changed= 0;
 +  memset(index_rows_read, 0, sizeof(index_rows_read));
    DBUG_RETURN(error);
  }
  
-@@ -3571,6 +3578,111 @@
+@@ -3596,6 +3603,127 @@
    return;
  }
  
 +// Updates the global table stats with the TABLE this handler represents.
-+void handler::update_global_table_stats() {
-+  if (!opt_userstat_running) {
-+    rows_read = rows_changed = 0;
++void handler::update_global_table_stats()
++{
++  if (!opt_userstat_running)
++  {
++    rows_read= rows_changed= 0;
 +    return;
 +  }
 +
-+  if (!rows_read && !rows_changed) return;  // Nothing to update.
++  if (!rows_read && !rows_changed)
++    return;  // Nothing to update.
 +  // table_cache_key is db_name + '\0' + table_name + '\0'.
-+  if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++  if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str)
++    return;
 +
 +  TABLE_STATS* table_stats;
 +  char key[NAME_LEN * 2 + 2];
 +  // [db] + '.' + [table]
 +  sprintf(key, "%s.%s", table->s->table_cache_key.str, table->s->table_name.str);
 +
-+  pthread_mutex_lock(&LOCK_global_table_stats);
++  mysql_mutex_lock(&LOCK_global_table_stats);
 +  // Gets the global table stats, creating one if necessary.
-+  if (!(table_stats = (TABLE_STATS*)hash_search(&global_table_stats,
-+                                                (uchar*)key,
-+                                                strlen(key)))) {
-+    if (!(table_stats = ((TABLE_STATS*)
-+                         my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++  if (!(table_stats = (TABLE_STATS *) my_hash_search(&global_table_stats,
++                                                     (uchar*)key,
++                                                     strlen(key))))
++  {
++    if (!(table_stats = ((TABLE_STATS *)
++                         my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++    {
 +      // Out of memory.
 +      sql_print_error("Allocating table stats failed.");
 +      goto end;
 +    }
 +    strncpy(table_stats->table, key, sizeof(table_stats->table));
-+    table_stats->rows_read = 0;
-+    table_stats->rows_changed = 0;
-+    table_stats->rows_changed_x_indexes = 0;
-+    table_stats->engine_type = (int) ht->db_type;
++    table_stats->rows_read=              0;
++    table_stats->rows_changed=           0;
++    table_stats->rows_changed_x_indexes= 0;
++    table_stats->engine_type=            (int) ht->db_type;
 +
-+    if (my_hash_insert(&global_table_stats, (uchar*)table_stats)) {
++    if (my_hash_insert(&global_table_stats, (uchar *) table_stats))
++    {
 +      // Out of memory.
 +      sql_print_error("Inserting table stats failed.");
-+      my_free((char*)table_stats, 0);
++      my_free((char *) table_stats);
 +      goto end;
 +    }
 +  }
 +  // Updates the global table stats.
-+  table_stats->rows_read += rows_read;
-+  table_stats->rows_changed += rows_changed;
-+  table_stats->rows_changed_x_indexes +=
-+      rows_changed * (table->s->keys ? table->s->keys : 1);
-+  current_thd->diff_total_read_rows += rows_read;
-+  rows_read = rows_changed = 0;
++  table_stats->rows_read+=              rows_read;
++  table_stats->rows_changed+=           rows_changed;
++  table_stats->rows_changed_x_indexes+=
++    rows_changed * (table->s->keys ? table->s->keys : 1);
++  current_thd->diff_total_read_rows+=   rows_read;
++  rows_read= rows_changed=              0;
 +end:
-+  pthread_mutex_unlock(&LOCK_global_table_stats);
++  mysql_mutex_unlock(&LOCK_global_table_stats);
 +}
 +
 +// Updates the global index stats with this handler's accumulated index reads.
-+void handler::update_global_index_stats() {
++void handler::update_global_index_stats()
++{
 +  // table_cache_key is db_name + '\0' + table_name + '\0'.
-+  if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++  if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str)
++    return;
 +
-+  if (!opt_userstat_running) {
-+    for (uint x = 0; x < table->s->keys; x++) {
-+      index_rows_read[x] = 0;
++  if (!opt_userstat_running)
++  {
++    for (uint x= 0; x < table->s->keys; ++x)
++    {
++      index_rows_read[x]= 0;
 +    }
 +    return;
 +  }
 +
-+  for (uint x = 0; x < table->s->keys; x++) {
-+    if (index_rows_read[x]) {
++  for (uint x = 0; x < table->s->keys; ++x)
++  {
++    if (index_rows_read[x])
++    {
 +      // Rows were read using this index.
 +      KEY* key_info = &table->key_info[x];
 +
@@ -239,32 +191,35 @@ diff -ruN a/sql/handler.cc b/sql/handler.cc
 +      sprintf(key, "%s.%s.%s",  table->s->table_cache_key.str,
 +              table->s->table_name.str, key_info->name);
 +
-+      pthread_mutex_lock(&LOCK_global_index_stats);
++      mysql_mutex_lock(&LOCK_global_index_stats);
 +      // Gets the global index stats, creating one if necessary.
-+      if (!(index_stats = (INDEX_STATS*)hash_search(&global_index_stats,
-+                                                    (uchar*)key,
-+                                                    strlen(key)))) {
-+        if (!(index_stats = ((INDEX_STATS*)
-+                             my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++      if (!(index_stats = (INDEX_STATS *) my_hash_search(&global_index_stats,
++                                                         (uchar *) key,
++                                                         strlen(key))))
++      {
++        if (!(index_stats = ((INDEX_STATS *)
++                             my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++        {
 +          // Out of memory.
 +          sql_print_error("Allocating index stats failed.");
 +          goto end;
 +        }
 +        strncpy(index_stats->index, key, sizeof(index_stats->index));
-+        index_stats->rows_read = 0;
++        index_stats->rows_read= 0;
 +
-+        if (my_hash_insert(&global_index_stats, (uchar*)index_stats)) {
++        if (my_hash_insert(&global_index_stats, (uchar *) index_stats))
++        {
 +          // Out of memory.
 +          sql_print_error("Inserting index stats failed.");
-+          my_free((char*)index_stats, 0);
++          my_free((char *) index_stats);
 +          goto end;
 +        }
 +      }
 +      // Updates the global index stats.
-+      index_stats->rows_read += index_rows_read[x];
-+      index_rows_read[x] = 0;
-+end:
-+      pthread_mutex_unlock(&LOCK_global_index_stats);
++      index_stats->rows_read+= index_rows_read[x];
++      index_rows_read[x]=      0;
++  end:
++      mysql_mutex_unlock(&LOCK_global_index_stats);
 +    }
 +  }
 +}
@@ -272,11 +227,11 @@ diff -ruN a/sql/handler.cc b/sql/handler.cc
  /****************************************************************************
  ** Some general functions that isn't in the handler class
 diff -ruN a/sql/handler.h b/sql/handler.h
---- a/sql/handler.h    2010-10-12 00:34:25.000000000 +0400
-+++ b/sql/handler.h    2010-11-24 17:28:49.000000000 +0300
-@@ -30,6 +30,10 @@
- #define USING_TRANSACTIONS
+--- a/sql/handler.h    2010-12-03 20:58:26.000000000 +0300
++++ b/sql/handler.h    2010-12-31 05:10:00.000000000 +0300
+@@ -33,6 +33,10 @@
+ #include <ft_global.h>
+ #include <keycache.h>
  
 +#if MAX_KEY > 128
 +#error MAX_KEY is too large.  Values up to 128 are supported.
@@ -285,7 +240,33 @@ diff -ruN a/sql/handler.h b/sql/handler.h
  // the following is for checking tables
  
  #define HA_ADMIN_ALREADY_DONE   1
-@@ -1121,6 +1125,9 @@
+@@ -561,10 +565,12 @@
+ enum enum_schema_tables
+ {
+   SCH_CHARSETS= 0,
++  SCH_CLIENT_STATS,
+   SCH_COLLATIONS,
+   SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
+   SCH_COLUMNS,
+   SCH_COLUMN_PRIVILEGES,
++  SCH_INDEX_STATS,
+   SCH_ENGINES,
+   SCH_EVENTS,
+   SCH_FILES,
+@@ -592,9 +598,12 @@
+   SCH_TABLE_CONSTRAINTS,
+   SCH_TABLE_NAMES,
+   SCH_TABLE_PRIVILEGES,
++  SCH_TABLE_STATS,
+   SCH_TEMPORARY_TABLES,
++  SCH_THREAD_STATS,
+   SCH_TRIGGERS,
+   SCH_USER_PRIVILEGES,
++  SCH_USER_STATS,
+   SCH_VARIABLES,
+   SCH_VIEWS
+ };
+@@ -1209,6 +1218,9 @@
    bool locked;
    bool implicit_emptied;                /* Can be !=0 only if HEAP */
    const COND *pushed_cond;
@@ -295,13 +276,14 @@ diff -ruN a/sql/handler.h b/sql/handler.h
    /**
      next_insert_id is the next value which should be inserted into the
      auto_increment column: in a inserting-multi-row statement (like INSERT
-@@ -1158,9 +1165,11 @@
+@@ -1260,10 +1272,12 @@
      ref_length(sizeof(my_off_t)),
      ft_handler(0), inited(NONE),
      locked(FALSE), implicit_emptied(0),
 -    pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
 +    pushed_cond(0), rows_read(0), rows_changed(0), next_insert_id(0), insert_id_for_cur_row(0),
-     auto_inc_intervals_count(0)
+     auto_inc_intervals_count(0),
+     m_psi(NULL)
 -    {}
 +    {
 +      memset(index_rows_read, 0, sizeof(index_rows_read));
@@ -309,16 +291,16 @@ diff -ruN a/sql/handler.h b/sql/handler.h
    virtual ~handler(void)
    {
      DBUG_ASSERT(locked == FALSE);
-@@ -1284,6 +1293,8 @@
+@@ -1386,6 +1400,8 @@
    {
      table= table_arg;
      table_share= share;
-+    rows_read = rows_changed = 0;
++    rows_read = rows_changed= 0;
 +    memset(index_rows_read, 0, sizeof(index_rows_read));
    }
    virtual double scan_time()
    { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
-@@ -1628,6 +1639,8 @@
+@@ -1753,6 +1769,8 @@
    virtual bool is_crashed() const  { return 0; }
    virtual bool auto_repair() const { return 0; }
  
@@ -328,17 +310,17 @@ diff -ruN a/sql/handler.h b/sql/handler.h
  #define CHF_CREATE_FLAG 0
  #define CHF_DELETE_FLAG 1
 diff -ruN a/sql/lex.h b/sql/lex.h
---- a/sql/lex.h        2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/lex.h        2010-11-24 17:28:26.000000000 +0300
-@@ -106,6 +106,7 @@
-   { "CHECKSUM",               SYM(CHECKSUM_SYM)},
+--- a/sql/lex.h        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/lex.h        2010-12-30 01:25:40.000000000 +0300
+@@ -111,6 +111,7 @@
    { "CIPHER",         SYM(CIPHER_SYM)},
+   { "CLASS_ORIGIN",     SYM(CLASS_ORIGIN_SYM)},
    { "CLIENT",         SYM(CLIENT_SYM)},
 +  { "CLIENT_STATISTICS",      SYM(CLIENT_STATS_SYM)},
    { "CLOSE",          SYM(CLOSE_SYM)},
    { "COALESCE",               SYM(COALESCE)},
    { "CODE",             SYM(CODE_SYM)},
-@@ -245,6 +246,7 @@
+@@ -257,6 +258,7 @@
    { "IN",             SYM(IN_SYM)},
    { "INDEX",          SYM(INDEX_SYM)},
    { "INDEXES",                SYM(INDEXES)},
@@ -346,15 +328,7 @@ diff -ruN a/sql/lex.h b/sql/lex.h
    { "INFILE",         SYM(INFILE)},
    { "INITIAL_SIZE",   SYM(INITIAL_SIZE_SYM)},
    { "INNER",          SYM(INNER_SYM)},
-@@ -478,6 +480,7 @@
-   { "SIGNED",         SYM(SIGNED_SYM)},
-   { "SIMPLE",         SYM(SIMPLE_SYM)},
-   { "SLAVE",            SYM(SLAVE)},
-+  { "SLOW",             SYM(SLOW_SYM)},
-   { "SNAPSHOT",         SYM(SNAPSHOT_SYM)},
-   { "SMALLINT",               SYM(SMALLINT)},
-   { "SOCKET",         SYM(SOCKET_SYM)},
-@@ -527,12 +530,14 @@
+@@ -550,12 +552,14 @@
    { "TABLES",         SYM(TABLES)},
    { "TABLESPACE",             SYM(TABLESPACE)},
    { "TABLE_CHECKSUM", SYM(TABLE_CHECKSUM_SYM)},
@@ -369,7 +343,7 @@ diff -ruN a/sql/lex.h b/sql/lex.h
    { "TIME",           SYM(TIME_SYM)},
    { "TIMESTAMP",      SYM(TIMESTAMP)},
    { "TIMESTAMPADD",     SYM(TIMESTAMP_ADD)},
-@@ -568,6 +573,7 @@
+@@ -591,6 +595,7 @@
    { "USE",            SYM(USE_SYM)},
    { "USER",           SYM(USER)},
    { "USER_RESOURCES", SYM(RESOURCES)},
@@ -378,9 +352,9 @@ diff -ruN a/sql/lex.h b/sql/lex.h
    { "USING",          SYM(USING)},
    { "UTC_DATE",         SYM(UTC_DATE_SYM)},
 diff -ruN a/sql/log.cc b/sql/log.cc
---- a/sql/log.cc       2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/log.cc       2010-11-24 17:24:52.000000000 +0300
-@@ -826,6 +826,13 @@
+--- a/sql/log.cc       2010-12-03 20:58:26.000000000 +0300
++++ b/sql/log.cc       2010-12-30 01:55:35.000000000 +0300
+@@ -922,6 +922,13 @@
      mysql_slow_log.reopen_file();
  }
  
@@ -394,43 +368,21 @@ diff -ruN a/sql/log.cc b/sql/log.cc
  /*
    Log error with all enabled log event handlers
  
-@@ -937,6 +944,21 @@
-   return rc;
- }
-+bool LOGGER::flush_slow_log(THD *thd)
-+{
-+  /*
-+    Now we lock logger, as nobody should be able to use logging routines while
-+    log tables are closed
-+  */
-+  logger.lock_exclusive();
-+
-+  /* reopen log files */
-+  file_log_handler->flush_slow_log();
-+
-+  /* end of log flush */
-+  logger.unlock();
-+  return 0;
-+}
- /*
-   Log slow query with all enabled log event handlers
-@@ -4495,6 +4517,8 @@
+@@ -4843,6 +4850,8 @@
                               thd->first_successful_insert_id_in_prev_stmt_for_binlog);
            if (e.write(file))
              goto err;
 +          if (file == &log_file)
-+            thd->binlog_bytes_written += e.data_written;
++            thd->binlog_bytes_written+= e.data_written;
          }
          if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
          {
-@@ -4506,12 +4530,16 @@
+@@ -4854,12 +4863,16 @@
                               minimum());
            if (e.write(file))
              goto err;
 +          if (file == &log_file)
-+            thd->binlog_bytes_written += e.data_written;
++            thd->binlog_bytes_written+= e.data_written;
          }
          if (thd->rand_used)
          {
@@ -438,59 +390,59 @@ diff -ruN a/sql/log.cc b/sql/log.cc
            if (e.write(file))
              goto err;
 +          if (file == &log_file)
-+            thd->binlog_bytes_written += e.data_written;
++            thd->binlog_bytes_written+= e.data_written;
          }
          if (thd->user_var_events.elements)
          {
-@@ -4527,6 +4555,8 @@
-                                  user_var_event->charset_number);
+@@ -4882,6 +4895,8 @@
+                                  flags);
              if (e.write(file))
                goto err;
 +            if (file == &log_file)
-+              thd->binlog_bytes_written += e.data_written;
++              thd->binlog_bytes_written+= e.data_written;
            }
          }
        }
-@@ -4539,6 +4569,8 @@
-     if (event_info->write(file) || 
+@@ -4893,6 +4908,8 @@
+     if (event_info->write(file) ||
          DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
        goto err;
 +    if (file == &log_file)
-+      thd->binlog_bytes_written += event_info->data_written;
++      thd->binlog_bytes_written+= event_info->data_written;
  
-     if (file == &log_file) // we are writing to the real log (disk)
-     {
-@@ -4684,7 +4716,7 @@
+     error= 0;
+@@ -5056,7 +5073,8 @@
      be reset as a READ_CACHE to be able to read the contents from it.
   */
  
 -int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
-+int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool sync_log)
++int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache,
++                               bool lock_log, bool sync_log)
  {
    Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
  
-@@ -4732,6 +4764,7 @@
+@@ -5103,6 +5121,7 @@
        /* write the first half of the split header */
        if (my_b_write(&log_file, header, carry))
          return ER_ERROR_ON_WRITE;
-+      thd->binlog_bytes_written += carry;
++      thd->binlog_bytes_written+= carry;
  
        /*
          copy fixed second half of header to cache so the correct
-@@ -4800,6 +4833,7 @@
+@@ -5171,6 +5190,7 @@
      /* Write data to the binary log file */
      if (my_b_write(&log_file, cache->read_pos, length))
        return ER_ERROR_ON_WRITE;
-+    thd->binlog_bytes_written += length;
++    thd->binlog_bytes_written+= length;
      cache->read_pos=cache->read_end;          // Mark buffer used up
    } while ((length= my_b_fill(cache)));
  
-@@ -4922,21 +4956,24 @@
-       */
+@@ -5281,20 +5301,23 @@
+       Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0);
        if (qinfo.write(&log_file))
          goto err;
-+      thd->binlog_bytes_written += qinfo.data_written;
++      thd->binlog_bytes_written+= qinfo.data_written;
        DBUG_EXECUTE_IF("crash_before_writing_xid",
                        {
 -                        if ((write_error= write_cache(cache, false, true)))
@@ -508,23 +460,24 @@ diff -ruN a/sql/log.cc b/sql/log.cc
        if (commit_event && commit_event->write(&log_file))
          goto err;
 +      if (commit_event)
-+        thd->binlog_bytes_written += commit_event->data_written;
++        thd->binlog_bytes_written+= commit_event->data_written;
  
        if (incident && write_incident(thd, FALSE))
          goto err;
 diff -ruN a/sql/log.h b/sql/log.h
---- a/sql/log.h        2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/log.h        2010-11-24 17:24:52.000000000 +0300
-@@ -361,7 +361,7 @@
+--- a/sql/log.h        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/log.h        2010-12-30 01:56:04.000000000 +0300
+@@ -414,7 +414,8 @@
    bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
    bool write_incident(THD *thd, bool lock);
 -  int  write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
-+  int  write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool flush_and_sync);
++  int  write_cache(THD *thd, IO_CACHE *cache,
++                   bool lock_log, bool flush_and_sync);
    void set_write_error(THD *thd);
    bool check_write_error(THD *thd);
  
-@@ -499,6 +499,7 @@
+@@ -566,6 +567,7 @@
                             const char *sql_text, uint sql_text_len,
                             CHARSET_INFO *client_cs);
    void flush();
@@ -532,101 +485,40 @@ diff -ruN a/sql/log.h b/sql/log.h
    void init_pthread_objects();
    MYSQL_QUERY_LOG *get_mysql_slow_log() { return &mysql_slow_log; }
    MYSQL_QUERY_LOG *get_mysql_log() { return &mysql_log; }
-@@ -543,6 +544,7 @@
-   void init_base();
-   void init_log_tables();
-   bool flush_logs(THD *thd);
-+  bool flush_slow_log(THD *thd);
-   /* Perform basic logger cleanup. this will leave e.g. error log open. */
-   void cleanup_base();
-   /* Free memory. Nothing could be logged after this function is called */
-diff -ruN a/sql/mysql_priv.h b/sql/mysql_priv.h
---- a/sql/mysql_priv.h 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/mysql_priv.h 2010-11-24 17:31:34.000000000 +0300
-@@ -1139,7 +1139,17 @@
- bool multi_delete_set_locks_and_link_aux_tables(LEX *lex);
- void init_max_user_conn(void);
- void init_update_queries(void);
-+void init_global_user_stats(void);
-+void init_global_table_stats(void);
-+void init_global_index_stats(void);
-+void init_global_client_stats(void);
-+void init_global_thread_stats(void);
- void free_max_user_conn(void);
-+void free_global_user_stats(void);
-+void free_global_table_stats(void);
-+void free_global_index_stats(void);
-+void free_global_client_stats(void);
-+void free_global_thread_stats(void);
- pthread_handler_t handle_bootstrap(void *arg);
- int mysql_execute_command(THD *thd);
- bool do_command(THD *thd);
-@@ -2015,6 +2025,7 @@
- extern ulong max_connect_errors, connect_timeout;
- extern ulong slave_net_timeout, slave_trans_retries;
- extern uint max_user_connections;
-+extern ulonglong denied_connections;
- extern ulong what_to_log,flush_time;
- extern ulong query_buff_size;
- extern ulong max_prepared_stmt_count, prepared_stmt_count;
-@@ -2068,6 +2079,7 @@
- extern my_bool opt_slave_compressed_protocol, use_temp_pool;
- extern ulong slave_exec_mode_options;
- extern my_bool opt_readonly, lower_case_file_system;
-+extern my_bool opt_userstat_running, opt_thread_statistics;
- extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
- extern my_bool opt_secure_auth;
- extern char* opt_secure_file_priv;
-@@ -2132,6 +2144,15 @@
- extern struct system_variables max_system_variables;
- extern struct system_status_var global_status_var;
- extern struct rand_struct sql_rand;
-+extern HASH global_user_stats;
-+extern HASH global_client_stats;
-+extern HASH global_thread_stats;
-+extern pthread_mutex_t LOCK_global_user_client_stats;
-+extern HASH global_table_stats;
-+extern pthread_mutex_t LOCK_global_table_stats;
-+extern HASH global_index_stats;
-+extern pthread_mutex_t LOCK_global_index_stats;
-+extern pthread_mutex_t LOCK_stats;
- extern const char *opt_date_time_formats[];
- extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[];
 diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
---- a/sql/mysqld.cc    2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/mysqld.cc    2010-11-24 17:31:34.000000000 +0300
-@@ -533,6 +533,7 @@
+--- a/sql/mysqld.cc    2010-12-03 20:58:26.000000000 +0300
++++ b/sql/mysqld.cc    2010-12-30 02:04:50.000000000 +0300
+@@ -438,6 +438,7 @@
  uint    opt_debug_sync_timeout= 0;
  #endif /* defined(ENABLED_DEBUG_SYNC) */
  my_bool opt_old_style_user_limits= 0, trust_function_creators= 0;
 +my_bool opt_userstat_running= 0, opt_thread_statistics= 0;
+ my_bool opt_optimizer_fix= 0;
  /*
    True if there is at least one per-hour limit for some user, so we should
-   check them before each query (and possibly reset counters when hour is
-@@ -581,6 +582,7 @@
+@@ -486,6 +487,7 @@
+ ulong specialflag=0;
  ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
  ulong max_connections, max_connect_errors;
- uint  max_user_connections= 0;
-+ulonglong denied_connections = 0;
- /**
-   Limit of the total number of prepared statements in the server.
-   Is necessary to protect the server against out-of-memory attacks.
-@@ -682,6 +684,10 @@
-               LOCK_global_system_variables,
-               LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
-                 LOCK_connection_count;
-+pthread_mutex_t LOCK_stats;
-+pthread_mutex_t LOCK_global_user_client_stats;
-+pthread_mutex_t LOCK_global_table_stats;
-+pthread_mutex_t LOCK_global_index_stats;
++ulonglong denied_connections= 0;
+ /* flashcache */
+ int cachedev_fd;
+@@ -630,7 +632,9 @@
+   LOCK_crypt,
+   LOCK_global_system_variables,
+   LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
+-  LOCK_connection_count, LOCK_error_messages;
++  LOCK_connection_count, LOCK_error_messages,
++  LOCK_stats, LOCK_global_user_client_stats,
++  LOCK_global_table_stats, LOCK_global_index_stats;
  /**
    The below lock protects access to two global server variables:
    max_prepared_stmt_count and prepared_stmt_count. These variables
-@@ -1367,6 +1373,11 @@
-   x_free(opt_secure_file_priv);
-   bitmap_free(&temp_pool);
-   free_max_user_conn();
+@@ -1490,6 +1494,11 @@
+ #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
+   query_response_time_free();
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
 +  free_global_user_stats();
 +  free_global_client_stats();
 +  free_global_thread_stats();
@@ -635,26 +527,26 @@ diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
  #ifdef HAVE_REPLICATION
    end_slave_list();
  #endif
-@@ -1483,6 +1494,10 @@
-   (void) pthread_cond_destroy(&COND_thread_cache);
-   (void) pthread_cond_destroy(&COND_flush_thread_cache);
-   (void) pthread_cond_destroy(&COND_manager);
-+  (void) pthread_mutex_destroy(&LOCK_stats);
-+  (void) pthread_mutex_destroy(&LOCK_global_user_client_stats);
-+  (void) pthread_mutex_destroy(&LOCK_global_table_stats);
-+  (void) pthread_mutex_destroy(&LOCK_global_index_stats);
+@@ -1593,6 +1602,10 @@
+   mysql_cond_destroy(&COND_thread_cache);
+   mysql_cond_destroy(&COND_flush_thread_cache);
+   mysql_cond_destroy(&COND_manager);
++  mysql_mutex_destroy(&LOCK_stats);
++  mysql_mutex_destroy(&LOCK_global_user_client_stats);
++  mysql_mutex_destroy(&LOCK_global_table_stats);
++  mysql_mutex_destroy(&LOCK_global_index_stats);
  }
  #endif /*EMBEDDED_LIBRARY*/
-@@ -3172,6 +3187,7 @@
+@@ -3024,6 +3037,7 @@
    {"show_binlog_events",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOG_EVENTS]), SHOW_LONG_STATUS},
    {"show_binlogs",         (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOGS]), SHOW_LONG_STATUS},
    {"show_charsets",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CHARSETS]), SHOW_LONG_STATUS},
 +  {"show_client_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CLIENT_STATS]), SHOW_LONG_STATUS},
    {"show_collations",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS},
-   {"show_column_types",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLUMN_TYPES]), SHOW_LONG_STATUS},
    {"show_contributors",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CONTRIBUTORS]), SHOW_LONG_STATUS},
-@@ -3193,6 +3209,7 @@
+   {"show_create_db",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS},
+@@ -3044,6 +3058,7 @@
  #endif
    {"show_function_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS_FUNC]), SHOW_LONG_STATUS},
    {"show_grants",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS},
@@ -662,41 +554,45 @@ diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
    {"show_keys",            (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS},
    {"show_master_status",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS},
    {"show_new_master",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS},
-@@ -3211,9 +3228,12 @@
-   {"show_slave_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
+@@ -3063,10 +3078,13 @@
+   {"show_slave_status_nolock", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_NOLOCK_STAT]), SHOW_LONG_STATUS},
    {"show_status",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
    {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
 +  {"show_table_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATS]), SHOW_LONG_STATUS},
    {"show_table_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
    {"show_tables",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
+   {"show_temporary_tables",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TEMPORARY_TABLES]), SHOW_LONG_STATUS},
 +  {"show_thread_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_THREAD_STATS]), SHOW_LONG_STATUS},
    {"show_triggers",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
 +  {"show_user_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_USER_STATS]), SHOW_LONG_STATUS},
    {"show_variables",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
    {"show_warnings",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
    {"slave_start",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SLAVE_START]), SHOW_LONG_STATUS},
-@@ -3652,6 +3672,10 @@
- #endif
-   (void) pthread_mutex_init(&LOCK_server_started, MY_MUTEX_INIT_FAST);
-   (void) pthread_cond_init(&COND_server_started,NULL);
-+  (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST);
-+  (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST);
-+  (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
-+  (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
+@@ -3587,6 +3605,13 @@
+   mysql_mutex_init(key_LOCK_server_started,
+                    &LOCK_server_started, MY_MUTEX_INIT_FAST);
+   mysql_cond_init(key_COND_server_started, &COND_server_started, NULL);
++  mysql_mutex_init(key_LOCK_stats, &LOCK_stats, MY_MUTEX_INIT_FAST);
++  mysql_mutex_init(key_LOCK_global_user_client_stats,
++    &LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST);
++  mysql_mutex_init(key_LOCK_global_table_stats,
++    &LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
++  mysql_mutex_init(key_LOCK_global_index_stats,
++    &LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
    sp_cache_init();
  #ifdef HAVE_EVENT_SCHEDULER
    Events::init_mutexes();
-@@ -4053,6 +4077,9 @@
-   if (!errmesg[0][0])
-     unireg_abort(1);
+@@ -3956,6 +3981,9 @@
+   query_response_time_init();
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+   /* We have to initialize the storage engines before CSV logging */
 +  init_global_table_stats();
 +  init_global_index_stats();
 +
-   /* We have to initialize the storage engines before CSV logging */
    if (ha_init())
    {
-@@ -4199,6 +4226,9 @@
+     sql_print_error("Can't init databases");
+@@ -4092,6 +4120,9 @@
  
    init_max_user_conn();
    init_update_queries();
@@ -706,69 +602,129 @@ diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
    DBUG_RETURN(0);
  }
  
-@@ -5016,6 +5046,7 @@
-     DBUG_PRINT("error",("Too many connections"));
-     close_connection(thd, ER_CON_COUNT_ERROR, 1);
+@@ -5123,6 +5154,7 @@
+     {
+       sql_print_warning("%s", ER_DEFAULT(ER_CON_COUNT_ERROR));
+     }
 +    statistic_increment(denied_connections, &LOCK_status);
      delete thd;
      DBUG_VOID_RETURN;
    }
-@@ -5800,6 +5831,8 @@
-   OPT_SLAVE_EXEC_MODE,
-   OPT_GENERAL_LOG_FILE,
-   OPT_SLOW_QUERY_LOG_FILE,
-+  OPT_USERSTAT_RUNNING,
-+  OPT_THREAD_STATISTICS,
-   OPT_USE_GLOBAL_LONG_QUERY_TIME,
-   OPT_USE_GLOBAL_LOG_SLOW_CONTROL,
-   OPT_SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP,
-@@ -7292,6 +7325,14 @@
-    &max_system_variables.net_wait_timeout, 0, GET_ULONG,
-    REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT),
-    0, 1, 0},
-+  {"userstat_running", OPT_USERSTAT_RUNNING,
-+   "Control USER_STATISTICS, CLIENT_STATISTICS, THREAD_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS running",
-+   (uchar**) &opt_userstat_running, (uchar**) &opt_userstat_running,
-+   0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
-+  {"thread_statistics", OPT_THREAD_STATISTICS,
-+   "Control TABLE_STATISTICS running, when userstat_running is enabled",
-+   (uchar**) &opt_thread_statistics, (uchar**) &opt_thread_statistics,
-+   0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
-   {"binlog-direct-non-transactional-updates", OPT_BINLOG_DIRECT_NON_TRANS_UPDATE,
-    "Causes updates to non-transactional engines using statement format to be "
-    "written directly to binary log. Before using this option, make sure that "
-diff -ruN a/sql/set_var.cc b/sql/set_var.cc
---- a/sql/set_var.cc   2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/set_var.cc   2010-11-24 17:31:34.000000000 +0300
-@@ -556,6 +556,10 @@
- static sys_var_thd_ulong      sys_read_buff_size(&vars, "read_buffer_size",
-                                          &SV::read_buff_size);
- static sys_var_opt_readonly   sys_readonly(&vars, "read_only", &opt_readonly);
-+static sys_var_bool_ptr               sys_userstat_running(&vars, "userstat_running",
-+                                                   &opt_userstat_running);
-+static sys_var_bool_ptr               sys_thread_statistics(&vars, "thread_statistics",
-+                                                    &opt_thread_statistics);
- static sys_var_thd_ulong      sys_read_rnd_buff_size(&vars, "read_rnd_buffer_size",
-                                              &SV::read_rnd_buff_size);
- static sys_var_thd_ulong      sys_div_precincrement(&vars, "div_precision_increment",
+@@ -7820,6 +7852,8 @@
+   key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
+   key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
+   key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
++  key_LOCK_stats, key_LOCK_global_user_client_stats,
++  key_LOCK_global_table_stats, key_LOCK_global_index_stats,
+   key_LOCK_gdl, key_LOCK_global_system_variables,
+   key_LOCK_manager,
+   key_LOCK_prepared_stmt_count,
+@@ -7857,6 +7891,13 @@
+   { &key_LOCK_delayed_insert, "LOCK_delayed_insert", PSI_FLAG_GLOBAL},
+   { &key_LOCK_delayed_status, "LOCK_delayed_status", PSI_FLAG_GLOBAL},
+   { &key_LOCK_error_log, "LOCK_error_log", PSI_FLAG_GLOBAL},
++  { &key_LOCK_stats, "LOCK_stats", PSI_FLAG_GLOBAL},
++  { &key_LOCK_global_user_client_stats,
++    "LOCK_global_user_client_stats", PSI_FLAG_GLOBAL},
++  { &key_LOCK_global_table_stats,
++     "LOCK_global_table_stats", PSI_FLAG_GLOBAL},
++  { &key_LOCK_global_index_stats,
++    "LOCK_global_index_stats", PSI_FLAG_GLOBAL},
+   { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL},
+   { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL},
+   { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL},
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h     2010-12-03 20:58:26.000000000 +0300
++++ b/sql/mysqld.h     2010-12-31 06:04:59.000000000 +0300
+@@ -23,6 +23,7 @@
+ #include "my_atomic.h"                     /* my_atomic_rwlock_t */
+ #include "mysql/psi/mysql_file.h"          /* MYSQL_FILE */
+ #include "sql_list.h"                      /* I_List */
++#include "hash.h"
+ class THD;
+ struct handlerton;
+@@ -114,6 +115,7 @@
+ extern ulonglong slave_type_conversions_options;
+ extern my_bool read_only, opt_readonly;
+ extern my_bool lower_case_file_system;
++extern my_bool opt_userstat_running, opt_thread_statistics;
+ extern my_bool opt_optimizer_fix;
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+@@ -180,6 +182,7 @@
+ extern ulong slave_trans_retries;
+ extern uint  slave_net_timeout;
+ extern uint max_user_connections;
++extern ulonglong denied_connections;
+ extern ulong what_to_log,flush_time;
+ extern ulong max_prepared_stmt_count, prepared_stmt_count;
+ extern ulong binlog_cache_size, open_files_limit;
+@@ -205,6 +208,11 @@
+ extern struct system_variables max_system_variables;
+ extern struct system_status_var global_status_var;
+ extern struct rand_struct sql_rand;
++extern HASH global_user_stats;
++extern HASH global_client_stats;
++extern HASH global_thread_stats;
++extern HASH global_table_stats;
++extern HASH global_index_stats;
+ extern const char *opt_date_time_formats[];
+ extern handlerton *partition_hton;
+ extern handlerton *myisam_hton;
+@@ -242,6 +250,8 @@
+   key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
+   key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
+   key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
++  key_LOCK_stats, key_LOCK_global_user_client_stats,
++  key_LOCK_global_table_stats, key_LOCK_global_index_stats,
+   key_LOCK_gdl, key_LOCK_global_system_variables,
+   key_LOCK_logger, key_LOCK_manager,
+   key_LOCK_prepared_stmt_count,
+@@ -338,7 +348,9 @@
+        LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
+        LOCK_slave_list, LOCK_active_mi, LOCK_manager,
+        LOCK_global_system_variables, LOCK_user_conn,
+-       LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_connection_count;
++       LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_connection_count,
++       LOCK_stats, LOCK_global_user_client_stats,
++       LOCK_global_table_stats, LOCK_global_index_stats;
+ extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_thread_count;
+ #ifdef HAVE_OPENSSL
+ extern mysql_mutex_t LOCK_des_key_file;
+@@ -450,6 +462,16 @@
+   return id;
+ }
++void init_global_user_stats(void);
++void init_global_table_stats(void);
++void init_global_index_stats(void);
++void init_global_client_stats(void);
++void init_global_thread_stats(void);
++void free_global_user_stats(void);
++void free_global_table_stats(void);
++void free_global_index_stats(void);
++void free_global_client_stats(void);
++void free_global_thread_stats(void);
+ /*
+   TODO: Replace this with an inline function.
 diff -ruN a/sql/sql_base.cc b/sql/sql_base.cc
---- a/sql/sql_base.cc  2010-10-12 00:34:33.000000000 +0400
-+++ b/sql/sql_base.cc  2010-11-24 17:29:05.000000000 +0300
-@@ -1382,6 +1382,12 @@
-   DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
-                         table->s->table_name.str, (long) table));
+--- a/sql/sql_base.cc  2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_base.cc  2010-12-30 02:33:17.000000000 +0300
+@@ -1524,6 +1524,11 @@
+   table->mdl_ticket= NULL;
  
+   mysql_mutex_lock(&thd->LOCK_thd_data);
 +  if(table->file)
 +  {
 +    table->file->update_global_table_stats();
 +    table->file->update_global_index_stats();
 +  }
-+
    *table_ptr=table->next;
-   /*
-     When closing a MERGE parent or child table, detach the children first.
-@@ -1922,6 +1928,8 @@
+   mysql_mutex_unlock(&thd->LOCK_thd_data);
+@@ -2149,6 +2154,8 @@
    DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
                            table->s->db.str, table->s->table_name.str));
  
@@ -778,88 +734,97 @@ diff -ruN a/sql/sql_base.cc b/sql/sql_base.cc
    closefrm(table, 0);
    if (delete_table)
 diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
---- a/sql/sql_class.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_class.cc 2010-11-24 17:31:33.000000000 +0300
-@@ -706,6 +706,13 @@
+--- a/sql/sql_class.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_class.cc 2010-12-30 02:41:40.000000000 +0300
+@@ -601,6 +601,13 @@
    mysys_var=0;
    binlog_evt_union.do_union= FALSE;
    enable_slow_log= 0;
-+  busy_time = 0;
-+  cpu_time = 0;
-+  bytes_received = 0;
-+  bytes_sent = 0;
-+  binlog_bytes_written = 0;
-+  updated_row_count = 0;
-+  sent_row_count_2 = 0;
++  busy_time=            0;
++  cpu_time=             0;
++  bytes_received=       0;
++  bytes_sent=           0;
++  binlog_bytes_written= 0;
++  updated_row_count=    0;
++  sent_row_count_2=     0;
  #ifndef DBUG_OFF
    dbug_sentry=THD_SENTRY_MAGIC;
  #endif
-@@ -909,6 +916,7 @@
-   reset_current_stmt_binlog_row_based();
-   bzero((char *) &status_var, sizeof(status_var));
-   sql_log_bin_toplevel= options & OPTION_BIN_LOG;
+@@ -981,6 +988,7 @@
+     variables.option_bits|= OPTION_BIN_LOG;
+   else
+     variables.option_bits&= ~OPTION_BIN_LOG;
 +  reset_stats();
  
  #if defined(ENABLED_DEBUG_SYNC)
    /* Initialize the Debug Sync Facility. See debug_sync.cc. */
-@@ -916,6 +924,84 @@
+@@ -988,6 +996,94 @@
  #endif /* defined(ENABLED_DEBUG_SYNC) */
  }
  
 +// Resets stats in a THD.
-+void THD::reset_stats(void) {
-+  current_connect_time = time(NULL);
-+  last_global_update_time = current_connect_time;
++void THD::reset_stats(void)
++{
++  current_connect_time=    time(NULL);
++  last_global_update_time= current_connect_time;
 +  reset_diff_stats();
 +}
 +
 +// Resets the 'diff' stats, which are used to update global stats.
-+void THD::reset_diff_stats(void) {
-+  diff_total_busy_time = 0;
-+  diff_total_cpu_time = 0;
-+  diff_total_bytes_received = 0;
-+  diff_total_bytes_sent = 0;
-+  diff_total_binlog_bytes_written = 0;
-+  diff_total_sent_rows = 0;
-+  diff_total_updated_rows = 0;
-+  diff_total_read_rows = 0;
-+  diff_select_commands = 0;
-+  diff_update_commands = 0;
-+  diff_other_commands = 0;
-+  diff_commit_trans = 0;
-+  diff_rollback_trans = 0;
-+  diff_denied_connections = 0;
-+  diff_lost_connections = 0;
-+  diff_access_denied_errors = 0;
-+  diff_empty_queries = 0;
++void THD::reset_diff_stats(void)
++{
++  diff_total_busy_time=            0;
++  diff_total_cpu_time=             0;
++  diff_total_bytes_received=       0;
++  diff_total_bytes_sent=           0;
++  diff_total_binlog_bytes_written= 0;
++  diff_total_sent_rows=            0;
++  diff_total_updated_rows=         0;
++  diff_total_read_rows=            0;
++  diff_select_commands=            0;
++  diff_update_commands=            0;
++  diff_other_commands=             0;
++  diff_commit_trans=               0;
++  diff_rollback_trans=             0;
++  diff_denied_connections=         0;
++  diff_lost_connections=           0;
++  diff_access_denied_errors=       0;
++  diff_empty_queries=              0;
 +}
 +
 +// Updates 'diff' stats of a THD.
-+void THD::update_stats(bool ran_command) {
-+  if (opt_userstat_running) {
-+  diff_total_busy_time += busy_time;
-+  diff_total_cpu_time += cpu_time;
-+  diff_total_bytes_received += bytes_received;
-+  diff_total_bytes_sent += bytes_sent;
-+  diff_total_binlog_bytes_written += binlog_bytes_written;
-+  diff_total_sent_rows += sent_row_count_2;
-+  diff_total_updated_rows += updated_row_count;
++void THD::update_stats(bool ran_command)
++{
++  if (opt_userstat_running)
++  {
++  diff_total_busy_time+=            busy_time;
++  diff_total_cpu_time+=             cpu_time;
++  diff_total_bytes_received+=       bytes_received;
++  diff_total_bytes_sent+=           bytes_sent;
++  diff_total_binlog_bytes_written+= binlog_bytes_written;
++  diff_total_sent_rows+=            sent_row_count_2;
++  diff_total_updated_rows+=         updated_row_count;
 +  // diff_total_read_rows is updated in handler.cc.
 +
-+  if (ran_command) {
++  if (ran_command)
++  {
 +    // The replication thread has the COM_CONNECT command.
 +    if ((old_command == COM_QUERY || command == COM_CONNECT) &&
-+        (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END)) {
++        (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END))
++    {
 +      // A SQL query.
-+      if (lex->sql_command == SQLCOM_SELECT) {
++      if (lex->sql_command == SQLCOM_SELECT)
++      {
 +        diff_select_commands++;
 +        if (!sent_row_count_2)
 +          diff_empty_queries++;
-+      } else if (! sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) {
++      }
++      else if (!sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND)
++      {
 +        // 'SHOW ' commands become SQLCOM_SELECT.
 +        diff_other_commands++;
 +        // 'SHOW ' commands shouldn't inflate total sent row count.
-+        diff_total_sent_rows -= sent_row_count_2;
++        diff_total_sent_rows-= sent_row_count_2;
 +      } else if (is_update_query(lex->sql_command)) {
 +        diff_update_commands++;
 +      } else {
@@ -874,20 +839,21 @@ diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
 +  // diff_access_denied_errors is updated in sql_parse.cc.
 +
 +  /* reset counters to zero to avoid double-counting since values
-+     are already store in diff_total_*. */
++     are already store in diff_total_*.
++  */
 +  }
-+  busy_time = 0;
-+  cpu_time = 0;
-+  bytes_received = 0;
-+  bytes_sent = 0;
-+  binlog_bytes_written = 0;
-+  updated_row_count = 0;
-+  sent_row_count_2 = 0;
++  busy_time=            0;
++  cpu_time=             0;
++  bytes_received=       0;
++  bytes_sent=           0;
++  binlog_bytes_written= 0;
++  updated_row_count=    0;
++  sent_row_count_2=     0;
 +}
  
  /*
    Init THD for query processing.
-@@ -1547,6 +1633,32 @@
+@@ -1688,6 +1784,32 @@
  }
  #endif
  
@@ -920,23 +886,23 @@ diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
  
  struct Item_change_record: public ilink
  {
-@@ -1734,6 +1846,7 @@
-     buffer.set(buff, sizeof(buff), &my_charset_bin);
+@@ -1864,6 +1986,7 @@
    }
    thd->sent_row_count++;
 +  thd->sent_row_count_2++;
-   if (thd->is_error())
-   {
-     protocol->remove_last_row();
-@@ -1838,6 +1951,7 @@
+   if (thd->vio_ok())
+     DBUG_RETURN(protocol->write());
+@@ -1956,6 +2079,7 @@
  select_export::~select_export()
  {
    thd->sent_row_count=row_count;
-+  thd->sent_row_count_2=row_count;
++  thd->sent_row_count_2= row_count;
  }
  
  
-@@ -2870,6 +2984,7 @@
+@@ -2979,6 +3103,7 @@
    if (likely(thd != 0))
    { /* current_thd==0 when close_connection() calls net_send_error() */
      thd->status_var.bytes_sent+= length;
@@ -944,7 +910,7 @@ diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
    }
  }
  
-@@ -2877,6 +2992,7 @@
+@@ -2986,6 +3111,7 @@
  void thd_increment_bytes_received(ulong length)
  {
    current_thd->status_var.bytes_received+= length;
@@ -953,27 +919,27 @@ diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
  
  
 diff -ruN a/sql/sql_class.h b/sql/sql_class.h
---- a/sql/sql_class.h  2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_class.h  2010-11-24 17:28:57.000000000 +0300
-@@ -1435,6 +1435,8 @@
-     first byte of the packet in do_command()
+--- a/sql/sql_class.h  2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_class.h  2010-12-31 05:15:57.000000000 +0300
+@@ -1610,6 +1610,8 @@
    */
    enum enum_server_command command;
+   uint32     server_id;
 +  // Used to save the command, before it is set to COM_SLEEP.
 +  enum enum_server_command old_command;
-   uint32     server_id;
    uint32     file_id;                 // for LOAD DATA INFILE
    /* remote (peer) port */
-@@ -1828,6 +1830,8 @@
-   /* variables.transaction_isolation is reset to this after each commit */
-   enum_tx_isolation session_tx_isolation;
+   uint16 peer_port;
+@@ -2081,6 +2083,8 @@
+   */
+   enum_tx_isolation tx_isolation;
    enum_check_fields count_cuted_fields;
 +  ha_rows    updated_row_count;
 +  ha_rows    sent_row_count_2; /* for userstat */
  
    DYNAMIC_ARRAY user_var_events;        /* For user variables replication */
    MEM_ROOT      *user_var_events_alloc; /* Allocate above array elements here */
-@@ -1916,6 +1920,49 @@
+@@ -2176,6 +2180,49 @@
    */
    LOG_INFO*  current_linfo;
    NET*       slave_net;                       // network connection from slave -> m.
@@ -982,7 +948,7 @@ diff -ruN a/sql/sql_class.h b/sql/sql_class.h
 +    Used to update global user stats.  The global user stats are updated
 +    occasionally with the 'diff' variables.  After the update, the 'diff'
 +    variables are reset to 0.
-+   */
++  */
 +  // Time when the current thread connected to MySQL.
 +  time_t current_connect_time;
 +  // Last time when THD stats were updated in global_user_stats.
@@ -1023,7 +989,7 @@ diff -ruN a/sql/sql_class.h b/sql/sql_class.h
    /* Used by the sys_var class to store temporary values */
    union
    {
-@@ -1981,6 +2028,11 @@
+@@ -2256,6 +2303,11 @@
      alloc_root. 
    */
    void init_for_queries();
@@ -1035,38 +1001,39 @@ diff -ruN a/sql/sql_class.h b/sql/sql_class.h
    void change_user(void);
    void cleanup(void);
    void cleanup_after_query();
-@@ -2351,9 +2403,15 @@
-     *p_db= strmake(db, db_length);
-     *p_db_length= db_length;
-     return FALSE;
-+
-+  // Returns string as 'IP:port' for the client-side of the connnection represented
-+  // by 'client' as displayed by SHOW PROCESSLIST. Allocates memory from the heap of
-+  // this THD and that is not reclaimed immediately, so use sparingly. May return NULL.
+@@ -2726,6 +2778,15 @@
    }
    thd_scheduler scheduler;
  
++  /* Returns string as 'IP:port' for the client-side
++     of the connnection represented
++     by 'client' as displayed by SHOW PROCESSLIST.
++     Allocates memory from the heap of
++     this THD and that is not reclaimed
++     immediately, so use sparingly. May return NULL.
++  */
 +  char *get_client_host_port(THD *client);
 +
  public:
    inline Internal_error_handler *get_internal_handler()
    { return m_internal_handler; }
-@@ -2437,6 +2495,9 @@
+@@ -2913,6 +2974,10 @@
    LEX_STRING invoker_host;
  };
  
-+// Returns string as 'IP' for the client-side of the connection represented by
-+// 'client'. Does not allocate memory. May return "".
++/* Returns string as 'IP' for the client-side of the connection represented by
++   'client'. Does not allocate memory. May return "".
++*/
 +const char *get_client_host(THD *client);
  
- /** A short cut for thd->main_da.set_ok_status(). */
+ /** A short cut for thd->stmt_da->set_ok_status(). */
  
 diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
---- a/sql/sql_connect.cc       2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_connect.cc       2010-11-24 17:24:52.000000000 +0300
-@@ -42,6 +42,24 @@
- extern void win_install_sigabrt_handler();
- #endif
+--- a/sql/sql_connect.cc       2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_connect.cc       2010-12-31 03:53:28.000000000 +0300
+@@ -55,6 +55,24 @@
+ #define MIN_HANDSHAKE_SIZE      6
+ #endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
  
 +// Increments connection count for user.
 +static int increment_connection_count(THD* thd, bool use_lock);
@@ -1078,43 +1045,43 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +HASH global_client_stats;
 +HASH global_thread_stats;
 +// Protects global_user_stats and global_client_stats
-+extern pthread_mutex_t LOCK_global_user_client_stats;
++extern mysql_mutex_t LOCK_global_user_client_stats;
 +
 +HASH global_table_stats;
-+extern pthread_mutex_t LOCK_global_table_stats;
++extern mysql_mutex_t LOCK_global_table_stats;
 +
 +HASH global_index_stats;
-+extern pthread_mutex_t LOCK_global_index_stats;
++extern mysql_mutex_t LOCK_global_index_stats;
 +
  /*
    Get structure for logging connection data for the current user
  */
-@@ -99,6 +117,563 @@
+@@ -112,6 +130,586 @@
  
  }
  
 +extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
 +                         my_bool not_used __attribute__((unused)))
 +{
-+  *length = strlen(user_stats->user);
-+  return (uchar*)user_stats->user;
++  *length= strlen(user_stats->user);
++  return (uchar*) user_stats->user;
 +}
 +
 +extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
 +                         my_bool not_used __attribute__((unused)))
 +{
-+  *length = sizeof(my_thread_id);
-+  return (uchar*)&(thread_stats->id);
++  *length= sizeof(my_thread_id);
++  return (uchar *) &(thread_stats->id);
 +}
 +
 +void free_user_stats(USER_STATS* user_stats)
 +{
-+  my_free((char*)user_stats, MYF(0));
++  my_free((char *) user_stats);
 +}
 +
 +void free_thread_stats(THREAD_STATS* thread_stats)
 +{
-+  my_free((char*)thread_stats, MYF(0));
++  my_free((char *) thread_stats);
 +}
 +
 +void init_user_stats(USER_STATS *user_stats,
@@ -1148,26 +1115,26 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +  strncpy(user_stats->user, user, sizeof(user_stats->user));
 +  strncpy(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user));
 +
-+  user_stats->total_connections = total_connections;
-+  user_stats->concurrent_connections = concurrent_connections;
-+  user_stats->connected_time = connected_time;
-+  user_stats->busy_time = busy_time;
-+  user_stats->cpu_time = cpu_time;
-+  user_stats->bytes_received = bytes_received;
-+  user_stats->bytes_sent = bytes_sent;
-+  user_stats->binlog_bytes_written = binlog_bytes_written;
-+  user_stats->rows_fetched = rows_fetched;
-+  user_stats->rows_updated = rows_updated;
-+  user_stats->rows_read = rows_read;
-+  user_stats->select_commands = select_commands;
-+  user_stats->update_commands = update_commands;
-+  user_stats->other_commands = other_commands;
-+  user_stats->commit_trans = commit_trans;
-+  user_stats->rollback_trans = rollback_trans;
-+  user_stats->denied_connections = denied_connections;
-+  user_stats->lost_connections = lost_connections;
-+  user_stats->access_denied_errors = access_denied_errors;
-+  user_stats->empty_queries = empty_queries;
++  user_stats->total_connections=      total_connections;
++  user_stats->concurrent_connections= concurrent_connections;
++  user_stats->connected_time=         connected_time;
++  user_stats->busy_time=              busy_time;
++  user_stats->cpu_time=               cpu_time;
++  user_stats->bytes_received=         bytes_received;
++  user_stats->bytes_sent=             bytes_sent;
++  user_stats->binlog_bytes_written=   binlog_bytes_written;
++  user_stats->rows_fetched=           rows_fetched;
++  user_stats->rows_updated=           rows_updated;
++  user_stats->rows_read=              rows_read;
++  user_stats->select_commands=        select_commands;
++  user_stats->update_commands=        update_commands;
++  user_stats->other_commands=         other_commands;
++  user_stats->commit_trans=           commit_trans;
++  user_stats->rollback_trans=         rollback_trans;
++  user_stats->denied_connections=     denied_connections;
++  user_stats->lost_connections=       lost_connections;
++  user_stats->access_denied_errors=   access_denied_errors;
++  user_stats->empty_queries=          empty_queries;
 +  DBUG_VOID_RETURN;
 +}
 +
@@ -1198,28 +1165,28 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +  DBUG_PRINT("info",
 +             ("Add thread_stats entry for thread %lu",
 +              id));
-+  thread_stats->id = id;
-+
-+  thread_stats->total_connections = total_connections;
-+  thread_stats->concurrent_connections = concurrent_connections;
-+  thread_stats->connected_time = connected_time;
-+  thread_stats->busy_time = busy_time;
-+  thread_stats->cpu_time = cpu_time;
-+  thread_stats->bytes_received = bytes_received;
-+  thread_stats->bytes_sent = bytes_sent;
-+  thread_stats->binlog_bytes_written = binlog_bytes_written;
-+  thread_stats->rows_fetched = rows_fetched;
-+  thread_stats->rows_updated = rows_updated;
-+  thread_stats->rows_read = rows_read;
-+  thread_stats->select_commands = select_commands;
-+  thread_stats->update_commands = update_commands;
-+  thread_stats->other_commands = other_commands;
-+  thread_stats->commit_trans = commit_trans;
-+  thread_stats->rollback_trans = rollback_trans;
-+  thread_stats->denied_connections = denied_connections;
-+  thread_stats->lost_connections = lost_connections;
-+  thread_stats->access_denied_errors = access_denied_errors;
-+  thread_stats->empty_queries = empty_queries;
++  thread_stats->id= id;
++
++  thread_stats->total_connections=      total_connections;
++  thread_stats->concurrent_connections= concurrent_connections;
++  thread_stats->connected_time=         connected_time;
++  thread_stats->busy_time=              busy_time;
++  thread_stats->cpu_time=               cpu_time;
++  thread_stats->bytes_received=         bytes_received;
++  thread_stats->bytes_sent=             bytes_sent;
++  thread_stats->binlog_bytes_written=   binlog_bytes_written;
++  thread_stats->rows_fetched=           rows_fetched;
++  thread_stats->rows_updated=           rows_updated;
++  thread_stats->rows_read=              rows_read;
++  thread_stats->select_commands=        select_commands;
++  thread_stats->update_commands=        update_commands;
++  thread_stats->other_commands=         other_commands;
++  thread_stats->commit_trans=           commit_trans;
++  thread_stats->rollback_trans=         rollback_trans;
++  thread_stats->denied_connections=     denied_connections;
++  thread_stats->lost_connections=       lost_connections;
++  thread_stats->access_denied_errors=   access_denied_errors;
++  thread_stats->empty_queries=          empty_queries;
 +  DBUG_VOID_RETURN;
 +}
 +
@@ -1245,26 +1212,26 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +                    ulonglong access_denied_errors,
 +                    ulonglong empty_queries)
 +{
-+  user_stats->total_connections += total_connections;
-+  user_stats->concurrent_connections += concurrent_connections;
-+  user_stats->connected_time += connected_time;
-+  user_stats->busy_time += busy_time;
-+  user_stats->cpu_time += cpu_time;
-+  user_stats->bytes_received += bytes_received;
-+  user_stats->bytes_sent += bytes_sent;
-+  user_stats->binlog_bytes_written += binlog_bytes_written;
-+  user_stats->rows_fetched += rows_fetched;
-+  user_stats->rows_updated += rows_updated;
-+  user_stats->rows_read += rows_read;
-+  user_stats->select_commands += select_commands;
-+  user_stats->update_commands += update_commands;
-+  user_stats->other_commands += other_commands;
-+  user_stats->commit_trans += commit_trans;
-+  user_stats->rollback_trans += rollback_trans;
-+  user_stats->denied_connections += denied_connections;
-+  user_stats->lost_connections += lost_connections;
-+  user_stats->access_denied_errors += access_denied_errors;
-+  user_stats->empty_queries += empty_queries;
++  user_stats->total_connections+=      total_connections;
++  user_stats->concurrent_connections+= concurrent_connections;
++  user_stats->connected_time+=         connected_time;
++  user_stats->busy_time+=              busy_time;
++  user_stats->cpu_time+=               cpu_time;
++  user_stats->bytes_received+=         bytes_received;
++  user_stats->bytes_sent+=             bytes_sent;
++  user_stats->binlog_bytes_written+=   binlog_bytes_written;
++  user_stats->rows_fetched+=           rows_fetched;
++  user_stats->rows_updated+=           rows_updated;
++  user_stats->rows_read+=              rows_read;
++  user_stats->select_commands+=        select_commands;
++  user_stats->update_commands+=        update_commands;
++  user_stats->other_commands+=         other_commands;
++  user_stats->commit_trans+=           commit_trans;
++  user_stats->rollback_trans+=         rollback_trans;
++  user_stats->denied_connections+=     denied_connections;
++  user_stats->lost_connections+=       lost_connections;
++  user_stats->access_denied_errors+=   access_denied_errors;
++  user_stats->empty_queries+=          empty_queries;
 +}
 +
 +void add_thread_stats(THREAD_STATS *thread_stats,
@@ -1289,33 +1256,33 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +                    ulonglong access_denied_errors,
 +                    ulonglong empty_queries)
 +{
-+  thread_stats->total_connections += total_connections;
-+  thread_stats->concurrent_connections += concurrent_connections;
-+  thread_stats->connected_time += connected_time;
-+  thread_stats->busy_time += busy_time;
-+  thread_stats->cpu_time += cpu_time;
-+  thread_stats->bytes_received += bytes_received;
-+  thread_stats->bytes_sent += bytes_sent;
-+  thread_stats->binlog_bytes_written += binlog_bytes_written;
-+  thread_stats->rows_fetched += rows_fetched;
-+  thread_stats->rows_updated += rows_updated;
-+  thread_stats->rows_read += rows_read;
-+  thread_stats->select_commands += select_commands;
-+  thread_stats->update_commands += update_commands;
-+  thread_stats->other_commands += other_commands;
-+  thread_stats->commit_trans += commit_trans;
-+  thread_stats->rollback_trans += rollback_trans;
-+  thread_stats->denied_connections += denied_connections;
-+  thread_stats->lost_connections += lost_connections;
-+  thread_stats->access_denied_errors += access_denied_errors;
-+  thread_stats->empty_queries += empty_queries;
++  thread_stats->total_connections+=      total_connections;
++  thread_stats->concurrent_connections+= concurrent_connections;
++  thread_stats->connected_time+=         connected_time;
++  thread_stats->busy_time+=              busy_time;
++  thread_stats->cpu_time+=               cpu_time;
++  thread_stats->bytes_received+=         bytes_received;
++  thread_stats->bytes_sent+=             bytes_sent;
++  thread_stats->binlog_bytes_written+=   binlog_bytes_written;
++  thread_stats->rows_fetched+=           rows_fetched;
++  thread_stats->rows_updated+=           rows_updated;
++  thread_stats->rows_read+=              rows_read;
++  thread_stats->select_commands+=        select_commands;
++  thread_stats->update_commands+=        update_commands;
++  thread_stats->other_commands+=         other_commands;
++  thread_stats->commit_trans+=           commit_trans;
++  thread_stats->rollback_trans+=         rollback_trans;
++  thread_stats->denied_connections+=     denied_connections;
++  thread_stats->lost_connections+=       lost_connections;
++  thread_stats->access_denied_errors+=   access_denied_errors;
++  thread_stats->empty_queries+=          empty_queries;
 +}
 +
 +void init_global_user_stats(void)
 +{
-+  if (hash_init(&global_user_stats, system_charset_info, max_connections,
-+                0, 0, (hash_get_key)get_key_user_stats,
-+                (hash_free_key)free_user_stats, 0)) {
++  if (my_hash_init(&global_user_stats, system_charset_info, max_connections,
++                0, 0, (my_hash_get_key)get_key_user_stats,
++                (my_hash_free_key)free_user_stats, 0)) {
 +    sql_print_error("Initializing global_user_stats failed.");
 +    exit(1);
 +  }
@@ -1323,9 +1290,9 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +
 +void init_global_client_stats(void)
 +{
-+  if (hash_init(&global_client_stats, system_charset_info, max_connections,
-+                0, 0, (hash_get_key)get_key_user_stats,
-+                (hash_free_key)free_user_stats, 0)) {
++  if (my_hash_init(&global_client_stats, system_charset_info, max_connections,
++                0, 0, (my_hash_get_key)get_key_user_stats,
++                (my_hash_free_key)free_user_stats, 0)) {
 +    sql_print_error("Initializing global_client_stats failed.");
 +    exit(1);
 +  }
@@ -1333,9 +1300,10 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +
 +void init_global_thread_stats(void)
 +{
-+  if (hash_init(&global_thread_stats, &my_charset_bin, max_connections,
-+                0, 0, (hash_get_key)get_key_thread_stats,
-+                (hash_free_key)free_thread_stats, 0)) {
++  if (my_hash_init(&global_thread_stats, &my_charset_bin, max_connections,
++                0, 0, (my_hash_get_key) get_key_thread_stats,
++                (my_hash_free_key) free_thread_stats, 0))
++  {
 +    sql_print_error("Initializing global_client_stats failed.");
 +    exit(1);
 +  }
@@ -1344,20 +1312,20 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +extern "C" uchar *get_key_table_stats(TABLE_STATS *table_stats, size_t *length,
 +                                     my_bool not_used __attribute__((unused)))
 +{
-+  *length = strlen(table_stats->table);
-+  return (uchar*)table_stats->table;
++  *length= strlen(table_stats->table);
++  return (uchar*) table_stats->table;
 +}
 +
 +extern "C" void free_table_stats(TABLE_STATS* table_stats)
 +{
-+  my_free((char*)table_stats, MYF(0));
++  my_free((char*) table_stats);
 +}
 +
 +void init_global_table_stats(void)
 +{
-+  if (hash_init(&global_table_stats, system_charset_info, max_connections,
-+                0, 0, (hash_get_key)get_key_table_stats,
-+                (hash_free_key)free_table_stats, 0)) {
++  if (my_hash_init(&global_table_stats, system_charset_info, max_connections,
++                0, 0, (my_hash_get_key)get_key_table_stats,
++                (my_hash_free_key)free_table_stats, 0)) {
 +    sql_print_error("Initializing global_table_stats failed.");
 +    exit(1);
 +  }
@@ -1366,20 +1334,20 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +extern "C" uchar *get_key_index_stats(INDEX_STATS *index_stats, size_t *length,
 +                                     my_bool not_used __attribute__((unused)))
 +{
-+  *length = strlen(index_stats->index);
-+  return (uchar*)index_stats->index;
++  *length= strlen(index_stats->index);
++  return (uchar*) index_stats->index;
 +}
 +
 +extern "C" void free_index_stats(INDEX_STATS* index_stats)
 +{
-+  my_free((char*)index_stats, MYF(0));
++  my_free((char*) index_stats);
 +}
 +
 +void init_global_index_stats(void)
 +{
-+  if (hash_init(&global_index_stats, system_charset_info, max_connections,
-+                0, 0, (hash_get_key)get_key_index_stats,
-+                (hash_free_key)free_index_stats, 0)) {
++  if (my_hash_init(&global_index_stats, system_charset_info, max_connections,
++                0, 0, (my_hash_get_key)get_key_index_stats,
++                (my_hash_free_key)free_index_stats, 0)) {
 +    sql_print_error("Initializing global_index_stats failed.");
 +    exit(1);
 +  }
@@ -1387,27 +1355,27 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +
 +void free_global_user_stats(void)
 +{
-+  hash_free(&global_user_stats);
++  my_hash_free(&global_user_stats);
 +}
 +
 +void free_global_thread_stats(void)
 +{
-+  hash_free(&global_thread_stats);
++  my_hash_free(&global_thread_stats);
 +}
 +
 +void free_global_table_stats(void)
 +{
-+  hash_free(&global_table_stats);
++  my_hash_free(&global_table_stats);
 +}
 +
 +void free_global_index_stats(void)
 +{
-+  hash_free(&global_index_stats);
++  my_hash_free(&global_index_stats);
 +}
 +
 +void free_global_client_stats(void)
 +{
-+  hash_free(&global_client_stats);
++  my_hash_free(&global_client_stats);
 +}
 +
 +// 'mysql_system_user' is used for when the user is not defined for a THD.
@@ -1426,11 +1394,12 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +{
 +  USER_STATS* user_stats;
 +
-+  if (!(user_stats = (USER_STATS*)hash_search(users_or_clients, (uchar*) name,
-+                                              strlen(name))))
++  if (!(user_stats = (USER_STATS *) my_hash_search(users_or_clients,
++                                                   (uchar*) name,
++                                                   strlen(name))))
 +  {
 +    // First connection for this user or client
-+    if (!(user_stats = ((USER_STATS*)
++    if (!(user_stats = ((USER_STATS *)
 +                        my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL)))))
 +    {
 +      return 1; // Out of memory
@@ -1448,9 +1417,9 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +                    0,         // access denied errors
 +                    0);        // empty queries
 +
-+    if (my_hash_insert(users_or_clients, (uchar*)user_stats))
++    if (my_hash_insert(users_or_clients, (uchar *) user_stats))
 +    {
-+      my_free((char*)user_stats, 0);
++      my_free((char *) user_stats);
 +      return 1; // Out of memory
 +    }
 +  }
@@ -1463,11 +1432,12 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +{
 +  THREAD_STATS* thread_stats;
 +
-+  if (!(thread_stats = (THREAD_STATS*)hash_search(users_or_clients, (uchar*) &id,
-+                                              sizeof(my_thread_id))))
++  if (!(thread_stats = (THREAD_STATS *) my_hash_search(users_or_clients,
++                                                       (uchar*) &id,
++                                                       sizeof(my_thread_id))))
 +  {
 +    // First connection for this user or client
-+    if (!(thread_stats = ((THREAD_STATS*)
++    if (!(thread_stats = ((THREAD_STATS *)
 +                        my_malloc(sizeof(THREAD_STATS), MYF(MY_WME | MY_ZEROFILL)))))
 +    {
 +      return 1; // Out of memory
@@ -1485,9 +1455,9 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +                    0,         // access denied errors
 +                    0);        // empty queries
 +
-+    if (my_hash_insert(users_or_clients, (uchar*)thread_stats))
++    if (my_hash_insert(users_or_clients, (uchar *) thread_stats))
 +    {
-+      my_free((char*)thread_stats, 0);
++      my_free((char *) thread_stats);
 +      return 1; // Out of memory
 +    }
 +  }
@@ -1495,43 +1465,47 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +  return 0;
 +}
 +
-+// Increments the global user and client stats connection count.  If 'use_lock'
-+// is true, LOCK_global_user_client_stats will be locked/unlocked.  Returns
-+// 0 on success, 1 on error.
++/* Increments the global user and client stats connection count.  If 'use_lock'
++   is true, LOCK_global_user_client_stats will be locked/unlocked.  Returns
++   0 on success, 1 on error.
++*/
 +static int increment_connection_count(THD* thd, bool use_lock)
 +{
-+  char* user_string = get_valid_user_string(thd->main_security_ctx.user);
-+  const char* client_string = get_client_host(thd);
-+  int return_value = 0;
++  char* user_string=         get_valid_user_string(thd->main_security_ctx.user);
++  const char* client_string= get_client_host(thd);
++  int return_value=          0;
 +
 +  if (!opt_userstat_running)
 +    return return_value;
 +
-+  if (use_lock) pthread_mutex_lock(&LOCK_global_user_client_stats);
++  if (use_lock)
++    mysql_mutex_lock(&LOCK_global_user_client_stats);
 +
 +  if (increment_count_by_name(user_string, user_string,
 +                              &global_user_stats, thd))
 +  {
-+    return_value = 1;
++    return_value= 1;
 +    goto end;
 +  }
 +  if (increment_count_by_name(client_string,
 +                              user_string,
 +                              &global_client_stats, thd))
 +  {
-+    return_value = 1;
++    return_value= 1;
 +    goto end;
 +  }
-+  if (opt_thread_statistics) {
++  if (opt_thread_statistics)
++  {
 +    if (increment_count_by_id(thd->thread_id, &global_thread_stats, thd))
 +    {
-+      return_value = 1;
++      return_value= 1;
 +      goto end;
 +    }
-+  }
++ }
 +
 +end:
-+  if (use_lock) pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  if (use_lock)
++    mysql_mutex_unlock(&LOCK_global_user_client_stats);
 +  return return_value;
 +}
 +
@@ -1540,139 +1514,144 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +                                               USER_STATS* user_stats,
 +                                               time_t now)
 +{
-+  user_stats->connected_time += now - thd->last_global_update_time;
-+//  thd->last_global_update_time = now;
-+  user_stats->busy_time += thd->diff_total_busy_time;
-+  user_stats->cpu_time += thd->diff_total_cpu_time;
-+  user_stats->bytes_received += thd->diff_total_bytes_received;
-+  user_stats->bytes_sent += thd->diff_total_bytes_sent;
-+  user_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
-+  user_stats->rows_fetched += thd->diff_total_sent_rows;
-+  user_stats->rows_updated += thd->diff_total_updated_rows;
-+  user_stats->rows_read += thd->diff_total_read_rows;
-+  user_stats->select_commands += thd->diff_select_commands;
-+  user_stats->update_commands += thd->diff_update_commands;
-+  user_stats->other_commands += thd->diff_other_commands;
-+  user_stats->commit_trans += thd->diff_commit_trans;
-+  user_stats->rollback_trans += thd->diff_rollback_trans;
-+  user_stats->denied_connections += thd->diff_denied_connections;
-+  user_stats->lost_connections += thd->diff_lost_connections;
-+  user_stats->access_denied_errors += thd->diff_access_denied_errors;
-+  user_stats->empty_queries += thd->diff_empty_queries;
++  user_stats->connected_time+=       now - thd->last_global_update_time;
++//thd->last_global_update_time=      now;
++  user_stats->busy_time+=            thd->diff_total_busy_time;
++  user_stats->cpu_time+=             thd->diff_total_cpu_time;
++  user_stats->bytes_received+=       thd->diff_total_bytes_received;
++  user_stats->bytes_sent+=           thd->diff_total_bytes_sent;
++  user_stats->binlog_bytes_written+= thd->diff_total_binlog_bytes_written;
++  user_stats->rows_fetched+=         thd->diff_total_sent_rows;
++  user_stats->rows_updated+=         thd->diff_total_updated_rows;
++  user_stats->rows_read+=            thd->diff_total_read_rows;
++  user_stats->select_commands+=      thd->diff_select_commands;
++  user_stats->update_commands+=      thd->diff_update_commands;
++  user_stats->other_commands+=       thd->diff_other_commands;
++  user_stats->commit_trans+=         thd->diff_commit_trans;
++  user_stats->rollback_trans+=       thd->diff_rollback_trans;
++  user_stats->denied_connections+=   thd->diff_denied_connections;
++  user_stats->lost_connections+=     thd->diff_lost_connections;
++  user_stats->access_denied_errors+= thd->diff_access_denied_errors;
++  user_stats->empty_queries+=        thd->diff_empty_queries;
 +}
 +
 +static void update_global_thread_stats_with_thread(THD* thd,
 +                                               THREAD_STATS* thread_stats,
 +                                               time_t now)
 +{
-+  thread_stats->connected_time += now - thd->last_global_update_time;
-+//  thd->last_global_update_time = now;
-+  thread_stats->busy_time += thd->diff_total_busy_time;
-+  thread_stats->cpu_time += thd->diff_total_cpu_time;
-+  thread_stats->bytes_received += thd->diff_total_bytes_received;
-+  thread_stats->bytes_sent += thd->diff_total_bytes_sent;
-+  thread_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
-+  thread_stats->rows_fetched += thd->diff_total_sent_rows;
-+  thread_stats->rows_updated += thd->diff_total_updated_rows;
-+  thread_stats->rows_read += thd->diff_total_read_rows;
-+  thread_stats->select_commands += thd->diff_select_commands;
-+  thread_stats->update_commands += thd->diff_update_commands;
-+  thread_stats->other_commands += thd->diff_other_commands;
-+  thread_stats->commit_trans += thd->diff_commit_trans;
-+  thread_stats->rollback_trans += thd->diff_rollback_trans;
-+  thread_stats->denied_connections += thd->diff_denied_connections;
-+  thread_stats->lost_connections += thd->diff_lost_connections;
-+  thread_stats->access_denied_errors += thd->diff_access_denied_errors;
-+  thread_stats->empty_queries += thd->diff_empty_queries;
++  thread_stats->connected_time+=       now - thd->last_global_update_time;
++//thd->last_global_update_time=        now;
++  thread_stats->busy_time+=            thd->diff_total_busy_time;
++  thread_stats->cpu_time+=             thd->diff_total_cpu_time;
++  thread_stats->bytes_received+=       thd->diff_total_bytes_received;
++  thread_stats->bytes_sent+=           thd->diff_total_bytes_sent;
++  thread_stats->binlog_bytes_written+= thd->diff_total_binlog_bytes_written;
++  thread_stats->rows_fetched+=         thd->diff_total_sent_rows;
++  thread_stats->rows_updated+=         thd->diff_total_updated_rows;
++  thread_stats->rows_read+=            thd->diff_total_read_rows;
++  thread_stats->select_commands+=      thd->diff_select_commands;
++  thread_stats->update_commands+=      thd->diff_update_commands;
++  thread_stats->other_commands+=       thd->diff_other_commands;
++  thread_stats->commit_trans+=         thd->diff_commit_trans;
++  thread_stats->rollback_trans+=       thd->diff_rollback_trans;
++  thread_stats->denied_connections+=   thd->diff_denied_connections;
++  thread_stats->lost_connections+=     thd->diff_lost_connections;
++  thread_stats->access_denied_errors+= thd->diff_access_denied_errors;
++  thread_stats->empty_queries+=        thd->diff_empty_queries;
 +}
 +
 +// Updates the global stats of a user or client
 +void update_global_user_stats(THD* thd, bool create_user, time_t now)
 +{
-+  if (opt_userstat_running) {
-+  char* user_string = get_valid_user_string(thd->main_security_ctx.user);
-+  const char* client_string = get_client_host(thd);
++  if (opt_userstat_running)
++  {
++    char* user_string=         get_valid_user_string(thd->main_security_ctx.user);
++    const char* client_string= get_client_host(thd);
 +
-+  USER_STATS* user_stats;
-+  THREAD_STATS* thread_stats;
-+  pthread_mutex_lock(&LOCK_global_user_client_stats);
-+
-+  // Update by user name
-+  if ((user_stats = (USER_STATS*)hash_search(&global_user_stats,
-+                                             (uchar*)user_string,
-+                                             strlen(user_string)))) {
-+    // Found user.
-+    update_global_user_stats_with_user(thd, user_stats, now);
-+  } else {
-+    // Create the entry
-+    if (create_user) {
-+      increment_count_by_name(user_string, user_string,
-+                              &global_user_stats, thd);
-+    }
-+  }
++    USER_STATS* user_stats;
++    THREAD_STATS* thread_stats;
++    mysql_mutex_lock(&LOCK_global_user_client_stats);
 +
-+  // Update by client IP
-+  if ((user_stats = (USER_STATS*)hash_search(&global_client_stats,
-+                                             (uchar*)client_string,
-+                                             strlen(client_string)))) {
-+    // Found by client IP
-+    update_global_user_stats_with_user(thd, user_stats, now);
-+  } else {
-+    // Create the entry
-+    if (create_user) {
-+      increment_count_by_name(client_string,
-+                              user_string,
-+                              &global_client_stats, thd);
++    // Update by user name
++    if ((user_stats = (USER_STATS *) my_hash_search(&global_user_stats,
++                                                    (uchar *) user_string,
++                                                    strlen(user_string))))
++    {
++      // Found user.
++      update_global_user_stats_with_user(thd, user_stats, now);
++    }
++    else
++    {
++      // Create the entry
++      if (create_user)
++      {
++        increment_count_by_name(user_string, user_string,
++                                &global_user_stats, thd);
++      }
 +    }
-+  }
 +
-+  if (opt_thread_statistics) {
-+    // Update by thread ID
-+    if ((thread_stats = (THREAD_STATS*)hash_search(&global_thread_stats,
-+                                             (uchar*) &(thd->thread_id),
-+                                             sizeof(my_thread_id)))) {
-+      // Found by thread ID
-+      update_global_thread_stats_with_thread(thd, thread_stats, now);
-+    } else {
++    // Update by client IP
++    if ((user_stats = (USER_STATS *) my_hash_search(&global_client_stats,
++                                                    (uchar *) client_string,
++                                                    strlen(client_string))))
++    {
++      // Found by client IP
++      update_global_user_stats_with_user(thd, user_stats, now);
++    }
++    else
++    {
 +      // Create the entry
-+      if (create_user) {
-+        increment_count_by_id(thd->thread_id,
-+                              &global_thread_stats, thd);
++      if (create_user)
++      {
++        increment_count_by_name(client_string,
++                                user_string,
++                                &global_client_stats, thd);
 +      }
 +    }
-+  }
 +
-+  thd->last_global_update_time = now;
-+  thd->reset_diff_stats();
++    if (opt_thread_statistics)
++    {
++      // Update by thread ID
++      if ((thread_stats = (THREAD_STATS *) my_hash_search(&global_thread_stats,
++                                                          (uchar *) &(thd->thread_id),
++                                                          sizeof(my_thread_id))))
++      {
++        // Found by thread ID
++        update_global_thread_stats_with_thread(thd, thread_stats, now);
++      }
++      else
++      {
++        // Create the entry
++        if (create_user)
++        {
++          increment_count_by_id(thd->thread_id,
++                                &global_thread_stats, thd);
++        }
++      }
++    }
 +
-+  pthread_mutex_unlock(&LOCK_global_user_client_stats);
-+  } else {
-+  thd->reset_diff_stats();
++    thd->last_global_update_time = now;
++    thd->reset_diff_stats();
++
++    mysql_mutex_unlock(&LOCK_global_user_client_stats);
++  }
++  else
++  {
++    thd->reset_diff_stats();
 +  }
 +}
  
  /*
    check if user has already too many connections
-@@ -154,7 +729,10 @@
- end:
+@@ -169,6 +767,7 @@
    if (error)
-+  {
+   {
      uc->connections--; // no need for decrease_user_connections() here
 +    statistic_increment(denied_connections, &LOCK_status);
-+  }
-   (void) pthread_mutex_unlock(&LOCK_user_conn);
-   DBUG_RETURN(error);
- }
-@@ -490,6 +1068,7 @@
-     general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
-     DBUG_RETURN(1);
-   }
-+  thd->diff_access_denied_errors++;
-   my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
-            thd->main_security_ctx.user,
-            thd->main_security_ctx.host_or_ip,
-@@ -971,11 +1550,20 @@
+     /*
+       The thread may returned back to the pool and assigned to a user
+       that doesn't have a limit. Ensure the user is not using resources
+@@ -565,11 +1164,18 @@
        my_sleep(1000);                         /* must wait after eof() */
  #endif
      statistic_increment(aborted_connects,&LOCK_status);
@@ -1685,15 +1664,13 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
 +
 +  thd->reset_stats();
 +  // Updates global user connection stats.
-+  if (increment_connection_count(thd, true)) {
-+    net_send_error(thd, ER_OUTOFMEMORY);  // Out of memory
++  if (increment_connection_count(thd, true))
 +    DBUG_RETURN(1);
-+  }
 +
    DBUG_RETURN(0);
  }
  
-@@ -997,6 +1585,7 @@
+@@ -599,6 +1205,7 @@
    if (thd->killed || (net->error && net->vio != 0))
    {
      statistic_increment(aborted_threads,&LOCK_status);
@@ -1701,7 +1678,7 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
    }
  
    if (net->error && net->vio != 0)
-@@ -1123,10 +1712,14 @@
+@@ -728,10 +1335,14 @@
    for (;;)
    {
      NET *net= &thd->net;
@@ -1714,59 +1691,81 @@ diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
        goto end_thread;
 +    }
  
-     prepare_new_connection_state(thd);
-@@ -1149,6 +1742,8 @@
+     MYSQL_CONNECTION_START(thd->thread_id, thd->security_ctx->priv_user,
+                            (char *) thd->security_ctx->host_or_ip);
+@@ -758,6 +1369,8 @@
     
  end_thread:
      close_connection(thd, 0, 1);
 +    thd->update_stats(false);
 +    update_global_user_stats(thd, create_user, time(NULL));
-     if (thread_scheduler.end_thread(thd,1))
-       return 0;                                 // Probably no-threads
+     if (MYSQL_CALLBACK_ELSE(thread_scheduler, end_thread, (thd, 1), 0))
+       return;                                 // Probably no-threads
  
 diff -ruN a/sql/sql_delete.cc b/sql/sql_delete.cc
---- a/sql/sql_delete.cc        2010-10-12 00:34:33.000000000 +0400
-+++ b/sql/sql_delete.cc        2010-11-24 17:24:52.000000000 +0300
-@@ -452,6 +452,7 @@
-     my_ok(thd, (ha_rows) thd->row_count_func);
+--- a/sql/sql_delete.cc        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_delete.cc        2010-12-31 03:58:22.000000000 +0300
+@@ -411,6 +411,7 @@
+     my_ok(thd, deleted);
      DBUG_PRINT("info",("%ld records deleted",(long) deleted));
    }
-+  thd->updated_row_count += deleted;
++  thd->updated_row_count+= deleted;
    DBUG_RETURN(error >= 0 || thd->is_error());
  }
  
-@@ -1059,6 +1060,7 @@
-     thd->row_count_func= deleted;
-     ::my_ok(thd, (ha_rows) thd->row_count_func);
+@@ -1005,6 +1006,7 @@
+   {
+     ::my_ok(thd, deleted);
    }
-+  thd->updated_row_count += deleted;
++  thd->updated_row_count+= deleted;
    return 0;
  }
  
 diff -ruN a/sql/sql_insert.cc b/sql/sql_insert.cc
---- a/sql/sql_insert.cc        2010-10-12 00:34:16.000000000 +0400
-+++ b/sql/sql_insert.cc        2010-11-24 17:24:52.000000000 +0300
-@@ -981,6 +981,7 @@
-     thd->row_count_func= info.copied + info.deleted + updated;
-     ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
+--- a/sql/sql_insert.cc        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_insert.cc        2010-12-31 04:12:35.000000000 +0300
+@@ -1073,13 +1073,14 @@
+   if (error)
+     goto abort;
++  ha_rows row_count;
+   if (values_list.elements == 1 && (!(thd->variables.option_bits & OPTION_WARNINGS) ||
+                                   !thd->cuted_fields))
+   {
+-    my_ok(thd, info.copied + info.deleted +
++    row_count= info.copied + info.deleted +
+                ((thd->client_capabilities & CLIENT_FOUND_ROWS) ?
+-                info.touched : info.updated),
+-          id);
++                info.touched : info.updated);
++    my_ok(thd, row_count, id);
    }
-+  thd->updated_row_count += thd->row_count_func;
+   else
+   {
+@@ -1095,8 +1096,10 @@
+       sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records,
+             (ulong) (info.deleted + updated),
+               (ulong) thd->warning_info->statement_warn_count());
+-    ::my_ok(thd, info.copied + info.deleted + updated, id, buff);
++    row_count= info.copied + info.deleted + updated;
++    ::my_ok(thd, row_count, id, buff);
+   }
++  thd->updated_row_count+= row_count;
    thd->abort_on_warning= 0;
    DBUG_RETURN(FALSE);
  
-@@ -3309,6 +3310,7 @@
+@@ -3585,6 +3588,7 @@
       thd->first_successful_insert_id_in_prev_stmt :
       (info.copied ? autoinc_value_of_last_inserted_row : 0));
-   ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
-+  thd->updated_row_count += thd->row_count_func;
+   ::my_ok(thd, row_count, id, buff);
++  thd->updated_row_count+= row_count;
    DBUG_RETURN(0);
  }
  
 diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
---- a/sql/sql_lex.h    2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_lex.h    2010-11-24 17:31:33.000000000 +0300
-@@ -124,6 +124,9 @@
+--- a/sql/sql_lex.h    2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_lex.h    2010-12-31 05:07:18.000000000 +0300
+@@ -196,6 +196,9 @@
      When a command is added here, be sure it's also added in mysqld.cc
      in "struct show_var_st status_vars[]= {" ...
    */
@@ -1777,11 +1776,11 @@ diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
    SQLCOM_END
  };
 diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
---- a/sql/sql_parse.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_parse.cc 2010-11-24 17:45:19.000000000 +0300
-@@ -47,6 +47,9 @@
+--- a/sql/sql_parse.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_parse.cc 2010-12-31 04:57:45.000000000 +0300
+@@ -116,6 +116,9 @@
  static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables);
- static bool check_show_create_table_access(THD *thd, TABLE_LIST *table);
+ static void sql_kill(THD *thd, ulong id, bool only_kill_query);
  
 +// Uses the THD to update the global stats by user name and client IP
 +void update_global_user_stats(THD* thd, bool create_user, time_t now);
@@ -1789,30 +1788,31 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
  const char *any_db="*any*";   // Special symbol for check_access
  
  const LEX_STRING command_name[]={
-@@ -825,6 +828,12 @@
+@@ -701,6 +704,12 @@
    */
    thd->clear_error();                         // Clear error message
-   thd->main_da.reset_diagnostics_area();
-+  thd->updated_row_count=0;
-+  thd->busy_time=0;
-+  thd->cpu_time=0;
-+  thd->bytes_received=0;
-+  thd->bytes_sent=0;
-+  thd->binlog_bytes_written=0;
+   thd->stmt_da->reset_diagnostics_area();
++  thd->updated_row_count=    0;
++  thd->busy_time=            0;
++  thd->cpu_time=             0;
++  thd->bytes_received=       0;
++  thd->bytes_sent=           0;
++  thd->binlog_bytes_written= 0;
  
    net_new_transaction(net);
  
-@@ -994,6 +1003,9 @@
-   DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command));
+@@ -886,6 +895,10 @@
+                       (char *) thd->security_ctx->host_or_ip);
+   
    thd->command=command;
-+  // To increment the corrent command counter for user stats, 'command' must
-+  // be saved because it is set to COM_SLEEP at the end of this function.
-+  thd->old_command = command;
++  /* To increment the corrent command counter for user stats, 'command' must
++     be saved because it is set to COM_SLEEP at the end of this function.
++  */
++  thd->old_command= command;
    /*
      Commands which always take a long time are logged into
      the slow log only if opt_log_slow_admin_statements is set.
-@@ -1865,6 +1877,13 @@
+@@ -1619,6 +1632,13 @@
      thd->profiling.discard_current_query();
  #endif
      break;
@@ -1826,7 +1826,7 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
    case SCH_OPEN_TABLES:
    case SCH_VARIABLES:
    case SCH_STATUS:
-@@ -2021,6 +2040,7 @@
+@@ -1776,6 +1796,7 @@
                         thd->security_ctx->priv_host)) &&
          check_global_access(thd, SUPER_ACL))
      {
@@ -1834,47 +1834,23 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
        my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER");
        DBUG_RETURN(TRUE);
      }
-@@ -5348,6 +5368,7 @@
-       if (!no_errors)
-       {
-         const char *db_name= db ? db : thd->db;
-+        thd->diff_access_denied_errors++;
-         my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
-                  sctx->priv_user, sctx->priv_host, db_name);
-       }
-@@ -5380,12 +5401,15 @@
-   {                                           // We can never grant this
+@@ -4705,6 +4726,7 @@
+       case ACL_INTERNAL_ACCESS_DENIED:
+         if (! no_errors)
+         {
++          thd->diff_access_denied_errors++;
+           my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+                    sctx->priv_user, sctx->priv_host, db);
+         }
+@@ -4755,6 +4777,7 @@
      DBUG_PRINT("error",("No possible access"));
      if (!no_errors)
-+    {
+     {
 +      thd->diff_access_denied_errors++;
-       my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
-                sctx->priv_user,
-                sctx->priv_host,
-                (thd->password ?
-                 ER(ER_YES) :
-                 ER(ER_NO)));                    /* purecov: tested */
-+    }
-     DBUG_RETURN(TRUE);                                /* purecov: tested */
-   }
-@@ -5411,11 +5435,15 @@
-   DBUG_PRINT("error",("Access denied"));
-   if (!no_errors)
-+  {
-+    // increment needs !no_errors condition, otherwise double counting.
-+    thd->diff_access_denied_errors++;
-     my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
-              sctx->priv_user, sctx->priv_host,
-              (db ? db : (thd->db ?
-                          thd->db :
-                          "unknown")));          /* purecov: tested */
-+  }
-   DBUG_RETURN(TRUE);                          /* purecov: tested */
- }
-@@ -5444,6 +5472,7 @@
+       if (thd->password == 2)
+         my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0),
+                  sctx->priv_user,
+@@ -4871,6 +4894,7 @@
  
      if (!thd->col_access && check_grant_db(thd, dst_db_name))
      {
@@ -1882,20 +1858,7 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
        my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
                 thd->security_ctx->priv_user,
                 thd->security_ctx->priv_host,
-@@ -5525,9 +5554,12 @@
-         (want_access & ~(SELECT_ACL | EXTRA_ACL | FILE_ACL)))
-     {
-       if (!no_errors)
-+      {
-+        thd->diff_access_denied_errors++;
-         my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
-                  sctx->priv_user, sctx->priv_host,
-                  INFORMATION_SCHEMA_NAME.str);
-+      }
-       return TRUE;
-     }
-     /*
-@@ -5690,6 +5722,7 @@
+@@ -5141,6 +5165,7 @@
    if ((thd->security_ctx->master_access & want_access))
      return 0;
    get_privilege_desc(command, sizeof(command), want_access);
@@ -1903,22 +1866,23 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
    my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command);
    return 1;
  #else
-@@ -6071,6 +6104,30 @@
+@@ -5529,6 +5554,32 @@
    lex_start(thd);
    mysql_reset_thd_for_next_command(thd);
  
-+  int start_time_error = 0;
-+  int end_time_error = 0;
++  int start_time_error=   0;
++  int end_time_error=     0;
 +  struct timeval start_time, end_time;
-+  double start_usecs = 0;
-+  double end_usecs = 0;
++  double start_usecs=     0;
++  double end_usecs=       0;
 +  /* cpu time */
-+  int cputime_error = 0;
++  int cputime_error=      0;
 +  struct timespec tp;
-+  double start_cpu_nsecs = 0;
-+  double end_cpu_nsecs = 0;
++  double start_cpu_nsecs= 0;
++  double end_cpu_nsecs=   0;
 +
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get start cputime */
 +    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
@@ -1926,7 +1890,8 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
 +#endif
 +
 +    // Gets the start time, in order to measure how long this command takes.
-+    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++    if (!(start_time_error = gettimeofday(&start_time, NULL)))
++    {
 +      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
 +    }
 +  }
@@ -1934,26 +1899,32 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
    if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0)
    {
      LEX *lex= thd->lex;
-@@ -6151,6 +6208,43 @@
-     *found_semicolon= NULL;
+@@ -5597,6 +5648,52 @@
+     DBUG_ASSERT(thd->change_list.is_empty());
    }
  
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +    // Gets the end time.
-+    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
-+      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    if (!(end_time_error= gettimeofday(&end_time, NULL)))
++    {
++      end_usecs= end_time.tv_sec * 1000000.0 + end_time.tv_usec;
 +    }
 +
 +    // Calculates the difference between the end and start times.
-+    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++    {
++      thd->busy_time= (end_usecs - start_usecs) / 1000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->busy_time > 2629743) {
-+        thd->busy_time = 0;
++      if (thd->busy_time > 2629743)
++      {
++        thd->busy_time= 0;
 +      }
-+    } else {
++    }
++    else
++    {
 +      // end time went back in time, or gettimeofday() failed.
-+      thd->busy_time = 0;
++      thd->busy_time= 0;
 +    }
 +
 +#ifdef HAVE_CLOCK_GETTIME
@@ -1962,13 +1933,16 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
 +        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
 +      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
 +#endif
-+    if (start_cpu_nsecs && !cputime_error) {
++    if (start_cpu_nsecs && !cputime_error)
++    {
 +      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->cpu_time > 2629743) {
++      if (thd->cpu_time > 2629743)
++      {
 +        thd->cpu_time = 0;
 +      }
-+    } else
++    }
++    else
 +      thd->cpu_time = 0;
 +  }
 +  // Updates THD stats and the global user stats.
@@ -1978,67 +1952,12 @@ diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
    DBUG_VOID_RETURN;
  }
  
-@@ -7016,6 +7110,13 @@
-     if (flush_error_log())
-       result=1;
-   }
-+  if (((options & (REFRESH_SLOW_QUERY_LOG | REFRESH_LOG)) ==
-+       REFRESH_SLOW_QUERY_LOG))
-+  {
-+    /* We are only flushing slow query log */
-+    logger.flush_slow_log(thd);
-+  }
-+
- #ifdef HAVE_QUERY_CACHE
-   if (options & REFRESH_QUERY_CACHE_FREE)
-   {
-@@ -7116,6 +7217,40 @@
- #endif
-  if (options & REFRESH_USER_RESOURCES)
-    reset_mqh((LEX_USER *) NULL, 0);             /* purecov: inspected */
-+  if (options & REFRESH_TABLE_STATS)
-+  {
-+    pthread_mutex_lock(&LOCK_global_table_stats);
-+    free_global_table_stats();
-+    init_global_table_stats();
-+    pthread_mutex_unlock(&LOCK_global_table_stats);
-+  }
-+  if (options & REFRESH_INDEX_STATS)
-+  {
-+    pthread_mutex_lock(&LOCK_global_index_stats);
-+    free_global_index_stats();
-+    init_global_index_stats();
-+    pthread_mutex_unlock(&LOCK_global_index_stats);
-+  }
-+  if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS | REFRESH_THREAD_STATS))
-+  {
-+    pthread_mutex_lock(&LOCK_global_user_client_stats);
-+    if (options & REFRESH_USER_STATS)
-+    {
-+      free_global_user_stats();
-+      init_global_user_stats();
-+    }
-+    if (options & REFRESH_CLIENT_STATS)
-+    {
-+      free_global_client_stats();
-+      init_global_client_stats();
-+    }
-+    if (options & REFRESH_THREAD_STATS)
-+    {
-+      free_global_thread_stats();
-+      init_global_thread_stats();
-+    }
-+    pthread_mutex_unlock(&LOCK_global_user_client_stats);
-+  }
-  *write_to_binlog= tmp_write_to_binlog;
-  /*
-    If the query was killed then this function must fail.
 diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
---- a/sql/sql_prepare.cc       2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_prepare.cc       2010-11-24 17:45:09.000000000 +0300
-@@ -96,6 +96,9 @@
- #include <mysql_com.h>
+--- a/sql/sql_prepare.cc       2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_prepare.cc       2010-12-31 04:25:04.000000000 +0300
+@@ -114,6 +114,9 @@
  #endif
+ #include "lock.h"                               // MYSQL_OPEN_FORCE_SHARED_MDL
  
 +// Uses the THD to update the global stats by user name and client IP
 +void update_global_user_stats(THD* thd, bool create_user, time_t now);
@@ -2046,31 +1965,33 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
  /**
    A result class used to send cursor rows using the binary protocol.
  */
-@@ -2103,8 +2106,32 @@
+@@ -2173,8 +2176,34 @@
    /* First of all clear possible warnings from the previous command */
    mysql_reset_thd_for_next_command(thd);
  
-+  int start_time_error = 0;
-+  int end_time_error = 0;
++  int start_time_error=   0;
++  int end_time_error=     0;
 +  struct timeval start_time, end_time;
-+  double start_usecs = 0;
-+  double end_usecs = 0;
++  double start_usecs=     0;
++  double end_usecs=       0;
 +  /* cpu time */
-+  int cputime_error = 0;
++  int cputime_error=      0;
 +  struct timespec tp;
-+  double start_cpu_nsecs = 0;
-+  double end_cpu_nsecs = 0;
++  double start_cpu_nsecs= 0;
++  double end_cpu_nsecs=   0;
 +
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get start cputime */
-+    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++    if (!(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs= tp.tv_sec * 1000000000.0 + tp.tv_nsec;
 +#endif
 +
 +    // Gets the start time, in order to measure how long this command takes.
-+    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
-+      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    if (!(start_time_error= gettimeofday(&start_time, NULL)))
++    {
++      start_usecs= start_time.tv_sec * 1000000.0 + start_time.tv_usec;
 +    }
 +  }
 +
@@ -2080,7 +2001,7 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
  
    if (thd->stmt_map.insert(thd, stmt))
    {
-@@ -2112,7 +2139,7 @@
+@@ -2182,7 +2211,7 @@
        The error is set in the insert. The statement itself
        will be also deleted there (this is how the hash works).
      */
@@ -2088,43 +2009,52 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
 +    goto end;
    }
  
-   /* Reset warnings from previous command */
-@@ -2139,6 +2166,44 @@
+   thd->protocol= &thd->protocol_binary;
+@@ -2196,6 +2225,53 @@
    thd->protocol= save_protocol;
  
    /* check_prepared_statemnt sends the metadata packet in case of success */
 +end:
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +    // Gets the end time.
-+    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
-+      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    if (!(end_time_error= gettimeofday(&end_time, NULL)))
++    {
++      end_usecs= end_time.tv_sec * 1000000.0 + end_time.tv_usec;
 +    }
 +
 +    // Calculates the difference between the end and start times.
-+    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++    {
++      thd->busy_time= (end_usecs - start_usecs) / 1000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->busy_time > 2629743) {
-+        thd->busy_time = 0;
++      if (thd->busy_time > 2629743)
++      {
++        thd->busy_time= 0;
 +      }
-+    } else {
++    }
++    else
++    {
 +      // end time went back in time, or gettimeofday() failed.
-+      thd->busy_time = 0;
++      thd->busy_time= 0;
 +    }
 +
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get end cputime */
 +    if (!cputime_error &&
-+        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++        !(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
 +#endif
-+    if (start_cpu_nsecs && !cputime_error) {
-+      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++    if (start_cpu_nsecs && !cputime_error)
++    {
++      thd->cpu_time= (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->cpu_time > 2629743) {
-+        thd->cpu_time = 0;
++      if (thd->cpu_time > 2629743)
++      {
++        thd->cpu_time= 0;
 +      }
-+    } else
++    }
++    else
 +      thd->cpu_time = 0;
 +  }
 +  // Updates THD stats and the global user stats.
@@ -2134,22 +2064,23 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
    DBUG_VOID_RETURN;
  }
  
-@@ -2489,12 +2554,36 @@
+@@ -2553,12 +2629,38 @@
    /* First of all clear possible warnings from the previous command */
    mysql_reset_thd_for_next_command(thd);
  
-+  int start_time_error = 0;
-+  int end_time_error = 0;
++  int start_time_error=   0;
++  int end_time_error=     0;
 +  struct timeval start_time, end_time;
-+  double start_usecs = 0;
-+  double end_usecs = 0;
++  double start_usecs=     0;
++  double end_usecs=       0;
 +  /* cpu time */
-+  int cputime_error = 0;
++  int cputime_error=      0;
 +  struct timespec tp;
-+  double start_cpu_nsecs = 0;
-+  double end_cpu_nsecs = 0;
++  double start_cpu_nsecs= 0;
++  double end_cpu_nsecs=   0;
 +
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get start cputime */
 +    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
@@ -2157,7 +2088,8 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
 +#endif
 +
 +    // Gets the start time, in order to measure how long this command takes.
-+    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++    if (!(start_time_error = gettimeofday(&start_time, NULL)))
++    {
 +      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
 +    }
 +  }
@@ -2171,43 +2103,52 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
 +    goto end;
    }
  
- #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
-@@ -2515,6 +2604,44 @@
+ #if defined(ENABLED_PROFILING)
+@@ -2576,6 +2678,53 @@
    /* Close connection socket; for use with client testing (Bug#43560). */
    DBUG_EXECUTE_IF("close_conn_after_stmt_execute", vio_close(thd->net.vio););
  
 +end:
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +    // Gets the end time.
-+    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
-+      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    if (!(end_time_error= gettimeofday(&end_time, NULL)))
++    {
++      end_usecs= end_time.tv_sec * 1000000.0 + end_time.tv_usec;
 +    }
 +
 +    // Calculates the difference between the end and start times.
-+    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++    {
++      thd->busy_time= (end_usecs - start_usecs) / 1000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->busy_time > 2629743) {
-+        thd->busy_time = 0;
++      if (thd->busy_time > 2629743)
++      {
++        thd->busy_time= 0;
 +      }
-+    } else {
++    }
++    else
++    {
 +      // end time went back in time, or gettimeofday() failed.
-+      thd->busy_time = 0;
++      thd->busy_time= 0;
 +    }
 +
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get end cputime */
 +    if (!cputime_error &&
-+        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++        !(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
 +#endif
-+    if (start_cpu_nsecs && !cputime_error) {
-+      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++    if (start_cpu_nsecs && !cputime_error)
++    {
++      thd->cpu_time= (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->cpu_time > 2629743) {
-+        thd->cpu_time = 0;
++      if (thd->cpu_time > 2629743)
++      {
++        thd->cpu_time= 0;
 +      }
-+    } else
++    }
++    else
 +      thd->cpu_time = 0;
 +  }
 +  // Updates THD stats and the global user stats.
@@ -2215,34 +2156,36 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
 +  update_global_user_stats(thd, true, time(NULL));
 +
    DBUG_VOID_RETURN;
  }
-@@ -2588,20 +2715,45 @@
+@@ -2648,20 +2797,47 @@
  
    /* First of all clear possible warnings from the previous command */
    mysql_reset_thd_for_next_command(thd);
 +
-+  int start_time_error = 0;
-+  int end_time_error = 0;
++  int start_time_error=   0;
++  int end_time_error=     0;
 +  struct timeval start_time, end_time;
-+  double start_usecs = 0;
-+  double end_usecs = 0;
++  double start_usecs=     0;
++  double end_usecs=       0;
 +  /* cpu time */
-+  int cputime_error = 0;
++  int cputime_error=      0;
 +  struct timespec tp;
-+  double start_cpu_nsecs = 0;
-+  double end_cpu_nsecs = 0;
++  double start_cpu_nsecs= 0;
++  double end_cpu_nsecs=   0;
 +
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get start cputime */
-+    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++    if (!(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
 +#endif
 +
 +    // Gets the start time, in order to measure how long this command takes.
-+    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
-+      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    if (!(start_time_error= gettimeofday(&start_time, NULL)))
++    {
++      start_usecs= start_time.tv_sec * 1000000.0 + start_time.tv_usec;
 +    }
 +  }
 +
@@ -2265,43 +2208,51 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
    }
  
    thd->stmt_arena= stmt;
-@@ -2625,6 +2777,44 @@
+@@ -2678,6 +2854,52 @@
    thd->restore_backup_statement(stmt, &stmt_backup);
    thd->stmt_arena= thd;
  
 +end:
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +    // Gets the end time.
-+    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++    if (!(end_time_error = gettimeofday(&end_time, NULL)))
++    {
 +      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
 +    }
 +
 +    // Calculates the difference between the end and start times.
-+    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++    {
++      thd->busy_time= (end_usecs - start_usecs) / 1000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->busy_time > 2629743) {
-+        thd->busy_time = 0;
++      if (thd->busy_time > 2629743)
++      {
++        thd->busy_time= 0;
 +      }
-+    } else {
++    }
++    else
++    {
 +      // end time went back in time, or gettimeofday() failed.
-+      thd->busy_time = 0;
++      thd->busy_time= 0;
 +    }
 +
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get end cputime */
 +    if (!cputime_error &&
-+        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++        !(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
 +#endif
-+    if (start_cpu_nsecs && !cputime_error) {
-+      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++    if (start_cpu_nsecs && !cputime_error)
++    {
++      thd->cpu_time= (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->cpu_time > 2629743) {
-+        thd->cpu_time = 0;
++      if (thd->cpu_time > 2629743)
++      {
++        thd->cpu_time= 0;
 +      }
 +    } else
-+      thd->cpu_time = 0;
++      thd->cpu_time= 0;
 +  }
 +  // Updates THD stats and the global user stats.
 +  thd->update_stats(true);
@@ -2310,31 +2261,33 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
    DBUG_VOID_RETURN;
  }
  
-@@ -2655,13 +2845,37 @@
+@@ -2708,13 +2930,39 @@
    /* First of all clear possible warnings from the previous command */
    mysql_reset_thd_for_next_command(thd);
  
-+  int start_time_error = 0;
-+  int end_time_error = 0;
++  int start_time_error=   0;
++  int end_time_error=     0;
 +  struct timeval start_time, end_time;
-+  double start_usecs = 0;
-+  double end_usecs = 0;
++  double start_usecs=     0;
++  double end_usecs=       0;
 +  /* cpu time */
-+  int cputime_error = 0;
++  int cputime_error=      0;
 +  struct timespec tp;
-+  double start_cpu_nsecs = 0;
-+  double end_cpu_nsecs = 0;
++  double start_cpu_nsecs= 0;
++  double end_cpu_nsecs=   0;
 +
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +#ifdef HAVE_CLOCK_GETTIME
 +    /* get start cputime */
-+    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++    if (!(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs= tp.tv_sec * 1000000000.0+tp.tv_nsec;
 +#endif
 +
 +    // Gets the start time, in order to measure how long this command takes.
-+    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
-+      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    if (!(start_time_error= gettimeofday(&start_time, NULL)))
++    {
++      start_usecs= start_time.tv_sec * 1000000.0 + start_time.tv_usec;
 +    }
 +  }
 +
@@ -2349,27 +2302,33 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
    }
  
    stmt->close_cursor();
-@@ -2678,6 +2892,44 @@
+@@ -2731,6 +2979,53 @@
  
    my_ok(thd);
  
 +end:
-+  if (opt_userstat_running) {
++  if (opt_userstat_running)
++  {
 +    // Gets the end time.
-+    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++    if (!(end_time_error = gettimeofday(&end_time, NULL)))
++    {
 +      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
 +    }
 +
 +    // Calculates the difference between the end and start times.
-+    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++    {
++      thd->busy_time= (end_usecs - start_usecs) / 1000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->busy_time > 2629743) {
-+        thd->busy_time = 0;
++      if (thd->busy_time > 2629743)
++      {
++        thd->busy_time= 0;
 +      }
-+    } else {
++    }
++    else
++    {
 +      // end time went back in time, or gettimeofday() failed.
-+      thd->busy_time = 0;
++      thd->busy_time= 0;
 +    }
 +
 +#ifdef HAVE_CLOCK_GETTIME
@@ -2378,14 +2337,17 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
 +        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
 +      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
 +#endif
-+    if (start_cpu_nsecs && !cputime_error) {
++    if (start_cpu_nsecs && !cputime_error)
++    {
 +      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
 +      // In case there are bad values, 2629743 is the #seconds in a month.
-+      if (thd->cpu_time > 2629743) {
-+        thd->cpu_time = 0;
++      if (thd->cpu_time > 2629743)
++      {
++        thd->cpu_time= 0;
 +      }
-+    } else
-+      thd->cpu_time = 0;
++    }
++    else
++      thd->cpu_time= 0;
 +  }
 +  // Updates THD stats and the global user stats.
 +  thd->update_stats(true);
@@ -2394,34 +2356,91 @@ diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
    DBUG_VOID_RETURN;
  }
  
+diff -ruN a/sql/sql_reload.cc b/sql/sql_reload.cc
+--- a/sql/sql_reload.cc        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_reload.cc        2010-12-31 05:00:59.000000000 +0300
+@@ -272,14 +272,48 @@
+    mysql_mutex_unlock(&LOCK_active_mi);
+  }
+ #endif
+- if (options & REFRESH_USER_RESOURCES)
+-   reset_mqh((LEX_USER *) NULL, 0);             /* purecov: inspected */
+ #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
+  if (options & REFRESH_QUERY_RESPONSE_TIME)
+  {
+    query_response_time_flush();
+  }
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++  if (options & REFRESH_USER_RESOURCES)
++    reset_mqh((LEX_USER *) NULL, 0);             /* purecov: inspected */
++  if (options & REFRESH_TABLE_STATS)
++  {
++    mysql_mutex_lock(&LOCK_global_table_stats);
++    free_global_table_stats();
++    init_global_table_stats();
++    mysql_mutex_unlock(&LOCK_global_table_stats);
++  }
++  if (options & REFRESH_INDEX_STATS)
++  {
++    mysql_mutex_lock(&LOCK_global_index_stats);
++    free_global_index_stats();
++    init_global_index_stats();
++    mysql_mutex_unlock(&LOCK_global_index_stats);
++  }
++  if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS | REFRESH_THREAD_STATS))
++  {
++    mysql_mutex_lock(&LOCK_global_user_client_stats);
++    if (options & REFRESH_USER_STATS)
++    {
++      free_global_user_stats();
++      init_global_user_stats();
++    }
++    if (options & REFRESH_CLIENT_STATS)
++    {
++      free_global_client_stats();
++      init_global_client_stats();
++    }
++    if (options & REFRESH_THREAD_STATS)
++    {
++      free_global_thread_stats();
++      init_global_thread_stats();
++    }
++    mysql_mutex_unlock(&LOCK_global_user_client_stats);
++  }
+  *write_to_binlog= tmp_write_to_binlog;
+  /*
+    If the query was killed then this function must fail.
 diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
---- a/sql/sql_show.cc  2010-11-24 17:24:52.000000000 +0300
-+++ b/sql/sql_show.cc  2010-11-24 17:31:33.000000000 +0300
-@@ -84,6 +84,40 @@
+--- a/sql/sql_show.cc  2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_show.cc  2010-12-31 04:39:23.000000000 +0300
+@@ -114,6 +114,43 @@
  
  static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table);
  
 +/*
-+ * Solaris 10 does not have strsep(). 
-+ * 
++ * Solaris 10 does not have strsep().
++ *
 + * based on getToken from http://www.winehq.org/pipermail/wine-patches/2001-November/001322.html
 + *
-+ */
++*/
 +
 +#ifndef HAVE_STRSEP
 +static char* strsep(char** str, const char* delims)
 +{
 +  char *token;
 +
-+  if (*str == NULL) {
++  if (*str == NULL)
++  {
 +    /* No more tokens */
 +    return NULL;
 +  }
 +
-+  token = *str;
-+  while (**str != '\0') {
-+    if (strchr(delims, **str) != NULL) {
-+      **str = '\0';
++  token= *str;
++  while (**str != '\0')
++  {
++    if (strchr(delims, **str) != NULL)
++    {
++      **str= '\0';
 +      (*str)++;
 +      return token;
 +    }
@@ -2429,7 +2448,7 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +  }
 +
 +  /* There is not another token */
-+  *str = NULL;
++  *str= NULL;
 +
 +  return token;
 +}
@@ -2438,7 +2457,7 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
  /***************************************************************************
  ** List all table types supported
  ***************************************************************************/
-@@ -832,6 +866,7 @@
+@@ -799,6 +836,7 @@
                sctx->master_access);
    if (!(db_access & DB_ACLS) && check_grant_db(thd,dbname))
    {
@@ -2446,7 +2465,7 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
      my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
               sctx->priv_user, sctx->host_or_ip, dbname);
      general_log_print(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR),
-@@ -2386,6 +2421,279 @@
+@@ -2351,6 +2389,284 @@
    DBUG_RETURN(res);
  }
  
@@ -2461,13 +2480,14 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +   RETURN
 +     0 - OK
 +     1 - error
-+ */
++*/
 +int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table)
 +{
 +  DBUG_ENTER("send_user_stats");
-+  for (uint i = 0; i < all_user_stats->records; ++i) {
++  for (uint i = 0; i < all_user_stats->records; ++i)
++  {
 +    restore_record(table, s->default_values);
-+    USER_STATS *user_stats = (USER_STATS*)hash_element(all_user_stats, i);
++    USER_STATS *user_stats = (USER_STATS *) my_hash_element(all_user_stats, i);
 +      table->field[0]->store(user_stats->user, strlen(user_stats->user), system_charset_info);
 +      table->field[1]->store((longlong)user_stats->total_connections);
 +      table->field[2]->store((longlong)user_stats->concurrent_connections);
@@ -2501,9 +2521,10 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +int send_thread_stats(THD* thd, HASH *all_thread_stats, TABLE *table)
 +{
 +  DBUG_ENTER("send_thread_stats");
-+  for (uint i = 0; i < all_thread_stats->records; ++i) {
++  for (uint i = 0; i < all_thread_stats->records; ++i)
++  {
 +    restore_record(table, s->default_values);
-+    THREAD_STATS *user_stats = (THREAD_STATS*)hash_element(all_thread_stats, i);
++    THREAD_STATS *user_stats = (THREAD_STATS *) my_hash_element(all_thread_stats, i);
 +      table->field[0]->store((longlong)user_stats->id);
 +      table->field[1]->store((longlong)user_stats->total_connections);
 +      table->field[2]->store((longlong)user_stats->concurrent_connections);
@@ -2546,7 +2567,7 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +   RETURN
 +     0 - OK
 +     1 - error
-+ */
++*/
 +
 +
 +int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond)
@@ -2560,9 +2581,9 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +  // Iterates through all the global stats and sends them to the client.
 +  // Pattern matching on the client IP is supported.
 +
-+  pthread_mutex_lock(&LOCK_global_user_client_stats);
++  mysql_mutex_lock(&LOCK_global_user_client_stats);
 +  int result= send_user_stats(thd, &global_user_stats, table);
-+  pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  mysql_mutex_unlock(&LOCK_global_user_client_stats);
 +  if (result)
 +    goto err;
 +
@@ -2585,7 +2606,7 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +   RETURN
 +     0 - OK
 +     1 - error
-+ */
++*/
 +
 +
 +int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond)
@@ -2599,9 +2620,9 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +  // Iterates through all the global stats and sends them to the client.
 +  // Pattern matching on the client IP is supported.
 +
-+  pthread_mutex_lock(&LOCK_global_user_client_stats);
++  mysql_mutex_lock(&LOCK_global_user_client_stats);
 +  int result= send_user_stats(thd, &global_client_stats, table);
-+  pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  mysql_mutex_unlock(&LOCK_global_user_client_stats);
 +  if (result)
 +    goto err;
 +
@@ -2624,9 +2645,9 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +  // Iterates through all the global stats and sends them to the client.
 +  // Pattern matching on the client IP is supported.
 +
-+  pthread_mutex_lock(&LOCK_global_user_client_stats);
++  mysql_mutex_lock(&LOCK_global_user_client_stats);
 +  int result= send_thread_stats(thd, &global_thread_stats, table);
-+  pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  mysql_mutex_unlock(&LOCK_global_user_client_stats);
 +  if (result)
 +    goto err;
 +
@@ -2645,23 +2666,24 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +  DBUG_ENTER("fill_schema_table_stats");
 +  char *table_full_name, *table_schema;
 +
-+  pthread_mutex_lock(&LOCK_global_table_stats);
-+  for (uint i = 0; i < global_table_stats.records; ++i) {
++  mysql_mutex_lock(&LOCK_global_table_stats);
++  for (uint i = 0; i < global_table_stats.records; ++i)
++  {
 +    restore_record(table, s->default_values);
-+    TABLE_STATS *table_stats = 
-+      (TABLE_STATS*)hash_element(&global_table_stats, i);
++    TABLE_STATS *table_stats =
++      (TABLE_STATS *) my_hash_element(&global_table_stats, i);
 +
 +    table_full_name= thd->strdup(table_stats->table);
 +    table_schema= strsep(&table_full_name, ".");
 +
 +    TABLE_LIST tmp_table;
-+    bzero((char*) &tmp_table,sizeof(tmp_table));
++    bzero((char *) &tmp_table,sizeof(tmp_table));
 +    tmp_table.table_name= table_full_name;
 +    tmp_table.db= table_schema;
 +    tmp_table.grant.privilege= 0;
-+    if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++    if (check_access(thd, SELECT_ACL, tmp_table.db,
 +                      &tmp_table.grant.privilege, 0, 0,
-+                      is_schema_db(table_schema)) ||
++                      is_infoschema_db(table_schema)) ||
 +         check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
 +        continue;
 +
@@ -2673,11 +2695,11 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +
 +    if (schema_table_store_record(thd, table))
 +    {
-+      VOID(pthread_mutex_unlock(&LOCK_global_table_stats));
++      mysql_mutex_unlock(&LOCK_global_table_stats);
 +      DBUG_RETURN(1);
 +    }
 +  }
-+  pthread_mutex_unlock(&LOCK_global_table_stats);
++  mysql_mutex_unlock(&LOCK_global_table_stats);
 +  DBUG_RETURN(0);
 +}
 +
@@ -2688,24 +2710,25 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +  DBUG_ENTER("fill_schema_index_stats");
 +  char *index_full_name, *table_schema, *table_name;
 +
-+  pthread_mutex_lock(&LOCK_global_index_stats);
-+  for (uint i = 0; i < global_index_stats.records; ++i) {
++  mysql_mutex_lock(&LOCK_global_index_stats);
++  for (uint i = 0; i < global_index_stats.records; ++i)
++  {
 +    restore_record(table, s->default_values);
 +    INDEX_STATS *index_stats =
-+      (INDEX_STATS*)hash_element(&global_index_stats, i);
++      (INDEX_STATS *) my_hash_element(&global_index_stats, i);
 +
 +    index_full_name= thd->strdup(index_stats->index);
 +    table_schema= strsep(&index_full_name, ".");
 +    table_name= strsep(&index_full_name, ".");
 +
 +    TABLE_LIST tmp_table;
-+    bzero((char*) &tmp_table,sizeof(tmp_table));
++    bzero((char *) &tmp_table,sizeof(tmp_table));
 +    tmp_table.table_name= table_name;
 +    tmp_table.db= table_schema;
 +    tmp_table.grant.privilege= 0;
-+    if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++    if (check_access(thd, SELECT_ACL, tmp_table.db,
 +                      &tmp_table.grant.privilege, 0, 0,
-+                      is_schema_db(table_schema)) ||
++                      is_infoschema_db(table_schema)) ||
 +         check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
 +        continue;
 +
@@ -2715,18 +2738,19 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
 +    table->field[3]->store((longlong)index_stats->rows_read, TRUE);
 +
 +    if (schema_table_store_record(thd, table))
-+    { 
-+      VOID(pthread_mutex_unlock(&LOCK_global_index_stats));
++    {
++      mysql_mutex_unlock(&LOCK_global_index_stats);
 +      DBUG_RETURN(1);
 +    }
 +  }
-+  pthread_mutex_unlock(&LOCK_global_index_stats);
++  mysql_mutex_unlock(&LOCK_global_index_stats);
 +  DBUG_RETURN(0);
 +}
++
  
  /* collect status for all running threads */
  
-@@ -6688,6 +6996,104 @@
+@@ -7465,6 +7781,104 @@
  };
  
  
@@ -2831,16 +2855,16 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
  ST_FIELD_INFO processlist_fields_info[]=
  {
    {"ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, "Id", SKIP_OPEN_TABLE},
-@@ -6823,6 +7229,8 @@
+@@ -7654,6 +8068,8 @@
  {
    {"CHARACTER_SETS", charsets_fields_info, create_schema_table, 
     fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0},
-+  {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table, 
++  {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table,
 +    fill_schema_client_stats, make_old_format, 0, -1, -1, 0, 0},
    {"COLLATIONS", collation_fields_info, create_schema_table, 
     fill_schema_collation, make_old_format, 0, -1, -1, 0, 0},
    {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info,
-@@ -6832,6 +7240,8 @@
+@@ -7663,6 +8079,8 @@
     OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL},
    {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table,
     fill_schema_column_privileges, 0, 0, -1, -1, 0, 0},
@@ -2849,54 +2873,67 @@ diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
    {"ENGINES", engines_fields_info, create_schema_table,
     fill_schema_engines, make_old_format, 0, -1, -1, 0, 0},
  #ifdef HAVE_EVENT_SCHEDULER
-@@ -6888,11 +7298,17 @@
+@@ -7735,14 +8153,20 @@
     get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
    {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
     fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
 +  {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table,
 +    fill_schema_table_stats, make_old_format, 0, -1, -1, 0, 0},
+   {"TEMPORARY_TABLES", temporary_table_fields_info, create_schema_table,
+    fill_temporary_tables, make_temporary_tables_old_format, 0, 2, 3, 0,
+    OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
 +  {"THREAD_STATISTICS", thread_stats_fields_info, create_schema_table,
 +    fill_schema_thread_stats, make_old_format, 0, -1, -1, 0, 0},
    {"TRIGGERS", triggers_fields_info, create_schema_table,
     get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
-    OPEN_TABLE_ONLY},
+    OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE},
    {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table, 
     fill_schema_user_privileges, 0, 0, -1, -1, 0, 0},
-+  {"USER_STATISTICS", user_stats_fields_info, create_schema_table, 
++  {"USER_STATISTICS", user_stats_fields_info, create_schema_table,
 +    fill_schema_user_stats, make_old_format, 0, -1, -1, 0, 0},
    {"VARIABLES", variables_fields_info, create_schema_table, fill_variables,
     make_old_format, 0, 0, -1, 1, 0},
    {"VIEWS", view_fields_info, create_schema_table, 
 diff -ruN a/sql/sql_update.cc b/sql/sql_update.cc
---- a/sql/sql_update.cc        2010-10-12 00:34:16.000000000 +0400
-+++ b/sql/sql_update.cc        2010-11-24 17:24:52.000000000 +0300
-@@ -890,6 +890,7 @@
-     thd->row_count_func=
-       (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
-     my_ok(thd, (ulong) thd->row_count_func, id, buff);
-+    thd->updated_row_count += thd->row_count_func;
+--- a/sql/sql_update.cc        2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_update.cc        2010-12-31 04:08:17.000000000 +0300
+@@ -894,8 +894,10 @@
+     my_snprintf(buff, sizeof(buff), ER(ER_UPDATE_INFO), (ulong) found,
+                 (ulong) updated,
+                 (ulong) thd->warning_info->statement_warn_count());
+-    my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated,
+-          id, buff);
++    ha_rows row_count=
++      (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
++    my_ok(thd, row_count, id, buff);
++    thd->updated_row_count += row_count;
      DBUG_PRINT("info",("%ld records updated", (long) updated));
    }
    thd->count_cuted_fields= CHECK_FIELD_IGNORE;                /* calc cuted fields */
-@@ -2176,5 +2177,6 @@
-   thd->row_count_func=
-     (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
-   ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
-+  thd->updated_row_count += thd->row_count_func;
+@@ -2136,7 +2138,9 @@
+     thd->first_successful_insert_id_in_prev_stmt : 0;
+   my_snprintf(buff, sizeof(buff), ER(ER_UPDATE_INFO),
+               (ulong) found, (ulong) updated, (ulong) thd->cuted_fields);
+-  ::my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated,
+-          id, buff);
++  ha_rows row_count=
++    (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
++  ::my_ok(thd, row_count, id, buff);
++  thd->updated_row_count+= row_count;
    DBUG_RETURN(FALSE);
  }
 diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
---- a/sql/sql_yacc.yy  2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_yacc.yy  2010-11-24 17:31:33.000000000 +0300
-@@ -757,6 +757,7 @@
- %token  CHECK_SYM                     /* SQL-2003-R */
+--- a/sql/sql_yacc.yy  2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_yacc.yy  2010-12-31 05:06:16.000000000 +0300
+@@ -864,6 +864,7 @@
  %token  CIPHER_SYM
+ %token  CLASS_ORIGIN_SYM              /* SQL-2003-N */
  %token  CLIENT_SYM
 +%token  CLIENT_STATS_SYM
  %token  CLOSE_SYM                     /* SQL-2003-R */
  %token  COALESCE                      /* SQL-2003-N */
  %token  CODE_SYM
-@@ -903,6 +904,7 @@
+@@ -1017,6 +1018,7 @@
  %token  IMPORT
  %token  INDEXES
  %token  INDEX_SYM
@@ -2904,23 +2941,15 @@ diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
  %token  INFILE
  %token  INITIAL_SIZE_SYM
  %token  INNER_SYM                     /* SQL-2003-R */
-@@ -1144,6 +1146,7 @@
- %token  SIGNED_SYM
- %token  SIMPLE_SYM                    /* SQL-2003-N */
- %token  SLAVE
-+%token  SLOW_SYM
- %token  SMALLINT                      /* SQL-2003-R */
- %token  SNAPSHOT_SYM
- %token  SOCKET_SYM
-@@ -1189,6 +1192,7 @@
+@@ -1315,6 +1317,7 @@
  %token  TABLESPACE
  %token  TABLE_REF_PRIORITY
  %token  TABLE_SYM                     /* SQL-2003-R */
 +%token  TABLE_STATS_SYM
  %token  TABLE_CHECKSUM_SYM
+ %token  TABLE_NAME_SYM                /* SQL-2003-N */
  %token  TEMPORARY                     /* SQL-2003-N */
- %token  TEMPTABLE_SYM
-@@ -1197,6 +1201,7 @@
+@@ -1324,6 +1327,7 @@
  %token  TEXT_SYM
  %token  THAN_SYM
  %token  THEN_SYM                      /* SQL-2003-R */
@@ -2928,7 +2957,7 @@ diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
  %token  TIMESTAMP                     /* SQL-2003-R */
  %token  TIMESTAMP_ADD
  %token  TIMESTAMP_DIFF
-@@ -1234,6 +1239,7 @@
+@@ -1361,6 +1365,7 @@
  %token  UPGRADE_SYM
  %token  USAGE                         /* SQL-2003-N */
  %token  USER                          /* SQL-2003-R */
@@ -2936,28 +2965,28 @@ diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
  %token  USE_FRM
  %token  USE_SYM
  %token  USING                         /* SQL-2003-R */
-@@ -10346,6 +10352,41 @@
-           {
-             Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
-           }
-+        | CLIENT_STATS_SYM wild_and_where 
+@@ -11109,6 +11114,41 @@
+              MYSQL_YYABORT;
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+          }
++        | CLIENT_STATS_SYM wild_and_where
 +          {
 +           LEX *lex= Lex;
-+           Lex->sql_command = SQLCOM_SELECT;
++           Lex->sql_command= SQLCOM_SELECT;
 +           if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS))
 +             MYSQL_YYABORT;
 +          }
-+        | USER_STATS_SYM wild_and_where 
++        | USER_STATS_SYM wild_and_where
 +          {
 +           LEX *lex= Lex;
-+           lex->sql_command = SQLCOM_SELECT;
++           lex->sql_command= SQLCOM_SELECT;
 +           if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS))
 +             MYSQL_YYABORT;
 +          }
 +        | THREAD_STATS_SYM wild_and_where
 +          {
 +           LEX *lex= Lex;
-+           Lex->sql_command = SQLCOM_SELECT;
++           Lex->sql_command= SQLCOM_SELECT;
 +           if (prepare_schema_table(YYTHD, lex, 0, SCH_THREAD_STATS))
 +             MYSQL_YYABORT;
 +          }
@@ -2975,15 +3004,13 @@ diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
 +           if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS))
 +             MYSQL_YYABORT;
 +          }
-         | CREATE PROCEDURE sp_name
+         | CREATE PROCEDURE_SYM sp_name
            {
              LEX *lex= Lex;
-@@ -10554,6 +10595,18 @@
-           { Lex->type|= REFRESH_STATUS; }
-         | SLAVE
-           { Lex->type|= REFRESH_SLAVE; }
-+        | SLOW_SYM QUERY_SYM LOGS_SYM
-+          { Lex->type |= REFRESH_SLOW_QUERY_LOG; }
+@@ -11351,6 +11391,16 @@
+             Lex->type|= REFRESH_QUERY_RESPONSE_TIME;
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+           }
 +        | CLIENT_STATS_SYM
 +          { Lex->type|= REFRESH_CLIENT_STATS; }
 +        | USER_STATS_SYM
@@ -2997,47 +3024,31 @@ diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
          | MASTER_SYM
            { Lex->type|= REFRESH_MASTER; }
          | DES_KEY_FILE
-@@ -11677,6 +11730,7 @@
+@@ -12473,6 +12523,7 @@
          | CHAIN_SYM                {}
          | CHANGED                  {}
          | CIPHER_SYM               {}
 +        | CLIENT_STATS_SYM         {}
          | CLIENT_SYM               {}
+         | CLASS_ORIGIN_SYM         {}
          | COALESCE                 {}
-         | CODE_SYM                 {}
-@@ -11738,6 +11792,7 @@
+@@ -12541,6 +12592,7 @@
          | HOSTS_SYM                {}
          | HOUR_SYM                 {}
          | IDENTIFIED_SYM           {}
 +        | INDEX_STATS_SYM          {}
+         | IGNORE_SERVER_IDS_SYM    {}
          | INVOKER_SYM              {}
          | IMPORT                   {}
-         | INDEXES                  {}
-@@ -11862,6 +11917,7 @@
-         | SIMPLE_SYM               {}
-         | SHARE_SYM                {}
-         | SHUTDOWN                 {}
-+        | SLOW_SYM                 {}
-         | SNAPSHOT_SYM             {}
-         | SOUNDS_SYM               {}
-         | SOURCE_SYM               {}
-@@ -11881,6 +11937,7 @@
+@@ -12692,6 +12744,7 @@
          | SUSPEND_SYM              {}
          | SWAPS_SYM                {}
          | SWITCHES_SYM             {}
 +        | TABLE_STATS_SYM          {}
+         | TABLE_NAME_SYM           {}
          | TABLES                   {}
          | TABLE_CHECKSUM_SYM       {}
-         | TABLESPACE               {}
-@@ -11888,6 +11945,7 @@
-         | TEMPTABLE_SYM            {}
-         | TEXT_SYM                 {}
-         | THAN_SYM                 {}
-+        | THREAD_STATS_SYM         {}
-         | TRANSACTION_SYM          {}
-         | TRIGGERS_SYM             {}
-         | TIMESTAMP                {}
-@@ -11905,6 +11963,7 @@
+@@ -12717,6 +12770,7 @@
          | UNKNOWN_SYM              {}
          | UNTIL_SYM                {}
          | USER                     {}
@@ -3046,9 +3057,17 @@ diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
          | VARIABLES                {}
          | VIEW_SYM                 {}
 diff -ruN a/sql/structs.h b/sql/structs.h
---- a/sql/structs.h    2010-10-12 00:34:34.000000000 +0400
-+++ b/sql/structs.h    2010-11-24 17:24:52.000000000 +0300
-@@ -237,6 +237,171 @@
+--- a/sql/structs.h    2010-12-03 20:58:26.000000000 +0300
++++ b/sql/structs.h    2010-12-31 05:12:04.000000000 +0300
+@@ -25,6 +25,7 @@
+ #include "my_time.h"                   /* enum_mysql_timestamp_type */
+ #include "thr_lock.h"                  /* thr_lock_type */
+ #include "my_base.h"                   /* ha_rows, ha_key_alg */
++#include "mysql_com.h"
+ struct TABLE;
+ class Field;
+@@ -218,6 +219,171 @@
    USER_RESOURCES user_resources;
  } USER_CONN;
  
@@ -3073,11 +3092,11 @@ diff -ruN a/sql/structs.h b/sql/structs.h
 +  ulonglong empty_queries;
 +} USER_STATS;
 +
-+/* Lookup function for hash tables with USER_STATS entries */
++/* Lookup function for my_hash tables with USER_STATS entries */
 +extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
 +                                my_bool not_used __attribute__((unused)));
 +
-+/* Free all memory for a hash table with USER_STATS entries */
++/* Free all memory for a my_hash table with USER_STATS entries */
 +extern void free_user_stats(USER_STATS* user_stats);
 +
 +/* Intialize an instance of USER_STATS */
@@ -3148,11 +3167,11 @@ diff -ruN a/sql/structs.h b/sql/structs.h
 +  ulonglong empty_queries;
 +} THREAD_STATS;
 +
-+/* Lookup function for hash tables with THREAD_STATS entries */
++/* Lookup function for my_hash tables with THREAD_STATS entries */
 +extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
 +                                my_bool not_used __attribute__((unused)));
 +
-+/* Free all memory for a hash table with THREAD_STATS entries */
++/* Free all memory for a my_hash table with THREAD_STATS entries */
 +extern void free_thread_stats(THREAD_STATS* thread_stats);
 +
 +/* Intialize an instance of THREAD_STATS */
@@ -3220,78 +3239,31 @@ diff -ruN a/sql/structs.h b/sql/structs.h
        /* Bits in form->update */
  #define REG_MAKE_DUPP         1       /* Make a copy of record when read */
  #define REG_NEW_RECORD                2       /* Write a new record if not found */
-diff -ruN a/sql/table.h b/sql/table.h
---- a/sql/table.h      2010-10-12 00:34:16.000000000 +0400
-+++ b/sql/table.h      2010-11-24 17:31:33.000000000 +0300
-@@ -944,10 +944,12 @@
- enum enum_schema_tables
- {
-   SCH_CHARSETS= 0,
-+  SCH_CLIENT_STATS,
-   SCH_COLLATIONS,
-   SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
-   SCH_COLUMNS,
-   SCH_COLUMN_PRIVILEGES,
-+  SCH_INDEX_STATS,
-   SCH_ENGINES,
-   SCH_EVENTS,
-   SCH_FILES,
-@@ -971,8 +973,11 @@
-   SCH_TABLE_CONSTRAINTS,
-   SCH_TABLE_NAMES,
-   SCH_TABLE_PRIVILEGES,
-+  SCH_TABLE_STATS,
-+  SCH_THREAD_STATS,
-   SCH_TRIGGERS,
-   SCH_USER_PRIVILEGES,
-+  SCH_USER_STATS,
-   SCH_VARIABLES,
-   SCH_VIEWS
- };
-diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
---- a/storage/innobase/handler/ha_innodb.cc    2010-10-12 00:34:15.000000000 +0400
-+++ b/storage/innobase/handler/ha_innodb.cc    2010-11-24 17:24:52.000000000 +0300
-@@ -4055,6 +4055,8 @@
-       error = row_insert_for_mysql((byte*) record, prebuilt);
-+      if (error == DB_SUCCESS) rows_changed++;
-+
-       /* Handle duplicate key errors */
-       if (auto_inc_used) {
-               ulint           err;
-@@ -4392,6 +4394,8 @@
-               }
-       }
-+      if (error == DB_SUCCESS) rows_changed++;
-+
-       innodb_srv_conc_exit_innodb(trx);
-       error = convert_error_code_to_mysql(error, user_thd);
-@@ -4444,6 +4448,8 @@
-       error = row_update_for_mysql((byte*) record, prebuilt);
-+      if (error == DB_SUCCESS) rows_changed++;
-+
-       innodb_srv_conc_exit_innodb(trx);
-       error = convert_error_code_to_mysql(error, user_thd);
-@@ -4923,6 +4929,9 @@
-       if (ret == DB_SUCCESS) {
-               error = 0;
-               table->status = 0;
-+              rows_read++;
-+              if (active_index >= 0 && active_index < MAX_KEY)
-+                      index_rows_read[active_index]++;
-       } else if (ret == DB_RECORD_NOT_FOUND) {
-               error = HA_ERR_END_OF_FILE;
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc  2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sys_vars.cc  2010-12-30 02:22:25.000000000 +0300
+@@ -1547,6 +1547,17 @@
+        NO_MUTEX_GUARD, NOT_IN_BINLOG,
+        ON_CHECK(check_read_only), ON_UPDATE(fix_read_only));
++static Sys_var_mybool Sys_userstat_running(
++       "userstat_running",
++       "Control USER_STATISTICS, CLIENT_STATISTICS, THREAD_STATISTICS, "
++       "INDEX_STATISTICS and TABLE_STATISTICS running",
++       GLOBAL_VAR(opt_userstat_running), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
++
++static Sys_var_mybool Sys_thread_statistics(
++       "thread_statistics",
++       "Control TABLE_STATISTICS running, when userstat_running is enabled",
++       GLOBAL_VAR(opt_thread_statistics), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
++
+ // Small lower limit to be able to test MRR
+ static Sys_var_ulong Sys_read_rnd_buff_size(
+        "read_rnd_buffer_size",
 diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
---- a/storage/myisam/ha_myisam.cc      2010-10-12 00:34:25.000000000 +0400
-+++ b/storage/myisam/ha_myisam.cc      2010-11-24 17:24:52.000000000 +0300
-@@ -761,6 +761,7 @@
+--- a/storage/myisam/ha_myisam.cc      2010-12-03 20:58:26.000000000 +0300
++++ b/storage/myisam/ha_myisam.cc      2010-12-31 05:58:01.000000000 +0300
+@@ -769,6 +769,7 @@
  
  int ha_myisam::write_row(uchar *buf)
  {
@@ -3299,7 +3271,7 @@ diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
    ha_statistic_increment(&SSV::ha_write_count);
  
    /* If we have a timestamp column, update it to the current time */
-@@ -773,11 +774,12 @@
+@@ -781,11 +782,13 @@
    */
    if (table->next_number_field && buf == table->record[0])
    {
@@ -3309,12 +3281,13 @@ diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
    }
 -  return mi_write(file,buf);
 +  error=mi_write(file,buf);
-+  if (!error) rows_changed++;
++  if (!error)
++    rows_changed++;
 +  return error;
  }
  
  int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
-@@ -1638,16 +1640,22 @@
+@@ -1536,16 +1539,24 @@
  
  int ha_myisam::update_row(const uchar *old_data, uchar *new_data)
  {
@@ -3324,7 +3297,8 @@ diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
      table->timestamp_field->set_time();
 -  return mi_update(file,old_data,new_data);
 +  error=mi_update(file,old_data,new_data);
-+  if (!error) rows_changed++;
++  if (!error)
++    rows_changed++;
 +  return error;
  }
  
@@ -3334,54 +3308,58 @@ diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
    ha_statistic_increment(&SSV::ha_delete_count);
 -  return mi_delete(file,buf);
 +  error=mi_delete(file,buf);
-+  if (!error) rows_changed++;
++  if (!error)
++    rows_changed++;
 +  return error;
  }
  
  int ha_myisam::index_read_map(uchar *buf, const uchar *key,
-@@ -1658,6 +1666,13 @@
+@@ -1557,6 +1568,14 @@
    ha_statistic_increment(&SSV::ha_read_key_count);
    int error=mi_rkey(file, buf, active_index, key, keypart_map, find_flag);
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) {
++  if (!error)
++  {
 +    rows_read++;
 +
 +    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    return error;
  }
-@@ -1668,6 +1683,13 @@
+@@ -1569,6 +1588,14 @@
    ha_statistic_increment(&SSV::ha_read_key_count);
    int error=mi_rkey(file, buf, index, key, keypart_map, find_flag);
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) {
++  if (!error)
++  {
 +    rows_read++;
 +
 +    int inx = index;
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    return error;
  }
-@@ -1680,6 +1702,13 @@
+@@ -1583,6 +1610,14 @@
    int error=mi_rkey(file, buf, active_index, key, keypart_map,
                      HA_READ_PREFIX_LAST);
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) {
++  if (!error)
++  {
 +    rows_read++;
 +
 +    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    DBUG_RETURN(error);
  }
-@@ -1689,6 +1718,13 @@
+@@ -1594,6 +1629,13 @@
    ha_statistic_increment(&SSV::ha_read_next_count);
    int error=mi_rnext(file,buf,active_index);
    table->status=error ? STATUS_NOT_FOUND: 0;
@@ -3392,10 +3370,10 @@ diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    return error;
  }
-@@ -1698,6 +1734,13 @@
+@@ -1605,6 +1647,13 @@
    ha_statistic_increment(&SSV::ha_read_prev_count);
    int error=mi_rprev(file,buf, active_index);
    table->status=error ? STATUS_NOT_FOUND: 0;
@@ -3406,64 +3384,69 @@ diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    return error;
  }
-@@ -1707,6 +1750,13 @@
+@@ -1616,6 +1665,14 @@
    ha_statistic_increment(&SSV::ha_read_first_count);
    int error=mi_rfirst(file, buf, active_index);
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) {
++  if (!error)
++  {
 +    rows_read++;
 +
 +    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    return error;
  }
-@@ -1716,6 +1766,13 @@
+@@ -1627,6 +1684,14 @@
    ha_statistic_increment(&SSV::ha_read_last_count);
    int error=mi_rlast(file, buf, active_index);
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) {
++  if (!error)
++  {
 +    rows_read++;
 +
 +    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    return error;
  }
-@@ -1731,6 +1788,13 @@
+@@ -1644,6 +1709,14 @@
      error= mi_rnext_same(file,buf);
    } while (error == HA_ERR_RECORD_DELETED);
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) {
++  if (!error)
++  {
 +    rows_read++;
 +
 +    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
 +    if (inx >= 0 && inx < MAX_KEY)
 +      index_rows_read[inx]++;
 +  }
+   MYSQL_INDEX_READ_ROW_DONE(error);
    return error;
  }
-@@ -1747,6 +1811,7 @@
+@@ -1663,6 +1736,8 @@
    ha_statistic_increment(&SSV::ha_read_rnd_next_count);
    int error=mi_scan(file, buf);
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) rows_read++;
++  if (!error)
++    rows_read++;
+   MYSQL_READ_ROW_DONE(error);
    return error;
  }
-@@ -1760,6 +1825,7 @@
+@@ -1679,6 +1754,8 @@
    ha_statistic_increment(&SSV::ha_read_rnd_count);
    int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length));
    table->status=error ? STATUS_NOT_FOUND: 0;
-+  if (!error) rows_read++;
++  if (!error)
++    rows_read++;
+   MYSQL_READ_ROW_DONE(error);
    return error;
  }
This page took 1.581539 seconds and 4 git commands to generate.