]> git.pld-linux.org Git - packages/percona-server.git/commitdiff
- new perfona url, new patch names (but still have to wait for the patches to be...
authorArkadiusz Miśkiewicz <arekm@maven.pl>
Wed, 15 Dec 2010 22:22:51 +0000 (22:22 +0000)
committercvs2git <feedback@pld-linux.org>
Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
Changed files:
    mysql-innodb_extend_slow.patch -> 1.1
    mysql-innodb_split_buf_pool_mutex.patch -> 1.2
    mysql-microsec_process.patch -> 1.2
    mysql-userstat.patch -> 1.1
    mysql.spec -> 1.503

mysql-innodb_extend_slow.patch [new file with mode: 0644]
mysql-innodb_split_buf_pool_mutex.patch [new file with mode: 0644]
mysql-microsec_process.patch [new file with mode: 0644]
mysql-userstat.patch [new file with mode: 0644]
mysql.spec

diff --git a/mysql-innodb_extend_slow.patch b/mysql-innodb_extend_slow.patch
new file mode 100644 (file)
index 0000000..4264943
--- /dev/null
@@ -0,0 +1,1044 @@
+# name       : innodb_extend_slow.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-03 15:49:59.175955882 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-03 17:42:42.074307123 +0900
+@@ -51,6 +51,40 @@
+ #include "dict0dict.h"
+ #include "log0recv.h"
+ #include "page0zip.h"
++#include "trx0trx.h"
++
++/* prototypes for new functions added to ha_innodb.cc */
++trx_t* innobase_get_trx();
++
++inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
++{
++      ulint           block_hash;
++      ulint           block_hash_byte;
++      byte            block_hash_offset;
++
++      ut_ad(block);
++
++      if (!innobase_get_slow_log() || !trx || !trx->take_stats)
++              return;
++
++      if (!trx->distinct_page_access_hash) {
++              trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
++              memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++      }
++
++      block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space +
++                                      block->page.offset, DPAH_SIZE << 3);
++      block_hash_byte = block_hash >> 3;
++      block_hash_offset = (byte) block_hash & 0x07;
++      if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE)
++              fprintf(stderr, "!!! block_hash_byte = %lu  block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
++      if (block_hash_offset < 0 || block_hash_offset > 7)
++              fprintf(stderr, "!!! block_hash_byte = %lu  block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
++      if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
++              trx->distinct_page_access++;
++      trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
++      return;
++}
+ /*
+               IMPLEMENTATION OF THE BUFFER POOL
+@@ -2399,11 +2433,19 @@
+       mutex_t*        block_mutex;
+       ibool           must_read;
+       unsigned        access_time;
++      trx_t*          trx = NULL;
++      ulint           sec;
++      ulint           ms;
++      ib_uint64_t     start_time;
++      ib_uint64_t     finish_time;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+ #ifndef UNIV_LOG_DEBUG
+       ut_ad(!ibuf_inside());
+ #endif
++      if (innobase_get_slow_log()) {
++              trx = innobase_get_trx();
++      }
+       buf_pool->stat.n_page_gets++;
+       for (;;) {
+@@ -2421,7 +2463,7 @@
+               //buf_pool_mutex_exit(buf_pool);
+               rw_lock_s_unlock(&buf_pool->page_hash_latch);
+-              buf_read_page(space, zip_size, offset);
++              buf_read_page(space, zip_size, offset, trx);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+               ut_a(++buf_dbg_counter % 37 || buf_validate());
+@@ -2499,6 +2541,13 @@
+               /* Let us wait until the read operation
+               completes */
++              if (innobase_get_slow_log() && trx && trx->take_stats)
++              {
++                      ut_usectime(&sec, &ms);
++                      start_time = (ib_uint64_t)sec * 1000000 + ms;
++              } else {
++                      start_time = 0;
++              }
+               for (;;) {
+                       enum buf_io_fix io_fix;
+@@ -2513,6 +2562,12 @@
+                               break;
+                       }
+               }
++              if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++              {
++                      ut_usectime(&sec, &ms);
++                      finish_time = (ib_uint64_t)sec * 1000000 + ms;
++                      trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++              }
+       }
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+@@ -2825,6 +2880,11 @@
+       ibool           must_read;
+       ulint           retries = 0;
+       mutex_t*        block_mutex = NULL;
++      trx_t*          trx = NULL;
++      ulint           sec;
++      ulint           ms;
++      ib_uint64_t     start_time;
++      ib_uint64_t     finish_time;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ut_ad(mtr);
+@@ -2842,6 +2902,9 @@
+ #ifndef UNIV_LOG_DEBUG
+       ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
+ #endif
++      if (innobase_get_slow_log()) {
++              trx = innobase_get_trx();
++      }
+       buf_pool->stat.n_page_gets++;
+       fold = buf_page_address_fold(space, offset);
+ loop:
+@@ -2915,7 +2978,7 @@
+                       return(NULL);
+               }
+-              if (buf_read_page(space, zip_size, offset)) {
++              if (buf_read_page(space, zip_size, offset, trx)) {
+                       retries = 0;
+               } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
+                       ++retries;
+@@ -3178,6 +3241,13 @@
+                       /* Let us wait until the read operation
+                       completes */
++                      if (innobase_get_slow_log() && trx && trx->take_stats)
++                      {
++                              ut_usectime(&sec, &ms);
++                              start_time = (ib_uint64_t)sec * 1000000 + ms;
++                      } else {
++                              start_time = 0;
++                      }
+                       for (;;) {
+                               enum buf_io_fix io_fix;
+@@ -3192,6 +3262,12 @@
+                                       break;
+                               }
+                       }
++                      if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++                      {
++                              ut_usectime(&sec, &ms);
++                              finish_time = (ib_uint64_t)sec * 1000000 + ms;
++                              trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++                      }
+               }
+               fix_type = MTR_MEMO_BUF_FIX;
+@@ -3217,13 +3293,17 @@
+               /* In the case of a first access, try to apply linear
+               read-ahead */
+-              buf_read_ahead_linear(space, zip_size, offset);
++              buf_read_ahead_linear(space, zip_size, offset, trx);
+       }
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+       ut_a(ibuf_count_get(buf_block_get_space(block),
+                           buf_block_get_page_no(block)) == 0);
+ #endif
++      if (innobase_get_slow_log()) {
++              _increment_page_get_statistics(block, trx);
++      }
++
+       return(block);
+ }
+@@ -3247,6 +3327,7 @@
+       unsigned        access_time;
+       ibool           success;
+       ulint           fix_type;
++      trx_t*          trx = NULL;
+       ut_ad(block);
+       ut_ad(mtr);
+@@ -3324,13 +3405,17 @@
+ #ifdef UNIV_DEBUG_FILE_ACCESSES
+       ut_a(block->page.file_page_was_freed == FALSE);
+ #endif
++      if (innobase_get_slow_log()) {
++              trx = innobase_get_trx();
++      }
++
+       if (UNIV_UNLIKELY(!access_time)) {
+               /* In the case of a first access, try to apply linear
+               read-ahead */
+               buf_read_ahead_linear(buf_block_get_space(block),
+                                     buf_block_get_zip_size(block),
+-                                    buf_block_get_page_no(block));
++                                    buf_block_get_page_no(block), trx);
+       }
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+@@ -3340,6 +3425,9 @@
+       buf_pool = buf_pool_from_block(block);
+       buf_pool->stat.n_page_gets++;
++      if (innobase_get_slow_log()) {
++              _increment_page_get_statistics(block, trx);
++      }
+       return(TRUE);
+ }
+@@ -3362,6 +3450,7 @@
+       buf_pool_t*     buf_pool;
+       ibool           success;
+       ulint           fix_type;
++      trx_t*          trx = NULL;
+       ut_ad(mtr);
+       ut_ad(mtr->state == MTR_ACTIVE);
+@@ -3448,6 +3537,11 @@
+ #endif
+       buf_pool->stat.n_page_gets++;
++      if (innobase_get_slow_log()) {
++              trx = innobase_get_trx();
++              _increment_page_get_statistics(block, trx);
++      }
++
+       return(TRUE);
+ }
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-03 17:32:15.617037263 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-03 17:42:42.075297193 +0900
+@@ -77,7 +77,8 @@
+                       treat the tablespace as dropped; this is a timestamp we
+                       use to stop dangling page reads from a tablespace
+                       which we have DISCARDed + IMPORTed back */
+-      ulint   offset) /*!< in: page number */
++      ulint   offset, /*!< in: page number */
++      trx_t*  trx)
+ {
+       buf_page_t*     bpage;
+       ulint           wake_later;
+@@ -179,15 +180,15 @@
+       thd_wait_begin(NULL, THD_WAIT_DISKIO);
+       if (zip_size) {
+-              *err = fil_io(OS_FILE_READ | wake_later,
++              *err = _fil_io(OS_FILE_READ | wake_later,
+                             sync, space, zip_size, offset, 0, zip_size,
+-                            bpage->zip.data, bpage);
++                            bpage->zip.data, bpage, trx);
+       } else {
+               ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+-              *err = fil_io(OS_FILE_READ | wake_later,
++              *err = _fil_io(OS_FILE_READ | wake_later,
+                             sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
+-                            ((buf_block_t*) bpage)->frame, bpage);
++                            ((buf_block_t*) bpage)->frame, bpage, trx);
+       }
+       thd_wait_end(NULL);
+       ut_a(*err == DB_SUCCESS);
+@@ -213,7 +214,8 @@
+ /*==========*/
+       ulint   space,  /*!< in: space id */
+       ulint   zip_size,/*!< in: compressed page size in bytes, or 0 */
+-      ulint   offset) /*!< in: page number */
++      ulint   offset, /*!< in: page number */
++      trx_t*  trx)
+ {
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ib_int64_t      tablespace_version;
+@@ -227,7 +229,7 @@
+       count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+                                 zip_size, FALSE,
+-                                tablespace_version, offset);
++                                tablespace_version, offset, trx);
+       srv_buf_pool_reads += count;
+       if (err == DB_TABLESPACE_DELETED) {
+               ut_print_timestamp(stderr);
+@@ -278,8 +280,9 @@
+ /*==================*/
+       ulint   space,  /*!< in: space id */
+       ulint   zip_size,/*!< in: compressed page size in bytes, or 0 */
+-      ulint   offset) /*!< in: page number of a page; NOTE: the current thread
++      ulint   offset, /*!< in: page number of a page; NOTE: the current thread
+                       must want access to this page (see NOTE 3 above) */
++      trx_t*  trx)
+ {
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ib_int64_t      tablespace_version;
+@@ -500,7 +503,7 @@
+                       count += buf_read_page_low(
+                               &err, FALSE,
+                               ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
+-                              space, zip_size, FALSE, tablespace_version, i);
++                              space, zip_size, FALSE, tablespace_version, i, trx);
+                       if (err == DB_TABLESPACE_DELETED) {
+                               ut_print_timestamp(stderr);
+                               fprintf(stderr,
+@@ -594,7 +597,7 @@
+               buf_read_page_low(&err, sync && (i + 1 == n_stored),
+                                 BUF_READ_ANY_PAGE, space_ids[i],
+                                 zip_size, TRUE, space_versions[i],
+-                                page_nos[i]);
++                                page_nos[i], NULL);
+               if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
+ tablespace_deleted:
+@@ -736,12 +739,12 @@
+               if ((i + 1 == n_stored) && sync) {
+                       buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+                                         zip_size, TRUE, tablespace_version,
+-                                        page_nos[i]);
++                                        page_nos[i], NULL);
+               } else {
+                       buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+                                         | OS_AIO_SIMULATED_WAKE_LATER,
+                                         space, zip_size, TRUE,
+-                                        tablespace_version, page_nos[i]);
++                                        tablespace_version, page_nos[i], NULL);
+               }
+       }
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c   2010-12-03 15:53:54.610037199 +0900
++++ b/storage/innobase/fil/fil0fil.c   2010-12-03 17:42:42.079064198 +0900
+@@ -4349,7 +4349,7 @@
+                                node->name, node->handle, buf,
+                                offset_low, offset_high,
+                                page_size * n_pages,
+-                               NULL, NULL);
++                               NULL, NULL, NULL);
+ #endif
+               if (success) {
+                       node->size += n_pages;
+@@ -4676,7 +4676,7 @@
+ i/o on a tablespace which does not exist */
+ UNIV_INTERN
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+       ulint   type,           /*!< in: OS_FILE_READ or OS_FILE_WRITE,
+                               ORed to OS_FILE_LOG, if a log i/o
+@@ -4701,8 +4701,9 @@
+       void*   buf,            /*!< in/out: buffer where to store read data
+                               or from where to write; in aio this must be
+                               appropriately aligned */
+-      void*   message)        /*!< in: message for aio handler if non-sync
++      void*   message,        /*!< in: message for aio handler if non-sync
+                               aio used, else ignored */
++      trx_t*  trx)
+ {
+       ulint           mode;
+       fil_space_t*    space;
+@@ -4872,7 +4873,7 @@
+ #else
+       /* Queue the aio request */
+       ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
+-                   offset_low, offset_high, len, node, message);
++                   offset_low, offset_high, len, node, message, trx);
+ #endif
+       ut_a(ret);
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:36:44.293955189 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 17:42:42.090024586 +0900
+@@ -1528,6 +1528,16 @@
+       trx->check_unique_secondary = !thd_test_options(
+               thd, OPTION_RELAXED_UNIQUE_CHECKS);
++#ifdef EXTENDED_SLOWLOG
++      if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) {
++              trx->take_stats = TRUE;
++      } else {
++              trx->take_stats = FALSE;
++      }
++#else
++      trx->take_stats = FALSE;
++#endif
++
+       DBUG_VOID_RETURN;
+ }
+@@ -1583,6 +1593,32 @@
+ }
++/*************************************************************************
++Gets current trx. */
++extern "C"
++trx_t*
++innobase_get_trx()
++{
++      THD *thd=current_thd;
++      if (likely(thd != 0)) {
++              trx_t*& trx = thd_to_trx(thd);
++              return(trx);
++      } else {
++              return(NULL);
++      }
++}
++
++extern "C"
++ibool
++innobase_get_slow_log()
++{
++#ifdef EXTENDED_SLOWLOG
++      return((ibool) thd_opt_slow_log());
++#else
++      return(FALSE);
++#endif
++}
++
+ /*********************************************************************//**
+ Construct ha_innobase handler. */
+ UNIV_INTERN
+@@ -9179,6 +9215,25 @@
+       statement has ended */
+       if (trx->n_mysql_tables_in_use == 0) {
++#ifdef EXTENDED_SLOWLOG
++              increment_thd_innodb_stats(thd,
++                                      (unsigned long long) trx->id,
++                                      trx->io_reads,
++                                      trx->io_read,
++                                      trx->io_reads_wait_timer,
++                                      trx->lock_que_wait_timer,
++                                      trx->innodb_que_wait_timer,
++                                      trx->distinct_page_access);
++
++              trx->io_reads = 0;
++              trx->io_read = 0;
++              trx->io_reads_wait_timer = 0;
++              trx->lock_que_wait_timer = 0;
++              trx->innodb_que_wait_timer = 0;
++              trx->distinct_page_access = 0;
++              if (trx->distinct_page_access_hash)
++                      memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++#endif
+               trx->mysql_n_tables_locked = 0;
+               prebuilt->used_in_HANDLER = FALSE;
+diff -ruN a/storage/innobase/handler/innodb_patch_info.h b/storage/innobase/handler/innodb_patch_info.h
+--- a/storage/innobase/handler/innodb_patch_info.h     2010-12-03 17:36:44.293955189 +0900
++++ b/storage/innobase/handler/innodb_patch_info.h     2010-12-03 17:42:42.094955866 +0900
+@@ -38,5 +38,6 @@
+ {"innodb_recovery_patches","Bugfixes and adjustments about recovery process","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_admin_command_base","XtraDB specific command interface through i_s","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_show_lock_name","Show mutex/lock name instead of crated file/line","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_extend_slow","Extended statistics in slow.log","It is InnoDB-part only. It needs to patch also to mysqld.","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h       2010-12-03 15:18:48.891024406 +0900
++++ b/storage/innobase/include/buf0rea.h       2010-12-03 17:42:42.096026873 +0900
+@@ -27,6 +27,7 @@
+ #define buf0rea_h
+ #include "univ.i"
++#include "trx0types.h"
+ #include "buf0types.h"
+ /********************************************************************//**
+@@ -41,7 +42,8 @@
+ /*==========*/
+       ulint   space,  /*!< in: space id */
+       ulint   zip_size,/*!< in: compressed page size in bytes, or 0 */
+-      ulint   offset);/*!< in: page number */
++      ulint   offset, /*!< in: page number */
++      trx_t*  trx);
+ /********************************************************************//**
+ Applies linear read-ahead if in the buf_pool the page is a border page of
+ a linear read-ahead area and all the pages in the area have been accessed.
+@@ -72,8 +74,9 @@
+ /*==================*/
+       ulint   space,  /*!< in: space id */
+       ulint   zip_size,/*!< in: compressed page size in bytes, or 0 */
+-      ulint   offset);/*!< in: page number of a page; NOTE: the current thread
++      ulint   offset, /*!< in: page number of a page; NOTE: the current thread
+                       must want access to this page (see NOTE 3 above) */
++      trx_t*  trx);
+ /********************************************************************//**
+ Issues read requests for pages which the ibuf module wants to read in, in
+ order to contract the insert buffer tree. Technically, this function is like
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h       2010-12-03 15:09:51.290958543 +0900
++++ b/storage/innobase/include/fil0fil.h       2010-12-03 17:42:42.097027548 +0900
+@@ -611,9 +611,12 @@
+ Reads or writes data. This operation is asynchronous (aio).
+ @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+ i/o on a tablespace which does not exist */
++#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \
++      _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL)
++
+ UNIV_INTERN
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+       ulint   type,           /*!< in: OS_FILE_READ or OS_FILE_WRITE,
+                               ORed to OS_FILE_LOG, if a log i/o
+@@ -638,8 +641,9 @@
+       void*   buf,            /*!< in/out: buffer where to store read data
+                               or from where to write; in aio this must be
+                               appropriately aligned */
+-      void*   message);       /*!< in: message for aio handler if non-sync
++      void*   message,        /*!< in: message for aio handler if non-sync
+                               aio used, else ignored */
++      trx_t*  trx);
+ /**********************************************************************//**
+ Waits for an aio operation to complete. This function is used to write the
+ handler for completed requests. The aio array of pending requests is divided
+diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
+--- a/storage/innobase/include/os0file.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.h       2010-12-03 17:42:42.100023783 +0900
+@@ -36,6 +36,7 @@
+ #define os0file_h
+ #include "univ.i"
++#include "trx0types.h"
+ #ifndef __WIN__
+ #include <dirent.h>
+@@ -277,13 +278,17 @@
+       pfs_os_file_close_func(file, __FILE__, __LINE__)
+ # define os_aio(type, mode, name, file, buf, offset, offset_high,     \
+-              n, message1, message2)                                  \
++              n, message1, message2, trx)                             \
+       pfs_os_aio_func(type, mode, name, file, buf, offset,            \
+-                      offset_high, n, message1, message2,             \
++                      offset_high, n, message1, message2, trx,        \
+                       __FILE__, __LINE__)
+ # define os_file_read(file, buf, offset, offset_high, n)              \
+-      pfs_os_file_read_func(file, buf, offset, offset_high, n,        \
++      pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL,  \
++                            __FILE__, __LINE__)
++
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx)     \
++      pfs_os_file_read_func(file, buf, offset, offset_high, n, trx,   \
+                             __FILE__, __LINE__)
+ # define os_file_read_no_error_handling(file, buf, offset,            \
+@@ -319,12 +324,15 @@
+ # define os_file_close(file)  os_file_close_func(file)
+ # define os_aio(type, mode, name, file, buf, offset, offset_high,     \
+-             n, message1, message2)                                   \
++             n, message1, message2, trx)                              \
+       os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\
+-                  message1, message2)
++                  message1, message2, trx)
+ # define os_file_read(file, buf, offset, offset_high, n)              \
+-      os_file_read_func(file, buf, offset, offset_high, n)
++      os_file_read_func(file, buf, offset, offset_high, n, NULL)
++
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx)     \
++      os_file_read_func(file, buf, offset, offset_high, n, trx)
+ # define os_file_read_no_error_handling(file, buf, offset,            \
+                                      offset_high, n)                  \
+@@ -690,6 +698,7 @@
+       ulint           offset_high,/*!< in: most significant 32 bits of
+                               offset */
+       ulint           n,      /*!< in: number of bytes to read */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line);/*!< in: line where the func invoked */
+@@ -744,6 +753,7 @@
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                                 OS_AIO_SYNC */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line);/*!< in: line where the func invoked */
+ /*******************************************************************//**
+@@ -885,7 +895,8 @@
+                               offset where to read */
+       ulint           offset_high,/*!< in: most significant 32 bits of
+                               offset */
+-      ulint           n);     /*!< in: number of bytes to read */
++      ulint           n,      /*!< in: number of bytes to read */
++      trx_t*          trx);
+ /*******************************************************************//**
+ Rewind file to its start, read at most size - 1 bytes from it to str, and
+ NUL-terminate str. All errors are silently ignored. This function is
+@@ -1044,10 +1055,11 @@
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                               OS_AIO_SYNC */
+-      void*           message2);/*!< in: message for the aio handler
++      void*           message2,/*!< in: message for the aio handler
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                               OS_AIO_SYNC */
++      trx_t*          trx);
+ /************************************************************************//**
+ Wakes up all async i/o threads so that they know to exit themselves in
+ shutdown. */
+diff -ruN a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
+--- a/storage/innobase/include/os0file.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.ic      2010-12-03 17:42:42.102024458 +0900
+@@ -229,6 +229,7 @@
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                                 OS_AIO_SYNC */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line)/*!< in: line where the func invoked */
+ {
+@@ -244,7 +245,7 @@
+                                  src_file, src_line);
+       result = os_aio_func(type, mode, name, file, buf, offset, offset_high,
+-                           n, message1, message2);
++                           n, message1, message2, trx);
+       register_pfs_file_io_end(locker, n);
+@@ -268,6 +269,7 @@
+       ulint           offset_high,/*!< in: most significant 32 bits of
+                               offset */
+       ulint           n,      /*!< in: number of bytes to read */
++      trx_t*          trx,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line)/*!< in: line where the func invoked */
+ {
+@@ -278,7 +280,7 @@
+       register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+                                  src_file, src_line);
+-      result = os_file_read_func(file, buf, offset, offset_high, n);
++      result = os_file_read_func(file, buf, offset, offset_high, n, trx);
+       register_pfs_file_io_end(locker, n);
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h       2010-12-03 17:32:15.634987408 +0900
++++ b/storage/innobase/include/srv0srv.h       2010-12-03 17:42:42.104028644 +0900
+@@ -62,6 +62,9 @@
+ #define SRV_AUTO_EXTEND_INCREMENT     \
+       (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
++/* prototypes for new functions added to ha_innodb.cc */
++ibool innobase_get_slow_log();
++
+ /* This is set to TRUE if the MySQL user has set it in MySQL */
+ extern ibool  srv_lower_case_table_names;
+diff -ruN a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
+--- a/storage/innobase/include/trx0trx.h       2010-12-03 15:41:52.049372966 +0900
++++ b/storage/innobase/include/trx0trx.h       2010-12-03 17:42:42.107024532 +0900
+@@ -728,6 +728,17 @@
+       /*------------------------------*/
+       char detailed_error[256];       /*!< detailed error message for last
+                                       error, or empty. */
++      /*------------------------------*/
++      ulint           io_reads;
++      ib_uint64_t     io_read;
++      ulint           io_reads_wait_timer;
++      ib_uint64_t     lock_que_wait_ustarted;
++      ulint           lock_que_wait_timer;
++      ulint           innodb_que_wait_timer;
++      ulint           distinct_page_access;
++#define       DPAH_SIZE       8192
++      byte*           distinct_page_access_hash;
++      ibool           take_stats;
+ };
+ #define TRX_MAX_N_THREADS     32      /* maximum number of
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c        2010-12-03 15:09:51.297986437 +0900
++++ b/storage/innobase/lock/lock0lock.c        2010-12-03 17:42:42.111024587 +0900
+@@ -1755,6 +1755,8 @@
+ {
+       lock_t* lock;
+       trx_t*  trx;
++      ulint   sec;
++      ulint   ms;
+       ut_ad(mutex_own(&kernel_mutex));
+@@ -1813,6 +1815,10 @@
+       trx->que_state = TRX_QUE_LOCK_WAIT;
+       trx->was_chosen_as_deadlock_victim = FALSE;
+       trx->wait_started = time(NULL);
++      if (innobase_get_slow_log() && trx->take_stats) {
++              ut_usectime(&sec, &ms);
++              trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
++      }
+       ut_a(que_thr_stop(thr));
+@@ -3692,6 +3698,8 @@
+ {
+       lock_t* lock;
+       trx_t*  trx;
++      ulint   sec;
++      ulint   ms;
+       ut_ad(mutex_own(&kernel_mutex));
+@@ -3747,6 +3755,10 @@
+               return(DB_SUCCESS);
+       }
++      if (innobase_get_slow_log() && trx->take_stats) {
++              ut_usectime(&sec, &ms);
++              trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
++      }
+       trx->que_state = TRX_QUE_LOCK_WAIT;
+       trx->was_chosen_as_deadlock_victim = FALSE;
+       trx->wait_started = time(NULL);
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c    2010-12-03 17:32:15.644024974 +0900
++++ b/storage/innobase/os/os0file.c    2010-12-03 17:42:42.117023467 +0900
+@@ -43,6 +43,8 @@
+ #include "srv0start.h"
+ #include "fil0fil.h"
+ #include "buf0buf.h"
++#include "trx0sys.h"
++#include "trx0trx.h"
+ #include "log0recv.h"
+ #ifndef UNIV_HOTBACKUP
+ # include "os0sync.h"
+@@ -2175,13 +2177,18 @@
+       ulint           n,      /*!< in: number of bytes to read */
+       ulint           offset, /*!< in: least significant 32 bits of file
+                               offset from where to read */
+-      ulint           offset_high) /*!< in: most significant 32 bits of
++      ulint           offset_high, /*!< in: most significant 32 bits of
+                               offset */
++      trx_t*          trx)
+ {
+       off_t   offs;
+ #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
+       ssize_t n_bytes;
+ #endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */
++      ulint           sec;
++      ulint           ms;
++      ib_uint64_t     start_time;
++      ib_uint64_t     finish_time;
+       ut_a((offset & 0xFFFFFFFFUL) == offset);
+@@ -2202,6 +2209,15 @@
+       os_n_file_reads++;
++      if (innobase_get_slow_log() && trx && trx->take_stats)
++      {
++              trx->io_reads++;
++              trx->io_read += n;
++              ut_usectime(&sec, &ms);
++              start_time = (ib_uint64_t)sec * 1000000 + ms;
++      } else {
++              start_time = 0;
++      }
+ #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
+       os_mutex_enter(os_file_count_mutex);
+       os_file_n_pending_preads++;
+@@ -2215,6 +2231,13 @@
+       os_n_pending_reads--;
+       os_mutex_exit(os_file_count_mutex);
++      if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++      {
++              ut_usectime(&sec, &ms);
++              finish_time = (ib_uint64_t)sec * 1000000 + ms;
++              trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++      }
++
+       return(n_bytes);
+ #else
+       {
+@@ -2251,6 +2274,13 @@
+               os_n_pending_reads--;
+               os_mutex_exit(os_file_count_mutex);
++              if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++              {
++                      ut_usectime(&sec, &ms);
++                      finish_time = (ib_uint64_t)sec * 1000000 + ms;
++                      trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++              }
++
+               return(ret);
+       }
+ #endif
+@@ -2391,7 +2421,8 @@
+                               offset where to read */
+       ulint           offset_high, /*!< in: most significant 32 bits of
+                               offset */
+-      ulint           n)      /*!< in: number of bytes to read */
++      ulint           n,      /*!< in: number of bytes to read */
++      trx_t*          trx)
+ {
+ #ifdef __WIN__
+       BOOL            ret;
+@@ -2463,7 +2494,7 @@
+       os_bytes_read_since_printout += n;
+ try_again:
+-      ret = os_file_pread(file, buf, n, offset, offset_high);
++      ret = os_file_pread(file, buf, n, offset, offset_high, trx);
+       if ((ulint)ret == n) {
+@@ -2589,7 +2620,7 @@
+       os_bytes_read_since_printout += n;
+ try_again:
+-      ret = os_file_pread(file, buf, n, offset, offset_high);
++      ret = os_file_pread(file, buf, n, offset, offset_high, NULL);
+       if ((ulint)ret == n) {
+@@ -3608,7 +3639,8 @@
+                               offset */
+       ulint           offset_high, /*!< in: most significant 32 bits of
+                               offset */
+-      ulint           len)    /*!< in: length of the block to read or write */
++      ulint           len,    /*!< in: length of the block to read or write */
++      trx_t*          trx)
+ {
+       os_aio_slot_t*  slot = NULL;
+ #ifdef WIN_ASYNC_IO
+@@ -3976,10 +4008,11 @@
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                               OS_AIO_SYNC */
+-      void*           message2)/*!< in: message for the aio handler
++      void*           message2,/*!< in: message for the aio handler
+                               (can be used to identify a completed
+                               aio operation); ignored if mode is
+                               OS_AIO_SYNC */
++      trx_t*          trx)
+ {
+       os_aio_array_t* array;
+       os_aio_slot_t*  slot;
+@@ -4017,8 +4050,8 @@
+               wait in the Windows case. */
+               if (type == OS_FILE_READ) {
+-                      return(os_file_read(file, buf, offset,
+-                                          offset_high, n));
++                      return(os_file_read_trx(file, buf, offset,
++                                          offset_high, n, trx));
+               }
+               ut_a(type == OS_FILE_WRITE);
+@@ -4056,8 +4089,13 @@
+               ut_error;
+       }
++      if (trx && type == OS_FILE_READ)
++      {
++              trx->io_reads++;
++              trx->io_read += n;
++      }
+       slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+-                                       name, buf, offset, offset_high, n);
++                                       name, buf, offset, offset_high, n, trx);
+       if (type == OS_FILE_READ) {
+               if (srv_use_native_aio) {
+                       os_n_file_reads++;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 17:32:15.648024399 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 17:45:05.067023254 +0900
+@@ -87,6 +87,9 @@
+ #include "mysql/plugin.h"
+ #include "mysql/service_thd_wait.h"
++/* prototypes for new functions added to ha_innodb.cc */
++ibool innobase_get_slow_log();
++
+ /* This is set to TRUE if the MySQL user has set it in MySQL; currently
+ affects only FOREIGN KEY definition parsing */
+ UNIV_INTERN ibool     srv_lower_case_table_names      = FALSE;
+@@ -1204,6 +1207,10 @@
+       ibool                   has_slept = FALSE;
+       srv_conc_slot_t*        slot      = NULL;
+       ulint                   i;
++      ib_uint64_t             start_time = 0L;
++      ib_uint64_t             finish_time = 0L;
++      ulint                   sec;
++      ulint                   ms;
+       if (trx->mysql_thd != NULL
+           && thd_is_replication_slave_thread(trx->mysql_thd)) {
+@@ -1280,6 +1287,7 @@
+               switches. */
+               if (SRV_THREAD_SLEEP_DELAY > 0) {
+                       os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
++                      trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY;
+               }
+               trx->op_info = "";
+@@ -1335,6 +1343,13 @@
+       /* Go to wait for the event; when a thread leaves InnoDB it will
+       release this thread */
++      if (innobase_get_slow_log() && trx->take_stats) {
++              ut_usectime(&sec, &ms);
++              start_time = (ib_uint64_t)sec * 1000000 + ms;
++      } else {
++              start_time = 0;
++      }
++
+       trx->op_info = "waiting in InnoDB queue";
+       thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
+@@ -1343,6 +1358,12 @@
+       trx->op_info = "";
++      if (innobase_get_slow_log() && trx->take_stats && start_time) {
++              ut_usectime(&sec, &ms);
++              finish_time = (ib_uint64_t)sec * 1000000 + ms;
++              trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
++      }
++
+       os_fast_mutex_lock(&srv_conc_mutex);
+       srv_conc_n_waiting_threads--;
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c   2010-12-03 15:41:52.053955669 +0900
++++ b/storage/innobase/trx/trx0trx.c   2010-12-03 17:42:42.127023410 +0900
+@@ -184,6 +184,15 @@
+       trx->global_read_view = NULL;
+       trx->read_view = NULL;
++      trx->io_reads = 0;
++      trx->io_read = 0;
++      trx->io_reads_wait_timer = 0;
++      trx->lock_que_wait_timer = 0;
++      trx->innodb_que_wait_timer = 0;
++      trx->distinct_page_access = 0;
++      trx->distinct_page_access_hash = NULL;
++      trx->take_stats = FALSE;
++
+       /* Set X/Open XA transaction identification to NULL */
+       memset(&trx->xid, 0, sizeof(trx->xid));
+       trx->xid.formatID = -1;
+@@ -221,6 +230,11 @@
+       trx->mysql_process_no = os_proc_get_number();
++      if (innobase_get_slow_log() && trx->take_stats) {
++              trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
++              memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++      }
++
+       return(trx);
+ }
+@@ -352,6 +366,12 @@
+ /*===============*/
+       trx_t*  trx)    /*!< in, own: trx object */
+ {
++      if (trx->distinct_page_access_hash)
++      {
++              mem_free(trx->distinct_page_access_hash);
++              trx->distinct_page_access_hash= NULL;
++      }
++
+       mutex_enter(&kernel_mutex);
+       UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
+@@ -373,6 +393,12 @@
+ /*====================*/
+       trx_t*  trx)    /*!< in, own: trx object */
+ {
++      if (trx->distinct_page_access_hash)
++      {
++              mem_free(trx->distinct_page_access_hash);
++              trx->distinct_page_access_hash= NULL;
++      }
++
+       mutex_enter(&kernel_mutex);
+       trx_free(trx);
+@@ -1094,6 +1120,9 @@
+       trx_t*  trx)    /*!< in: transaction */
+ {
+       que_thr_t*      thr;
++      ulint           sec;
++      ulint           ms;
++      ib_uint64_t     now;
+       ut_ad(mutex_own(&kernel_mutex));
+       ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+@@ -1108,6 +1137,11 @@
+               thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+       }
++      if (innobase_get_slow_log() && trx->take_stats) {
++              ut_usectime(&sec, &ms);
++              now = (ib_uint64_t)sec * 1000000 + ms;
++              trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
++      }
+       trx->que_state = TRX_QUE_RUNNING;
+ }
+@@ -1121,6 +1155,9 @@
+       trx_t*  trx)    /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
+ {
+       que_thr_t*      thr;
++      ulint           sec;
++      ulint           ms;
++      ib_uint64_t     now;
+       ut_ad(mutex_own(&kernel_mutex));
+       ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+@@ -1135,6 +1172,11 @@
+               thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+       }
++      if (innobase_get_slow_log() && trx->take_stats) {
++              ut_usectime(&sec, &ms);
++              now = (ib_uint64_t)sec * 1000000 + ms;
++              trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
++      }
+       trx->que_state = TRX_QUE_RUNNING;
+ }
diff --git a/mysql-innodb_split_buf_pool_mutex.patch b/mysql-innodb_split_buf_pool_mutex.patch
new file mode 100644 (file)
index 0000000..37f45b0
--- /dev/null
@@ -0,0 +1,3951 @@
+# name       : innodb_split_buf_pool_mutex.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c   2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0cur.c   2010-12-03 15:48:29.268957148 +0900
+@@ -4042,7 +4042,8 @@
+       mtr_commit(mtr);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       mutex_enter(&block->mutex);
+       /* Only free the block if it is still allocated to
+@@ -4053,17 +4054,22 @@
+           && buf_block_get_space(block) == space
+           && buf_block_get_page_no(block) == page_no) {
+-              if (buf_LRU_free_block(&block->page, all, NULL)
++              if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
+                   != BUF_LRU_FREED
+-                  && all && block->page.zip.data) {
++                  && all && block->page.zip.data
++                  /* Now, buf_LRU_free_block() may release mutex temporarily */
++                  && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
++                  && buf_block_get_space(block) == space
++                  && buf_block_get_page_no(block) == page_no) {
+                       /* Attempt to deallocate the uncompressed page
+                       if the whole block cannot be deallocted. */
+-                      buf_LRU_free_block(&block->page, FALSE, NULL);
++                      buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       mutex_exit(&block->mutex);
+ }
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c   2010-12-03 15:48:03.033037049 +0900
++++ b/storage/innobase/btr/btr0sea.c   2010-12-03 15:48:29.271024260 +0900
+@@ -1211,7 +1211,7 @@
+       ulint*          offsets;
+       rw_lock_x_lock(&btr_search_latch);
+-      buf_pool_mutex_enter_all();
++      //buf_pool_mutex_enter_all();
+       table = btr_search_sys->hash_index;
+@@ -1220,6 +1220,8 @@
+               buf_pool = buf_pool_from_array(j);
++              mutex_enter(&buf_pool->LRU_list_mutex);
++
+               bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+               while (bpage != NULL) {
+@@ -1301,9 +1303,11 @@
+                       bpage = UT_LIST_GET_PREV(LRU, bpage);
+               }
++
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       }
+-      buf_pool_mutex_exit_all();
++      //buf_pool_mutex_exit_all();
+       rw_lock_x_unlock(&btr_search_latch);
+       if (UNIV_LIKELY_NULL(heap)) {
+@@ -1896,7 +1900,7 @@
+       rec_offs_init(offsets_);
+       rw_lock_x_lock(&btr_search_latch);
+-      buf_pool_mutex_enter_all();
++      buf_pool_page_hash_x_lock_all();
+       cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+@@ -1904,11 +1908,11 @@
+               /* We release btr_search_latch every once in a while to
+               give other queries a chance to run. */
+               if ((i != 0) && ((i % chunk_size) == 0)) {
+-                      buf_pool_mutex_exit_all();
++                      buf_pool_page_hash_x_unlock_all();
+                       rw_lock_x_unlock(&btr_search_latch);
+                       os_thread_yield();
+                       rw_lock_x_lock(&btr_search_latch);
+-                      buf_pool_mutex_enter_all();
++                      buf_pool_page_hash_x_lock_all();
+               }
+               node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+@@ -2019,11 +2023,11 @@
+               /* We release btr_search_latch every once in a while to
+               give other queries a chance to run. */
+               if (i != 0) {
+-                      buf_pool_mutex_exit_all();
++                      buf_pool_page_hash_x_unlock_all();
+                       rw_lock_x_unlock(&btr_search_latch);
+                       os_thread_yield();
+                       rw_lock_x_lock(&btr_search_latch);
+-                      buf_pool_mutex_enter_all();
++                      buf_pool_page_hash_x_lock_all();
+               }
+               if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+@@ -2031,7 +2035,7 @@
+               }
+       }
+-      buf_pool_mutex_exit_all();
++      buf_pool_page_hash_x_unlock_all();
+       rw_lock_x_unlock(&btr_search_latch);
+       if (UNIV_LIKELY_NULL(heap)) {
+               mem_heap_free(heap);
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
+@@ -73,10 +73,11 @@
+       if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
+ #endif /* UNIV_DEBUG_VALGRIND */
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+       ut_ad(buf_pool->zip_free[i].start != bpage);
+-      UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
++      UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
+ #ifdef UNIV_DEBUG_VALGRIND
+       if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
+@@ -96,8 +97,8 @@
+                                       buf_pool->zip_free[] */
+ {
+ #ifdef UNIV_DEBUG_VALGRIND
+-      buf_page_t*     prev = UT_LIST_GET_PREV(list, bpage);
+-      buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
++      buf_page_t*     prev = UT_LIST_GET_PREV(zip_list, bpage);
++      buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
+       if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
+       if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
+@@ -106,9 +107,10 @@
+       ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
+ #endif /* UNIV_DEBUG_VALGRIND */
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+-      UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
++      UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
+ #ifdef UNIV_DEBUG_VALGRIND
+       if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
+@@ -128,12 +130,13 @@
+ {
+       buf_page_t*     bpage;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_a(i < BUF_BUDDY_SIZES);
+ #ifndef UNIV_DEBUG_VALGRIND
+       /* Valgrind would complain about accessing free memory. */
+-      ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++      ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                             ut_ad(buf_page_get_state(ut_list_node_313)
+                                   == BUF_BLOCK_ZIP_FREE)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+@@ -177,16 +180,19 @@
+ buf_buddy_block_free(
+ /*=================*/
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+-      void*           buf)            /*!< in: buffer frame to deallocate */
++      void*           buf,            /*!< in: buffer frame to deallocate */
++      ibool           have_page_hash_mutex)
+ {
+       const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
+       buf_page_t*     bpage;
+       buf_block_t*    block;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
++      mutex_enter(&buf_pool->zip_hash_mutex);
++
+       HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+                   ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+                         && bpage->in_zip_hash && !bpage->in_page_hash),
+@@ -198,12 +204,14 @@
+       ut_d(bpage->in_zip_hash = FALSE);
+       HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
++      mutex_exit(&buf_pool->zip_hash_mutex);
++
+       ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+       UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+       block = (buf_block_t*) bpage;
+       mutex_enter(&block->mutex);
+-      buf_LRU_block_free_non_file_page(block);
++      buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+       mutex_exit(&block->mutex);
+       ut_ad(buf_pool->buddy_n_frames > 0);
+@@ -220,7 +228,7 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_block(block);
+       const ulint     fold = BUF_POOL_ZIP_FOLD(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+@@ -232,7 +240,10 @@
+       ut_ad(!block->page.in_page_hash);
+       ut_ad(!block->page.in_zip_hash);
+       ut_d(block->page.in_zip_hash = TRUE);
++
++      mutex_enter(&buf_pool->zip_hash_mutex);
+       HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
++      mutex_exit(&buf_pool->zip_hash_mutex);
+       ut_d(buf_pool->buddy_n_frames++);
+ }
+@@ -268,7 +279,7 @@
+               bpage->state = BUF_BLOCK_ZIP_FREE;
+ #ifndef UNIV_DEBUG_VALGRIND
+               /* Valgrind would complain about accessing free memory. */
+-              ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++              ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                                     ut_ad(buf_page_get_state(
+                                                   ut_list_node_313)
+                                           == BUF_BLOCK_ZIP_FREE)));
+@@ -291,25 +302,29 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       ulint           i,              /*!< in: index of buf_pool->zip_free[],
+                                       or BUF_BUDDY_SIZES */
+-      ibool*          lru)            /*!< in: pointer to a variable that
++      ibool*          lru,            /*!< in: pointer to a variable that
+                                       will be assigned TRUE if storage was
+                                       allocated from the LRU list and
+                                       buf_pool->mutex was temporarily
+                                       released, or NULL if the LRU list
+                                       should not be used */
++      ibool           have_page_hash_mutex)
+ {
+       buf_block_t*    block;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       if (i < BUF_BUDDY_SIZES) {
+               /* Try to allocate from the buddy system. */
++              mutex_enter(&buf_pool->zip_free_mutex);
+               block = buf_buddy_alloc_zip(buf_pool, i);
+               if (block) {
+                       goto func_exit;
+               }
++              mutex_exit(&buf_pool->zip_free_mutex);
+       }
+       /* Try allocating from the buf_pool->free list. */
+@@ -326,19 +341,30 @@
+       }
+       /* Try replacing an uncompressed page in the buffer pool. */
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      if (have_page_hash_mutex) {
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      }
+       block = buf_LRU_get_free_block(buf_pool, 0);
+       *lru = TRUE;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      if (have_page_hash_mutex) {
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++      }
+ alloc_big:
+       buf_buddy_block_register(block);
++      mutex_enter(&buf_pool->zip_free_mutex);
+       block = buf_buddy_alloc_from(
+               buf_pool, block->frame, i, BUF_BUDDY_SIZES);
+ func_exit:
+       buf_pool->buddy_stat[i].used++;
++      mutex_exit(&buf_pool->zip_free_mutex);
++
+       return(block);
+ }
+@@ -355,7 +381,10 @@
+       buf_page_t*     b;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       switch (buf_page_get_state(bpage)) {
+       case BUF_BLOCK_ZIP_FREE:
+@@ -364,7 +393,7 @@
+       case BUF_BLOCK_FILE_PAGE:
+       case BUF_BLOCK_MEMORY:
+       case BUF_BLOCK_REMOVE_HASH:
+-              ut_error;
++              /* ut_error; */ /* optimistic */
+       case BUF_BLOCK_ZIP_DIRTY:
+               /* Cannot relocate dirty pages. */
+               return(FALSE);
+@@ -374,9 +403,18 @@
+       }
+       mutex_enter(&buf_pool->zip_mutex);
++      mutex_enter(&buf_pool->zip_free_mutex);
+       if (!buf_page_can_relocate(bpage)) {
+               mutex_exit(&buf_pool->zip_mutex);
++              mutex_exit(&buf_pool->zip_free_mutex);
++              return(FALSE);
++      }
++
++      if (bpage != buf_page_hash_get(buf_pool,
++                                     bpage->space, bpage->offset)) {
++              mutex_exit(&buf_pool->zip_mutex);
++              mutex_exit(&buf_pool->zip_free_mutex);
+               return(FALSE);
+       }
+@@ -384,18 +422,19 @@
+       ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
+       /* relocate buf_pool->zip_clean */
+-      b = UT_LIST_GET_PREV(list, dpage);
+-      UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
++      b = UT_LIST_GET_PREV(zip_list, dpage);
++      UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
+       if (b) {
+-              UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
++              UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
+       } else {
+-              UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
++              UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
+       }
+       UNIV_MEM_INVALID(bpage, sizeof *bpage);
+       mutex_exit(&buf_pool->zip_mutex);
++      mutex_exit(&buf_pool->zip_free_mutex);
+       return(TRUE);
+ }
+@@ -409,14 +448,16 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           src,            /*!< in: block to relocate */
+       void*           dst,            /*!< in: free block to relocate to */
+-      ulint           i)              /*!< in: index of
++      ulint           i,              /*!< in: index of
+                                       buf_pool->zip_free[] */
++      ibool           have_page_hash_mutex)
+ {
+       buf_page_t*     bpage;
+       const ulint     size    = BUF_BUDDY_LOW << i;
+       ullint          usec    = ut_time_us(NULL);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_ad(!ut_align_offset(src, size));
+       ut_ad(!ut_align_offset(dst, size));
+@@ -438,6 +479,12 @@
+               /* This is a compressed page. */
+               mutex_t*        mutex;
++              if (!have_page_hash_mutex) {
++                      mutex_exit(&buf_pool->zip_free_mutex);
++                      mutex_enter(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_lock(&buf_pool->page_hash_latch);
++              }
++
+               /* The src block may be split into smaller blocks,
+               some of which may be free.  Thus, the
+               mach_read_from_4() calls below may attempt to read
+@@ -462,6 +509,11 @@
+                       added to buf_pool->page_hash yet.  Obviously,
+                       it cannot be relocated. */
++                      if (!have_page_hash_mutex) {
++                              mutex_enter(&buf_pool->zip_free_mutex);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       return(FALSE);
+               }
+@@ -473,18 +525,27 @@
+                       For the sake of simplicity, give up. */
+                       ut_ad(page_zip_get_size(&bpage->zip) < size);
++                      if (!have_page_hash_mutex) {
++                              mutex_enter(&buf_pool->zip_free_mutex);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       return(FALSE);
+               }
++              /* To keep latch order */
++              if (have_page_hash_mutex)
++                      mutex_exit(&buf_pool->zip_free_mutex);
++
+               /* The block must have been allocated, but it may
+               contain uninitialized data. */
+               UNIV_MEM_ASSERT_W(src, size);
+-              mutex = buf_page_get_mutex(bpage);
++              mutex = buf_page_get_mutex_enter(bpage);
+-              mutex_enter(mutex);
++              mutex_enter(&buf_pool->zip_free_mutex);
+-              if (buf_page_can_relocate(bpage)) {
++              if (mutex && buf_page_can_relocate(bpage)) {
+                       /* Relocate the compressed page. */
+                       ut_a(bpage->zip.data == src);
+                       memcpy(dst, src, size);
+@@ -499,10 +560,22 @@
+                               buddy_stat->relocated_usec
+                                       += ut_time_us(NULL) - usec;
+                       }
++
++                      if (!have_page_hash_mutex) {
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       return(TRUE);
+               }
+-              mutex_exit(mutex);
++              if (!have_page_hash_mutex) {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              }
++
++              if (mutex) {
++                      mutex_exit(mutex);
++              }
+       } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
+               /* This must be a buf_page_t object. */
+ #if UNIV_WORD_SIZE == 4
+@@ -511,10 +584,31 @@
+               about uninitialized pad bytes. */
+               UNIV_MEM_ASSERT_RW(src, size);
+ #endif
++
++              mutex_exit(&buf_pool->zip_free_mutex);
++
++              if (!have_page_hash_mutex) {
++                      mutex_enter(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_lock(&buf_pool->page_hash_latch);
++              }
++
+               if (buf_buddy_relocate_block(src, dst)) {
++                      mutex_enter(&buf_pool->zip_free_mutex);
++
++                      if (!have_page_hash_mutex) {
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      }
+                       goto success;
+               }
++
++              mutex_enter(&buf_pool->zip_free_mutex);
++
++              if (!have_page_hash_mutex) {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              }
+       }
+       return(FALSE);
+@@ -529,13 +623,15 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           buf,            /*!< in: block to be freed, must not be
+                                       pointed to by the buffer pool */
+-      ulint           i)              /*!< in: index of buf_pool->zip_free[],
++      ulint           i,              /*!< in: index of buf_pool->zip_free[],
+                                       or BUF_BUDDY_SIZES */
++      ibool           have_page_hash_mutex)
+ {
+       buf_page_t*     bpage;
+       buf_page_t*     buddy;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       ut_ad(!mutex_own(&buf_pool->zip_mutex));
+       ut_ad(i <= BUF_BUDDY_SIZES);
+       ut_ad(buf_pool->buddy_stat[i].used > 0);
+@@ -546,7 +642,9 @@
+       ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+       if (i == BUF_BUDDY_SIZES) {
+-              buf_buddy_block_free(buf_pool, buf);
++              mutex_exit(&buf_pool->zip_free_mutex);
++              buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
++              mutex_enter(&buf_pool->zip_free_mutex);
+               return;
+       }
+@@ -591,7 +689,7 @@
+               ut_a(bpage != buf);
+               {
+-                      buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
++                      buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
+                       UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
+                       bpage = next;
+               }
+@@ -600,13 +698,13 @@
+ #ifndef UNIV_DEBUG_VALGRIND
+ buddy_nonfree:
+       /* Valgrind would complain about accessing free memory. */
+-      ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++      ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                             ut_ad(buf_page_get_state(ut_list_node_313)
+                                   == BUF_BLOCK_ZIP_FREE)));
+ #endif /* UNIV_DEBUG_VALGRIND */
+       /* The buddy is not free. Is there a free block of this size? */
+-      bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++      bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+       if (bpage) {
+               /* Remove the block from the free list, because a successful
+@@ -616,7 +714,7 @@
+               buf_buddy_remove_from_free(buf_pool, bpage, i);
+               /* Try to relocate the buddy of buf to the free block. */
+-              if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
++              if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
+                       ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+                       goto buddy_free2;
+@@ -636,14 +734,14 @@
+               (Parts of the buddy can be free in
+               buf_pool->zip_free[j] with j < i.) */
+-              ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++              ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+                                     ut_ad(buf_page_get_state(
+                                                   ut_list_node_313)
+                                           == BUF_BLOCK_ZIP_FREE
+                                           && ut_list_node_313 != buddy)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+-              if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
++              if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
+                       buf = bpage;
+                       UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c   2010-12-03 15:22:36.314943336 +0900
++++ b/storage/innobase/buf/buf0buf.c   2010-12-03 15:48:29.282947357 +0900
+@@ -263,6 +263,7 @@
+ #ifdef UNIV_PFS_RWLOCK
+ /* Keys to register buffer block related rwlocks and mutexes with
+ performance schema */
++UNIV_INTERN mysql_pfs_key_t   buf_pool_page_hash_key;
+ UNIV_INTERN mysql_pfs_key_t   buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ UNIV_INTERN mysql_pfs_key_t   buf_block_debug_latch_key;
+@@ -273,6 +274,10 @@
+ UNIV_INTERN mysql_pfs_key_t   buffer_block_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t   buf_pool_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t   buf_pool_zip_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_LRU_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_free_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_zip_free_mutex_key;
++UNIV_INTERN mysql_pfs_key_t   buf_pool_zip_hash_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t   flush_list_mutex_key;
+ #endif /* UNIV_PFS_MUTEX */
+@@ -881,9 +886,9 @@
+       block->page.in_zip_hash = FALSE;
+       block->page.in_flush_list = FALSE;
+       block->page.in_free_list = FALSE;
+-      block->in_unzip_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
+       block->page.in_LRU_list = FALSE;
++      block->in_unzip_LRU_list = FALSE;
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+       block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -981,9 +986,11 @@
+               memset(block->frame, '\0', UNIV_PAGE_SIZE);
+ #endif
+               /* Add the block to the free list */
+-              UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
++              mutex_enter(&buf_pool->free_list_mutex);
++              UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
+               ut_d(block->page.in_free_list = TRUE);
++              mutex_exit(&buf_pool->free_list_mutex);
+               ut_ad(buf_pool_from_block(block) == buf_pool);
+               block++;
+@@ -1038,7 +1045,8 @@
+       buf_chunk_t*    chunk = buf_pool->chunks;
+       ut_ad(buf_pool);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+       for (n = buf_pool->n_chunks; n--; chunk++) {
+               buf_block_t* block = buf_chunk_contains_zip(chunk, data);
+@@ -1138,7 +1146,7 @@
+       buf_block_t*            block;
+       const buf_block_t*      block_end;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
+       block_end = chunk->blocks + chunk->size;
+@@ -1150,8 +1158,10 @@
+               ut_ad(!block->in_unzip_LRU_list);
+               ut_ad(!block->page.in_flush_list);
+               /* Remove the block from the free list. */
++              mutex_enter(&buf_pool->free_list_mutex);
+               ut_ad(block->page.in_free_list);
+-              UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++              UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++              mutex_exit(&buf_pool->free_list_mutex);
+               /* Free the latches. */
+               mutex_free(&block->mutex);
+@@ -1208,9 +1218,21 @@
+       ------------------------------- */
+       mutex_create(buf_pool_mutex_key,
+                    &buf_pool->mutex, SYNC_BUF_POOL);
++      mutex_create(buf_pool_LRU_list_mutex_key,
++                   &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
++      rw_lock_create(buf_pool_page_hash_key,
++                     &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
++      mutex_create(buf_pool_free_list_mutex_key,
++                   &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
++      mutex_create(buf_pool_zip_free_mutex_key,
++                   &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
++      mutex_create(buf_pool_zip_hash_mutex_key,
++                   &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
+       mutex_create(buf_pool_zip_mutex_key,
+                    &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_enter(buf_pool);
+       if (buf_pool_size > 0) {
+@@ -1223,6 +1245,8 @@
+                       mem_free(chunk);
+                       mem_free(buf_pool);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                       buf_pool_mutex_exit(buf_pool);
+                       return(DB_ERROR);
+@@ -1253,6 +1277,8 @@
+       /* All fields are initialized by mem_zalloc(). */
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_exit(buf_pool);
+       return(DB_SUCCESS);
+@@ -1469,7 +1495,11 @@
+       ulint           fold;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+       ut_a(bpage->buf_fix_count == 0);
+@@ -1556,7 +1586,8 @@
+ try_again:
+       btr_search_disable(); /* Empty the adaptive hash index again */
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+ shrink_again:
+       if (buf_pool->n_chunks <= 1) {
+@@ -1627,7 +1658,7 @@
+                               buf_LRU_make_block_old(&block->page);
+                               dirty++;
+-                      } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
++                      } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
+                                  != BUF_LRU_FREED) {
+                               nonfree++;
+                       }
+@@ -1635,7 +1666,8 @@
+                       mutex_exit(&block->mutex);
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+               /* Request for a flush of the chunk if it helps.
+               Do not flush if there are non-free blocks, since
+@@ -1685,7 +1717,8 @@
+ func_done:
+       buf_pool->old_pool_size = buf_pool->curr_pool_size;
+ func_exit:
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       btr_search_enable();
+ }
+@@ -1726,7 +1759,9 @@
+       hash_table_t*   zip_hash;
+       hash_table_t*   page_hash;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       /* Free, create, and populate the hash table. */
+       hash_table_free(buf_pool->page_hash);
+@@ -1767,8 +1802,9 @@
+       All such blocks are either in buf_pool->zip_clean or
+       in buf_pool->flush_list. */
++      mutex_enter(&buf_pool->zip_mutex);
+       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(zip_list, b)) {
+               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+               ut_ad(!b->in_flush_list);
+               ut_ad(b->in_LRU_list);
+@@ -1778,10 +1814,11 @@
+               HASH_INSERT(buf_page_t, hash, page_hash,
+                           buf_page_address_fold(b->space, b->offset), b);
+       }
++      mutex_exit(&buf_pool->zip_mutex);
+       buf_flush_list_mutex_enter(buf_pool);
+       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(flush_list, b)) {
+               ut_ad(b->in_flush_list);
+               ut_ad(b->in_LRU_list);
+               ut_ad(b->in_page_hash);
+@@ -1808,7 +1845,9 @@
+       }
+       buf_flush_list_mutex_exit(buf_pool);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+ /********************************************************************
+@@ -1855,21 +1894,32 @@
+       buf_page_t*     bpage;
+       ulint           i;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
++      mutex_t*        block_mutex;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
++      if (bpage) {
++              block_mutex = buf_page_get_mutex_enter(bpage);
++              ut_a(block_mutex);
++      }
+       if (UNIV_LIKELY_NULL(bpage)) {
+               if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+                       /* The page was loaded meanwhile. */
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                       return(bpage);
+               }
+               /* Add to an existing watch. */
+               bpage->buf_fix_count++;
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              mutex_exit(block_mutex);
+               return(NULL);
+       }
++      /* buf_pool->watch is protected by zip_mutex for now */
++      mutex_enter(&buf_pool->zip_mutex);
+       for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
+               bpage = &buf_pool->watch[i];
+@@ -1897,6 +1947,8 @@
+                       ut_d(bpage->in_page_hash = TRUE);
+                       HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+                                   fold, bpage);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      mutex_exit(&buf_pool->zip_mutex);
+                       return(NULL);
+               case BUF_BLOCK_ZIP_PAGE:
+                       ut_ad(bpage->in_page_hash);
+@@ -1914,6 +1966,8 @@
+       ut_error;
+       /* Fix compiler warning */
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      mutex_exit(&buf_pool->zip_mutex);
+       return(NULL);
+ }
+@@ -1943,6 +1997,8 @@
+       buf_chunk_t*    chunks;
+       buf_chunk_t*    chunk;
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_enter(buf_pool);
+       chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
+@@ -1961,6 +2017,8 @@
+               buf_pool->n_chunks++;
+       }
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       buf_pool_mutex_exit(buf_pool);
+ }
+@@ -2048,7 +2106,11 @@
+                                       space, offset) */
+       buf_page_t*     watch)          /*!< in/out: sentinel for watch */
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
++      ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
+       HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
+       ut_d(watch->in_page_hash = FALSE);
+@@ -2070,28 +2132,31 @@
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ulint           fold = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+       /* The page must exist because buf_pool_watch_set()
+       increments buf_fix_count. */
+       ut_a(bpage);
+       if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
+-              mutex_t* mutex = buf_page_get_mutex(bpage);
++              mutex_t* mutex = buf_page_get_mutex_enter(bpage);
+-              mutex_enter(mutex);
+               ut_a(bpage->buf_fix_count > 0);
+               bpage->buf_fix_count--;
+               mutex_exit(mutex);
+       } else {
++              mutex_enter(&buf_pool->zip_mutex);
+               ut_a(bpage->buf_fix_count > 0);
+               if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
+                       buf_pool_watch_remove(buf_pool, fold, bpage);
+               }
++              mutex_exit(&buf_pool->zip_mutex);
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+ /****************************************************************//**
+@@ -2111,14 +2176,16 @@
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ulint           fold    = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+       /* The page must exist because buf_pool_watch_set()
+       increments buf_fix_count. */
+       ut_a(bpage);
+       ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(ret);
+ }
+@@ -2135,13 +2202,15 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       ut_a(buf_page_in_file(bpage));
+       buf_LRU_make_block_young(bpage);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /********************************************************************//**
+@@ -2165,14 +2234,20 @@
+       ut_a(buf_page_in_file(bpage));
+       if (buf_page_peek_if_too_old(bpage)) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               buf_LRU_make_block_young(bpage);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       } else if (!access_time) {
+               ulint   time_ms = ut_time_ms();
+-              buf_pool_mutex_enter(buf_pool);
++              mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
++              //buf_pool_mutex_enter(buf_pool);
++              if (block_mutex) {
+               buf_page_set_accessed(bpage, time_ms);
+-              buf_pool_mutex_exit(buf_pool);
++              mutex_exit(block_mutex);
++              }
++              //buf_pool_mutex_exit(buf_pool);
+       }
+ }
+@@ -2189,7 +2264,8 @@
+       buf_block_t*    block;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+@@ -2198,7 +2274,8 @@
+               block->check_index_page_at_flush = FALSE;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+ /********************************************************************//**
+@@ -2217,7 +2294,8 @@
+       ibool           is_hashed;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+@@ -2228,7 +2306,8 @@
+               is_hashed = block->is_hashed;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(is_hashed);
+ }
+@@ -2250,7 +2329,8 @@
+       buf_page_t*     bpage;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+@@ -2259,7 +2339,8 @@
+               bpage->file_page_was_freed = TRUE;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(bpage);
+ }
+@@ -2280,7 +2361,8 @@
+       buf_page_t*     bpage;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+@@ -2289,7 +2371,8 @@
+               bpage->file_page_was_freed = FALSE;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(bpage);
+ }
+@@ -2324,8 +2407,9 @@
+       buf_pool->stat.n_page_gets++;
+       for (;;) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
+ lookup:
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               bpage = buf_page_hash_get(buf_pool, space, offset);
+               if (bpage) {
+                       ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+@@ -2334,7 +2418,8 @@
+               /* Page not in buf_pool: needs to be read from file */
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               buf_read_page(space, zip_size, offset);
+@@ -2346,10 +2431,15 @@
+       if (UNIV_UNLIKELY(!bpage->zip.data)) {
+               /* There is no compressed page. */
+ err_exit:
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               return(NULL);
+       }
++      block_mutex = buf_page_get_mutex_enter(bpage);
++
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+       ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+       switch (buf_page_get_state(bpage)) {
+@@ -2358,19 +2448,19 @@
+       case BUF_BLOCK_MEMORY:
+       case BUF_BLOCK_REMOVE_HASH:
+       case BUF_BLOCK_ZIP_FREE:
++              if (block_mutex)
++                      mutex_exit(block_mutex);
+               break;
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
+-              block_mutex = &buf_pool->zip_mutex;
+-              mutex_enter(block_mutex);
++              ut_a(block_mutex == &buf_pool->zip_mutex);
+               bpage->buf_fix_count++;
+               goto got_block;
+       case BUF_BLOCK_FILE_PAGE:
+-              block_mutex = &((buf_block_t*) bpage)->mutex;
+-              mutex_enter(block_mutex);
++              ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
+               /* Discard the uncompressed page frame if possible. */
+-              if (buf_LRU_free_block(bpage, FALSE, NULL)
++              if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
+                   == BUF_LRU_FREED) {
+                       mutex_exit(block_mutex);
+@@ -2389,7 +2479,7 @@
+       must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+       access_time = buf_page_is_accessed(bpage);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       mutex_exit(block_mutex);
+@@ -2698,7 +2788,7 @@
+       const buf_block_t*      block)          /*!< in: pointer to block,
+                                               not dereferenced */
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+               /* The pointer should be aligned. */
+@@ -2734,6 +2824,7 @@
+       ulint           fix_type;
+       ibool           must_read;
+       ulint           retries = 0;
++      mutex_t*        block_mutex = NULL;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+       ut_ad(mtr);
+@@ -2755,9 +2846,11 @@
+       fold = buf_page_address_fold(space, offset);
+ loop:
+       block = guess;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       if (block) {
++              block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++
+               /* If the guess is a compressed page descriptor that
+               has been allocated by buf_buddy_alloc(), it may have
+               been invalidated by buf_buddy_relocate().  In that
+@@ -2766,11 +2859,15 @@
+               the guess may be pointing to a buffer pool chunk that
+               has been released when resizing the buffer pool. */
+-              if (!buf_block_is_uncompressed(buf_pool, block)
++              if (!block_mutex) {
++                      block = guess = NULL;
++              } else if (!buf_block_is_uncompressed(buf_pool, block)
+                   || offset != block->page.offset
+                   || space != block->page.space
+                   || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
++                      mutex_exit(block_mutex);
++
+                       block = guess = NULL;
+               } else {
+                       ut_ad(!block->page.in_zip_hash);
+@@ -2779,12 +2876,19 @@
+       }
+       if (block == NULL) {
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               block = (buf_block_t*) buf_page_hash_get_low(
+                       buf_pool, space, offset, fold);
++              if (block) {
++                      block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++                      ut_a(block_mutex);
++              }
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       }
+ loop2:
+       if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
++              mutex_exit(block_mutex);
+               block = NULL;
+       }
+@@ -2796,12 +2900,14 @@
+                               space, offset, fold);
+                       if (UNIV_LIKELY_NULL(block)) {
+-
++                              block_mutex = buf_page_get_mutex((buf_page_t*)block);
++                              ut_a(block_mutex);
++                              ut_ad(mutex_own(block_mutex));
+                               goto got_block;
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
+               if (mode == BUF_GET_IF_IN_POOL
+                   || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+@@ -2849,7 +2955,8 @@
+               /* The page is being read to buffer pool,
+               but we cannot wait around for the read to
+               complete. */
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(block_mutex);
+               return(NULL);
+       }
+@@ -2859,38 +2966,49 @@
+               ibool           success;
+       case BUF_BLOCK_FILE_PAGE:
++              if (block_mutex == &buf_pool->zip_mutex) {
++                      /* it is wrong mutex... */
++                      mutex_exit(block_mutex);
++                      goto loop;
++              }
+               break;
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
++              ut_ad(block_mutex == &buf_pool->zip_mutex);
+               bpage = &block->page;
+               /* Protect bpage->buf_fix_count. */
+-              mutex_enter(&buf_pool->zip_mutex);
++              //mutex_enter(&buf_pool->zip_mutex);
+               if (bpage->buf_fix_count
+                   || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+                       /* This condition often occurs when the buffer
+                       is not buffer-fixed, but I/O-fixed by
+                       buf_page_init_for_read(). */
+-                      mutex_exit(&buf_pool->zip_mutex);
++                      //mutex_exit(&buf_pool->zip_mutex);
+ wait_until_unfixed:
+                       /* The block is buffer-fixed or I/O-fixed.
+                       Try again later. */
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      mutex_exit(block_mutex);
+                       os_thread_sleep(WAIT_FOR_READ);
+   
+                       goto loop;
+               }
+               /* Allocate an uncompressed page. */
+-              buf_pool_mutex_exit(buf_pool);
+-              mutex_exit(&buf_pool->zip_mutex);
++              //buf_pool_mutex_exit(buf_pool);
++              //mutex_exit(&buf_pool->zip_mutex);
++              mutex_exit(block_mutex);
+               block = buf_LRU_get_free_block(buf_pool, 0);
+               ut_a(block);
++              block_mutex = &block->mutex;
+-              buf_pool_mutex_enter(buf_pool);
+-              mutex_enter(&block->mutex);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++              mutex_enter(block_mutex);
+               {
+                       buf_page_t*     hash_bpage;
+@@ -2903,35 +3021,47 @@
+                               while buf_pool->mutex was released.
+                               Free the block that was allocated. */
+-                              buf_LRU_block_free_non_file_page(block);
+-                              mutex_exit(&block->mutex);
++                              buf_LRU_block_free_non_file_page(block, TRUE);
++                              mutex_exit(block_mutex);
+                               block = (buf_block_t*) hash_bpage;
++                              if (block) {
++                                      block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++                                      ut_a(block_mutex);
++                              }
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
+                               goto loop2;
+                       }
+               }
++              mutex_enter(&buf_pool->zip_mutex);
++
+               if (UNIV_UNLIKELY
+                   (bpage->buf_fix_count
+                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
++                      mutex_exit(&buf_pool->zip_mutex);
+                       /* The block was buffer-fixed or I/O-fixed
+                       while buf_pool->mutex was not held by this thread.
+                       Free the block that was allocated and try again.
+                       This should be extremely unlikely. */
+-                      buf_LRU_block_free_non_file_page(block);
+-                      mutex_exit(&block->mutex);
++                      buf_LRU_block_free_non_file_page(block, TRUE);
++                      //mutex_exit(&block->mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+                       goto wait_until_unfixed;
+               }
+               /* Move the compressed page from bpage to block,
+               and uncompress it. */
+-              mutex_enter(&buf_pool->zip_mutex);
+-
+               buf_relocate(bpage, &block->page);
++
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+               buf_block_init_low(block);
+               block->lock_hash_val = lock_rec_hash(space, offset);
+@@ -2940,7 +3070,7 @@
+               if (buf_page_get_state(&block->page)
+                   == BUF_BLOCK_ZIP_PAGE) {
+-                      UT_LIST_REMOVE(list, buf_pool->zip_clean,
++                      UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
+                                      &block->page);
+                       ut_ad(!block->page.in_flush_list);
+               } else {
+@@ -2957,19 +3087,24 @@
+               /* Insert at the front of unzip_LRU list */
+               buf_unzip_LRU_add_block(block, FALSE);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++
+               block->page.buf_fix_count = 1;
+               buf_block_set_io_fix(block, BUF_IO_READ);
+               rw_lock_x_lock_func(&block->lock, 0, file, line);
+               UNIV_MEM_INVALID(bpage, sizeof *bpage);
+-              mutex_exit(&block->mutex);
++              mutex_exit(block_mutex);
+               mutex_exit(&buf_pool->zip_mutex);
++
++              buf_pool_mutex_enter(buf_pool);
+               buf_pool->n_pend_unzip++;
++              buf_pool_mutex_exit(buf_pool);
+-              buf_buddy_free(buf_pool, bpage, sizeof *bpage);
++              buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
+               /* Decompress the page and apply buffered operations
+               while not holding buf_pool->mutex or block->mutex. */
+@@ -2982,12 +3117,15 @@
+               }
+               /* Unfix and unlatch the block. */
+-              buf_pool_mutex_enter(buf_pool);
+-              mutex_enter(&block->mutex);
++              //buf_pool_mutex_enter(buf_pool);
++              block_mutex = &block->mutex;
++              mutex_enter(block_mutex);
+               block->page.buf_fix_count--;
+               buf_block_set_io_fix(block, BUF_IO_NONE);
+-              mutex_exit(&block->mutex);
++
++              buf_pool_mutex_enter(buf_pool);
+               buf_pool->n_pend_unzip--;
++              buf_pool_mutex_exit(buf_pool);
+               rw_lock_x_unlock(&block->lock);
+               break;
+@@ -3003,7 +3141,7 @@
+       ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+-      mutex_enter(&block->mutex);
++      //mutex_enter(&block->mutex);
+ #if UNIV_WORD_SIZE == 4
+       /* On 32-bit systems, there is no padding in buf_page_t.  On
+       other systems, Valgrind could complain about uninitialized pad
+@@ -3013,13 +3151,14 @@
+       buf_block_buf_fix_inc(block, file, line);
+-      mutex_exit(&block->mutex);
++      //mutex_exit(&block->mutex);
+       /* Check if this is the first access to the page */
+       access_time = buf_page_is_accessed(&block->page);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(block_mutex);
+       buf_page_set_accessed_make_young(&block->page, access_time);
+@@ -3252,9 +3391,11 @@
+       buf_pool = buf_pool_from_block(block);
+       if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               buf_LRU_make_block_young(&block->page);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       } else if (!buf_page_is_accessed(&block->page)) {
+               /* Above, we do a dirty read on purpose, to avoid
+               mutex contention.  The field buf_page_t::access_time
+@@ -3262,9 +3403,11 @@
+               field must be protected by mutex, however. */
+               ulint   time_ms = ut_time_ms();
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&block->mutex);
+               buf_page_set_accessed(&block->page, time_ms);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&block->mutex);
+       }
+       ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+@@ -3331,18 +3474,21 @@
+       ut_ad(mtr);
+       ut_ad(mtr->state == MTR_ACTIVE);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       block = buf_block_hash_get(buf_pool, space_id, page_no);
+       if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               return(NULL);
+       }
+       ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
+       mutex_enter(&block->mutex);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+       ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+@@ -3431,7 +3577,10 @@
+       buf_page_t*     hash_page;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       ut_ad(mutex_own(&(block->mutex)));
+       ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+@@ -3460,11 +3609,14 @@
+       if (UNIV_LIKELY(!hash_page)) {
+       } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
+               /* Preserve the reference count. */
+-              ulint   buf_fix_count = hash_page->buf_fix_count;
++              ulint   buf_fix_count;
++              mutex_enter(&buf_pool->zip_mutex);
++              buf_fix_count = hash_page->buf_fix_count;
+               ut_a(buf_fix_count > 0);
+               block->page.buf_fix_count += buf_fix_count;
+               buf_pool_watch_remove(buf_pool, fold, hash_page);
++              mutex_exit(&buf_pool->zip_mutex);
+       } else {
+               fprintf(stderr,
+                       "InnoDB: Error: page %lu %lu already found"
+@@ -3474,7 +3626,8 @@
+                       (const void*) hash_page, (const void*) block);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+               mutex_exit(&block->mutex);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               buf_print();
+               buf_LRU_print();
+               buf_validate();
+@@ -3558,7 +3711,9 @@
+       fold = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+       if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
+@@ -3567,9 +3722,15 @@
+ err_exit:
+               if (block) {
+                       mutex_enter(&block->mutex);
+-                      buf_LRU_block_free_non_file_page(block);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++                      buf_LRU_block_free_non_file_page(block, FALSE);
+                       mutex_exit(&block->mutex);
+               }
++              else {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++                      rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              }
+               bpage = NULL;
+               goto func_exit;
+@@ -3592,6 +3753,8 @@
+               buf_page_init(space, offset, fold, block);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+               /* The block must be put to the LRU list, to the old blocks */
+               buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+@@ -3619,7 +3782,7 @@
+                       been added to buf_pool->LRU and
+                       buf_pool->page_hash. */
+                       mutex_exit(&block->mutex);
+-                      data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++                      data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+                       mutex_enter(&block->mutex);
+                       block->page.zip.data = data;
+@@ -3632,6 +3795,7 @@
+                       buf_unzip_LRU_add_block(block, TRUE);
+               }
++              mutex_exit(&buf_pool->LRU_list_mutex);
+               mutex_exit(&block->mutex);
+       } else {
+               /* Defer buf_buddy_alloc() until after the block has
+@@ -3643,8 +3807,8 @@
+               control block (bpage), in order to avoid the
+               invocation of buf_buddy_relocate_block() on
+               uninitialized data. */
+-              data = buf_buddy_alloc(buf_pool, zip_size, &lru);
+-              bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
++              data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
++              bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
+               /* Initialize the buf_pool pointer. */
+               bpage->buf_pool = buf_pool;
+@@ -3663,8 +3827,11 @@
+                               /* The block was added by some other thread. */
+                               watch_page = NULL;
+-                              buf_buddy_free(buf_pool, bpage, sizeof *bpage);
+-                              buf_buddy_free(buf_pool, data, zip_size);
++                              buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
++                              buf_buddy_free(buf_pool, data, zip_size, TRUE);
++
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                               bpage = NULL;
+                               goto func_exit;
+@@ -3708,18 +3875,24 @@
+               HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
+                           bpage);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+               /* The block must be put to the LRU list, to the old blocks */
+               buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+               buf_LRU_insert_zip_clean(bpage);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++
+               buf_page_set_io_fix(bpage, BUF_IO_READ);
+               mutex_exit(&buf_pool->zip_mutex);
+       }
++      buf_pool_mutex_enter(buf_pool);
+       buf_pool->n_pend_reads++;
+-func_exit:
+       buf_pool_mutex_exit(buf_pool);
++func_exit:
++      //buf_pool_mutex_exit(buf_pool);
+       if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+@@ -3761,7 +3934,9 @@
+       fold = buf_page_address_fold(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       block = (buf_block_t*) buf_page_hash_get_low(
+               buf_pool, space, offset, fold);
+@@ -3777,7 +3952,9 @@
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+               /* Page can be found in buf_pool */
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               buf_block_free(free_block);
+@@ -3799,6 +3976,7 @@
+       mutex_enter(&block->mutex);
+       buf_page_init(space, offset, fold, block);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       /* The block must be put to the LRU list */
+       buf_LRU_add_block(&block->page, FALSE);
+@@ -3825,7 +4003,7 @@
+               the reacquisition of buf_pool->mutex.  We also must
+               defer this operation until after the block descriptor
+               has been added to buf_pool->LRU and buf_pool->page_hash. */
+-              data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++              data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+               mutex_enter(&block->mutex);
+               block->page.zip.data = data;
+@@ -3843,7 +4021,8 @@
+       buf_page_set_accessed(&block->page, time_ms);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+@@ -3894,6 +4073,8 @@
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+       const ibool     uncompressed = (buf_page_get_state(bpage)
+                                       == BUF_BLOCK_FILE_PAGE);
++      ibool           have_LRU_mutex = FALSE;
++      mutex_t*        block_mutex;
+       ut_a(buf_page_in_file(bpage));
+@@ -4027,8 +4208,26 @@
+               }
+       }
++      if (io_type == BUF_IO_WRITE
++          && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++              || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
++              /* to keep consistency at buf_LRU_insert_zip_clean() */
++              have_LRU_mutex = TRUE; /* optimistic */
++      }
++retry_mutex:
++      if (have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex);
++      block_mutex = buf_page_get_mutex_enter(bpage);
++      ut_a(block_mutex);
++      if (io_type == BUF_IO_WRITE
++          && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++              || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
++          && !have_LRU_mutex) {
++              mutex_exit(block_mutex);
++              have_LRU_mutex = TRUE;
++              goto retry_mutex;
++      }
+       buf_pool_mutex_enter(buf_pool);
+-      mutex_enter(buf_page_get_mutex(bpage));
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+       if (io_type == BUF_IO_WRITE || uncompressed) {
+@@ -4051,6 +4250,7 @@
+               the x-latch to this OS thread: do not let this confuse you in
+               debugging! */
++              ut_a(!have_LRU_mutex);
+               ut_ad(buf_pool->n_pend_reads > 0);
+               buf_pool->n_pend_reads--;
+               buf_pool->stat.n_pages_read++;
+@@ -4068,6 +4268,9 @@
+               buf_flush_write_complete(bpage);
++              if (have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++
+               if (uncompressed) {
+                       rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+                                            BUF_IO_WRITE);
+@@ -4090,8 +4293,8 @@
+       }
+ #endif /* UNIV_DEBUG */
+-      mutex_exit(buf_page_get_mutex(bpage));
+       buf_pool_mutex_exit(buf_pool);
++      mutex_exit(block_mutex);
+ }
+ /*********************************************************************//**
+@@ -4108,7 +4311,9 @@
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       chunk = buf_pool->chunks;
+@@ -4125,7 +4330,9 @@
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       return(TRUE);
+ }
+@@ -4173,7 +4380,8 @@
+               freed = buf_LRU_search_and_free_block(buf_pool, 100);
+       }
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+       ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
+@@ -4186,7 +4394,8 @@
+       memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
+       buf_refresh_io_stats(buf_pool);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /*********************************************************************//**
+@@ -4228,7 +4437,10 @@
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
++      /* for keep the new latch order, it cannot validate correctly... */
+       chunk = buf_pool->chunks;
+@@ -4323,7 +4535,7 @@
+       /* Check clean compressed-only blocks. */
+       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(zip_list, b)) {
+               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+               switch (buf_page_get_io_fix(b)) {
+               case BUF_IO_NONE:
+@@ -4354,7 +4566,7 @@
+       buf_flush_list_mutex_enter(buf_pool);
+       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(flush_list, b)) {
+               ut_ad(b->in_flush_list);
+               ut_a(b->oldest_modification);
+               n_flush++;
+@@ -4413,6 +4625,8 @@
+       }
+       ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
++      /* because of latching order with block->mutex, we cannot get needed mutexes before that */
++/*
+       if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+               fprintf(stderr, "Free list len %lu, free blocks %lu\n",
+                       (ulong) UT_LIST_GET_LEN(buf_pool->free),
+@@ -4423,8 +4637,11 @@
+       ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+       ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+       ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
++*/
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       ut_a(buf_LRU_validate());
+       ut_a(buf_flush_validate(buf_pool));
+@@ -4480,7 +4697,9 @@
+       index_ids = mem_alloc(size * sizeof *index_ids);
+       counts = mem_alloc(sizeof(ulint) * size);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      mutex_enter(&buf_pool->free_list_mutex);
+       buf_flush_list_mutex_enter(buf_pool);
+       fprintf(stderr,
+@@ -4549,7 +4768,9 @@
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      mutex_exit(&buf_pool->free_list_mutex);
+       for (i = 0; i < n_found; i++) {
+               index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -4606,7 +4827,7 @@
+       buf_chunk_t*    chunk;
+       ulint           fixed_pages_number = 0;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       chunk = buf_pool->chunks;
+@@ -4640,7 +4861,7 @@
+       /* Traverse the lists of clean and dirty compressed-only blocks. */
+       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(zip_list, b)) {
+               ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+               ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
+@@ -4652,7 +4873,7 @@
+       buf_flush_list_mutex_enter(buf_pool);
+       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+-           b = UT_LIST_GET_NEXT(list, b)) {
++           b = UT_LIST_GET_NEXT(flush_list, b)) {
+               ut_ad(b->in_flush_list);
+               switch (buf_page_get_state(b)) {
+@@ -4678,7 +4899,7 @@
+       buf_flush_list_mutex_exit(buf_pool);
+       mutex_exit(&buf_pool->zip_mutex);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       return(fixed_pages_number);
+ }
+@@ -4772,6 +4993,8 @@
+       ut_ad(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      mutex_enter(&buf_pool->free_list_mutex);
+       buf_pool_mutex_enter(buf_pool);
+       buf_flush_list_mutex_enter(buf_pool);
+@@ -4875,6 +5098,8 @@
+               buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+       buf_refresh_io_stats(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      mutex_exit(&buf_pool->free_list_mutex);
+       buf_pool_mutex_exit(buf_pool);
+ }
+@@ -4994,11 +5219,13 @@
+ {
+       ulint   len;
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->free_list_mutex);
+       len = UT_LIST_GET_LEN(buf_pool->free);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->free_list_mutex);
+       return(len);
+ }
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c   2010-12-03 15:22:36.318955693 +0900
++++ b/storage/innobase/buf/buf0flu.c   2010-12-03 15:48:29.289024083 +0900
+@@ -279,7 +279,7 @@
+       ut_d(block->page.in_flush_list = TRUE);
+       block->page.oldest_modification = lsn;
+-      UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++      UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+ #ifdef UNIV_DEBUG_VALGRIND
+       {
+@@ -373,14 +373,14 @@
+                      > block->page.oldest_modification) {
+                       ut_ad(b->in_flush_list);
+                       prev_b = b;
+-                      b = UT_LIST_GET_NEXT(list, b);
++                      b = UT_LIST_GET_NEXT(flush_list, b);
+               }
+       }
+       if (prev_b == NULL) {
+-              UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++              UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+       } else {
+-              UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
++              UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
+                                    prev_b, &block->page);
+       }
+@@ -406,7 +406,7 @@
+       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
+       //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+-      //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++      ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       //ut_ad(bpage->in_LRU_list);
+       if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+@@ -442,14 +442,14 @@
+       enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+-      ut_a(buf_page_in_file(bpage));
++      //ut_a(buf_page_in_file(bpage));
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+-      if (bpage->oldest_modification != 0
++      if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
+           && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+               ut_ad(bpage->in_flush_list);
+@@ -480,7 +480,7 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(bpage->in_flush_list);
+@@ -498,11 +498,11 @@
+               return;
+       case BUF_BLOCK_ZIP_DIRTY:
+               buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
+-              UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++              UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+               buf_LRU_insert_zip_clean(bpage);
+               break;
+       case BUF_BLOCK_FILE_PAGE:
+-              UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++              UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+               break;
+       }
+@@ -546,7 +546,7 @@
+       buf_page_t*     prev_b = NULL;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       /* Must reside in the same buffer pool. */
+       ut_ad(buf_pool == buf_pool_from_bpage(dpage));
+@@ -575,18 +575,18 @@
+       because we assert on in_flush_list in comparison function. */
+       ut_d(bpage->in_flush_list = FALSE);
+-      prev = UT_LIST_GET_PREV(list, bpage);
+-      UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++      prev = UT_LIST_GET_PREV(flush_list, bpage);
++      UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+       if (prev) {
+               ut_ad(prev->in_flush_list);
+               UT_LIST_INSERT_AFTER(
+-                      list,
++                      flush_list,
+                       buf_pool->flush_list,
+                       prev, dpage);
+       } else {
+               UT_LIST_ADD_FIRST(
+-                      list,
++                      flush_list,
+                       buf_pool->flush_list,
+                       dpage);
+       }
+@@ -1055,7 +1055,7 @@
+ #ifdef UNIV_DEBUG
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(!buf_pool_mutex_own(buf_pool));
++      //ut_ad(!buf_pool_mutex_own(buf_pool));
+ #endif
+ #ifdef UNIV_LOG_DEBUG
+@@ -1069,7 +1069,8 @@
+       io_fixed and oldest_modification != 0.  Thus, it cannot be
+       relocated in the buffer pool or removed from flush_list or
+       LRU_list. */
+-      ut_ad(!buf_pool_mutex_own(buf_pool));
++      //ut_ad(!buf_pool_mutex_own(buf_pool));
++      ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(!buf_flush_list_mutex_own(buf_pool));
+       ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+@@ -1155,12 +1156,18 @@
+       ibool           is_uncompressed;
+       ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+       ut_ad(buf_page_in_file(bpage));
+       block_mutex = buf_page_get_mutex(bpage);
+       ut_ad(mutex_own(block_mutex));
++      buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+       ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
+       buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+@@ -1322,14 +1329,16 @@
+               buf_pool = buf_pool_get(space, i);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               /* We only want to flush pages from this buffer pool. */
+               bpage = buf_page_hash_get(buf_pool, space, i);
+               if (!bpage) {
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+                       continue;
+               }
+@@ -1341,11 +1350,9 @@
+               if (flush_type != BUF_FLUSH_LRU
+                   || i == offset
+                   || buf_page_is_old(bpage)) {
+-                      mutex_t* block_mutex = buf_page_get_mutex(bpage);
++                      mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+-                      mutex_enter(block_mutex);
+-
+-                      if (buf_flush_ready_for_flush(bpage, flush_type)
++                      if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
+                           && (i == offset || !bpage->buf_fix_count)) {
+                               /* We only try to flush those
+                               neighbors != offset where the buf fix
+@@ -1361,11 +1368,12 @@
+                               ut_ad(!buf_pool_mutex_own(buf_pool));
+                               count++;
+                               continue;
+-                      } else {
++                      } else if (block_mutex) {
+                               mutex_exit(block_mutex);
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       }
+       return(count);
+@@ -1398,21 +1406,25 @@
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(flush_type != BUF_FLUSH_LRU
++            || mutex_own(&buf_pool->LRU_list_mutex));
+-      block_mutex = buf_page_get_mutex(bpage);
+-      mutex_enter(block_mutex);
++      block_mutex = buf_page_get_mutex_enter(bpage);
+-      ut_a(buf_page_in_file(bpage));
++      //ut_a(buf_page_in_file(bpage));
+-      if (buf_flush_ready_for_flush(bpage, flush_type)) {
++      if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
+               ulint           space;
+               ulint           offset;
+               buf_pool_t*     buf_pool;
+               buf_pool = buf_pool_from_bpage(bpage);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              if (flush_type == BUF_FLUSH_LRU) {
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++              }
+               /* These fields are protected by both the
+               buffer pool mutex and block mutex. */
+@@ -1428,13 +1440,18 @@
+                                                 *count,
+                                                 n_to_flush);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              if (flush_type == BUF_FLUSH_LRU) {
++                      mutex_enter(&buf_pool->LRU_list_mutex);
++              }
+               flushed = TRUE;
+-      } else {
++      } else if (block_mutex) {
+               mutex_exit(block_mutex);
+       }
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(flush_type != BUF_FLUSH_LRU
++            || mutex_own(&buf_pool->LRU_list_mutex));
+       return(flushed);
+ }
+@@ -1455,7 +1472,8 @@
+       buf_page_t*     bpage;
+       ulint           count = 0;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       do {
+               /* Start from the end of the list looking for a
+@@ -1477,7 +1495,8 @@
+       should be flushed, we factor in this value. */
+       buf_lru_flush_page_count += count;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       return(count);
+ }
+@@ -1505,9 +1524,10 @@
+ {
+       ulint           len;
+       buf_page_t*     bpage;
++      buf_page_t*     prev_bpage = NULL;
+       ulint           count = 0;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       /* If we have flushed enough, leave the loop */
+       do {
+@@ -1526,6 +1546,7 @@
+               if (bpage) {
+                       ut_a(bpage->oldest_modification > 0);
++                      prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
+               }
+               if (!bpage || bpage->oldest_modification >= lsn_limit) {
+@@ -1567,9 +1588,17 @@
+                               break;
+                       }
+-                      bpage = UT_LIST_GET_PREV(list, bpage);
++                      bpage = UT_LIST_GET_PREV(flush_list, bpage);
+-                      ut_ad(!bpage || bpage->in_flush_list);
++                      //ut_ad(!bpage || bpage->in_flush_list);
++                      if (bpage != prev_bpage) {
++                              /* the search might warp.. retrying */
++                              buf_flush_list_mutex_exit(buf_pool);
++                              break;
++                      }
++                      if (bpage) {
++                              prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
++                      }
+                       buf_flush_list_mutex_exit(buf_pool);
+@@ -1578,7 +1607,7 @@
+       } while (count < min_n && bpage != NULL && len > 0);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       return(count);
+ }
+@@ -1617,13 +1646,15 @@
+             || sync_thread_levels_empty_gen(TRUE));
+ #endif /* UNIV_SYNC_DEBUG */
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       /* Note: The buffer pool mutex is released and reacquired within
+       the flush functions. */
+       switch(flush_type) {
+       case BUF_FLUSH_LRU:
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               count = buf_flush_LRU_list_batch(buf_pool, min_n);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+               break;
+       case BUF_FLUSH_LIST:
+               count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
+@@ -1632,7 +1663,7 @@
+               ut_error;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       buf_flush_buffered_writes();
+@@ -1888,7 +1919,7 @@
+ retry:
+       //buf_pool_mutex_enter(buf_pool);
+       if (have_LRU_mutex)
+-              buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+       n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+@@ -1905,15 +1936,15 @@
+                       bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+                       continue;
+               }
+-              block_mutex = buf_page_get_mutex(bpage);
+-
+-              mutex_enter(block_mutex);
++              block_mutex = buf_page_get_mutex_enter(bpage);
+-              if (buf_flush_ready_for_replace(bpage)) {
++              if (block_mutex && buf_flush_ready_for_replace(bpage)) {
+                       n_replaceable++;
+               }
+-              mutex_exit(block_mutex);
++              if (block_mutex) {
++                      mutex_exit(block_mutex);
++              }
+               distance++;
+@@ -1922,7 +1953,7 @@
+       //buf_pool_mutex_exit(buf_pool);
+       if (have_LRU_mutex)
+-              buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
+@@ -2121,7 +2152,7 @@
+       ut_ad(buf_flush_list_mutex_own(buf_pool));
+-      UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
++      UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
+                        ut_ad(ut_list_node_313->in_flush_list));
+       bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+@@ -2161,7 +2192,7 @@
+                       rnode = rbt_next(buf_pool->flush_rbt, rnode);
+               }
+-              bpage = UT_LIST_GET_NEXT(list, bpage);
++              bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+               ut_a(!bpage || om >= bpage->oldest_modification);
+       }
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c   2010-12-03 15:22:36.321987250 +0900
++++ b/storage/innobase/buf/buf0lru.c   2010-12-03 15:48:29.293023197 +0900
+@@ -143,8 +143,9 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+-      buf_block_t*    block); /*!< in: block, must contain a file page and
++      buf_block_t*    block,  /*!< in: block, must contain a file page and
+                               be in a state where it can be freed */
++      ibool           have_page_hash_mutex);
+ /******************************************************************//**
+ Determines if the unzip_LRU list should be used for evicting a victim
+@@ -154,15 +155,20 @@
+ ibool
+ buf_LRU_evict_from_unzip_LRU(
+ /*=========================*/
+-      buf_pool_t*     buf_pool)
++      buf_pool_t*     buf_pool,
++      ibool           have_LRU_mutex)
+ {
+       ulint   io_avg;
+       ulint   unzip_avg;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      if (!have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex);
+       /* If the unzip_LRU list is empty, we can only use the LRU. */
+       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+               return(FALSE);
+       }
+@@ -171,14 +177,20 @@
+       decompressed pages in the buffer pool. */
+       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+           <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+               return(FALSE);
+       }
+       /* If eviction hasn't started yet, we assume by default
+       that a workload is disk bound. */
+       if (buf_pool->freed_page_clock == 0) {
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+               return(TRUE);
+       }
++      if (!have_LRU_mutex)
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       /* Calculate the average over past intervals, and add the values
+       of the current interval. */
+@@ -246,19 +258,23 @@
+       page_arr = ut_malloc(
+               sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+ scan_again:
+       num_entries = 0;
+       bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+       while (bpage != NULL) {
+-              mutex_t*        block_mutex = buf_page_get_mutex(bpage);
++              mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
+               buf_page_t*     prev_bpage;
+-              mutex_enter(block_mutex);
+               prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
++              if (!block_mutex) {
++                      goto next_page;
++              }
++
+               ut_a(buf_page_in_file(bpage));
+               if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
+@@ -287,14 +303,16 @@
+                       /* Array full. We release the buf_pool->mutex to
+                       obey the latching order. */
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+                       buf_LRU_drop_page_hash_batch(
+                               id, zip_size, page_arr, num_entries);
+                       num_entries = 0;
+-                      buf_pool_mutex_enter(buf_pool);
++                      //buf_pool_mutex_enter(buf_pool);
++                      mutex_enter(&buf_pool->LRU_list_mutex);
+               } else {
+                       mutex_exit(block_mutex);
+               }
+@@ -319,7 +337,8 @@
+               }
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+       /* Drop any remaining batch of search hashed pages. */
+       buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+@@ -341,7 +360,9 @@
+       ibool           all_freed;
+ scan_again:
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
+       all_freed = TRUE;
+@@ -369,8 +390,16 @@
+                       all_freed = FALSE;
+               } else {
+-                      mutex_t* block_mutex = buf_page_get_mutex(bpage);
+-                      mutex_enter(block_mutex);
++                      mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
++
++                      if (!block_mutex) {
++                              /* It may be impossible case...
++                              Something wrong, so will be scan_again */
++
++                              all_freed = FALSE;
++
++                              goto next_page_no_mutex;
++                      }
+                       if (bpage->buf_fix_count > 0) {
+@@ -429,7 +458,9 @@
+                               ulint   page_no;
+                               ulint   zip_size;
+-                              buf_pool_mutex_exit(buf_pool);
++                              //buf_pool_mutex_exit(buf_pool);
++                              mutex_exit(&buf_pool->LRU_list_mutex);
++                              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+                               zip_size = buf_page_get_zip_size(bpage);
+                               page_no = buf_page_get_page_no(bpage);
+@@ -454,7 +485,7 @@
+                       if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+                           != BUF_BLOCK_ZIP_FREE) {
+                               buf_LRU_block_free_hashed_page((buf_block_t*)
+-                                                             bpage);
++                                                             bpage, TRUE);
+                       } else {
+                               /* The block_mutex should have been
+                               released by buf_LRU_block_remove_hashed_page()
+@@ -486,7 +517,9 @@
+               bpage = prev_bpage;
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       if (!all_freed) {
+               os_thread_sleep(20000);
+@@ -532,7 +565,9 @@
+       buf_page_t*     b;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++      ut_ad(mutex_own(&buf_pool->flush_list_mutex));
+       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
+       /* Find the first successor of bpage in the LRU list
+@@ -540,17 +575,17 @@
+       b = bpage;
+       do {
+               b = UT_LIST_GET_NEXT(LRU, b);
+-      } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
++      } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
+       /* Insert bpage before b, i.e., after the predecessor of b. */
+       if (b) {
+-              b = UT_LIST_GET_PREV(list, b);
++              b = UT_LIST_GET_PREV(zip_list, b);
+       }
+       if (b) {
+-              UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
++              UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
+       } else {
+-              UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
++              UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
+       }
+ }
+@@ -563,18 +598,19 @@
+ buf_LRU_free_from_unzip_LRU_list(
+ /*=============================*/
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+-      ulint           n_iterations)   /*!< in: how many times this has
++      ulint           n_iterations,   /*!< in: how many times this has
+                                       been called repeatedly without
+                                       result: a high value means that
+                                       we should search farther; we will
+                                       search n_iterations / 5 of the
+                                       unzip_LRU list, or nothing if
+                                       n_iterations >= 5 */
++      ibool           have_LRU_mutex)
+ {
+       buf_block_t*    block;
+       ulint           distance;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       /* Theoratically it should be much easier to find a victim
+       from unzip_LRU as we can choose even a dirty block (as we'll
+@@ -584,7 +620,7 @@
+       if we have done five iterations so far. */
+       if (UNIV_UNLIKELY(n_iterations >= 5)
+-          || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
++          || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
+               return(FALSE);
+       }
+@@ -592,18 +628,25 @@
+       distance = 100 + (n_iterations
+                         * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
++restart:
+       for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+            UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+            block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
+               enum buf_lru_free_block_status  freed;
++              mutex_enter(&block->mutex);
++              if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
++                  || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
++                      mutex_exit(&block->mutex);
++                      goto restart;
++              }
++
+               ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+               ut_ad(block->in_unzip_LRU_list);
+               ut_ad(block->page.in_LRU_list);
+-              mutex_enter(&block->mutex);
+-              freed = buf_LRU_free_block(&block->page, FALSE, NULL);
++              freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
+               mutex_exit(&block->mutex);
+               switch (freed) {
+@@ -637,21 +680,23 @@
+ buf_LRU_free_from_common_LRU_list(
+ /*==============================*/
+       buf_pool_t*     buf_pool,
+-      ulint           n_iterations)
++      ulint           n_iterations,
+                               /*!< in: how many times this has been called
+                               repeatedly without result: a high value means
+                               that we should search farther; if
+                               n_iterations < 10, then we search
+                               n_iterations / 10 * buf_pool->curr_size
+                               pages from the end of the LRU list */
++      ibool           have_LRU_mutex)
+ {
+       buf_page_t*     bpage;
+       ulint           distance;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
++restart:
+       for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+            UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+            bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
+@@ -659,14 +704,23 @@
+               enum buf_lru_free_block_status  freed;
+               unsigned                        accessed;
+               mutex_t*                        block_mutex
+-                      = buf_page_get_mutex(bpage);
++                      = buf_page_get_mutex_enter(bpage);
++
++              if (!block_mutex) {
++                      goto restart;
++              }
++
++              if (!bpage->in_LRU_list
++                  || !buf_page_in_file(bpage)) {
++                      mutex_exit(block_mutex);
++                      goto restart;
++              }
+               ut_ad(buf_page_in_file(bpage));
+               ut_ad(bpage->in_LRU_list);
+-              mutex_enter(block_mutex);
+               accessed = buf_page_is_accessed(bpage);
+-              freed = buf_LRU_free_block(bpage, TRUE, NULL);
++              freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
+               mutex_exit(block_mutex);
+               switch (freed) {
+@@ -718,16 +772,23 @@
+                               n_iterations / 5 of the unzip_LRU list. */
+ {
+       ibool   freed = FALSE;
++      ibool   have_LRU_mutex = FALSE;
+-      buf_pool_mutex_enter(buf_pool);
++      if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++              have_LRU_mutex = TRUE;
++
++      //buf_pool_mutex_enter(buf_pool);
++      if (have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex);
+-      freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
++      freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
+       if (!freed) {
+               freed = buf_LRU_free_from_common_LRU_list(
+-                      buf_pool, n_iterations);
++                      buf_pool, n_iterations, have_LRU_mutex);
+       }
++      buf_pool_mutex_enter(buf_pool);
+       if (!freed) {
+               buf_pool->LRU_flush_ended = 0;
+       } else if (buf_pool->LRU_flush_ended > 0) {
+@@ -735,6 +796,8 @@
+       }
+       buf_pool_mutex_exit(buf_pool);
++      if (have_LRU_mutex)
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       return(freed);
+ }
+@@ -795,7 +858,9 @@
+               buf_pool = buf_pool_from_array(i);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
++              mutex_enter(&buf_pool->free_list_mutex);
+               if (!recv_recovery_on
+                   && UT_LIST_GET_LEN(buf_pool->free)
+@@ -805,7 +870,9 @@
+                       ret = TRUE;
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              mutex_exit(&buf_pool->free_list_mutex);
+       }
+       return(ret);
+@@ -823,9 +890,10 @@
+ {
+       buf_block_t*    block;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+-      block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
++      mutex_enter(&buf_pool->free_list_mutex);
++      block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
+       if (block) {
+@@ -834,7 +902,9 @@
+               ut_ad(!block->page.in_flush_list);
+               ut_ad(!block->page.in_LRU_list);
+               ut_a(!buf_page_in_file(&block->page));
+-              UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++              UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++
++              mutex_exit(&buf_pool->free_list_mutex);
+               mutex_enter(&block->mutex);
+@@ -844,6 +914,8 @@
+               ut_ad(buf_pool_from_block(block) == buf_pool);
+               mutex_exit(&block->mutex);
++      } else {
++              mutex_exit(&buf_pool->free_list_mutex);
+       }
+       return(block);
+@@ -868,7 +940,7 @@
+       ibool           mon_value_was   = FALSE;
+       ibool           started_monitor = FALSE;
+ loop:
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+           + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
+@@ -951,8 +1023,10 @@
+                       ibool   lru;
+                       page_zip_set_size(&block->page.zip, zip_size);
++                      mutex_enter(&buf_pool->LRU_list_mutex);
+                       block->page.zip.data = buf_buddy_alloc(
+-                              buf_pool, zip_size, &lru);
++                              buf_pool, zip_size, &lru, FALSE);
++                      mutex_exit(&buf_pool->LRU_list_mutex);
+                       UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+               } else {
+@@ -960,7 +1034,7 @@
+                       block->page.zip.data = NULL;
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
+               if (started_monitor) {
+                       srv_print_innodb_monitor = mon_value_was;
+@@ -972,7 +1046,7 @@
+       /* If no block was in the free list, search from the end of the LRU
+       list and try to free a block there */
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+       freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
+@@ -1058,7 +1132,8 @@
+       ulint   new_len;
+       ut_a(buf_pool->LRU_old);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+       ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+ #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
+@@ -1124,7 +1199,8 @@
+ {
+       buf_page_t*     bpage;
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+       /* We first initialize all blocks in the LRU list as old and then use
+@@ -1159,13 +1235,14 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+       ut_ad(buf_page_in_file(bpage));
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       if (buf_page_belongs_to_unzip_LRU(bpage)) {
+               buf_block_t*    block = (buf_block_t*) bpage;
+               ut_ad(block->in_unzip_LRU_list);
+-              ut_d(block->in_unzip_LRU_list = FALSE);
++              block->in_unzip_LRU_list = FALSE;
+               UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+       }
+@@ -1183,7 +1260,8 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_in_file(bpage));
+@@ -1260,12 +1338,13 @@
+       ut_ad(buf_pool);
+       ut_ad(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+       ut_ad(!block->in_unzip_LRU_list);
+-      ut_d(block->in_unzip_LRU_list = TRUE);
++      block->in_unzip_LRU_list = TRUE;
+       if (old) {
+               UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+@@ -1286,7 +1365,8 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_in_file(bpage));
+@@ -1337,7 +1417,8 @@
+       ut_ad(buf_pool);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_a(buf_page_in_file(bpage));
+       ut_ad(!bpage->in_LRU_list);
+@@ -1416,7 +1497,8 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       if (bpage->old) {
+               buf_pool->stat.n_pages_made_young++;
+@@ -1458,19 +1540,20 @@
+       buf_page_t*     bpage,  /*!< in: block to be freed */
+       ibool           zip,    /*!< in: TRUE if should remove also the
+                               compressed page of an uncompressed page */
+-      ibool*          buf_pool_mutex_released)
++      ibool*          buf_pool_mutex_released,
+                               /*!< in: pointer to a variable that will
+                               be assigned TRUE if buf_pool_mutex
+                               was temporarily released, or NULL */
++      ibool           have_LRU_mutex)
+ {
+       buf_page_t*     b = NULL;
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+       mutex_t*        block_mutex = buf_page_get_mutex(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(mutex_own(block_mutex));
+       ut_ad(buf_page_in_file(bpage));
+-      ut_ad(bpage->in_LRU_list);
++      //ut_ad(bpage->in_LRU_list);
+       ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+ #if UNIV_WORD_SIZE == 4
+       /* On 32-bit systems, there is no padding in buf_page_t.  On
+@@ -1479,7 +1562,7 @@
+       UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+ #endif
+-      if (!buf_page_can_relocate(bpage)) {
++      if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
+               /* Do not free buffer-fixed or I/O-fixed blocks. */
+               return(BUF_LRU_NOT_FREED);
+@@ -1511,15 +1594,15 @@
+               If it cannot be allocated (without freeing a block
+               from the LRU list), refuse to free bpage. */
+ alloc:
+-              buf_pool_mutex_exit_forbid(buf_pool);
+-              b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
+-              buf_pool_mutex_exit_allow(buf_pool);
++              //buf_pool_mutex_exit_forbid(buf_pool);
++              b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
++              //buf_pool_mutex_exit_allow(buf_pool);
+               if (UNIV_UNLIKELY(!b)) {
+                       return(BUF_LRU_CANNOT_RELOCATE);
+               }
+-              memcpy(b, bpage, sizeof *b);
++              //memcpy(b, bpage, sizeof *b);
+       }
+ #ifdef UNIV_DEBUG
+@@ -1530,6 +1613,39 @@
+       }
+ #endif /* UNIV_DEBUG */
++      /* not to break latch order, must re-enter block_mutex */
++      mutex_exit(block_mutex);
++
++      if (!have_LRU_mutex)
++              mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
++      rw_lock_x_lock(&buf_pool->page_hash_latch);
++      mutex_enter(block_mutex);
++
++      /* recheck states of block */
++      if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
++          || !buf_page_can_relocate(bpage)) {
++not_freed:
++              if (b) {
++                      buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
++              }
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++              return(BUF_LRU_NOT_FREED);
++      } else if (zip || !bpage->zip.data) {
++              if (bpage->oldest_modification)
++                      goto not_freed;
++      } else if (bpage->oldest_modification) {
++              if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
++                      ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
++                      goto not_freed;
++              }
++      }
++
++      if (b) {
++              memcpy(b, bpage, sizeof *b);
++      }
++
+       if (buf_LRU_block_remove_hashed_page(bpage, zip)
+           != BUF_BLOCK_ZIP_FREE) {
+               ut_a(bpage->buf_fix_count == 0);
+@@ -1546,6 +1662,10 @@
+                       ut_a(!hash_b);
++                      while (prev_b && !prev_b->in_LRU_list) {
++                              prev_b = UT_LIST_GET_PREV(LRU, prev_b);
++                      }
++
+                       b->state = b->oldest_modification
+                               ? BUF_BLOCK_ZIP_DIRTY
+                               : BUF_BLOCK_ZIP_PAGE;
+@@ -1642,7 +1762,9 @@
+                       *buf_pool_mutex_released = TRUE;
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               mutex_exit(block_mutex);
+               /* Remove possible adaptive hash index on the page.
+@@ -1674,7 +1796,9 @@
+                               : BUF_NO_CHECKSUM_MAGIC);
+               }
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              if (have_LRU_mutex)
++                      mutex_enter(&buf_pool->LRU_list_mutex);
+               mutex_enter(block_mutex);
+               if (b) {
+@@ -1684,13 +1808,17 @@
+                       mutex_exit(&buf_pool->zip_mutex);
+               }
+-              buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
++              buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
+       } else {
+               /* The block_mutex should have been released by
+               buf_LRU_block_remove_hashed_page() when it returns
+               BUF_BLOCK_ZIP_FREE. */
+               ut_ad(block_mutex == &buf_pool->zip_mutex);
+               mutex_enter(block_mutex);
++
++              if (!have_LRU_mutex)
++                      mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+       }
+       return(BUF_LRU_FREED);
+@@ -1702,13 +1830,14 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+-      buf_block_t*    block)  /*!< in: block, must not contain a file page */
++      buf_block_t*    block,  /*!< in: block, must not contain a file page */
++      ibool           have_page_hash_mutex)
+ {
+       void*           data;
+       buf_pool_t*     buf_pool = buf_pool_from_block(block);
+       ut_ad(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+       ut_ad(mutex_own(&block->mutex));
+       switch (buf_block_get_state(block)) {
+@@ -1742,18 +1871,21 @@
+       if (data) {
+               block->page.zip.data = NULL;
+               mutex_exit(&block->mutex);
+-              buf_pool_mutex_exit_forbid(buf_pool);
++              //buf_pool_mutex_exit_forbid(buf_pool);
+               buf_buddy_free(
+-                      buf_pool, data, page_zip_get_size(&block->page.zip));
++                      buf_pool, data, page_zip_get_size(&block->page.zip),
++                      have_page_hash_mutex);
+-              buf_pool_mutex_exit_allow(buf_pool);
++              //buf_pool_mutex_exit_allow(buf_pool);
+               mutex_enter(&block->mutex);
+               page_zip_set_size(&block->page.zip, 0);
+       }
+-      UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
++      mutex_enter(&buf_pool->free_list_mutex);
++      UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
+       ut_d(block->page.in_free_list = TRUE);
++      mutex_exit(&buf_pool->free_list_mutex);
+       UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+ }
+@@ -1783,7 +1915,11 @@
+       buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
+       ut_ad(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+@@ -1891,7 +2027,9 @@
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+               mutex_exit(buf_page_get_mutex(bpage));
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
+               buf_print();
+               buf_LRU_print();
+               buf_validate();
+@@ -1912,17 +2050,17 @@
+               ut_a(bpage->zip.data);
+               ut_a(buf_page_get_zip_size(bpage));
+-              UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
++              UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
+               mutex_exit(&buf_pool->zip_mutex);
+-              buf_pool_mutex_exit_forbid(buf_pool);
++              //buf_pool_mutex_exit_forbid(buf_pool);
+               buf_buddy_free(
+                       buf_pool, bpage->zip.data,
+-                      page_zip_get_size(&bpage->zip));
++                      page_zip_get_size(&bpage->zip), TRUE);
+-              buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
+-              buf_pool_mutex_exit_allow(buf_pool);
++              buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
++              //buf_pool_mutex_exit_allow(buf_pool);
+               UNIV_MEM_UNDESC(bpage);
+               return(BUF_BLOCK_ZIP_FREE);
+@@ -1945,13 +2083,13 @@
+                       ut_ad(!bpage->in_flush_list);
+                       ut_ad(!bpage->in_LRU_list);
+                       mutex_exit(&((buf_block_t*) bpage)->mutex);
+-                      buf_pool_mutex_exit_forbid(buf_pool);
++                      //buf_pool_mutex_exit_forbid(buf_pool);
+                       buf_buddy_free(
+                               buf_pool, data,
+-                              page_zip_get_size(&bpage->zip));
++                              page_zip_get_size(&bpage->zip), TRUE);
+-                      buf_pool_mutex_exit_allow(buf_pool);
++                      //buf_pool_mutex_exit_allow(buf_pool);
+                       mutex_enter(&((buf_block_t*) bpage)->mutex);
+                       page_zip_set_size(&bpage->zip, 0);
+               }
+@@ -1977,18 +2115,19 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+-      buf_block_t*    block)  /*!< in: block, must contain a file page and
++      buf_block_t*    block,  /*!< in: block, must contain a file page and
+                               be in a state where it can be freed */
++      ibool           have_page_hash_mutex)
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_block(block);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_block(block);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(mutex_own(&block->mutex));
+       buf_block_set_state(block, BUF_BLOCK_MEMORY);
+-      buf_LRU_block_free_non_file_page(block);
++      buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ }
+ /**********************************************************************//**
+@@ -2015,7 +2154,8 @@
+       }
+       if (adjust) {
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->LRU_list_mutex);
+               if (ratio != buf_pool->LRU_old_ratio) {
+                       buf_pool->LRU_old_ratio = ratio;
+@@ -2027,7 +2167,8 @@
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->LRU_list_mutex);
+       } else {
+               buf_pool->LRU_old_ratio = ratio;
+       }
+@@ -2124,7 +2265,8 @@
+       ulint           new_len;
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+@@ -2185,16 +2327,22 @@
+       ut_a(buf_pool->LRU_old_len == old_len);
+-      UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
++      mutex_exit(&buf_pool->LRU_list_mutex);
++      mutex_enter(&buf_pool->free_list_mutex);
++
++      UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
+                        ut_ad(ut_list_node_313->in_free_list));
+       for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+            bpage != NULL;
+-           bpage = UT_LIST_GET_NEXT(list, bpage)) {
++           bpage = UT_LIST_GET_NEXT(free, bpage)) {
+               ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+       }
++      mutex_exit(&buf_pool->free_list_mutex);
++      mutex_enter(&buf_pool->LRU_list_mutex);
++
+       UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
+                        ut_ad(ut_list_node_313->in_unzip_LRU_list
+                              && ut_list_node_313->page.in_LRU_list));
+@@ -2208,7 +2356,8 @@
+               ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /**********************************************************************//**
+@@ -2244,7 +2393,8 @@
+       const buf_page_t*       bpage;
+       ut_ad(buf_pool);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      mutex_enter(&buf_pool->LRU_list_mutex);
+       bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+@@ -2301,7 +2451,8 @@
+               bpage = UT_LIST_GET_NEXT(LRU, bpage);
+       }
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+ /**********************************************************************//**
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c   2010-12-03 15:22:36.323977308 +0900
++++ b/storage/innobase/buf/buf0rea.c   2010-12-03 15:48:29.296024468 +0900
+@@ -311,6 +311,7 @@
+               return(0);
+       }
++      buf_pool_mutex_exit(buf_pool);
+       /* Check that almost all pages in the area have been accessed; if
+       offset == low, the accesses must be in a descending order, otherwise,
+@@ -329,6 +330,7 @@
+       fail_count = 0;
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       for (i = low; i < high; i++) {
+               bpage = buf_page_hash_get(buf_pool, space, i);
+@@ -356,7 +358,8 @@
+               if (fail_count > threshold) {
+                       /* Too many failures: return */
+-                      buf_pool_mutex_exit(buf_pool);
++                      //buf_pool_mutex_exit(buf_pool);
++                      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+                       return(0);
+               }
+@@ -371,7 +374,8 @@
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+       if (bpage == NULL) {
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               return(0);
+       }
+@@ -397,7 +401,8 @@
+       pred_offset = fil_page_get_prev(frame);
+       succ_offset = fil_page_get_next(frame);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       if ((offset == low) && (succ_offset == offset + 1)) {
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:48:03.048955897 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:48:29.304024564 +0900
+@@ -250,6 +250,10 @@
+ #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+       {&buf_pool_mutex_key, "buf_pool_mutex", 0},
+       {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
++      {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
++      {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
++      {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
++      {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
+       {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
+       {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
+       {&dict_sys_mutex_key, "dict_sys_mutex", 0},
+@@ -301,6 +305,7 @@
+       {&archive_lock_key, "archive_lock", 0},
+ #  endif /* UNIV_LOG_ARCHIVE */
+       {&btr_search_latch_key, "btr_search_latch", 0},
++      {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
+ #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
+       {&buf_block_lock_key, "buf_block_lock", 0},
+ #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc  2010-12-03 15:37:45.517105700 +0900
++++ b/storage/innobase/handler/i_s.cc  2010-12-03 15:48:29.331024462 +0900
+@@ -1725,7 +1725,8 @@
+               buf_pool = buf_pool_from_array(i);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              mutex_enter(&buf_pool->zip_free_mutex);
+               for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+                       buf_buddy_stat_t*       buddy_stat;
+@@ -1755,7 +1756,8 @@
+                       }
+               }
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              mutex_exit(&buf_pool->zip_free_mutex);
+               if (status) {
+                       break;
+diff -ruN a/storage/innobase/handler/innodb_patch_info.h b/storage/innobase/handler/innodb_patch_info.h
+--- a/storage/innobase/handler/innodb_patch_info.h     2010-12-03 15:48:03.064995674 +0900
++++ b/storage/innobase/handler/innodb_patch_info.h     2010-12-03 15:48:29.331955850 +0900
+@@ -31,5 +31,6 @@
+ {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
+ {"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
+ {"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:48:03.068954202 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:48:29.335988682 +0900
+@@ -3700,9 +3700,11 @@
+               ulint           fold = buf_page_address_fold(space, page_no);
+               buf_pool_t*     buf_pool = buf_pool_get(space, page_no);
+-              buf_pool_mutex_enter(buf_pool);
++              //buf_pool_mutex_enter(buf_pool);
++              rw_lock_s_lock(&buf_pool->page_hash_latch);
+               bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
+-              buf_pool_mutex_exit(buf_pool);
++              //buf_pool_mutex_exit(buf_pool);
++              rw_lock_s_unlock(&buf_pool->page_hash_latch);
+               if (UNIV_LIKELY_NULL(bpage)) {
+                       /* A buffer pool watch has been set or the
+diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
+--- a/storage/innobase/include/buf0buddy.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.h     2010-12-03 15:48:29.338023826 +0900
+@@ -51,10 +51,11 @@
+       buf_pool_t*     buf_pool,
+                       /*!< buffer pool in which the block resides */
+       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
+-      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
++      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
+                       TRUE if storage was allocated from the LRU list
+                       and buf_pool->mutex was temporarily released,
+                       or NULL if the LRU list should not be used */
++      ibool   have_page_hash_mutex)
+       __attribute__((malloc));
+ /**********************************************************************//**
+@@ -67,7 +68,8 @@
+                       /*!< buffer pool in which the block resides */
+       void*   buf,    /*!< in: block to be freed, must not be
+                       pointed to by the buffer pool */
+-      ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
++      ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
++      ibool   have_page_hash_mutex)
+       __attribute__((nonnull));
+ #ifndef UNIV_NONINL
+diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
+--- a/storage/innobase/include/buf0buddy.ic    2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.ic    2010-12-03 15:48:29.339040413 +0900
+@@ -46,10 +46,11 @@
+                       /*!< in: buffer pool in which the page resides */
+       ulint   i,      /*!< in: index of buf_pool->zip_free[],
+                       or BUF_BUDDY_SIZES */
+-      ibool*  lru)    /*!< in: pointer to a variable that will be assigned
++      ibool*  lru,    /*!< in: pointer to a variable that will be assigned
+                       TRUE if storage was allocated from the LRU list
+                       and buf_pool->mutex was temporarily released,
+                       or NULL if the LRU list should not be used */
++      ibool   have_page_hash_mutex)
+       __attribute__((malloc));
+ /**********************************************************************//**
+@@ -61,8 +62,9 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           buf,            /*!< in: block to be freed, must not be
+                                       pointed to by the buffer pool */
+-      ulint           i)              /*!< in: index of buf_pool->zip_free[],
++      ulint           i,              /*!< in: index of buf_pool->zip_free[],
+                                       or BUF_BUDDY_SIZES */
++      ibool           have_page_hash_mutex)
+       __attribute__((nonnull));
+ /**********************************************************************//**
+@@ -102,16 +104,17 @@
+                                       the page resides */
+       ulint           size,           /*!< in: block size, up to
+                                       UNIV_PAGE_SIZE */
+-      ibool*          lru)            /*!< in: pointer to a variable
++      ibool*          lru,            /*!< in: pointer to a variable
+                                       that will be assigned TRUE if
+                                       storage was allocated from the
+                                       LRU list and buf_pool->mutex was
+                                       temporarily released, or NULL if
+                                       the LRU list should not be used */
++      ibool           have_page_hash_mutex)
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+-      return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
++      return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+ }
+ /**********************************************************************//**
+@@ -123,12 +126,25 @@
+       buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
+       void*           buf,            /*!< in: block to be freed, must not be
+                                       pointed to by the buffer pool */
+-      ulint           size)           /*!< in: block size, up to
++      ulint           size,           /*!< in: block size, up to
+                                       UNIV_PAGE_SIZE */
++      ibool           have_page_hash_mutex)
+ {
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++
++      if (!have_page_hash_mutex) {
++              mutex_enter(&buf_pool->LRU_list_mutex);
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++      }
+-      buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
++      mutex_enter(&buf_pool->zip_free_mutex);
++      buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
++      mutex_exit(&buf_pool->zip_free_mutex);
++
++      if (!have_page_hash_mutex) {
++              mutex_exit(&buf_pool->LRU_list_mutex);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      }
+ }
+ #ifdef UNIV_MATERIALIZE
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h       2010-12-03 15:22:36.327954660 +0900
++++ b/storage/innobase/include/buf0buf.h       2010-12-03 15:48:29.343024683 +0900
+@@ -132,6 +132,20 @@
+ /*==========================*/
+ /********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void);
++/*================================*/
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void);
++/*==================================*/
++
++/********************************************************************//**
+ Creates the buffer pool.
+ @return       own: buf_pool object, NULL if not enough memory or error */
+ UNIV_INTERN
+@@ -761,6 +775,15 @@
+       const buf_page_t*       bpage)  /*!< in: pointer to control block */
+       __attribute__((pure));
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++      const buf_page_t*       bpage)  /*!< in: pointer to control block */
++      __attribute__((pure));
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return       flush type */
+@@ -1227,7 +1250,7 @@
+       All these are protected by buf_pool_mutex. */
+       /* @{ */
+-      UT_LIST_NODE_T(buf_page_t) list;
++      /* UT_LIST_NODE_T(buf_page_t) list; */
+                                       /*!< based on state, this is a
+                                       list node, protected either by
+                                       buf_pool_mutex or by
+@@ -1254,6 +1277,10 @@
+                                       BUF_BLOCK_REMOVE_HASH or
+                                       BUF_BLOCK_READY_IN_USE. */
++      /* resplit for optimistic use */
++      UT_LIST_NODE_T(buf_page_t) free;
++      UT_LIST_NODE_T(buf_page_t) flush_list;
++      UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
+ #ifdef UNIV_DEBUG
+       ibool           in_flush_list;  /*!< TRUE if in buf_pool->flush_list;
+                                       when flush_list_mutex is free, the
+@@ -1347,11 +1374,11 @@
+                                       a block is in the unzip_LRU list
+                                       if page.state == BUF_BLOCK_FILE_PAGE
+                                       and page.zip.data != NULL */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+       ibool           in_unzip_LRU_list;/*!< TRUE if the page is in the
+                                       decompressed LRU list;
+                                       used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+       mutex_t         mutex;          /*!< mutex protecting this block:
+                                       state (also protected by the buffer
+                                       pool mutex), io_fix, buf_fix_count,
+@@ -1517,6 +1544,11 @@
+                                       pool instance, protects compressed
+                                       only pages (of type buf_page_t, not
+                                       buf_block_t */
++      mutex_t         LRU_list_mutex;
++      rw_lock_t       page_hash_latch;
++      mutex_t         free_list_mutex;
++      mutex_t         zip_free_mutex;
++      mutex_t         zip_hash_mutex;
+       ulint           instance_no;    /*!< Array index of this buffer
+                                       pool instance */
+       ulint           old_pool_size;  /*!< Old pool size in bytes */
+diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
+--- a/storage/innobase/include/buf0buf.ic      2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buf.ic      2010-12-03 15:48:29.345024524 +0900
+@@ -232,7 +232,7 @@
+       case BUF_BLOCK_ZIP_FREE:
+               /* This is a free page in buf_pool->zip_free[].
+               Such pages should only be accessed by the buddy allocator. */
+-              ut_error;
++              /* ut_error; */ /* optimistic */
+               break;
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
+@@ -275,9 +275,14 @@
+ {
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
++      if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
++              /* TODO: this code is the interim. should be confirmed later. */
++              return(&buf_pool->zip_mutex);
++      }
++
+       switch (buf_page_get_state(bpage)) {
+       case BUF_BLOCK_ZIP_FREE:
+-              ut_error;
++              /* ut_error; */ /* optimistic */
+               return(NULL);
+       case BUF_BLOCK_ZIP_PAGE:
+       case BUF_BLOCK_ZIP_DIRTY:
+@@ -287,6 +292,28 @@
+       }
+ }
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++      const buf_page_t*       bpage)  /*!< in: pointer to control block */
++{
++      mutex_t*        block_mutex;
++
++      while(1) {
++              block_mutex = buf_page_get_mutex(bpage);
++              if (!block_mutex)
++                      return block_mutex;
++
++              mutex_enter(block_mutex);
++              if (block_mutex == buf_page_get_mutex(bpage))
++                      return block_mutex;
++              mutex_exit(block_mutex);
++      }
++}
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return       flush type */
+@@ -383,8 +410,8 @@
+       enum buf_io_fix io_fix) /*!< in: io_fix state */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+@@ -414,14 +441,14 @@
+       const buf_page_t*       bpage)  /*!< control block being relocated */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_ad(buf_page_in_file(bpage));
+-      ut_ad(bpage->in_LRU_list);
++      //ut_ad(bpage->in_LRU_list);
+-      return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
++      return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
+              && bpage->buf_fix_count == 0);
+ }
+@@ -435,8 +462,8 @@
+       const buf_page_t*       bpage)  /*!< in: control block */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+       ut_ad(buf_page_in_file(bpage));
+@@ -456,7 +483,8 @@
+       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+       ut_a(buf_page_in_file(bpage));
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++      ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+       ut_ad(bpage->in_LRU_list);
+ #ifdef UNIV_LRU_DEBUG
+@@ -503,9 +531,10 @@
+       ulint           time_ms)        /*!< in: ut_time_ms() */
+ {
+ #ifdef UNIV_DEBUG
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
++      //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
++      ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+       ut_a(buf_page_in_file(bpage));
+       if (!bpage->access_time) {
+@@ -719,19 +748,19 @@
+ /*===========*/
+       buf_block_t*    block)  /*!< in, own: block to be freed */
+ {
+-      buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
++      //buf_pool_t*   buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
+       mutex_enter(&block->mutex);
+       ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+-      buf_LRU_block_free_non_file_page(block);
++      buf_LRU_block_free_non_file_page(block, FALSE);
+       mutex_exit(&block->mutex);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
+ }
+ #endif /* !UNIV_HOTBACKUP */
+@@ -779,17 +808,17 @@
+                                       page frame */
+ {
+       ib_uint64_t     lsn;
+-      mutex_t*        block_mutex = buf_page_get_mutex(bpage);
+-
+-      mutex_enter(block_mutex);
++      mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
+-      if (buf_page_in_file(bpage)) {
++      if (block_mutex && buf_page_in_file(bpage)) {
+               lsn = bpage->newest_modification;
+       } else {
+               lsn = 0;
+       }
+-      mutex_exit(block_mutex);
++      if (block_mutex) {
++              mutex_exit(block_mutex);
++      }
+       return(lsn);
+ }
+@@ -807,7 +836,7 @@
+ #ifdef UNIV_SYNC_DEBUG
+       buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+-      ut_ad((buf_pool_mutex_own(buf_pool)
++      ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
+              && (block->page.buf_fix_count == 0))
+             || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -962,7 +991,11 @@
+       buf_page_t*     bpage;
+       ut_ad(buf_pool);
+-      ut_ad(buf_pool_mutex_own(buf_pool));
++      //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++      ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
++            || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+       ut_ad(fold == buf_page_address_fold(space, offset));
+       /* Look for the page in the hash table */
+@@ -1047,11 +1080,13 @@
+       const buf_page_t*       bpage;
+       buf_pool_t*             buf_pool = buf_pool_get(space, offset);
+-      buf_pool_mutex_enter(buf_pool);
++      //buf_pool_mutex_enter(buf_pool);
++      rw_lock_s_lock(&buf_pool->page_hash_latch);
+       bpage = buf_page_hash_get(buf_pool, space, offset);
+-      buf_pool_mutex_exit(buf_pool);
++      //buf_pool_mutex_exit(buf_pool);
++      rw_lock_s_unlock(&buf_pool->page_hash_latch);
+       return(bpage != NULL);
+ }
+@@ -1179,4 +1214,38 @@
+               buf_pool_mutex_exit(buf_pool);
+       }
+ }
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void)
++/*===============================*/
++{
++      ulint   i;
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++              rw_lock_x_lock(&buf_pool->page_hash_latch);
++      }
++}
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void)
++/*=================================*/
++{
++      ulint   i;
++
++      for (i = 0; i < srv_buf_pool_instances; i++) {
++              buf_pool_t*     buf_pool;
++
++              buf_pool = buf_pool_from_array(i);
++              rw_lock_x_unlock(&buf_pool->page_hash_latch);
++      }
++}
+ #endif /* !UNIV_HOTBACKUP */
+diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
+--- a/storage/innobase/include/buf0lru.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0lru.h       2010-12-03 15:48:29.349024701 +0900
+@@ -113,10 +113,11 @@
+       buf_page_t*     bpage,  /*!< in: block to be freed */
+       ibool           zip,    /*!< in: TRUE if should remove also the
+                               compressed page of an uncompressed page */
+-      ibool*          buf_pool_mutex_released);
++      ibool*          buf_pool_mutex_released,
+                               /*!< in: pointer to a variable that will
+                               be assigned TRUE if buf_pool->mutex
+                               was temporarily released, or NULL */
++      ibool           have_LRU_mutex);
+ /******************************************************************//**
+ Try to free a replaceable block.
+ @return       TRUE if found and freed */
+@@ -163,7 +164,8 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+-      buf_block_t*    block); /*!< in: block, must not contain a file page */
++      buf_block_t*    block,  /*!< in: block, must not contain a file page */
++      ibool           have_page_hash_mutex);
+ /******************************************************************//**
+ Adds a block to the LRU list. */
+ UNIV_INTERN
+diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
+--- a/storage/innobase/include/sync0rw.h       2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0rw.h       2010-12-03 15:48:29.349942993 +0900
+@@ -112,6 +112,7 @@
+ extern        mysql_pfs_key_t archive_lock_key;
+ # endif /* UNIV_LOG_ARCHIVE */
+ extern        mysql_pfs_key_t btr_search_latch_key;
++extern        mysql_pfs_key_t buf_pool_page_hash_key;
+ extern        mysql_pfs_key_t buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ extern        mysql_pfs_key_t buf_block_debug_latch_key;
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h     2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0sync.h     2010-12-03 15:48:29.352024614 +0900
+@@ -75,6 +75,10 @@
+ extern mysql_pfs_key_t        buffer_block_mutex_key;
+ extern mysql_pfs_key_t        buf_pool_mutex_key;
+ extern mysql_pfs_key_t        buf_pool_zip_mutex_key;
++extern mysql_pfs_key_t        buf_pool_LRU_list_mutex_key;
++extern mysql_pfs_key_t        buf_pool_free_list_mutex_key;
++extern mysql_pfs_key_t        buf_pool_zip_free_mutex_key;
++extern mysql_pfs_key_t        buf_pool_zip_hash_mutex_key;
+ extern mysql_pfs_key_t        cache_last_read_mutex_key;
+ extern mysql_pfs_key_t        dict_foreign_err_mutex_key;
+ extern mysql_pfs_key_t        dict_sys_mutex_key;
+@@ -661,7 +665,7 @@
+ #define       SYNC_TRX_LOCK_HEAP      298
+ #define SYNC_TRX_SYS_HEADER   290
+ #define SYNC_LOG              170
+-#define SYNC_LOG_FLUSH_ORDER  147
++#define SYNC_LOG_FLUSH_ORDER  156
+ #define SYNC_RECV             168
+ #define       SYNC_WORK_QUEUE         162
+ #define       SYNC_SEARCH_SYS_CONF    161     /* for assigning btr_search_enabled */
+@@ -671,8 +675,13 @@
+                                       SYNC_SEARCH_SYS, as memory allocation
+                                       can call routines there! Otherwise
+                                       the level is SYNC_MEM_HASH. */
++#define       SYNC_BUF_LRU_LIST       158
++#define       SYNC_BUF_PAGE_HASH      157
++#define       SYNC_BUF_BLOCK          155     /* Block mutex */
++#define       SYNC_BUF_FREE_LIST      153
++#define       SYNC_BUF_ZIP_FREE       152
++#define       SYNC_BUF_ZIP_HASH       151
+ #define       SYNC_BUF_POOL           150     /* Buffer pool mutex */
+-#define       SYNC_BUF_BLOCK          146     /* Block mutex */
+ #define       SYNC_BUF_FLUSH_LIST     145     /* Buffer flush list mutex */
+ #define SYNC_DOUBLEWRITE      140
+ #define       SYNC_ANY_LATCH          135
+@@ -704,7 +713,7 @@
+               os_fast_mutex;  /*!< We use this OS mutex in place of lock_word
+                               when atomic operations are not enabled */
+ #endif
+-      ulint   waiters;        /*!< This ulint is set to 1 if there are (or
++      volatile ulint  waiters;        /*!< This ulint is set to 1 if there are (or
+                               may be) threads waiting in the global wait
+                               array for this mutex to be released.
+                               Otherwise, this is 0. */
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:48:03.080956216 +0900
++++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:48:29.355023766 +0900
+@@ -3065,7 +3065,7 @@
+                                                               level += log_sys->max_checkpoint_age
+                                                                        - (lsn - oldest_modification);
+                                                       }
+-                                                      bpage = UT_LIST_GET_NEXT(list, bpage);
++                                                      bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+                                                       n_blocks++;
+                                               }
+@@ -3150,7 +3150,7 @@
+                                                       found = TRUE;
+                                                       break;
+                                               }
+-                                              bpage = UT_LIST_GET_NEXT(list, bpage);
++                                              bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+                                               new_blocks_num++;
+                                       }
+                                       if (!found) {
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c        2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0sync.c        2010-12-03 15:48:29.358023890 +0900
+@@ -265,7 +265,7 @@
+       mutex->lock_word = 0;
+ #endif
+       mutex->event = os_event_create(NULL);
+-      mutex_set_waiters(mutex, 0);
++      mutex->waiters = 0;
+ #ifdef UNIV_DEBUG
+       mutex->magic_n = MUTEX_MAGIC_N;
+ #endif /* UNIV_DEBUG */
+@@ -444,6 +444,15 @@
+       mutex_t*        mutex,  /*!< in: mutex */
+       ulint           n)      /*!< in: value to set */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++      ut_ad(mutex);
++
++      if (n) {
++              os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
++      } else {
++              os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
++      }
++#else
+       volatile ulint* ptr;            /* declared volatile to ensure that
+                                       the value is stored to memory */
+       ut_ad(mutex);
+@@ -452,6 +461,7 @@
+       *ptr = n;               /* Here we assume that the write of a single
+                               word in memory is atomic */
++#endif
+ }
+ /******************************************************************//**
+@@ -1193,7 +1203,12 @@
+                       ut_error;
+               }
+               break;
++      case SYNC_BUF_LRU_LIST:
+       case SYNC_BUF_FLUSH_LIST:
++      case SYNC_BUF_PAGE_HASH:
++      case SYNC_BUF_FREE_LIST:
++      case SYNC_BUF_ZIP_FREE:
++      case SYNC_BUF_ZIP_HASH:
+       case SYNC_BUF_POOL:
+               /* We can have multiple mutexes of this type therefore we
+               can only check whether the greater than condition holds. */
+@@ -1211,7 +1226,8 @@
+               buffer block (block->mutex or buf_pool_zip_mutex). */
+               if (!sync_thread_levels_g(array, level, FALSE)) {
+                       ut_a(sync_thread_levels_g(array, level - 1, TRUE));
+-                      ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
++                      /* the exact rule is not fixed yet, for now */
++                      //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
+               }
+               break;
+       case SYNC_REC_LOCK:
diff --git a/mysql-microsec_process.patch b/mysql-microsec_process.patch
new file mode 100644 (file)
index 0000000..e7a5717
--- /dev/null
@@ -0,0 +1,52 @@
+# name       : microsec_process.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/microsec_process.info b/patch_info/microsec_process.info
+--- /dev/null  1970-01-01 09:00:00.000000000 +0900
++++ b/patch_info/microsec_process.info 2010-12-02 20:41:41.616069579 +0900
+@@ -0,0 +1,8 @@
++File=microsec_process.patch
++Name=Adds INFOMATION_SCHEMA.PROCESSLIST with TIME_MS column
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++2010-01
++Ported to 5.1.42
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc  2010-12-02 19:22:40.054024541 +0900
++++ b/sql/sql_show.cc  2010-12-02 20:41:41.622941425 +0900
+@@ -1875,7 +1875,8 @@
+   TABLE *table= tables->table;
+   CHARSET_INFO *cs= system_charset_info;
+   char *user;
+-  time_t now= my_time(0);
++  time_t now;
++  ulonglong now_utime= my_micro_time_and_time(&now);
+   DBUG_ENTER("fill_process_list");
+   user= thd->security_ctx->master_access & PROCESS_ACL ?
+@@ -1959,6 +1960,10 @@
+       }
+       mysql_mutex_unlock(&tmp->LOCK_thd_data);
++      /* TIME_MS */
++      table->field[8]->store(((tmp->start_utime ?
++                               now_utime - tmp->start_utime : 0)/ 1000));
++
+       if (schema_table_store_record(thd, table))
+       {
+         mysql_mutex_unlock(&LOCK_thread_count);
+@@ -7202,6 +7207,8 @@
+   {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
+   {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
+    SKIP_OPEN_TABLE},
++  {"TIME_MS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG,
++   0, 0, "Time_ms", SKIP_OPEN_TABLE},
+   {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+ };
diff --git a/mysql-userstat.patch b/mysql-userstat.patch
new file mode 100644 (file)
index 0000000..153138c
--- /dev/null
@@ -0,0 +1,3450 @@
+# name       : userstat.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/configure b/configure
+--- a/configure        2010-08-27 14:28:05.621275596 +0900
++++ b/configure        2010-08-27 15:10:33.736074033 +0900
+@@ -38009,7 +38009,7 @@
+   realpath rename rint rwlock_init setupterm \
+   shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
+   sighold sigset sigthreadmask port_create sleep \
+-  snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
++  snprintf socket strsep stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
+   strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
+   posix_fallocate backtrace backtrace_symbols backtrace_symbols_fd printstack
+ do
+diff -ruN a/configure.in b/configure.in
+--- a/configure.in     2010-08-04 02:24:24.000000000 +0900
++++ b/configure.in     2010-08-27 15:10:33.737073307 +0900
+@@ -2086,7 +2086,7 @@
+   realpath rename rint rwlock_init setupterm \
+   shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
+   sighold sigset sigthreadmask port_create sleep \
+-  snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
++  snprintf socket strsep stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
+   strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
+   posix_fallocate backtrace backtrace_symbols backtrace_symbols_fd printstack)
+diff -ruN a/include/config.h.in b/include/config.h.in
+--- a/include/config.h.in      2010-08-04 02:28:40.000000000 +0900
++++ b/include/config.h.in      2010-08-27 15:10:33.740077919 +0900
+@@ -802,6 +802,9 @@
+ /* Define to 1 if you have the <stdlib.h> header file. */
+ #undef HAVE_STDLIB_H
++/* Define to 1 if you have the `strsep' function. */
++#undef HAVE_STRSEP
++
+ /* Define to 1 if you have the `stpcpy' function. */
+ #undef HAVE_STPCPY
+diff -ruN a/include/mysql/plugin.h b/include/mysql/plugin.h
+--- a/include/mysql/plugin.h   2010-08-27 14:38:08.682439958 +0900
++++ b/include/mysql/plugin.h   2010-08-27 15:10:33.742003842 +0900
+@@ -705,6 +705,9 @@
+ unsigned long thd_log_slow_verbosity(const MYSQL_THD thd);
+ int thd_opt_slow_log();
+ #define EXTENDED_SLOWLOG
++
++#define EXTENDED_FOR_USERSTAT
++
+ /**
+   Create a temporary file.
+diff -ruN a/include/mysql_com.h b/include/mysql_com.h
+--- a/include/mysql_com.h      2010-08-04 02:24:30.000000000 +0900
++++ b/include/mysql_com.h      2010-08-27 15:10:33.743072186 +0900
+@@ -29,6 +29,7 @@
+ #define SERVER_VERSION_LENGTH 60
+ #define SQLSTATE_LENGTH 5
++#define LIST_PROCESS_HOST_LEN 64
+ /*
+   USER_HOST_BUFF_SIZE -- length of string buffer, that is enough to contain
+@@ -115,6 +116,12 @@
+                                          thread */
+ #define REFRESH_MASTER          128     /* Remove all bin logs in the index
+                                          and truncate the index */
++#define REFRESH_TABLE_STATS     256     /* Refresh table stats hash table */
++#define REFRESH_INDEX_STATS     512     /* Refresh index stats hash table */
++#define REFRESH_USER_STATS      1024    /* Refresh user stats hash table */
++#define REFRESH_SLOW_QUERY_LOG  2048    /* Flush slow query log and rotate*/
++#define REFRESH_CLIENT_STATS    4096    /* Refresh client stats hash table */
++#define REFRESH_THREAD_STATS    8192    /* Refresh thread stats hash table */
+ /* The following can't be set with mysql_refresh() */
+ #define REFRESH_READ_LOCK     16384   /* Lock tables for read */
+diff -ruN a/patch_info/userstats.info b/patch_info/userstats.info
+--- /dev/null  1970-01-01 09:00:00.000000000 +0900
++++ b/patch_info/userstats.info        2010-08-27 15:10:33.744161257 +0900
+@@ -0,0 +1,11 @@
++File=userstats.patch
++Name=SHOW USER/TABLE/INDEX statistics
++Version=V2
++Author=Google
++License=GPL
++Comment=Added INFORMATION_SCHEMA.*_STATISTICS
++2008-12-01
++YK: fix behavior for prepared statements
++
++2008-11-26
++YK: add switch variable "userstat_running" to control INFORMATION_SCHEMA.*_STATISTICS (default:OFF)
+diff -ruN a/sql/handler.cc b/sql/handler.cc
+--- a/sql/handler.cc   2010-08-04 02:24:27.000000000 +0900
++++ b/sql/handler.cc   2010-08-27 15:10:33.749058856 +0900
+@@ -1194,6 +1194,8 @@
+     if (cookie)
+       tc_log->unlog(cookie, xid);
+     DBUG_EXECUTE_IF("crash_commit_after", abort(););
++    if (is_real_trans)
++      thd->diff_commit_trans++;
+ end:
+     if (rw_trans)
+       start_waiting_global_read_lock(thd);
+@@ -1324,6 +1326,8 @@
+   /* Always cleanup. Even if there nht==0. There may be savepoints. */
+   if (is_real_trans)
+     thd->transaction.cleanup();
++
++  thd->diff_rollback_trans++;
+ #endif /* USING_TRANSACTIONS */
+   if (all)
+     thd->transaction_rollback_request= FALSE;
+@@ -1762,6 +1766,7 @@
+     ha_info->reset(); /* keep it conveniently zero-filled */
+   }
+   trans->ha_list= sv->ha_list;
++  thd->diff_rollback_trans++;
+   DBUG_RETURN(error);
+ }
+@@ -2122,6 +2127,8 @@
+       dup_ref=ref+ALIGN_SIZE(ref_length);
+     cached_table_flags= table_flags();
+   }
++  rows_read = rows_changed = 0;
++  memset(index_rows_read, 0, sizeof(index_rows_read));
+   DBUG_RETURN(error);
+ }
+@@ -3571,6 +3578,111 @@
+   return;
+ }
++// Updates the global table stats with the TABLE this handler represents.
++void handler::update_global_table_stats() {
++  if (!opt_userstat_running) {
++    rows_read = rows_changed = 0;
++    return;
++  }
++
++  if (!rows_read && !rows_changed) return;  // Nothing to update.
++  // table_cache_key is db_name + '\0' + table_name + '\0'.
++  if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++
++  TABLE_STATS* table_stats;
++  char key[NAME_LEN * 2 + 2];
++  // [db] + '.' + [table]
++  sprintf(key, "%s.%s", table->s->table_cache_key.str, table->s->table_name.str);
++
++  pthread_mutex_lock(&LOCK_global_table_stats);
++  // Gets the global table stats, creating one if necessary.
++  if (!(table_stats = (TABLE_STATS*)hash_search(&global_table_stats,
++                                                (uchar*)key,
++                                                strlen(key)))) {
++    if (!(table_stats = ((TABLE_STATS*)
++                         my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++      // Out of memory.
++      sql_print_error("Allocating table stats failed.");
++      goto end;
++    }
++    strncpy(table_stats->table, key, sizeof(table_stats->table));
++    table_stats->rows_read = 0;
++    table_stats->rows_changed = 0;
++    table_stats->rows_changed_x_indexes = 0;
++    table_stats->engine_type = (int) ht->db_type;
++
++    if (my_hash_insert(&global_table_stats, (uchar*)table_stats)) {
++      // Out of memory.
++      sql_print_error("Inserting table stats failed.");
++      my_free((char*)table_stats, 0);
++      goto end;
++    }
++  }
++  // Updates the global table stats.
++  table_stats->rows_read += rows_read;
++  table_stats->rows_changed += rows_changed;
++  table_stats->rows_changed_x_indexes +=
++      rows_changed * (table->s->keys ? table->s->keys : 1);
++  current_thd->diff_total_read_rows += rows_read;
++  rows_read = rows_changed = 0;
++end:
++  pthread_mutex_unlock(&LOCK_global_table_stats);
++}
++
++// Updates the global index stats with this handler's accumulated index reads.
++void handler::update_global_index_stats() {
++  // table_cache_key is db_name + '\0' + table_name + '\0'.
++  if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++
++  if (!opt_userstat_running) {
++    for (uint x = 0; x < table->s->keys; x++) {
++      index_rows_read[x] = 0;
++    }
++    return;
++  }
++
++  for (uint x = 0; x < table->s->keys; x++) {
++    if (index_rows_read[x]) {
++      // Rows were read using this index.
++      KEY* key_info = &table->key_info[x];
++
++      if (!key_info->name) continue;
++
++      INDEX_STATS* index_stats;
++      char key[NAME_LEN * 3 + 3];
++      // [db] + '.' + [table] + '.' + [index]
++      sprintf(key, "%s.%s.%s",  table->s->table_cache_key.str,
++              table->s->table_name.str, key_info->name);
++
++      pthread_mutex_lock(&LOCK_global_index_stats);
++      // Gets the global index stats, creating one if necessary.
++      if (!(index_stats = (INDEX_STATS*)hash_search(&global_index_stats,
++                                                    (uchar*)key,
++                                                    strlen(key)))) {
++        if (!(index_stats = ((INDEX_STATS*)
++                             my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++          // Out of memory.
++          sql_print_error("Allocating index stats failed.");
++          goto end;
++        }
++        strncpy(index_stats->index, key, sizeof(index_stats->index));
++        index_stats->rows_read = 0;
++
++        if (my_hash_insert(&global_index_stats, (uchar*)index_stats)) {
++          // Out of memory.
++          sql_print_error("Inserting index stats failed.");
++          my_free((char*)index_stats, 0);
++          goto end;
++        }
++      }
++      // Updates the global index stats.
++      index_stats->rows_read += index_rows_read[x];
++      index_rows_read[x] = 0;
++end:
++      pthread_mutex_unlock(&LOCK_global_index_stats);
++    }
++  }
++}
+ /****************************************************************************
+ ** Some general functions that isn't in the handler class
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h    2010-08-04 02:24:27.000000000 +0900
++++ b/sql/handler.h    2010-08-27 15:10:33.753058869 +0900
+@@ -30,6 +30,10 @@
+ #define USING_TRANSACTIONS
++#if MAX_KEY > 128
++#error MAX_KEY is too large.  Values up to 128 are supported.
++#endif
++
+ // the following is for checking tables
+ #define HA_ADMIN_ALREADY_DONE   1
+@@ -1121,6 +1125,9 @@
+   bool locked;
+   bool implicit_emptied;                /* Can be !=0 only if HEAP */
+   const COND *pushed_cond;
++  ulonglong rows_read;
++  ulonglong rows_changed;
++  ulonglong index_rows_read[MAX_KEY];
+   /**
+     next_insert_id is the next value which should be inserted into the
+     auto_increment column: in a inserting-multi-row statement (like INSERT
+@@ -1158,9 +1165,11 @@
+     ref_length(sizeof(my_off_t)),
+     ft_handler(0), inited(NONE),
+     locked(FALSE), implicit_emptied(0),
+-    pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
++    pushed_cond(0), rows_read(0), rows_changed(0), next_insert_id(0), insert_id_for_cur_row(0),
+     auto_inc_intervals_count(0)
+-    {}
++    {
++      memset(index_rows_read, 0, sizeof(index_rows_read));
++    }
+   virtual ~handler(void)
+   {
+     DBUG_ASSERT(locked == FALSE);
+@@ -1284,6 +1293,8 @@
+   {
+     table= table_arg;
+     table_share= share;
++    rows_read = rows_changed = 0;
++    memset(index_rows_read, 0, sizeof(index_rows_read));
+   }
+   virtual double scan_time()
+   { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
+@@ -1628,6 +1639,8 @@
+   virtual bool is_crashed() const  { return 0; }
+   virtual bool auto_repair() const { return 0; }
++  void update_global_table_stats();
++  void update_global_index_stats();
+ #define CHF_CREATE_FLAG 0
+ #define CHF_DELETE_FLAG 1
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h        2010-08-27 14:29:26.009071592 +0900
++++ b/sql/lex.h        2010-08-27 15:10:33.755063742 +0900
+@@ -106,6 +106,7 @@
+   { "CHECKSUM",               SYM(CHECKSUM_SYM)},
+   { "CIPHER",         SYM(CIPHER_SYM)},
+   { "CLIENT",         SYM(CLIENT_SYM)},
++  { "CLIENT_STATISTICS",      SYM(CLIENT_STATS_SYM)},
+   { "CLOSE",          SYM(CLOSE_SYM)},
+   { "COALESCE",               SYM(COALESCE)},
+   { "CODE",             SYM(CODE_SYM)},
+@@ -245,6 +246,7 @@
+   { "IN",             SYM(IN_SYM)},
+   { "INDEX",          SYM(INDEX_SYM)},
+   { "INDEXES",                SYM(INDEXES)},
++  { "INDEX_STATISTICS",       SYM(INDEX_STATS_SYM)},
+   { "INFILE",         SYM(INFILE)},
+   { "INITIAL_SIZE",   SYM(INITIAL_SIZE_SYM)},
+   { "INNER",          SYM(INNER_SYM)},
+@@ -478,6 +480,7 @@
+   { "SIGNED",         SYM(SIGNED_SYM)},
+   { "SIMPLE",         SYM(SIMPLE_SYM)},
+   { "SLAVE",            SYM(SLAVE)},
++  { "SLOW",             SYM(SLOW_SYM)},
+   { "SNAPSHOT",         SYM(SNAPSHOT_SYM)},
+   { "SMALLINT",               SYM(SMALLINT)},
+   { "SOCKET",         SYM(SOCKET_SYM)},
+@@ -527,12 +530,14 @@
+   { "TABLES",         SYM(TABLES)},
+   { "TABLESPACE",             SYM(TABLESPACE)},
+   { "TABLE_CHECKSUM", SYM(TABLE_CHECKSUM_SYM)},
++  { "TABLE_STATISTICS",       SYM(TABLE_STATS_SYM)},
+   { "TEMPORARY",      SYM(TEMPORARY)},
+   { "TEMPTABLE",      SYM(TEMPTABLE_SYM)},
+   { "TERMINATED",     SYM(TERMINATED)},
+   { "TEXT",           SYM(TEXT_SYM)},
+   { "THAN",             SYM(THAN_SYM)},
+   { "THEN",           SYM(THEN_SYM)},
++  { "THREAD_STATISTICS",      SYM(THREAD_STATS_SYM)},
+   { "TIME",           SYM(TIME_SYM)},
+   { "TIMESTAMP",      SYM(TIMESTAMP)},
+   { "TIMESTAMPADD",     SYM(TIMESTAMP_ADD)},
+@@ -568,6 +573,7 @@
+   { "USE",            SYM(USE_SYM)},
+   { "USER",           SYM(USER)},
+   { "USER_RESOURCES", SYM(RESOURCES)},
++  { "USER_STATISTICS",        SYM(USER_STATS_SYM)},
+   { "USE_FRM",                SYM(USE_FRM)},
+   { "USING",          SYM(USING)},
+   { "UTC_DATE",         SYM(UTC_DATE_SYM)},
+diff -ruN a/sql/log.cc b/sql/log.cc
+--- a/sql/log.cc       2010-08-27 14:43:41.986138797 +0900
++++ b/sql/log.cc       2010-08-27 15:10:33.761058932 +0900
+@@ -826,6 +826,13 @@
+     mysql_slow_log.reopen_file();
+ }
++void Log_to_file_event_handler::flush_slow_log()
++{
++  /* reopen slow log file */
++  if (opt_slow_log)
++    mysql_slow_log.reopen_file();
++}
++
+ /*
+   Log error with all enabled log event handlers
+@@ -937,6 +944,21 @@
+   return rc;
+ }
++bool LOGGER::flush_slow_log(THD *thd)
++{
++  /*
++    Now we lock logger, as nobody should be able to use logging routines while
++    log tables are closed
++  */
++  logger.lock_exclusive();
++
++  /* reopen log files */
++  file_log_handler->flush_slow_log();
++
++  /* end of log flush */
++  logger.unlock();
++  return 0;
++}
+ /*
+   Log slow query with all enabled log event handlers
+@@ -4491,6 +4513,8 @@
+                              thd->first_successful_insert_id_in_prev_stmt_for_binlog);
+           if (e.write(file))
+             goto err;
++          if (file == &log_file)
++            thd->binlog_bytes_written += e.data_written;
+         }
+         if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
+         {
+@@ -4502,12 +4526,16 @@
+                              minimum());
+           if (e.write(file))
+             goto err;
++          if (file == &log_file)
++            thd->binlog_bytes_written += e.data_written;
+         }
+         if (thd->rand_used)
+         {
+           Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
+           if (e.write(file))
+             goto err;
++          if (file == &log_file)
++            thd->binlog_bytes_written += e.data_written;
+         }
+         if (thd->user_var_events.elements)
+         {
+@@ -4523,6 +4551,8 @@
+                                  user_var_event->charset_number);
+             if (e.write(file))
+               goto err;
++            if (file == &log_file)
++              thd->binlog_bytes_written += e.data_written;
+           }
+         }
+       }
+@@ -4535,6 +4565,8 @@
+     if (event_info->write(file) || 
+         DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
+       goto err;
++    if (file == &log_file)
++      thd->binlog_bytes_written += event_info->data_written;
+     if (file == &log_file) // we are writing to the real log (disk)
+     {
+@@ -4680,7 +4712,7 @@
+     be reset as a READ_CACHE to be able to read the contents from it.
+  */
+-int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
++int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool sync_log)
+ {
+   Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
+@@ -4728,6 +4760,7 @@
+       /* write the first half of the split header */
+       if (my_b_write(&log_file, header, carry))
+         return ER_ERROR_ON_WRITE;
++      thd->binlog_bytes_written += carry;
+       /*
+         copy fixed second half of header to cache so the correct
+@@ -4796,6 +4829,7 @@
+     /* Write data to the binary log file */
+     if (my_b_write(&log_file, cache->read_pos, length))
+       return ER_ERROR_ON_WRITE;
++    thd->binlog_bytes_written += length;
+     cache->read_pos=cache->read_end;          // Mark buffer used up
+   } while ((length= my_b_fill(cache)));
+@@ -4918,21 +4952,24 @@
+       */
+       if (qinfo.write(&log_file))
+         goto err;
++      thd->binlog_bytes_written += qinfo.data_written;
+       DBUG_EXECUTE_IF("crash_before_writing_xid",
+                       {
+-                        if ((write_error= write_cache(cache, false, true)))
++                        if ((write_error= write_cache(thd, cache, false, true)))
+                           DBUG_PRINT("info", ("error writing binlog cache: %d",
+                                                write_error));
+                         DBUG_PRINT("info", ("crashing before writing xid"));
+                         abort();
+                       });
+-      if ((write_error= write_cache(cache, false, false)))
++      if ((write_error= write_cache(thd, cache, false, false)))
+         goto err;
+       if (commit_event && commit_event->write(&log_file))
+         goto err;
++      if (commit_event)
++        thd->binlog_bytes_written += commit_event->data_written;
+       if (incident && write_incident(thd, FALSE))
+         goto err;
+diff -ruN a/sql/log.h b/sql/log.h
+--- a/sql/log.h        2010-08-27 14:38:08.690071101 +0900
++++ b/sql/log.h        2010-08-27 15:13:33.762976324 +0900
+@@ -361,7 +361,7 @@
+   bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
+   bool write_incident(THD *thd, bool lock);
+-  int  write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
++  int  write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+   void set_write_error(THD *thd);
+   bool check_write_error(THD *thd);
+@@ -499,6 +499,7 @@
+                            const char *sql_text, uint sql_text_len,
+                            CHARSET_INFO *client_cs);
+   void flush();
++  void flush_slow_log();
+   void init_pthread_objects();
+   MYSQL_QUERY_LOG *get_mysql_slow_log() { return &mysql_slow_log; }
+   MYSQL_QUERY_LOG *get_mysql_log() { return &mysql_log; }
+@@ -543,6 +544,7 @@
+   void init_base();
+   void init_log_tables();
+   bool flush_logs(THD *thd);
++  bool flush_slow_log(THD *thd);
+   /* Perform basic logger cleanup. this will leave e.g. error log open. */
+   void cleanup_base();
+   /* Free memory. Nothing could be logged after this function is called */
+diff -ruN a/sql/mysql_priv.h b/sql/mysql_priv.h
+--- a/sql/mysql_priv.h 2010-08-27 14:38:08.699057407 +0900
++++ b/sql/mysql_priv.h 2010-08-27 15:10:33.805058568 +0900
+@@ -1139,7 +1139,17 @@
+ bool multi_delete_set_locks_and_link_aux_tables(LEX *lex);
+ void init_max_user_conn(void);
+ void init_update_queries(void);
++void init_global_user_stats(void);
++void init_global_table_stats(void);
++void init_global_index_stats(void);
++void init_global_client_stats(void);
++void init_global_thread_stats(void);
+ void free_max_user_conn(void);
++void free_global_user_stats(void);
++void free_global_table_stats(void);
++void free_global_index_stats(void);
++void free_global_client_stats(void);
++void free_global_thread_stats(void);
+ pthread_handler_t handle_bootstrap(void *arg);
+ int mysql_execute_command(THD *thd);
+ bool do_command(THD *thd);
+@@ -2014,6 +2024,7 @@
+ extern ulong max_connect_errors, connect_timeout;
+ extern ulong slave_net_timeout, slave_trans_retries;
+ extern uint max_user_connections;
++extern ulonglong denied_connections;
+ extern ulong what_to_log,flush_time;
+ extern ulong query_buff_size;
+ extern ulong max_prepared_stmt_count, prepared_stmt_count;
+@@ -2067,6 +2078,7 @@
+ extern my_bool opt_slave_compressed_protocol, use_temp_pool;
+ extern ulong slave_exec_mode_options;
+ extern my_bool opt_readonly, lower_case_file_system;
++extern my_bool opt_userstat_running, opt_thread_statistics;
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+ extern char* opt_secure_file_priv;
+@@ -2131,6 +2143,15 @@
+ extern struct system_variables max_system_variables;
+ extern struct system_status_var global_status_var;
+ extern struct rand_struct sql_rand;
++extern HASH global_user_stats;
++extern HASH global_client_stats;
++extern HASH global_thread_stats;
++extern pthread_mutex_t LOCK_global_user_client_stats;
++extern HASH global_table_stats;
++extern pthread_mutex_t LOCK_global_table_stats;
++extern HASH global_index_stats;
++extern pthread_mutex_t LOCK_global_index_stats;
++extern pthread_mutex_t LOCK_stats;
+ extern const char *opt_date_time_formats[];
+ extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[];
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc    2010-08-27 14:43:41.996021369 +0900
++++ b/sql/mysqld.cc    2010-08-27 15:10:33.772058694 +0900
+@@ -533,6 +533,7 @@
+ uint    opt_debug_sync_timeout= 0;
+ #endif /* defined(ENABLED_DEBUG_SYNC) */
+ my_bool opt_old_style_user_limits= 0, trust_function_creators= 0;
++my_bool opt_userstat_running= 0, opt_thread_statistics= 0;
+ /*
+   True if there is at least one per-hour limit for some user, so we should
+   check them before each query (and possibly reset counters when hour is
+@@ -581,6 +582,7 @@
+ ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
+ ulong max_connections, max_connect_errors;
+ uint  max_user_connections= 0;
++ulonglong denied_connections = 0;
+ /**
+   Limit of the total number of prepared statements in the server.
+   Is necessary to protect the server against out-of-memory attacks.
+@@ -682,6 +684,10 @@
+               LOCK_global_system_variables,
+               LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
+                 LOCK_connection_count;
++pthread_mutex_t LOCK_stats;
++pthread_mutex_t LOCK_global_user_client_stats;
++pthread_mutex_t LOCK_global_table_stats;
++pthread_mutex_t LOCK_global_index_stats;
+ /**
+   The below lock protects access to two global server variables:
+   max_prepared_stmt_count and prepared_stmt_count. These variables
+@@ -1367,6 +1373,11 @@
+   x_free(opt_secure_file_priv);
+   bitmap_free(&temp_pool);
+   free_max_user_conn();
++  free_global_user_stats();
++  free_global_client_stats();
++  free_global_thread_stats();
++  free_global_table_stats();
++  free_global_index_stats();
+ #ifdef HAVE_REPLICATION
+   end_slave_list();
+ #endif
+@@ -1483,6 +1494,10 @@
+   (void) pthread_cond_destroy(&COND_thread_cache);
+   (void) pthread_cond_destroy(&COND_flush_thread_cache);
+   (void) pthread_cond_destroy(&COND_manager);
++  (void) pthread_mutex_destroy(&LOCK_stats);
++  (void) pthread_mutex_destroy(&LOCK_global_user_client_stats);
++  (void) pthread_mutex_destroy(&LOCK_global_table_stats);
++  (void) pthread_mutex_destroy(&LOCK_global_index_stats);
+ }
+ #endif /*EMBEDDED_LIBRARY*/
+@@ -3172,6 +3187,7 @@
+   {"show_binlog_events",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOG_EVENTS]), SHOW_LONG_STATUS},
+   {"show_binlogs",         (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOGS]), SHOW_LONG_STATUS},
+   {"show_charsets",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CHARSETS]), SHOW_LONG_STATUS},
++  {"show_client_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CLIENT_STATS]), SHOW_LONG_STATUS},
+   {"show_collations",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS},
+   {"show_column_types",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLUMN_TYPES]), SHOW_LONG_STATUS},
+   {"show_contributors",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CONTRIBUTORS]), SHOW_LONG_STATUS},
+@@ -3193,6 +3209,7 @@
+ #endif
+   {"show_function_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS_FUNC]), SHOW_LONG_STATUS},
+   {"show_grants",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS},
++  {"show_index_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INDEX_STATS]), SHOW_LONG_STATUS},
+   {"show_keys",            (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS},
+   {"show_master_status",   (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS},
+   {"show_new_master",      (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS},
+@@ -3211,9 +3228,12 @@
+   {"show_slave_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
+   {"show_status",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
+   {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
++  {"show_table_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATS]), SHOW_LONG_STATUS},
+   {"show_table_status",    (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
+   {"show_tables",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
++  {"show_thread_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_THREAD_STATS]), SHOW_LONG_STATUS},
+   {"show_triggers",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
++  {"show_user_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_USER_STATS]), SHOW_LONG_STATUS},
+   {"show_variables",       (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
+   {"show_warnings",        (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
+   {"slave_start",          (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SLAVE_START]), SHOW_LONG_STATUS},
+@@ -3652,6 +3672,10 @@
+ #endif
+   (void) pthread_mutex_init(&LOCK_server_started, MY_MUTEX_INIT_FAST);
+   (void) pthread_cond_init(&COND_server_started,NULL);
++  (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST);
++  (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST);
++  (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
++  (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
+   sp_cache_init();
+ #ifdef HAVE_EVENT_SCHEDULER
+   Events::init_mutexes();
+@@ -4053,6 +4077,9 @@
+   if (!errmesg[0][0])
+     unireg_abort(1);
++  init_global_table_stats();
++  init_global_index_stats();
++
+   /* We have to initialize the storage engines before CSV logging */
+   if (ha_init())
+   {
+@@ -4199,6 +4226,9 @@
+   init_max_user_conn();
+   init_update_queries();
++  init_global_user_stats();
++  init_global_client_stats();
++  init_global_thread_stats();
+   DBUG_RETURN(0);
+ }
+@@ -5016,6 +5046,7 @@
+     DBUG_PRINT("error",("Too many connections"));
+     close_connection(thd, ER_CON_COUNT_ERROR, 1);
++    statistic_increment(denied_connections, &LOCK_status);
+     delete thd;
+     DBUG_VOID_RETURN;
+   }
+@@ -5800,6 +5831,8 @@
+   OPT_SLAVE_EXEC_MODE,
+   OPT_GENERAL_LOG_FILE,
+   OPT_SLOW_QUERY_LOG_FILE,
++  OPT_USERSTAT_RUNNING,
++  OPT_THREAD_STATISTICS,
+   OPT_USE_GLOBAL_LONG_QUERY_TIME,
+   OPT_USE_GLOBAL_LOG_SLOW_CONTROL,
+   OPT_SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP,
+@@ -7292,6 +7325,14 @@
+    &max_system_variables.net_wait_timeout, 0, GET_ULONG,
+    REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT),
+    0, 1, 0},
++  {"userstat_running", OPT_USERSTAT_RUNNING,
++   "Control USER_STATISTICS, CLIENT_STATISTICS, THREAD_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS running",
++   (uchar**) &opt_userstat_running, (uchar**) &opt_userstat_running,
++   0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
++  {"thread_statistics", OPT_THREAD_STATISTICS,
++   "Control TABLE_STATISTICS running, when userstat_running is enabled",
++   (uchar**) &opt_thread_statistics, (uchar**) &opt_thread_statistics,
++   0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
+   {"binlog-direct-non-transactional-updates", OPT_BINLOG_DIRECT_NON_TRANS_UPDATE,
+    "Causes updates to non-transactional engines using statement format to be "
+    "written directly to binary log. Before using this option, make sure that "
+diff -ruN a/sql/set_var.cc b/sql/set_var.cc
+--- a/sql/set_var.cc   2010-08-27 14:43:42.004008722 +0900
++++ b/sql/set_var.cc   2010-08-27 15:10:33.809988740 +0900
+@@ -554,6 +554,10 @@
+ static sys_var_thd_ulong      sys_read_buff_size(&vars, "read_buffer_size",
+                                          &SV::read_buff_size);
+ static sys_var_opt_readonly   sys_readonly(&vars, "read_only", &opt_readonly);
++static sys_var_bool_ptr               sys_userstat_running(&vars, "userstat_running",
++                                                   &opt_userstat_running);
++static sys_var_bool_ptr               sys_thread_statistics(&vars, "thread_statistics",
++                                                    &opt_thread_statistics);
+ static sys_var_thd_ulong      sys_read_rnd_buff_size(&vars, "read_rnd_buffer_size",
+                                              &SV::read_rnd_buff_size);
+ static sys_var_thd_ulong      sys_div_precincrement(&vars, "div_precision_increment",
+diff -ruN a/sql/sql_base.cc b/sql/sql_base.cc
+--- a/sql/sql_base.cc  2010-08-04 02:24:34.000000000 +0900
++++ b/sql/sql_base.cc  2010-08-27 15:10:33.818058934 +0900
+@@ -1382,6 +1382,12 @@
+   DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
+                         table->s->table_name.str, (long) table));
++  if(table->file)
++  {
++    table->file->update_global_table_stats();
++    table->file->update_global_index_stats();
++  }
++
+   *table_ptr=table->next;
+   /*
+     When closing a MERGE parent or child table, detach the children first.
+@@ -1922,6 +1928,8 @@
+   DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
+                           table->s->db.str, table->s->table_name.str));
++  table->file->update_global_table_stats();
++  table->file->update_global_index_stats();
+   free_io_cache(table);
+   closefrm(table, 0);
+   if (delete_table)
+diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
+--- a/sql/sql_class.cc 2010-08-27 14:38:08.741990000 +0900
++++ b/sql/sql_class.cc 2010-08-27 15:10:33.825058007 +0900
+@@ -704,6 +704,13 @@
+   mysys_var=0;
+   binlog_evt_union.do_union= FALSE;
+   enable_slow_log= 0;
++  busy_time = 0;
++  cpu_time = 0;
++  bytes_received = 0;
++  bytes_sent = 0;
++  binlog_bytes_written = 0;
++  updated_row_count = 0;
++  sent_row_count_2 = 0;
+ #ifndef DBUG_OFF
+   dbug_sentry=THD_SENTRY_MAGIC;
+ #endif
+@@ -907,6 +914,7 @@
+   reset_current_stmt_binlog_row_based();
+   bzero((char *) &status_var, sizeof(status_var));
+   sql_log_bin_toplevel= options & OPTION_BIN_LOG;
++  reset_stats();
+ #if defined(ENABLED_DEBUG_SYNC)
+   /* Initialize the Debug Sync Facility. See debug_sync.cc. */
+@@ -914,6 +922,84 @@
+ #endif /* defined(ENABLED_DEBUG_SYNC) */
+ }
++// Resets stats in a THD.
++void THD::reset_stats(void) {
++  current_connect_time = time(NULL);
++  last_global_update_time = current_connect_time;
++  reset_diff_stats();
++}
++
++// Resets the 'diff' stats, which are used to update global stats.
++void THD::reset_diff_stats(void) {
++  diff_total_busy_time = 0;
++  diff_total_cpu_time = 0;
++  diff_total_bytes_received = 0;
++  diff_total_bytes_sent = 0;
++  diff_total_binlog_bytes_written = 0;
++  diff_total_sent_rows = 0;
++  diff_total_updated_rows = 0;
++  diff_total_read_rows = 0;
++  diff_select_commands = 0;
++  diff_update_commands = 0;
++  diff_other_commands = 0;
++  diff_commit_trans = 0;
++  diff_rollback_trans = 0;
++  diff_denied_connections = 0;
++  diff_lost_connections = 0;
++  diff_access_denied_errors = 0;
++  diff_empty_queries = 0;
++}
++
++// Updates 'diff' stats of a THD.
++void THD::update_stats(bool ran_command) {
++  if (opt_userstat_running) {
++  diff_total_busy_time += busy_time;
++  diff_total_cpu_time += cpu_time;
++  diff_total_bytes_received += bytes_received;
++  diff_total_bytes_sent += bytes_sent;
++  diff_total_binlog_bytes_written += binlog_bytes_written;
++  diff_total_sent_rows += sent_row_count_2;
++  diff_total_updated_rows += updated_row_count;
++  // diff_total_read_rows is updated in handler.cc.
++
++  if (ran_command) {
++    // The replication thread has the COM_CONNECT command.
++    if ((old_command == COM_QUERY || command == COM_CONNECT) &&
++        (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END)) {
++      // A SQL query.
++      if (lex->sql_command == SQLCOM_SELECT) {
++        diff_select_commands++;
++        if (!sent_row_count_2)
++          diff_empty_queries++;
++      } else if (! sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) {
++        // 'SHOW ' commands become SQLCOM_SELECT.
++        diff_other_commands++;
++        // 'SHOW ' commands shouldn't inflate total sent row count.
++        diff_total_sent_rows -= sent_row_count_2;
++      } else if (is_update_query(lex->sql_command)) {
++        diff_update_commands++;
++      } else {
++        diff_other_commands++;
++      }
++    }
++  }
++  // diff_commit_trans is updated in handler.cc.
++  // diff_rollback_trans is updated in handler.cc.
++  // diff_denied_connections is updated in sql_parse.cc.
++  // diff_lost_connections is updated in sql_parse.cc.
++  // diff_access_denied_errors is updated in sql_parse.cc.
++
++  /* reset counters to zero to avoid double-counting since values
++     are already store in diff_total_*. */
++  }
++  busy_time = 0;
++  cpu_time = 0;
++  bytes_received = 0;
++  bytes_sent = 0;
++  binlog_bytes_written = 0;
++  updated_row_count = 0;
++  sent_row_count_2 = 0;
++}
+ /*
+   Init THD for query processing.
+@@ -1545,6 +1631,32 @@
+ }
+ #endif
++char *THD::get_client_host_port(THD *client)
++{
++  Security_context *client_sctx= client->security_ctx;
++  char *client_host= NULL;
++
++  if (client->peer_port && (client_sctx->host || client_sctx->ip) &&
++      security_ctx->host_or_ip[0])
++  {
++    if ((client_host= (char *) this->alloc(LIST_PROCESS_HOST_LEN+1)))
++      my_snprintf((char *) client_host, LIST_PROCESS_HOST_LEN,
++                  "%s:%u", client_sctx->host_or_ip, client->peer_port);
++  }
++  else
++    client_host= this->strdup(client_sctx->host_or_ip[0] ?
++                              client_sctx->host_or_ip :
++                              client_sctx->host ? client_sctx->host : "");
++
++  return client_host;
++}
++
++const char *get_client_host(THD *client)
++{
++  return client->security_ctx->host_or_ip[0] ?
++      client->security_ctx->host_or_ip :
++      client->security_ctx->host ? client->security_ctx->host : "";
++}
+ struct Item_change_record: public ilink
+ {
+@@ -1732,6 +1844,7 @@
+     buffer.set(buff, sizeof(buff), &my_charset_bin);
+   }
+   thd->sent_row_count++;
++  thd->sent_row_count_2++;
+   if (thd->is_error())
+   {
+     protocol->remove_last_row();
+@@ -1836,6 +1949,7 @@
+ select_export::~select_export()
+ {
+   thd->sent_row_count=row_count;
++  thd->sent_row_count_2=row_count;
+ }
+@@ -2868,6 +2982,7 @@
+   if (likely(thd != 0))
+   { /* current_thd==0 when close_connection() calls net_send_error() */
+     thd->status_var.bytes_sent+= length;
++    thd->bytes_sent+= length;
+   }
+ }
+@@ -2875,6 +2990,7 @@
+ void thd_increment_bytes_received(ulong length)
+ {
+   current_thd->status_var.bytes_received+= length;
++  current_thd->bytes_received+= length;
+ }
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h  2010-08-27 14:43:42.008006390 +0900
++++ b/sql/sql_class.h  2010-08-27 15:10:33.830058443 +0900
+@@ -1435,6 +1435,8 @@
+     first byte of the packet in do_command()
+   */
+   enum enum_server_command command;
++  // Used to save the command, before it is set to COM_SLEEP.
++  enum enum_server_command old_command;
+   uint32     server_id;
+   uint32     file_id;                 // for LOAD DATA INFILE
+   /* remote (peer) port */
+@@ -1828,6 +1830,8 @@
+   /* variables.transaction_isolation is reset to this after each commit */
+   enum_tx_isolation session_tx_isolation;
+   enum_check_fields count_cuted_fields;
++  ha_rows    updated_row_count;
++  ha_rows    sent_row_count_2; /* for userstat */
+   DYNAMIC_ARRAY user_var_events;        /* For user variables replication */
+   MEM_ROOT      *user_var_events_alloc; /* Allocate above array elements here */
+@@ -1916,6 +1920,49 @@
+   */
+   LOG_INFO*  current_linfo;
+   NET*       slave_net;                       // network connection from slave -> m.
++
++  /*
++    Used to update global user stats.  The global user stats are updated
++    occasionally with the 'diff' variables.  After the update, the 'diff'
++    variables are reset to 0.
++   */
++  // Time when the current thread connected to MySQL.
++  time_t current_connect_time;
++  // Last time when THD stats were updated in global_user_stats.
++  time_t last_global_update_time;
++  // Busy (non-idle) time for just one command.
++  double busy_time;
++  // Busy time not updated in global_user_stats yet.
++  double diff_total_busy_time;
++  // Cpu (non-idle) time for just one thread.
++  double cpu_time;
++  // Cpu time not updated in global_user_stats yet.
++  double diff_total_cpu_time;
++  /* bytes counting */
++  ulonglong bytes_received;
++  ulonglong diff_total_bytes_received;
++  ulonglong bytes_sent;
++  ulonglong diff_total_bytes_sent;
++  ulonglong binlog_bytes_written;
++  ulonglong diff_total_binlog_bytes_written;
++
++  // Number of rows not reflected in global_user_stats yet.
++  ha_rows diff_total_sent_rows, diff_total_updated_rows, diff_total_read_rows;
++  // Number of commands not reflected in global_user_stats yet.
++  ulonglong diff_select_commands, diff_update_commands, diff_other_commands;
++  // Number of transactions not reflected in global_user_stats yet.
++  ulonglong diff_commit_trans, diff_rollback_trans;
++  // Number of connection errors not reflected in global_user_stats yet.
++  ulonglong diff_denied_connections, diff_lost_connections;
++  // Number of db access denied, not reflected in global_user_stats yet.
++  ulonglong diff_access_denied_errors;
++  // Number of queries that return 0 rows
++  ulonglong diff_empty_queries;
++
++  // Per account query delay in miliseconds. When not 0, sleep this number of
++  // milliseconds before every SQL command.
++  ulonglong query_delay_millis;
++
+   /* Used by the sys_var class to store temporary values */
+   union
+   {
+@@ -1981,6 +2028,11 @@
+     alloc_root. 
+   */
+   void init_for_queries();
++  void reset_stats(void);
++  void reset_diff_stats(void);
++  // ran_command is true when this is called immediately after a
++  // command has been run.
++  void update_stats(bool ran_command);
+   void change_user(void);
+   void cleanup(void);
+   void cleanup_after_query();
+@@ -2351,9 +2403,15 @@
+     *p_db= strmake(db, db_length);
+     *p_db_length= db_length;
+     return FALSE;
++
++  // Returns string as 'IP:port' for the client-side of the connnection represented
++  // by 'client' as displayed by SHOW PROCESSLIST. Allocates memory from the heap of
++  // this THD and that is not reclaimed immediately, so use sparingly. May return NULL.
+   }
+   thd_scheduler scheduler;
++  char *get_client_host_port(THD *client);
++
+ public:
+   inline Internal_error_handler *get_internal_handler()
+   { return m_internal_handler; }
+@@ -2438,6 +2496,9 @@
+   LEX_STRING invoker_host;
+ };
++// Returns string as 'IP' for the client-side of the connection represented by
++// 'client'. Does not allocate memory. May return "".
++const char *get_client_host(THD *client);
+ /** A short cut for thd->main_da.set_ok_status(). */
+diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
+--- a/sql/sql_connect.cc       2010-08-27 14:38:08.750990238 +0900
++++ b/sql/sql_connect.cc       2010-08-27 15:10:33.834058369 +0900
+@@ -42,6 +42,24 @@
+ extern void win_install_sigabrt_handler();
+ #endif
++// Increments connection count for user.
++static int increment_connection_count(THD* thd, bool use_lock);
++
++// Uses the THD to update the global stats by user name and client IP
++void update_global_user_stats(THD* thd, bool create_user, time_t now);
++
++HASH global_user_stats;
++HASH global_client_stats;
++HASH global_thread_stats;
++// Protects global_user_stats and global_client_stats
++extern pthread_mutex_t LOCK_global_user_client_stats;
++
++HASH global_table_stats;
++extern pthread_mutex_t LOCK_global_table_stats;
++
++HASH global_index_stats;
++extern pthread_mutex_t LOCK_global_index_stats;
++
+ /*
+   Get structure for logging connection data for the current user
+ */
+@@ -99,6 +117,563 @@
+ }
++extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
++                         my_bool not_used __attribute__((unused)))
++{
++  *length = strlen(user_stats->user);
++  return (uchar*)user_stats->user;
++}
++
++extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
++                         my_bool not_used __attribute__((unused)))
++{
++  *length = sizeof(my_thread_id);
++  return (uchar*)&(thread_stats->id);
++}
++
++void free_user_stats(USER_STATS* user_stats)
++{
++  my_free((char*)user_stats, MYF(0));
++}
++
++void free_thread_stats(THREAD_STATS* thread_stats)
++{
++  my_free((char*)thread_stats, MYF(0));
++}
++
++void init_user_stats(USER_STATS *user_stats,
++                     const char *user,
++                     const char *priv_user,
++                     uint total_connections,
++                     uint concurrent_connections,
++                     time_t connected_time,
++                     double busy_time,
++                     double cpu_time,
++                     ulonglong bytes_received,
++                     ulonglong bytes_sent,
++                     ulonglong binlog_bytes_written,
++                     ha_rows rows_fetched,
++                     ha_rows rows_updated,
++                     ha_rows rows_read,
++                     ulonglong select_commands,
++                     ulonglong update_commands,
++                     ulonglong other_commands,
++                     ulonglong commit_trans,
++                     ulonglong rollback_trans,
++                     ulonglong denied_connections,
++                     ulonglong lost_connections,
++                     ulonglong access_denied_errors,
++                     ulonglong empty_queries)
++{
++  DBUG_ENTER("init_user_stats");
++  DBUG_PRINT("info",
++             ("Add user_stats entry for user %s - priv_user %s",
++              user, priv_user));
++  strncpy(user_stats->user, user, sizeof(user_stats->user));
++  strncpy(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user));
++
++  user_stats->total_connections = total_connections;
++  user_stats->concurrent_connections = concurrent_connections;
++  user_stats->connected_time = connected_time;
++  user_stats->busy_time = busy_time;
++  user_stats->cpu_time = cpu_time;
++  user_stats->bytes_received = bytes_received;
++  user_stats->bytes_sent = bytes_sent;
++  user_stats->binlog_bytes_written = binlog_bytes_written;
++  user_stats->rows_fetched = rows_fetched;
++  user_stats->rows_updated = rows_updated;
++  user_stats->rows_read = rows_read;
++  user_stats->select_commands = select_commands;
++  user_stats->update_commands = update_commands;
++  user_stats->other_commands = other_commands;
++  user_stats->commit_trans = commit_trans;
++  user_stats->rollback_trans = rollback_trans;
++  user_stats->denied_connections = denied_connections;
++  user_stats->lost_connections = lost_connections;
++  user_stats->access_denied_errors = access_denied_errors;
++  user_stats->empty_queries = empty_queries;
++  DBUG_VOID_RETURN;
++}
++
++void init_thread_stats(THREAD_STATS *thread_stats,
++                     my_thread_id id,
++                     uint total_connections,
++                     uint concurrent_connections,
++                     time_t connected_time,
++                     double busy_time,
++                     double cpu_time,
++                     ulonglong bytes_received,
++                     ulonglong bytes_sent,
++                     ulonglong binlog_bytes_written,
++                     ha_rows rows_fetched,
++                     ha_rows rows_updated,
++                     ha_rows rows_read,
++                     ulonglong select_commands,
++                     ulonglong update_commands,
++                     ulonglong other_commands,
++                     ulonglong commit_trans,
++                     ulonglong rollback_trans,
++                     ulonglong denied_connections,
++                     ulonglong lost_connections,
++                     ulonglong access_denied_errors,
++                     ulonglong empty_queries)
++{
++  DBUG_ENTER("init_thread_stats");
++  DBUG_PRINT("info",
++             ("Add thread_stats entry for thread %lu",
++              id));
++  thread_stats->id = id;
++
++  thread_stats->total_connections = total_connections;
++  thread_stats->concurrent_connections = concurrent_connections;
++  thread_stats->connected_time = connected_time;
++  thread_stats->busy_time = busy_time;
++  thread_stats->cpu_time = cpu_time;
++  thread_stats->bytes_received = bytes_received;
++  thread_stats->bytes_sent = bytes_sent;
++  thread_stats->binlog_bytes_written = binlog_bytes_written;
++  thread_stats->rows_fetched = rows_fetched;
++  thread_stats->rows_updated = rows_updated;
++  thread_stats->rows_read = rows_read;
++  thread_stats->select_commands = select_commands;
++  thread_stats->update_commands = update_commands;
++  thread_stats->other_commands = other_commands;
++  thread_stats->commit_trans = commit_trans;
++  thread_stats->rollback_trans = rollback_trans;
++  thread_stats->denied_connections = denied_connections;
++  thread_stats->lost_connections = lost_connections;
++  thread_stats->access_denied_errors = access_denied_errors;
++  thread_stats->empty_queries = empty_queries;
++  DBUG_VOID_RETURN;
++}
++
++void add_user_stats(USER_STATS *user_stats,
++                    uint total_connections,
++                    uint concurrent_connections,
++                    time_t connected_time,
++                    double busy_time,
++                    double cpu_time,
++                    ulonglong bytes_received,
++                    ulonglong bytes_sent,
++                    ulonglong binlog_bytes_written,
++                    ha_rows rows_fetched,
++                    ha_rows rows_updated,
++                    ha_rows rows_read,
++                    ulonglong select_commands,
++                    ulonglong update_commands,
++                    ulonglong other_commands,
++                    ulonglong commit_trans,
++                    ulonglong rollback_trans,
++                    ulonglong denied_connections,
++                    ulonglong lost_connections,
++                    ulonglong access_denied_errors,
++                    ulonglong empty_queries)
++{
++  user_stats->total_connections += total_connections;
++  user_stats->concurrent_connections += concurrent_connections;
++  user_stats->connected_time += connected_time;
++  user_stats->busy_time += busy_time;
++  user_stats->cpu_time += cpu_time;
++  user_stats->bytes_received += bytes_received;
++  user_stats->bytes_sent += bytes_sent;
++  user_stats->binlog_bytes_written += binlog_bytes_written;
++  user_stats->rows_fetched += rows_fetched;
++  user_stats->rows_updated += rows_updated;
++  user_stats->rows_read += rows_read;
++  user_stats->select_commands += select_commands;
++  user_stats->update_commands += update_commands;
++  user_stats->other_commands += other_commands;
++  user_stats->commit_trans += commit_trans;
++  user_stats->rollback_trans += rollback_trans;
++  user_stats->denied_connections += denied_connections;
++  user_stats->lost_connections += lost_connections;
++  user_stats->access_denied_errors += access_denied_errors;
++  user_stats->empty_queries += empty_queries;
++}
++
++void add_thread_stats(THREAD_STATS *thread_stats,
++                    uint total_connections,
++                    uint concurrent_connections,
++                    time_t connected_time,
++                    double busy_time,
++                    double cpu_time,
++                    ulonglong bytes_received,
++                    ulonglong bytes_sent,
++                    ulonglong binlog_bytes_written,
++                    ha_rows rows_fetched,
++                    ha_rows rows_updated,
++                    ha_rows rows_read,
++                    ulonglong select_commands,
++                    ulonglong update_commands,
++                    ulonglong other_commands,
++                    ulonglong commit_trans,
++                    ulonglong rollback_trans,
++                    ulonglong denied_connections,
++                    ulonglong lost_connections,
++                    ulonglong access_denied_errors,
++                    ulonglong empty_queries)
++{
++  thread_stats->total_connections += total_connections;
++  thread_stats->concurrent_connections += concurrent_connections;
++  thread_stats->connected_time += connected_time;
++  thread_stats->busy_time += busy_time;
++  thread_stats->cpu_time += cpu_time;
++  thread_stats->bytes_received += bytes_received;
++  thread_stats->bytes_sent += bytes_sent;
++  thread_stats->binlog_bytes_written += binlog_bytes_written;
++  thread_stats->rows_fetched += rows_fetched;
++  thread_stats->rows_updated += rows_updated;
++  thread_stats->rows_read += rows_read;
++  thread_stats->select_commands += select_commands;
++  thread_stats->update_commands += update_commands;
++  thread_stats->other_commands += other_commands;
++  thread_stats->commit_trans += commit_trans;
++  thread_stats->rollback_trans += rollback_trans;
++  thread_stats->denied_connections += denied_connections;
++  thread_stats->lost_connections += lost_connections;
++  thread_stats->access_denied_errors += access_denied_errors;
++  thread_stats->empty_queries += empty_queries;
++}
++
++void init_global_user_stats(void)
++{
++  if (hash_init(&global_user_stats, system_charset_info, max_connections,
++                0, 0, (hash_get_key)get_key_user_stats,
++                (hash_free_key)free_user_stats, 0)) {
++    sql_print_error("Initializing global_user_stats failed.");
++    exit(1);
++  }
++}
++
++void init_global_client_stats(void)
++{
++  if (hash_init(&global_client_stats, system_charset_info, max_connections,
++                0, 0, (hash_get_key)get_key_user_stats,
++                (hash_free_key)free_user_stats, 0)) {
++    sql_print_error("Initializing global_client_stats failed.");
++    exit(1);
++  }
++}
++
++void init_global_thread_stats(void)
++{
++  if (hash_init(&global_thread_stats, &my_charset_bin, max_connections,
++                0, 0, (hash_get_key)get_key_thread_stats,
++                (hash_free_key)free_thread_stats, 0)) {
++    sql_print_error("Initializing global_client_stats failed.");
++    exit(1);
++  }
++}
++
++extern "C" uchar *get_key_table_stats(TABLE_STATS *table_stats, size_t *length,
++                                     my_bool not_used __attribute__((unused)))
++{
++  *length = strlen(table_stats->table);
++  return (uchar*)table_stats->table;
++}
++
++extern "C" void free_table_stats(TABLE_STATS* table_stats)
++{
++  my_free((char*)table_stats, MYF(0));
++}
++
++void init_global_table_stats(void)
++{
++  if (hash_init(&global_table_stats, system_charset_info, max_connections,
++                0, 0, (hash_get_key)get_key_table_stats,
++                (hash_free_key)free_table_stats, 0)) {
++    sql_print_error("Initializing global_table_stats failed.");
++    exit(1);
++  }
++}
++
++extern "C" uchar *get_key_index_stats(INDEX_STATS *index_stats, size_t *length,
++                                     my_bool not_used __attribute__((unused)))
++{
++  *length = strlen(index_stats->index);
++  return (uchar*)index_stats->index;
++}
++
++extern "C" void free_index_stats(INDEX_STATS* index_stats)
++{
++  my_free((char*)index_stats, MYF(0));
++}
++
++void init_global_index_stats(void)
++{
++  if (hash_init(&global_index_stats, system_charset_info, max_connections,
++                0, 0, (hash_get_key)get_key_index_stats,
++                (hash_free_key)free_index_stats, 0)) {
++    sql_print_error("Initializing global_index_stats failed.");
++    exit(1);
++  }
++}
++
++void free_global_user_stats(void)
++{
++  hash_free(&global_user_stats);
++}
++
++void free_global_thread_stats(void)
++{
++  hash_free(&global_thread_stats);
++}
++
++void free_global_table_stats(void)
++{
++  hash_free(&global_table_stats);
++}
++
++void free_global_index_stats(void)
++{
++  hash_free(&global_index_stats);
++}
++
++void free_global_client_stats(void)
++{
++  hash_free(&global_client_stats);
++}
++
++// 'mysql_system_user' is used for when the user is not defined for a THD.
++static char mysql_system_user[] = "#mysql_system#";
++
++// Returns 'user' if it's not NULL.  Returns 'mysql_system_user' otherwise.
++static char* get_valid_user_string(char* user) {
++  return user ? user : mysql_system_user;
++}
++
++// Increments the global stats connection count for an entry from
++// global_client_stats or global_user_stats. Returns 0 on success
++// and 1 on error.
++static int increment_count_by_name(const char *name, const char *role_name,
++                                   HASH *users_or_clients, THD *thd)
++{
++  USER_STATS* user_stats;
++
++  if (!(user_stats = (USER_STATS*)hash_search(users_or_clients, (uchar*) name,
++                                              strlen(name))))
++  {
++    // First connection for this user or client
++    if (!(user_stats = ((USER_STATS*)
++                        my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++    {
++      return 1; // Out of memory
++    }
++
++    init_user_stats(user_stats, name, role_name,
++                    0, 0,      // connections
++                    0, 0, 0,   // time
++                    0, 0, 0,   // bytes sent, received and written
++                    0, 0, 0,   // rows fetched, updated and read
++                    0, 0, 0,   // select, update and other commands
++                    0, 0,      // commit and rollback trans
++                    thd->diff_denied_connections,
++                    0,         // lost connections
++                    0,         // access denied errors
++                    0);        // empty queries
++
++    if (my_hash_insert(users_or_clients, (uchar*)user_stats))
++    {
++      my_free((char*)user_stats, 0);
++      return 1; // Out of memory
++    }
++  }
++  user_stats->total_connections++;
++  return 0;
++}
++
++static int increment_count_by_id(my_thread_id id,
++                                 HASH *users_or_clients, THD *thd)
++{
++  THREAD_STATS* thread_stats;
++
++  if (!(thread_stats = (THREAD_STATS*)hash_search(users_or_clients, (uchar*) &id,
++                                              sizeof(my_thread_id))))
++  {
++    // First connection for this user or client
++    if (!(thread_stats = ((THREAD_STATS*)
++                        my_malloc(sizeof(THREAD_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++    {
++      return 1; // Out of memory
++    }
++
++    init_thread_stats(thread_stats, id,
++                    0, 0,      // connections
++                    0, 0, 0,   // time
++                    0, 0, 0,   // bytes sent, received and written
++                    0, 0, 0,   // rows fetched, updated and read
++                    0, 0, 0,   // select, update and other commands
++                    0, 0,      // commit and rollback trans
++                    thd->diff_denied_connections,
++                    0,         // lost connections
++                    0,         // access denied errors
++                    0);        // empty queries
++
++    if (my_hash_insert(users_or_clients, (uchar*)thread_stats))
++    {
++      my_free((char*)thread_stats, 0);
++      return 1; // Out of memory
++    }
++  }
++  thread_stats->total_connections++;
++  return 0;
++}
++
++// Increments the global user and client stats connection count.  If 'use_lock'
++// is true, LOCK_global_user_client_stats will be locked/unlocked.  Returns
++// 0 on success, 1 on error.
++static int increment_connection_count(THD* thd, bool use_lock)
++{
++  char* user_string = get_valid_user_string(thd->main_security_ctx.user);
++  const char* client_string = get_client_host(thd);
++  int return_value = 0;
++
++  if (!opt_userstat_running)
++    return return_value;
++
++  if (use_lock) pthread_mutex_lock(&LOCK_global_user_client_stats);
++
++  if (increment_count_by_name(user_string, user_string,
++                              &global_user_stats, thd))
++  {
++    return_value = 1;
++    goto end;
++  }
++  if (increment_count_by_name(client_string,
++                              user_string,
++                              &global_client_stats, thd))
++  {
++    return_value = 1;
++    goto end;
++  }
++  if (opt_thread_statistics) {
++    if (increment_count_by_id(thd->thread_id, &global_thread_stats, thd))
++    {
++      return_value = 1;
++      goto end;
++    }
++  }
++
++end:
++  if (use_lock) pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  return return_value;
++}
++
++// Used to update the global user and client stats.
++static void update_global_user_stats_with_user(THD* thd,
++                                               USER_STATS* user_stats,
++                                               time_t now)
++{
++  user_stats->connected_time += now - thd->last_global_update_time;
++//  thd->last_global_update_time = now;
++  user_stats->busy_time += thd->diff_total_busy_time;
++  user_stats->cpu_time += thd->diff_total_cpu_time;
++  user_stats->bytes_received += thd->diff_total_bytes_received;
++  user_stats->bytes_sent += thd->diff_total_bytes_sent;
++  user_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
++  user_stats->rows_fetched += thd->diff_total_sent_rows;
++  user_stats->rows_updated += thd->diff_total_updated_rows;
++  user_stats->rows_read += thd->diff_total_read_rows;
++  user_stats->select_commands += thd->diff_select_commands;
++  user_stats->update_commands += thd->diff_update_commands;
++  user_stats->other_commands += thd->diff_other_commands;
++  user_stats->commit_trans += thd->diff_commit_trans;
++  user_stats->rollback_trans += thd->diff_rollback_trans;
++  user_stats->denied_connections += thd->diff_denied_connections;
++  user_stats->lost_connections += thd->diff_lost_connections;
++  user_stats->access_denied_errors += thd->diff_access_denied_errors;
++  user_stats->empty_queries += thd->diff_empty_queries;
++}
++
++static void update_global_thread_stats_with_thread(THD* thd,
++                                               THREAD_STATS* thread_stats,
++                                               time_t now)
++{
++  thread_stats->connected_time += now - thd->last_global_update_time;
++//  thd->last_global_update_time = now;
++  thread_stats->busy_time += thd->diff_total_busy_time;
++  thread_stats->cpu_time += thd->diff_total_cpu_time;
++  thread_stats->bytes_received += thd->diff_total_bytes_received;
++  thread_stats->bytes_sent += thd->diff_total_bytes_sent;
++  thread_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
++  thread_stats->rows_fetched += thd->diff_total_sent_rows;
++  thread_stats->rows_updated += thd->diff_total_updated_rows;
++  thread_stats->rows_read += thd->diff_total_read_rows;
++  thread_stats->select_commands += thd->diff_select_commands;
++  thread_stats->update_commands += thd->diff_update_commands;
++  thread_stats->other_commands += thd->diff_other_commands;
++  thread_stats->commit_trans += thd->diff_commit_trans;
++  thread_stats->rollback_trans += thd->diff_rollback_trans;
++  thread_stats->denied_connections += thd->diff_denied_connections;
++  thread_stats->lost_connections += thd->diff_lost_connections;
++  thread_stats->access_denied_errors += thd->diff_access_denied_errors;
++  thread_stats->empty_queries += thd->diff_empty_queries;
++}
++
++// Updates the global stats of a user or client
++void update_global_user_stats(THD* thd, bool create_user, time_t now)
++{
++  if (opt_userstat_running) {
++  char* user_string = get_valid_user_string(thd->main_security_ctx.user);
++  const char* client_string = get_client_host(thd);
++
++  USER_STATS* user_stats;
++  THREAD_STATS* thread_stats;
++  pthread_mutex_lock(&LOCK_global_user_client_stats);
++
++  // Update by user name
++  if ((user_stats = (USER_STATS*)hash_search(&global_user_stats,
++                                             (uchar*)user_string,
++                                             strlen(user_string)))) {
++    // Found user.
++    update_global_user_stats_with_user(thd, user_stats, now);
++  } else {
++    // Create the entry
++    if (create_user) {
++      increment_count_by_name(user_string, user_string,
++                              &global_user_stats, thd);
++    }
++  }
++
++  // Update by client IP
++  if ((user_stats = (USER_STATS*)hash_search(&global_client_stats,
++                                             (uchar*)client_string,
++                                             strlen(client_string)))) {
++    // Found by client IP
++    update_global_user_stats_with_user(thd, user_stats, now);
++  } else {
++    // Create the entry
++    if (create_user) {
++      increment_count_by_name(client_string,
++                              user_string,
++                              &global_client_stats, thd);
++    }
++  }
++
++  if (opt_thread_statistics) {
++    // Update by thread ID
++    if ((thread_stats = (THREAD_STATS*)hash_search(&global_thread_stats,
++                                             (uchar*) &(thd->thread_id),
++                                             sizeof(my_thread_id)))) {
++      // Found by thread ID
++      update_global_thread_stats_with_thread(thd, thread_stats, now);
++    } else {
++      // Create the entry
++      if (create_user) {
++        increment_count_by_id(thd->thread_id,
++                              &global_thread_stats, thd);
++      }
++    }
++  }
++
++  thd->last_global_update_time = now;
++  thd->reset_diff_stats();
++
++  pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  } else {
++  thd->reset_diff_stats();
++  }
++}
+ /*
+   check if user has already too many connections
+@@ -154,7 +729,10 @@
+ end:
+   if (error)
++  {
+     uc->connections--; // no need for decrease_user_connections() here
++    statistic_increment(denied_connections, &LOCK_status);
++  }
+   (void) pthread_mutex_unlock(&LOCK_user_conn);
+   DBUG_RETURN(error);
+ }
+@@ -490,6 +1068,7 @@
+     general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
+     DBUG_RETURN(1);
+   }
++  thd->diff_access_denied_errors++;
+   my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
+            thd->main_security_ctx.user,
+            thd->main_security_ctx.host_or_ip,
+@@ -971,11 +1550,20 @@
+       my_sleep(1000);                         /* must wait after eof() */
+ #endif
+     statistic_increment(aborted_connects,&LOCK_status);
++    thd->diff_denied_connections++;
+     DBUG_RETURN(1);
+   }
+   /* Connect completed, set read/write timeouts back to default */
+   my_net_set_read_timeout(net, thd->variables.net_read_timeout);
+   my_net_set_write_timeout(net, thd->variables.net_write_timeout);
++
++  thd->reset_stats();
++  // Updates global user connection stats.
++  if (increment_connection_count(thd, true)) {
++    net_send_error(thd, ER_OUTOFMEMORY);  // Out of memory
++    DBUG_RETURN(1);
++  }
++
+   DBUG_RETURN(0);
+ }
+@@ -997,6 +1585,7 @@
+   if (thd->killed || (net->error && net->vio != 0))
+   {
+     statistic_increment(aborted_threads,&LOCK_status);
++    thd->diff_lost_connections++;
+   }
+   if (net->error && net->vio != 0)
+@@ -1123,10 +1712,14 @@
+   for (;;)
+   {
+     NET *net= &thd->net;
++    bool create_user= TRUE;
+     lex_start(thd);
+     if (login_connection(thd))
++    {
++      create_user= FALSE;
+       goto end_thread;
++    }
+     prepare_new_connection_state(thd);
+@@ -1149,6 +1742,8 @@
+    
+ end_thread:
+     close_connection(thd, 0, 1);
++    thd->update_stats(false);
++    update_global_user_stats(thd, create_user, time(NULL));
+     if (thread_scheduler.end_thread(thd,1))
+       return 0;                                 // Probably no-threads
+diff -ruN a/sql/sql_delete.cc b/sql/sql_delete.cc
+--- a/sql/sql_delete.cc        2010-08-04 02:24:34.000000000 +0900
++++ b/sql/sql_delete.cc        2010-08-27 15:10:33.837058490 +0900
+@@ -452,6 +452,7 @@
+     my_ok(thd, (ha_rows) thd->row_count_func);
+     DBUG_PRINT("info",("%ld records deleted",(long) deleted));
+   }
++  thd->updated_row_count += deleted;
+   DBUG_RETURN(error >= 0 || thd->is_error());
+ }
+@@ -1059,6 +1060,7 @@
+     thd->row_count_func= deleted;
+     ::my_ok(thd, (ha_rows) thd->row_count_func);
+   }
++  thd->updated_row_count += deleted;
+   return 0;
+ }
+diff -ruN a/sql/sql_insert.cc b/sql/sql_insert.cc
+--- a/sql/sql_insert.cc        2010-08-04 02:24:19.000000000 +0900
++++ b/sql/sql_insert.cc        2010-08-27 15:10:33.841059138 +0900
+@@ -981,6 +981,7 @@
+     thd->row_count_func= info.copied + info.deleted + updated;
+     ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
+   }
++  thd->updated_row_count += thd->row_count_func;
+   thd->abort_on_warning= 0;
+   DBUG_RETURN(FALSE);
+@@ -3311,6 +3312,7 @@
+      thd->first_successful_insert_id_in_prev_stmt :
+      (info.copied ? autoinc_value_of_last_inserted_row : 0));
+   ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
++  thd->updated_row_count += thd->row_count_func;
+   DBUG_RETURN(0);
+ }
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h    2010-08-27 14:29:26.030989835 +0900
++++ b/sql/sql_lex.h    2010-08-27 15:10:33.844058293 +0900
+@@ -124,6 +124,9 @@
+     When a command is added here, be sure it's also added in mysqld.cc
+     in "struct show_var_st status_vars[]= {" ...
+   */
++  // TODO(mcallaghan): update status_vars in mysqld to export these
++  SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS,
++  SQLCOM_SHOW_CLIENT_STATS, SQLCOM_SHOW_THREAD_STATS,
+   /* This should be the last !!! */
+   SQLCOM_END
+ };
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-08-27 14:38:08.757059579 +0900
++++ b/sql/sql_parse.cc 2010-08-27 15:15:30.420996146 +0900
+@@ -46,6 +46,9 @@
+ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables);
+ static bool check_show_create_table_access(THD *thd, TABLE_LIST *table);
++// Uses the THD to update the global stats by user name and client IP
++void update_global_user_stats(THD* thd, bool create_user, time_t now);
++
+ const char *any_db="*any*";   // Special symbol for check_access
+ const LEX_STRING command_name[]={
+@@ -824,6 +827,12 @@
+   */
+   thd->clear_error();                         // Clear error message
+   thd->main_da.reset_diagnostics_area();
++  thd->updated_row_count=0;
++  thd->busy_time=0;
++  thd->cpu_time=0;
++  thd->bytes_received=0;
++  thd->bytes_sent=0;
++  thd->binlog_bytes_written=0;
+   net_new_transaction(net);
+@@ -993,6 +1002,9 @@
+   DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command));
+   thd->command=command;
++  // To increment the corrent command counter for user stats, 'command' must
++  // be saved because it is set to COM_SLEEP at the end of this function.
++  thd->old_command = command;
+   /*
+     Commands which always take a long time are logged into
+     the slow log only if opt_log_slow_admin_statements is set.
+@@ -1864,6 +1876,13 @@
+     thd->profiling.discard_current_query();
+ #endif
+     break;
++  case SCH_USER_STATS:
++  case SCH_CLIENT_STATS:
++  case SCH_THREAD_STATS:
++    if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++      DBUG_RETURN(1);
++  case SCH_TABLE_STATS:
++  case SCH_INDEX_STATS:
+   case SCH_OPEN_TABLES:
+   case SCH_VARIABLES:
+   case SCH_STATUS:
+@@ -2020,6 +2039,7 @@
+                        thd->security_ctx->priv_host)) &&
+         check_global_access(thd, SUPER_ACL))
+     {
++      thd->diff_access_denied_errors++;
+       my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER");
+       DBUG_RETURN(TRUE);
+     }
+@@ -5331,6 +5351,7 @@
+       if (!no_errors)
+       {
+         const char *db_name= db ? db : thd->db;
++        thd->diff_access_denied_errors++;
+         my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+                  sctx->priv_user, sctx->priv_host, db_name);
+       }
+@@ -5363,12 +5384,15 @@
+   {                                           // We can never grant this
+     DBUG_PRINT("error",("No possible access"));
+     if (!no_errors)
++    {
++      thd->diff_access_denied_errors++;
+       my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
+                sctx->priv_user,
+                sctx->priv_host,
+                (thd->password ?
+                 ER(ER_YES) :
+                 ER(ER_NO)));                    /* purecov: tested */
++    }
+     DBUG_RETURN(TRUE);                                /* purecov: tested */
+   }
+@@ -5394,11 +5418,15 @@
+   DBUG_PRINT("error",("Access denied"));
+   if (!no_errors)
++  {
++    // increment needs !no_errors condition, otherwise double counting.
++    thd->diff_access_denied_errors++;
+     my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+              sctx->priv_user, sctx->priv_host,
+              (db ? db : (thd->db ?
+                          thd->db :
+                          "unknown")));          /* purecov: tested */
++  }
+   DBUG_RETURN(TRUE);                          /* purecov: tested */
+ }
+@@ -5427,6 +5455,7 @@
+     if (!thd->col_access && check_grant_db(thd, dst_db_name))
+     {
++      thd->diff_access_denied_errors++;
+       my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+                thd->security_ctx->priv_user,
+                thd->security_ctx->priv_host,
+@@ -5508,9 +5537,12 @@
+         (want_access & ~(SELECT_ACL | EXTRA_ACL | FILE_ACL)))
+     {
+       if (!no_errors)
++      {
++        thd->diff_access_denied_errors++;
+         my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+                  sctx->priv_user, sctx->priv_host,
+                  INFORMATION_SCHEMA_NAME.str);
++      }
+       return TRUE;
+     }
+     /*
+@@ -5673,6 +5705,7 @@
+   if ((thd->security_ctx->master_access & want_access))
+     return 0;
+   get_privilege_desc(command, sizeof(command), want_access);
++  thd->diff_access_denied_errors++;
+   my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command);
+   return 1;
+ #else
+@@ -6054,6 +6087,34 @@
+   lex_start(thd);
+   mysql_reset_thd_for_next_command(thd);
++  int start_time_error = 0;
++  int end_time_error = 0;
++  struct timeval start_time, end_time;
++  double start_usecs = 0;
++  double end_usecs = 0;
++  /* cpu time */
++  int cputime_error = 0;
++  struct timespec tp;
++  double start_cpu_nsecs = 0;
++  double end_cpu_nsecs = 0;
++
++  if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++    /* get start cputime */
++    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#else
++#warning : HAVE_CLOCK_GETTIME is disabled.
++#warning : Most systems require librt library to use the function clock_gettime().
++#warning : Did you set environment when ./configure ?  (e.g. "export LIBS=-lrt" for sh)
++#endif
++
++    // Gets the start time, in order to measure how long this command takes.
++    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    }
++  }
++
+   if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0)
+   {
+     LEX *lex= thd->lex;
+@@ -6134,6 +6195,43 @@
+     *found_semicolon= NULL;
+   }
++  if (opt_userstat_running) {
++    // Gets the end time.
++    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    }
++
++    // Calculates the difference between the end and start times.
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->busy_time > 2629743) {
++        thd->busy_time = 0;
++      }
++    } else {
++      // end time went back in time, or gettimeofday() failed.
++      thd->busy_time = 0;
++    }
++
++#ifdef HAVE_CLOCK_GETTIME
++    /* get end cputime */
++    if (!cputime_error &&
++        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++    if (start_cpu_nsecs && !cputime_error) {
++      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->cpu_time > 2629743) {
++        thd->cpu_time = 0;
++      }
++    } else
++      thd->cpu_time = 0;
++  }
++  // Updates THD stats and the global user stats.
++  thd->update_stats(true);
++  update_global_user_stats(thd, true, time(NULL));
++
+   DBUG_VOID_RETURN;
+ }
+@@ -6999,6 +7097,13 @@
+     if (flush_error_log())
+       result=1;
+   }
++  if (((options & (REFRESH_SLOW_QUERY_LOG | REFRESH_LOG)) ==
++       REFRESH_SLOW_QUERY_LOG))
++  {
++    /* We are only flushing slow query log */
++    logger.flush_slow_log(thd);
++  }
++
+ #ifdef HAVE_QUERY_CACHE
+   if (options & REFRESH_QUERY_CACHE_FREE)
+   {
+@@ -7099,6 +7204,40 @@
+ #endif
+  if (options & REFRESH_USER_RESOURCES)
+    reset_mqh((LEX_USER *) NULL, 0);             /* purecov: inspected */
++  if (options & REFRESH_TABLE_STATS)
++  {
++    pthread_mutex_lock(&LOCK_global_table_stats);
++    free_global_table_stats();
++    init_global_table_stats();
++    pthread_mutex_unlock(&LOCK_global_table_stats);
++  }
++  if (options & REFRESH_INDEX_STATS)
++  {
++    pthread_mutex_lock(&LOCK_global_index_stats);
++    free_global_index_stats();
++    init_global_index_stats();
++    pthread_mutex_unlock(&LOCK_global_index_stats);
++  }
++  if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS | REFRESH_THREAD_STATS))
++  {
++    pthread_mutex_lock(&LOCK_global_user_client_stats);
++    if (options & REFRESH_USER_STATS)
++    {
++      free_global_user_stats();
++      init_global_user_stats();
++    }
++    if (options & REFRESH_CLIENT_STATS)
++    {
++      free_global_client_stats();
++      init_global_client_stats();
++    }
++    if (options & REFRESH_THREAD_STATS)
++    {
++      free_global_thread_stats();
++      init_global_thread_stats();
++    }
++    pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  }
+  *write_to_binlog= tmp_write_to_binlog;
+  /*
+    If the query was killed then this function must fail.
+diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
+--- a/sql/sql_prepare.cc       2010-08-27 14:29:26.043058814 +0900
++++ b/sql/sql_prepare.cc       2010-08-27 15:10:33.858058832 +0900
+@@ -96,6 +96,9 @@
+ #include <mysql_com.h>
+ #endif
++// Uses the THD to update the global stats by user name and client IP
++void update_global_user_stats(THD* thd, bool create_user, time_t now);
++
+ /**
+   A result class used to send cursor rows using the binary protocol.
+ */
+@@ -2103,8 +2106,36 @@
+   /* First of all clear possible warnings from the previous command */
+   mysql_reset_thd_for_next_command(thd);
++  int start_time_error = 0;
++  int end_time_error = 0;
++  struct timeval start_time, end_time;
++  double start_usecs = 0;
++  double end_usecs = 0;
++  /* cpu time */
++  int cputime_error = 0;
++  struct timespec tp;
++  double start_cpu_nsecs = 0;
++  double end_cpu_nsecs = 0;
++
++  if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++    /* get start cputime */
++    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#else
++#warning : HAVE_CLOCK_GETTIME is disabled.
++#warning : Most systems require librt library to use the function clock_gettime().
++#warning : Did you set environment when ./configure ?  (e.g. "export LIBS=-lrt" for sh)
++#endif
++
++    // Gets the start time, in order to measure how long this command takes.
++    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    }
++  }
++
+   if (! (stmt= new Prepared_statement(thd)))
+-    DBUG_VOID_RETURN; /* out of memory: error is set in Sql_alloc */
++    goto end; /* out of memory: error is set in Sql_alloc */
+   if (thd->stmt_map.insert(thd, stmt))
+   {
+@@ -2112,7 +2143,7 @@
+       The error is set in the insert. The statement itself
+       will be also deleted there (this is how the hash works).
+     */
+-    DBUG_VOID_RETURN;
++    goto end;
+   }
+   /* Reset warnings from previous command */
+@@ -2139,6 +2170,44 @@
+   thd->protocol= save_protocol;
+   /* check_prepared_statemnt sends the metadata packet in case of success */
++end:
++  if (opt_userstat_running) {
++    // Gets the end time.
++    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    }
++
++    // Calculates the difference between the end and start times.
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->busy_time > 2629743) {
++        thd->busy_time = 0;
++      }
++    } else {
++      // end time went back in time, or gettimeofday() failed.
++      thd->busy_time = 0;
++    }
++
++#ifdef HAVE_CLOCK_GETTIME
++    /* get end cputime */
++    if (!cputime_error &&
++        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++    if (start_cpu_nsecs && !cputime_error) {
++      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->cpu_time > 2629743) {
++        thd->cpu_time = 0;
++      }
++    } else
++      thd->cpu_time = 0;
++  }
++  // Updates THD stats and the global user stats.
++  thd->update_stats(true);
++  update_global_user_stats(thd, true, time(NULL));
++
+   DBUG_VOID_RETURN;
+ }
+@@ -2485,12 +2554,36 @@
+   /* First of all clear possible warnings from the previous command */
+   mysql_reset_thd_for_next_command(thd);
++  int start_time_error = 0;
++  int end_time_error = 0;
++  struct timeval start_time, end_time;
++  double start_usecs = 0;
++  double end_usecs = 0;
++  /* cpu time */
++  int cputime_error = 0;
++  struct timespec tp;
++  double start_cpu_nsecs = 0;
++  double end_cpu_nsecs = 0;
++
++  if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++    /* get start cputime */
++    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++
++    // Gets the start time, in order to measure how long this command takes.
++    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    }
++  }
++
+   if (!(stmt= find_prepared_statement(thd, stmt_id)))
+   {
+     char llbuf[22];
+     my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
+              llstr(stmt_id, llbuf), "mysqld_stmt_execute");
+-    DBUG_VOID_RETURN;
++    goto end;
+   }
+ #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
+@@ -2511,6 +2604,44 @@
+   /* Close connection socket; for use with client testing (Bug#43560). */
+   DBUG_EXECUTE_IF("close_conn_after_stmt_execute", vio_close(thd->net.vio););
++end:
++  if (opt_userstat_running) {
++    // Gets the end time.
++    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    }
++
++    // Calculates the difference between the end and start times.
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->busy_time > 2629743) {
++        thd->busy_time = 0;
++      }
++    } else {
++      // end time went back in time, or gettimeofday() failed.
++      thd->busy_time = 0;
++    }
++
++#ifdef HAVE_CLOCK_GETTIME
++    /* get end cputime */
++    if (!cputime_error &&
++        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++    if (start_cpu_nsecs && !cputime_error) {
++      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->cpu_time > 2629743) {
++        thd->cpu_time = 0;
++      }
++    } else
++      thd->cpu_time = 0;
++  }
++  // Updates THD stats and the global user stats.
++  thd->update_stats(true);
++  update_global_user_stats(thd, true, time(NULL));
++
+   DBUG_VOID_RETURN;
+ }
+@@ -2584,20 +2715,45 @@
+   /* First of all clear possible warnings from the previous command */
+   mysql_reset_thd_for_next_command(thd);
++
++  int start_time_error = 0;
++  int end_time_error = 0;
++  struct timeval start_time, end_time;
++  double start_usecs = 0;
++  double end_usecs = 0;
++  /* cpu time */
++  int cputime_error = 0;
++  struct timespec tp;
++  double start_cpu_nsecs = 0;
++  double end_cpu_nsecs = 0;
++
++  if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++    /* get start cputime */
++    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++
++    // Gets the start time, in order to measure how long this command takes.
++    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    }
++  }
++
+   status_var_increment(thd->status_var.com_stmt_fetch);
+   if (!(stmt= find_prepared_statement(thd, stmt_id)))
+   {
+     char llbuf[22];
+     my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
+              llstr(stmt_id, llbuf), "mysqld_stmt_fetch");
+-    DBUG_VOID_RETURN;
++    goto end;
+   }
+   cursor= stmt->cursor;
+   if (!cursor)
+   {
+     my_error(ER_STMT_HAS_NO_OPEN_CURSOR, MYF(0), stmt_id);
+-    DBUG_VOID_RETURN;
++    goto end;
+   }
+   thd->stmt_arena= stmt;
+@@ -2621,6 +2777,44 @@
+   thd->restore_backup_statement(stmt, &stmt_backup);
+   thd->stmt_arena= thd;
++end:
++  if (opt_userstat_running) {
++    // Gets the end time.
++    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    }
++
++    // Calculates the difference between the end and start times.
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->busy_time > 2629743) {
++        thd->busy_time = 0;
++      }
++    } else {
++      // end time went back in time, or gettimeofday() failed.
++      thd->busy_time = 0;
++    }
++
++#ifdef HAVE_CLOCK_GETTIME
++    /* get end cputime */
++    if (!cputime_error &&
++        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++    if (start_cpu_nsecs && !cputime_error) {
++      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->cpu_time > 2629743) {
++        thd->cpu_time = 0;
++      }
++    } else
++      thd->cpu_time = 0;
++  }
++  // Updates THD stats and the global user stats.
++  thd->update_stats(true);
++  update_global_user_stats(thd, true, time(NULL));
++
+   DBUG_VOID_RETURN;
+ }
+@@ -2651,13 +2845,37 @@
+   /* First of all clear possible warnings from the previous command */
+   mysql_reset_thd_for_next_command(thd);
++  int start_time_error = 0;
++  int end_time_error = 0;
++  struct timeval start_time, end_time;
++  double start_usecs = 0;
++  double end_usecs = 0;
++  /* cpu time */
++  int cputime_error = 0;
++  struct timespec tp;
++  double start_cpu_nsecs = 0;
++  double end_cpu_nsecs = 0;
++
++  if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++    /* get start cputime */
++    if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++
++    // Gets the start time, in order to measure how long this command takes.
++    if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++      start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++    }
++  }
++
+   status_var_increment(thd->status_var.com_stmt_reset);
+   if (!(stmt= find_prepared_statement(thd, stmt_id)))
+   {
+     char llbuf[22];
+     my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
+              llstr(stmt_id, llbuf), "mysqld_stmt_reset");
+-    DBUG_VOID_RETURN;
++    goto end;
+   }
+   stmt->close_cursor();
+@@ -2674,6 +2892,44 @@
+   my_ok(thd);
++end:
++  if (opt_userstat_running) {
++    // Gets the end time.
++    if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++      end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++    }
++
++    // Calculates the difference between the end and start times.
++    if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++      thd->busy_time = (end_usecs - start_usecs) / 1000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->busy_time > 2629743) {
++        thd->busy_time = 0;
++      }
++    } else {
++      // end time went back in time, or gettimeofday() failed.
++      thd->busy_time = 0;
++    }
++
++#ifdef HAVE_CLOCK_GETTIME
++    /* get end cputime */
++    if (!cputime_error &&
++        !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++      end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++    if (start_cpu_nsecs && !cputime_error) {
++      thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++      // In case there are bad values, 2629743 is the #seconds in a month.
++      if (thd->cpu_time > 2629743) {
++        thd->cpu_time = 0;
++      }
++    } else
++      thd->cpu_time = 0;
++  }
++  // Updates THD stats and the global user stats.
++  thd->update_stats(true);
++  update_global_user_stats(thd, true, time(NULL));
++
+   DBUG_VOID_RETURN;
+ }
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc  2010-08-27 14:48:13.050141329 +0900
++++ b/sql/sql_show.cc  2010-08-27 15:10:33.866059533 +0900
+@@ -84,6 +84,40 @@
+ static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table);
++/*
++ * Solaris 10 does not have strsep(). 
++ * 
++ * based on getToken from http://www.winehq.org/pipermail/wine-patches/2001-November/001322.html
++ *
++ */
++
++#ifndef HAVE_STRSEP
++static char* strsep(char** str, const char* delims)
++{
++  char *token;
++
++  if (*str == NULL) {
++    /* No more tokens */
++    return NULL;
++  }
++
++  token = *str;
++  while (**str != '\0') {
++    if (strchr(delims, **str) != NULL) {
++      **str = '\0';
++      (*str)++;
++      return token;
++    }
++    (*str)++;
++  }
++
++  /* There is not another token */
++  *str = NULL;
++
++  return token;
++}
++#endif
++
+ /***************************************************************************
+ ** List all table types supported
+ ***************************************************************************/
+@@ -832,6 +866,7 @@
+               sctx->master_access);
+   if (!(db_access & DB_ACLS) && check_grant_db(thd,dbname))
+   {
++    thd->diff_access_denied_errors++;
+     my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+              sctx->priv_user, sctx->host_or_ip, dbname);
+     general_log_print(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR),
+@@ -2386,6 +2421,279 @@
+   DBUG_RETURN(res);
+ }
++/*
++   Write result to network for SHOW USER_STATISTICS
++
++   SYNOPSIS
++     send_user_stats
++       all_user_stats - values to return
++       table - I_S table
++
++   RETURN
++     0 - OK
++     1 - error
++ */
++int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table)
++{
++  DBUG_ENTER("send_user_stats");
++  for (uint i = 0; i < all_user_stats->records; ++i) {
++    restore_record(table, s->default_values);
++    USER_STATS *user_stats = (USER_STATS*)hash_element(all_user_stats, i);
++      table->field[0]->store(user_stats->user, strlen(user_stats->user), system_charset_info);
++      table->field[1]->store((longlong)user_stats->total_connections);
++      table->field[2]->store((longlong)user_stats->concurrent_connections);
++      table->field[3]->store((longlong)user_stats->connected_time);
++      table->field[4]->store((longlong)user_stats->busy_time);
++      table->field[5]->store((longlong)user_stats->cpu_time);
++      table->field[6]->store((longlong)user_stats->bytes_received);
++      table->field[7]->store((longlong)user_stats->bytes_sent);
++      table->field[8]->store((longlong)user_stats->binlog_bytes_written);
++      table->field[9]->store((longlong)user_stats->rows_fetched);
++      table->field[10]->store((longlong)user_stats->rows_updated);
++      table->field[11]->store((longlong)user_stats->rows_read);
++      table->field[12]->store((longlong)user_stats->select_commands);
++      table->field[13]->store((longlong)user_stats->update_commands);
++      table->field[14]->store((longlong)user_stats->other_commands);
++      table->field[15]->store((longlong)user_stats->commit_trans);
++      table->field[16]->store((longlong)user_stats->rollback_trans);
++      table->field[17]->store((longlong)user_stats->denied_connections);
++      table->field[18]->store((longlong)user_stats->lost_connections);
++      table->field[19]->store((longlong)user_stats->access_denied_errors);
++      table->field[20]->store((longlong)user_stats->empty_queries);
++      if (schema_table_store_record(thd, table))
++      {
++            DBUG_PRINT("error", ("store record error"));
++            DBUG_RETURN(1);
++      }
++  }
++  DBUG_RETURN(0);
++}
++
++int send_thread_stats(THD* thd, HASH *all_thread_stats, TABLE *table)
++{
++  DBUG_ENTER("send_thread_stats");
++  for (uint i = 0; i < all_thread_stats->records; ++i) {
++    restore_record(table, s->default_values);
++    THREAD_STATS *user_stats = (THREAD_STATS*)hash_element(all_thread_stats, i);
++      table->field[0]->store((longlong)user_stats->id);
++      table->field[1]->store((longlong)user_stats->total_connections);
++      table->field[2]->store((longlong)user_stats->concurrent_connections);
++      table->field[3]->store((longlong)user_stats->connected_time);
++      table->field[4]->store((longlong)user_stats->busy_time);
++      table->field[5]->store((longlong)user_stats->cpu_time);
++      table->field[6]->store((longlong)user_stats->bytes_received);
++      table->field[7]->store((longlong)user_stats->bytes_sent);
++      table->field[8]->store((longlong)user_stats->binlog_bytes_written);
++      table->field[9]->store((longlong)user_stats->rows_fetched);
++      table->field[10]->store((longlong)user_stats->rows_updated);
++      table->field[11]->store((longlong)user_stats->rows_read);
++      table->field[12]->store((longlong)user_stats->select_commands);
++      table->field[13]->store((longlong)user_stats->update_commands);
++      table->field[14]->store((longlong)user_stats->other_commands);
++      table->field[15]->store((longlong)user_stats->commit_trans);
++      table->field[16]->store((longlong)user_stats->rollback_trans);
++      table->field[17]->store((longlong)user_stats->denied_connections);
++      table->field[18]->store((longlong)user_stats->lost_connections);
++      table->field[19]->store((longlong)user_stats->access_denied_errors);
++      table->field[20]->store((longlong)user_stats->empty_queries);
++      if (schema_table_store_record(thd, table))
++      {
++              DBUG_PRINT("error", ("store record error"));
++              DBUG_RETURN(1);
++      }
++  }
++  DBUG_RETURN(0);
++}
++
++/*
++   Process SHOW USER_STATISTICS
++
++   SYNOPSIS
++     mysqld_show_user_stats
++       thd - current thread
++       wild - limit results to the entry for this user
++       with_roles - when true, display role for mapped users
++
++   RETURN
++     0 - OK
++     1 - error
++ */
++
++
++int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++  TABLE *table= tables->table;
++  DBUG_ENTER("fill_schema_user_stats");
++
++  if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++          DBUG_RETURN(1);
++
++  // Iterates through all the global stats and sends them to the client.
++  // Pattern matching on the client IP is supported.
++
++  pthread_mutex_lock(&LOCK_global_user_client_stats);
++  int result= send_user_stats(thd, &global_user_stats, table);
++  pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  if (result)
++    goto err;
++
++  DBUG_PRINT("exit", ("fill_schema_user_stats result is 0"));
++  DBUG_RETURN(0);
++
++ err:
++  DBUG_PRINT("exit", ("fill_schema_user_stats result is 1"));
++  DBUG_RETURN(1);
++}
++
++/*
++   Process SHOW CLIENT_STATISTICS
++
++   SYNOPSIS
++     mysqld_show_client_stats
++       thd - current thread
++       wild - limit results to the entry for this client
++
++   RETURN
++     0 - OK
++     1 - error
++ */
++
++
++int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++  TABLE *table= tables->table;
++  DBUG_ENTER("fill_schema_client_stats");
++
++  if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++          DBUG_RETURN(1);
++
++  // Iterates through all the global stats and sends them to the client.
++  // Pattern matching on the client IP is supported.
++
++  pthread_mutex_lock(&LOCK_global_user_client_stats);
++  int result= send_user_stats(thd, &global_client_stats, table);
++  pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  if (result)
++    goto err;
++
++  DBUG_PRINT("exit", ("mysqld_show_client_stats result is 0"));
++  DBUG_RETURN(0);
++
++ err:
++  DBUG_PRINT("exit", ("mysqld_show_client_stats result is 1"));
++  DBUG_RETURN(1);
++}
++
++int fill_schema_thread_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++  TABLE *table= tables->table;
++  DBUG_ENTER("fill_schema_thread_stats");
++
++  if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++          DBUG_RETURN(1);
++
++  // Iterates through all the global stats and sends them to the client.
++  // Pattern matching on the client IP is supported.
++
++  pthread_mutex_lock(&LOCK_global_user_client_stats);
++  int result= send_thread_stats(thd, &global_thread_stats, table);
++  pthread_mutex_unlock(&LOCK_global_user_client_stats);
++  if (result)
++    goto err;
++
++  DBUG_PRINT("exit", ("mysqld_show_thread_stats result is 0"));
++  DBUG_RETURN(0);
++
++ err:
++  DBUG_PRINT("exit", ("mysqld_show_thread_stats result is 1"));
++  DBUG_RETURN(1);
++}
++
++// Sends the global table stats back to the client.
++int fill_schema_table_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++  TABLE *table= tables->table;
++  DBUG_ENTER("fill_schema_table_stats");
++  char *table_full_name, *table_schema;
++
++  pthread_mutex_lock(&LOCK_global_table_stats);
++  for (uint i = 0; i < global_table_stats.records; ++i) {
++    restore_record(table, s->default_values);
++    TABLE_STATS *table_stats = 
++      (TABLE_STATS*)hash_element(&global_table_stats, i);
++
++    table_full_name= thd->strdup(table_stats->table);
++    table_schema= strsep(&table_full_name, ".");
++
++    TABLE_LIST tmp_table;
++    bzero((char*) &tmp_table,sizeof(tmp_table));
++    tmp_table.table_name= table_full_name;
++    tmp_table.db= table_schema;
++    tmp_table.grant.privilege= 0;
++    if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++                      &tmp_table.grant.privilege, 0, 0,
++                      is_schema_db(table_schema)) ||
++         check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
++        continue;
++
++    table->field[0]->store(table_schema, strlen(table_schema), system_charset_info);
++    table->field[1]->store(table_full_name, strlen(table_full_name), system_charset_info);
++    table->field[2]->store((longlong)table_stats->rows_read, TRUE);
++    table->field[3]->store((longlong)table_stats->rows_changed, TRUE);
++    table->field[4]->store((longlong)table_stats->rows_changed_x_indexes, TRUE);
++
++    if (schema_table_store_record(thd, table))
++    {
++      VOID(pthread_mutex_unlock(&LOCK_global_table_stats));
++      DBUG_RETURN(1);
++    }
++  }
++  pthread_mutex_unlock(&LOCK_global_table_stats);
++  DBUG_RETURN(0);
++}
++
++// Sends the global index stats back to the client.
++int fill_schema_index_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++  TABLE *table= tables->table;
++  DBUG_ENTER("fill_schema_index_stats");
++  char *index_full_name, *table_schema, *table_name;
++
++  pthread_mutex_lock(&LOCK_global_index_stats);
++  for (uint i = 0; i < global_index_stats.records; ++i) {
++    restore_record(table, s->default_values);
++    INDEX_STATS *index_stats =
++      (INDEX_STATS*)hash_element(&global_index_stats, i);
++
++    index_full_name= thd->strdup(index_stats->index);
++    table_schema= strsep(&index_full_name, ".");
++    table_name= strsep(&index_full_name, ".");
++
++    TABLE_LIST tmp_table;
++    bzero((char*) &tmp_table,sizeof(tmp_table));
++    tmp_table.table_name= table_name;
++    tmp_table.db= table_schema;
++    tmp_table.grant.privilege= 0;
++    if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++                      &tmp_table.grant.privilege, 0, 0,
++                      is_schema_db(table_schema)) ||
++         check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
++        continue;
++
++    table->field[0]->store(table_schema, strlen(table_schema), system_charset_info);
++    table->field[1]->store(table_name, strlen(table_name), system_charset_info);
++    table->field[2]->store(index_full_name, strlen(index_full_name), system_charset_info);
++    table->field[3]->store((longlong)index_stats->rows_read, TRUE);
++
++    if (schema_table_store_record(thd, table))
++    { 
++      VOID(pthread_mutex_unlock(&LOCK_global_index_stats));
++      DBUG_RETURN(1);
++    }
++  }
++  pthread_mutex_unlock(&LOCK_global_index_stats);
++  DBUG_RETURN(0);
++}
+ /* collect status for all running threads */
+@@ -6688,6 +6996,104 @@
+ };
++ST_FIELD_INFO user_stats_fields_info[]=
++{
++  {"USER", USERNAME_LENGTH, MYSQL_TYPE_STRING, 0, 0, "User", SKIP_OPEN_TABLE},
++  {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections", SKIP_OPEN_TABLE},
++  {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections", SKIP_OPEN_TABLE},
++  {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time", SKIP_OPEN_TABLE},
++  {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time", SKIP_OPEN_TABLE},
++  {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time", SKIP_OPEN_TABLE},
++  {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received", SKIP_OPEN_TABLE},
++  {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent", SKIP_OPEN_TABLE},
++  {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written", SKIP_OPEN_TABLE},
++  {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched", SKIP_OPEN_TABLE},
++  {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated", SKIP_OPEN_TABLE},
++  {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read", SKIP_OPEN_TABLE},
++  {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands", SKIP_OPEN_TABLE},
++  {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands", SKIP_OPEN_TABLE},
++  {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands", SKIP_OPEN_TABLE},
++  {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions", SKIP_OPEN_TABLE},
++  {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions", SKIP_OPEN_TABLE},
++  {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections", SKIP_OPEN_TABLE},
++  {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections", SKIP_OPEN_TABLE},
++  {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied", SKIP_OPEN_TABLE},
++  {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries", SKIP_OPEN_TABLE},
++  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO client_stats_fields_info[]=
++{
++  {"CLIENT", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Client", SKIP_OPEN_TABLE},
++  {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections", SKIP_OPEN_TABLE},
++  {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections", SKIP_OPEN_TABLE},
++  {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time", SKIP_OPEN_TABLE},
++  {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time", SKIP_OPEN_TABLE},
++  {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time", SKIP_OPEN_TABLE},
++  {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received", SKIP_OPEN_TABLE},
++  {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent", SKIP_OPEN_TABLE},
++  {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written", SKIP_OPEN_TABLE},
++  {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched", SKIP_OPEN_TABLE},
++  {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated", SKIP_OPEN_TABLE},
++  {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read", SKIP_OPEN_TABLE},
++  {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands", SKIP_OPEN_TABLE},
++  {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands", SKIP_OPEN_TABLE},
++  {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands", SKIP_OPEN_TABLE},
++  {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions", SKIP_OPEN_TABLE},
++  {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions", SKIP_OPEN_TABLE},
++  {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections", SKIP_OPEN_TABLE},
++  {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections", SKIP_OPEN_TABLE},
++  {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied", SKIP_OPEN_TABLE},
++  {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries", SKIP_OPEN_TABLE},
++  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO thread_stats_fields_info[]=
++{
++  {"THREAD_ID", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Thread_id", SKIP_OPEN_TABLE},
++  {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections", SKIP_OPEN_TABLE},
++  {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections", SKIP_OPEN_TABLE},
++  {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time", SKIP_OPEN_TABLE},
++  {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time", SKIP_OPEN_TABLE},
++  {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time", SKIP_OPEN_TABLE},
++  {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received", SKIP_OPEN_TABLE},
++  {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent", SKIP_OPEN_TABLE},
++  {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written", SKIP_OPEN_TABLE},
++  {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched", SKIP_OPEN_TABLE},
++  {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated", SKIP_OPEN_TABLE},
++  {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read", SKIP_OPEN_TABLE},
++  {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands", SKIP_OPEN_TABLE},
++  {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands", SKIP_OPEN_TABLE},
++  {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands", SKIP_OPEN_TABLE},
++  {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions", SKIP_OPEN_TABLE},
++  {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions", SKIP_OPEN_TABLE},
++  {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections", SKIP_OPEN_TABLE},
++  {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections", SKIP_OPEN_TABLE},
++  {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied", SKIP_OPEN_TABLE},
++  {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries", SKIP_OPEN_TABLE},
++  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO table_stats_fields_info[]=
++{
++  {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema", SKIP_OPEN_TABLE},
++  {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name", SKIP_OPEN_TABLE},
++  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read", SKIP_OPEN_TABLE},
++  {"ROWS_CHANGED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed", SKIP_OPEN_TABLE},
++  {"ROWS_CHANGED_X_INDEXES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed_x_#indexes", SKIP_OPEN_TABLE},
++  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO index_stats_fields_info[]=
++{
++  {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema", SKIP_OPEN_TABLE},
++  {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name", SKIP_OPEN_TABLE},
++  {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name", SKIP_OPEN_TABLE},
++  {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read", SKIP_OPEN_TABLE},
++  {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++
+ ST_FIELD_INFO processlist_fields_info[]=
+ {
+   {"ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, "Id", SKIP_OPEN_TABLE},
+@@ -6823,6 +7229,8 @@
+ {
+   {"CHARACTER_SETS", charsets_fields_info, create_schema_table, 
+    fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0},
++  {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table, 
++    fill_schema_client_stats, make_old_format, 0, -1, -1, 0, 0},
+   {"COLLATIONS", collation_fields_info, create_schema_table, 
+    fill_schema_collation, make_old_format, 0, -1, -1, 0, 0},
+   {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info,
+@@ -6832,6 +7240,8 @@
+    OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL},
+   {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table,
+    fill_schema_column_privileges, 0, 0, -1, -1, 0, 0},
++  {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table,
++   fill_schema_index_stats, make_old_format, 0, -1, -1, 0, 0},
+   {"ENGINES", engines_fields_info, create_schema_table,
+    fill_schema_engines, make_old_format, 0, -1, -1, 0, 0},
+ #ifdef HAVE_EVENT_SCHEDULER
+@@ -6888,11 +7298,17 @@
+    get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
+   {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
+    fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
++  {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table,
++    fill_schema_table_stats, make_old_format, 0, -1, -1, 0, 0},
++  {"THREAD_STATISTICS", thread_stats_fields_info, create_schema_table,
++    fill_schema_thread_stats, make_old_format, 0, -1, -1, 0, 0},
+   {"TRIGGERS", triggers_fields_info, create_schema_table,
+    get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
+    OPEN_TABLE_ONLY},
+   {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table, 
+    fill_schema_user_privileges, 0, 0, -1, -1, 0, 0},
++  {"USER_STATISTICS", user_stats_fields_info, create_schema_table, 
++    fill_schema_user_stats, make_old_format, 0, -1, -1, 0, 0},
+   {"VARIABLES", variables_fields_info, create_schema_table, fill_variables,
+    make_old_format, 0, 0, -1, 1, 0},
+   {"VIEWS", view_fields_info, create_schema_table, 
+diff -ruN a/sql/sql_update.cc b/sql/sql_update.cc
+--- a/sql/sql_update.cc        2010-08-04 02:24:35.000000000 +0900
++++ b/sql/sql_update.cc        2010-08-27 15:10:33.880988383 +0900
+@@ -843,6 +843,7 @@
+     thd->row_count_func=
+       (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
+     my_ok(thd, (ulong) thd->row_count_func, id, buff);
++    thd->updated_row_count += thd->row_count_func;
+     DBUG_PRINT("info",("%ld records updated", (long) updated));
+   }
+   thd->count_cuted_fields= CHECK_FIELD_IGNORE;                /* calc cuted fields */
+@@ -2145,5 +2146,6 @@
+   thd->row_count_func=
+     (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
+   ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
++  thd->updated_row_count += thd->row_count_func;
+   DBUG_RETURN(FALSE);
+ }
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy  2010-08-27 14:29:26.060990130 +0900
++++ b/sql/sql_yacc.yy  2010-08-27 15:10:33.890987529 +0900
+@@ -757,6 +757,7 @@
+ %token  CHECK_SYM                     /* SQL-2003-R */
+ %token  CIPHER_SYM
+ %token  CLIENT_SYM
++%token  CLIENT_STATS_SYM
+ %token  CLOSE_SYM                     /* SQL-2003-R */
+ %token  COALESCE                      /* SQL-2003-N */
+ %token  CODE_SYM
+@@ -903,6 +904,7 @@
+ %token  IMPORT
+ %token  INDEXES
+ %token  INDEX_SYM
++%token  INDEX_STATS_SYM
+ %token  INFILE
+ %token  INITIAL_SIZE_SYM
+ %token  INNER_SYM                     /* SQL-2003-R */
+@@ -1144,6 +1146,7 @@
+ %token  SIGNED_SYM
+ %token  SIMPLE_SYM                    /* SQL-2003-N */
+ %token  SLAVE
++%token  SLOW_SYM
+ %token  SMALLINT                      /* SQL-2003-R */
+ %token  SNAPSHOT_SYM
+ %token  SOCKET_SYM
+@@ -1189,6 +1192,7 @@
+ %token  TABLESPACE
+ %token  TABLE_REF_PRIORITY
+ %token  TABLE_SYM                     /* SQL-2003-R */
++%token  TABLE_STATS_SYM
+ %token  TABLE_CHECKSUM_SYM
+ %token  TEMPORARY                     /* SQL-2003-N */
+ %token  TEMPTABLE_SYM
+@@ -1197,6 +1201,7 @@
+ %token  TEXT_SYM
+ %token  THAN_SYM
+ %token  THEN_SYM                      /* SQL-2003-R */
++%token  THREAD_STATS_SYM
+ %token  TIMESTAMP                     /* SQL-2003-R */
+ %token  TIMESTAMP_ADD
+ %token  TIMESTAMP_DIFF
+@@ -1234,6 +1239,7 @@
+ %token  UPGRADE_SYM
+ %token  USAGE                         /* SQL-2003-N */
+ %token  USER                          /* SQL-2003-R */
++%token  USER_STATS_SYM
+ %token  USE_FRM
+ %token  USE_SYM
+ %token  USING                         /* SQL-2003-R */
+@@ -10346,6 +10352,41 @@
+           {
+             Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
+           }
++        | CLIENT_STATS_SYM wild_and_where 
++          {
++           LEX *lex= Lex;
++           Lex->sql_command = SQLCOM_SELECT;
++           if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS))
++             MYSQL_YYABORT;
++          }
++        | USER_STATS_SYM wild_and_where 
++          {
++           LEX *lex= Lex;
++           lex->sql_command = SQLCOM_SELECT;
++           if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS))
++             MYSQL_YYABORT;
++          }
++        | THREAD_STATS_SYM wild_and_where
++          {
++           LEX *lex= Lex;
++           Lex->sql_command = SQLCOM_SELECT;
++           if (prepare_schema_table(YYTHD, lex, 0, SCH_THREAD_STATS))
++             MYSQL_YYABORT;
++          }
++        | TABLE_STATS_SYM wild_and_where
++          {
++           LEX *lex= Lex;
++           lex->sql_command= SQLCOM_SELECT;
++           if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_STATS))
++             MYSQL_YYABORT;
++          }
++        | INDEX_STATS_SYM wild_and_where
++          {
++           LEX *lex= Lex;
++           lex->sql_command= SQLCOM_SELECT;
++           if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS))
++             MYSQL_YYABORT;
++          }
+         | CREATE PROCEDURE sp_name
+           {
+             LEX *lex= Lex;
+@@ -10554,6 +10595,18 @@
+           { Lex->type|= REFRESH_STATUS; }
+         | SLAVE
+           { Lex->type|= REFRESH_SLAVE; }
++        | SLOW_SYM QUERY_SYM LOGS_SYM
++          { Lex->type |= REFRESH_SLOW_QUERY_LOG; }
++        | CLIENT_STATS_SYM
++          { Lex->type|= REFRESH_CLIENT_STATS; }
++        | USER_STATS_SYM
++          { Lex->type|= REFRESH_USER_STATS; }
++        | THREAD_STATS_SYM
++          { Lex->type|= REFRESH_THREAD_STATS; }
++        | TABLE_STATS_SYM
++          { Lex->type|= REFRESH_TABLE_STATS; }
++        | INDEX_STATS_SYM
++          { Lex->type|= REFRESH_INDEX_STATS; }
+         | MASTER_SYM
+           { Lex->type|= REFRESH_MASTER; }
+         | DES_KEY_FILE
+@@ -11671,6 +11724,7 @@
+         | CHAIN_SYM                {}
+         | CHANGED                  {}
+         | CIPHER_SYM               {}
++        | CLIENT_STATS_SYM         {}
+         | CLIENT_SYM               {}
+         | COALESCE                 {}
+         | CODE_SYM                 {}
+@@ -11732,6 +11786,7 @@
+         | HOSTS_SYM                {}
+         | HOUR_SYM                 {}
+         | IDENTIFIED_SYM           {}
++        | INDEX_STATS_SYM          {}
+         | INVOKER_SYM              {}
+         | IMPORT                   {}
+         | INDEXES                  {}
+@@ -11856,6 +11911,7 @@
+         | SIMPLE_SYM               {}
+         | SHARE_SYM                {}
+         | SHUTDOWN                 {}
++        | SLOW_SYM                 {}
+         | SNAPSHOT_SYM             {}
+         | SOUNDS_SYM               {}
+         | SOURCE_SYM               {}
+@@ -11875,6 +11931,7 @@
+         | SUSPEND_SYM              {}
+         | SWAPS_SYM                {}
+         | SWITCHES_SYM             {}
++        | TABLE_STATS_SYM          {}
+         | TABLES                   {}
+         | TABLE_CHECKSUM_SYM       {}
+         | TABLESPACE               {}
+@@ -11882,6 +11939,7 @@
+         | TEMPTABLE_SYM            {}
+         | TEXT_SYM                 {}
+         | THAN_SYM                 {}
++        | THREAD_STATS_SYM         {}
+         | TRANSACTION_SYM          {}
+         | TRIGGERS_SYM             {}
+         | TIMESTAMP                {}
+@@ -11899,6 +11957,7 @@
+         | UNKNOWN_SYM              {}
+         | UNTIL_SYM                {}
+         | USER                     {}
++        | USER_STATS_SYM           {}
+         | USE_FRM                  {}
+         | VARIABLES                {}
+         | VIEW_SYM                 {}
+diff -ruN a/sql/structs.h b/sql/structs.h
+--- a/sql/structs.h    2010-08-04 02:24:35.000000000 +0900
++++ b/sql/structs.h    2010-08-27 15:10:33.904059058 +0900
+@@ -237,6 +237,171 @@
+   USER_RESOURCES user_resources;
+ } USER_CONN;
++typedef struct st_user_stats {
++  char user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
++  // Account name the user is mapped to when this is a user from mapped_user.
++  // Otherwise, the same value as user.
++  char priv_user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
++  uint total_connections;
++  uint concurrent_connections;
++  time_t connected_time;  // in seconds
++  double busy_time;       // in seconds
++  double cpu_time;        // in seconds
++  ulonglong bytes_received;
++  ulonglong bytes_sent;
++  ulonglong binlog_bytes_written;
++  ha_rows rows_fetched, rows_updated, rows_read;
++  ulonglong select_commands, update_commands, other_commands;
++  ulonglong commit_trans, rollback_trans;
++  ulonglong denied_connections, lost_connections;
++  ulonglong access_denied_errors;
++  ulonglong empty_queries;
++} USER_STATS;
++
++/* Lookup function for hash tables with USER_STATS entries */
++extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
++                                my_bool not_used __attribute__((unused)));
++
++/* Free all memory for a hash table with USER_STATS entries */
++extern void free_user_stats(USER_STATS* user_stats);
++
++/* Intialize an instance of USER_STATS */
++extern void
++init_user_stats(USER_STATS *user_stats,
++                const char *user,
++                const char *priv_user,
++                uint total_connections,
++                uint concurrent_connections,
++                time_t connected_time,
++                double busy_time,
++                double cpu_time,
++                ulonglong bytes_received,
++                ulonglong bytes_sent,
++                ulonglong binlog_bytes_written,
++                ha_rows rows_fetched,
++                ha_rows rows_updated,
++                ha_rows rows_read,
++                ulonglong select_commands,
++                ulonglong update_commands,
++                ulonglong other_commands,
++                ulonglong commit_trans,
++                ulonglong rollback_trans,
++                ulonglong denied_connections,
++                ulonglong lost_connections,
++                ulonglong access_denied_errors,
++                ulonglong empty_queries);
++
++/* Increment values of an instance of USER_STATS */
++extern void
++add_user_stats(USER_STATS *user_stats,
++               uint total_connections,
++               uint concurrent_connections,
++               time_t connected_time,
++               double busy_time,
++               double cpu_time,
++               ulonglong bytes_received,
++               ulonglong bytes_sent,
++               ulonglong binlog_bytes_written,
++               ha_rows rows_fetched,
++               ha_rows rows_updated,
++               ha_rows rows_read,
++               ulonglong select_commands,
++               ulonglong update_commands,
++               ulonglong other_commands,
++               ulonglong commit_trans,
++               ulonglong rollback_trans,
++               ulonglong denied_connections,
++               ulonglong lost_connections,
++               ulonglong access_denied_errors,
++               ulonglong empty_queries);
++
++typedef struct st_thread_stats {
++  my_thread_id id;
++  uint total_connections;
++  uint concurrent_connections;
++  time_t connected_time;  // in seconds
++  double busy_time;       // in seconds
++  double cpu_time;        // in seconds
++  ulonglong bytes_received;
++  ulonglong bytes_sent;
++  ulonglong binlog_bytes_written;
++  ha_rows rows_fetched, rows_updated, rows_read;
++  ulonglong select_commands, update_commands, other_commands;
++  ulonglong commit_trans, rollback_trans;
++  ulonglong denied_connections, lost_connections;
++  ulonglong access_denied_errors;
++  ulonglong empty_queries;
++} THREAD_STATS;
++
++/* Lookup function for hash tables with THREAD_STATS entries */
++extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
++                                my_bool not_used __attribute__((unused)));
++
++/* Free all memory for a hash table with THREAD_STATS entries */
++extern void free_thread_stats(THREAD_STATS* thread_stats);
++
++/* Intialize an instance of THREAD_STATS */
++extern void
++init_thread_stats(THREAD_STATS *thread_stats,
++                my_thread_id id,
++                uint total_connections,
++                uint concurrent_connections,
++                time_t connected_time,
++                double busy_time,
++                double cpu_time,
++                ulonglong bytes_received,
++                ulonglong bytes_sent,
++                ulonglong binlog_bytes_written,
++                ha_rows rows_fetched,
++                ha_rows rows_updated,
++                ha_rows rows_read,
++                ulonglong select_commands,
++                ulonglong update_commands,
++                ulonglong other_commands,
++                ulonglong commit_trans,
++                ulonglong rollback_trans,
++                ulonglong denied_connections,
++                ulonglong lost_connections,
++                ulonglong access_denied_errors,
++                ulonglong empty_queries);
++
++/* Increment values of an instance of THREAD_STATS */
++extern void
++add_thread_stats(THREAD_STATS *thread_stats,
++               uint total_connections,
++               uint concurrent_connections,
++               time_t connected_time,
++               double busy_time,
++               double cpu_time,
++               ulonglong bytes_received,
++               ulonglong bytes_sent,
++               ulonglong binlog_bytes_written,
++               ha_rows rows_fetched,
++               ha_rows rows_updated,
++               ha_rows rows_read,
++               ulonglong select_commands,
++               ulonglong update_commands,
++               ulonglong other_commands,
++               ulonglong commit_trans,
++               ulonglong rollback_trans,
++               ulonglong denied_connections,
++               ulonglong lost_connections,
++               ulonglong access_denied_errors,
++               ulonglong empty_queries);
++
++typedef struct st_table_stats {
++  char table[NAME_LEN * 2 + 2];  // [db] + '.' + [table] + '\0'
++  ulonglong rows_read, rows_changed;
++  ulonglong rows_changed_x_indexes;
++  /* Stores enum db_type, but forward declarations cannot be done */
++  int engine_type;
++} TABLE_STATS;
++
++typedef struct st_index_stats {
++  char index[NAME_LEN * 3 + 3];  // [db] + '.' + [table] + '.' + [index] + '\0'
++  ulonglong rows_read;
++} INDEX_STATS;
++
+       /* Bits in form->update */
+ #define REG_MAKE_DUPP         1       /* Make a copy of record when read */
+ #define REG_NEW_RECORD                2       /* Write a new record if not found */
+diff -ruN a/sql/table.h b/sql/table.h
+--- a/sql/table.h      2010-08-04 02:24:19.000000000 +0900
++++ b/sql/table.h      2010-08-27 15:10:33.906987259 +0900
+@@ -943,10 +943,12 @@
+ enum enum_schema_tables
+ {
+   SCH_CHARSETS= 0,
++  SCH_CLIENT_STATS,
+   SCH_COLLATIONS,
+   SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
+   SCH_COLUMNS,
+   SCH_COLUMN_PRIVILEGES,
++  SCH_INDEX_STATS,
+   SCH_ENGINES,
+   SCH_EVENTS,
+   SCH_FILES,
+@@ -970,8 +972,11 @@
+   SCH_TABLE_CONSTRAINTS,
+   SCH_TABLE_NAMES,
+   SCH_TABLE_PRIVILEGES,
++  SCH_TABLE_STATS,
++  SCH_THREAD_STATS,
+   SCH_TRIGGERS,
+   SCH_USER_PRIVILEGES,
++  SCH_USER_STATS,
+   SCH_VARIABLES,
+   SCH_VIEWS
+ };
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc    2010-08-04 02:24:20.000000000 +0900
++++ b/storage/innobase/handler/ha_innodb.cc    2010-08-27 15:10:33.913058592 +0900
+@@ -4055,6 +4055,8 @@
+       error = row_insert_for_mysql((byte*) record, prebuilt);
++      if (error == DB_SUCCESS) rows_changed++;
++
+       /* Handle duplicate key errors */
+       if (auto_inc_used) {
+               ulint           err;
+@@ -4392,6 +4394,8 @@
+               }
+       }
++      if (error == DB_SUCCESS) rows_changed++;
++
+       innodb_srv_conc_exit_innodb(trx);
+       error = convert_error_code_to_mysql(error, user_thd);
+@@ -4444,6 +4448,8 @@
+       error = row_update_for_mysql((byte*) record, prebuilt);
++      if (error == DB_SUCCESS) rows_changed++;
++
+       innodb_srv_conc_exit_innodb(trx);
+       error = convert_error_code_to_mysql(error, user_thd);
+@@ -4923,6 +4929,9 @@
+       if (ret == DB_SUCCESS) {
+               error = 0;
+               table->status = 0;
++              rows_read++;
++              if (active_index >= 0 && active_index < MAX_KEY)
++                      index_rows_read[active_index]++;
+       } else if (ret == DB_RECORD_NOT_FOUND) {
+               error = HA_ERR_END_OF_FILE;
+diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
+--- a/storage/myisam/ha_myisam.cc      2010-08-04 02:24:27.000000000 +0900
++++ b/storage/myisam/ha_myisam.cc      2010-08-27 15:10:33.921058182 +0900
+@@ -761,6 +761,7 @@
+ int ha_myisam::write_row(uchar *buf)
+ {
++  int error;
+   ha_statistic_increment(&SSV::ha_write_count);
+   /* If we have a timestamp column, update it to the current time */
+@@ -773,11 +774,12 @@
+   */
+   if (table->next_number_field && buf == table->record[0])
+   {
+-    int error;
+     if ((error= update_auto_increment()))
+       return error;
+   }
+-  return mi_write(file,buf);
++  error=mi_write(file,buf);
++  if (!error) rows_changed++;
++  return error;
+ }
+ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
+@@ -1638,16 +1640,22 @@
+ int ha_myisam::update_row(const uchar *old_data, uchar *new_data)
+ {
++  int error;
+   ha_statistic_increment(&SSV::ha_update_count);
+   if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+     table->timestamp_field->set_time();
+-  return mi_update(file,old_data,new_data);
++  error=mi_update(file,old_data,new_data);
++  if (!error) rows_changed++;
++  return error;
+ }
+ int ha_myisam::delete_row(const uchar *buf)
+ {
++  int error;
+   ha_statistic_increment(&SSV::ha_delete_count);
+-  return mi_delete(file,buf);
++  error=mi_delete(file,buf);
++  if (!error) rows_changed++;
++  return error;
+ }
+ int ha_myisam::index_read_map(uchar *buf, const uchar *key,
+@@ -1658,6 +1666,13 @@
+   ha_statistic_increment(&SSV::ha_read_key_count);
+   int error=mi_rkey(file, buf, active_index, key, keypart_map, find_flag);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   return error;
+ }
+@@ -1668,6 +1683,13 @@
+   ha_statistic_increment(&SSV::ha_read_key_count);
+   int error=mi_rkey(file, buf, index, key, keypart_map, find_flag);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   return error;
+ }
+@@ -1680,6 +1702,13 @@
+   int error=mi_rkey(file, buf, active_index, key, keypart_map,
+                     HA_READ_PREFIX_LAST);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   DBUG_RETURN(error);
+ }
+@@ -1689,6 +1718,13 @@
+   ha_statistic_increment(&SSV::ha_read_next_count);
+   int error=mi_rnext(file,buf,active_index);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   return error;
+ }
+@@ -1698,6 +1734,13 @@
+   ha_statistic_increment(&SSV::ha_read_prev_count);
+   int error=mi_rprev(file,buf, active_index);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   return error;
+ }
+@@ -1707,6 +1750,13 @@
+   ha_statistic_increment(&SSV::ha_read_first_count);
+   int error=mi_rfirst(file, buf, active_index);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   return error;
+ }
+@@ -1716,6 +1766,13 @@
+   ha_statistic_increment(&SSV::ha_read_last_count);
+   int error=mi_rlast(file, buf, active_index);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   return error;
+ }
+@@ -1731,6 +1788,13 @@
+     error= mi_rnext_same(file,buf);
+   } while (error == HA_ERR_RECORD_DELETED);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) {
++    rows_read++;
++
++    int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++    if (inx >= 0 && inx < MAX_KEY)
++      index_rows_read[inx]++;
++  }
+   return error;
+ }
+@@ -1747,6 +1811,7 @@
+   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
+   int error=mi_scan(file, buf);
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) rows_read++;
+   return error;
+ }
+@@ -1760,6 +1825,7 @@
+   ha_statistic_increment(&SSV::ha_read_rnd_count);
+   int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length));
+   table->status=error ? STATUS_NOT_FOUND: 0;
++  if (!error) rows_read++;
+   return error;
+ }
index ed314fb8df11df0c207d400917cf9cd8936a6761..e3e53be1070ff3058d8c2cfa91108692725d4a83 100644 (file)
@@ -70,12 +70,11 @@ Patch14:    %{name}-bug-43594.patch
 Patch15:       plugin-avoid-version.patch
 Patch16:       %{name}-fix-dummy-thread-race-condition.patch
 Patch18:       %{name}-sphinx.patch
-# <percona patches, http://www.percona.com/percona-lab.html>
-Patch100:      %{name}-userstats.patch
-Patch101:      %{name}-microslow.patch
-Patch102:      %{name}-acc-pslist.patch
-Patch103:      %{name}-split_buf_pool_mutex_fixed_optimistic_safe.patch
-Patch104:      %{name}-innodb_rw_lock.patch
+# <percona patches, http://bazaar.launchpad.net/~percona-dev/percona-server/5.5.7/files>
+Patch100:      %{name}-userstat.patch
+Patch101:      %{name}-innodb_extend_slow.patch
+Patch102:      %{name}-microsec_process.patch
+Patch103:      %{name}-innodb_split_buf_pool_mutex.patch
 # </percona>
 URL:           http://www.mysql.com/products/database/mysql/community_edition.html
 BuildRequires: bison
@@ -525,8 +524,6 @@ mv sphinx-*/mysqlse storage/sphinx
 #%patch102 -p1
 # CHECK ME
 #%patch103 -p1
-# CHECK ME
-#%patch104 -p1
 # </percona>
 
 %build
This page took 0.320967 seconds and 4 git commands to generate.