--- /dev/null
+# name : innodb_extend_slow.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:49:59.175955882 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-03 17:42:42.074307123 +0900
+@@ -51,6 +51,40 @@
+ #include "dict0dict.h"
+ #include "log0recv.h"
+ #include "page0zip.h"
++#include "trx0trx.h"
++
++/* prototypes for new functions added to ha_innodb.cc */
++trx_t* innobase_get_trx();
++
++inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
++{
++ ulint block_hash;
++ ulint block_hash_byte;
++ byte block_hash_offset;
++
++ ut_ad(block);
++
++ if (!innobase_get_slow_log() || !trx || !trx->take_stats)
++ return;
++
++ if (!trx->distinct_page_access_hash) {
++ trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++ }
++
++ block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space +
++ block->page.offset, DPAH_SIZE << 3);
++ block_hash_byte = block_hash >> 3;
++ block_hash_offset = (byte) block_hash & 0x07;
++ if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE)
++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
++ if (block_hash_offset < 0 || block_hash_offset > 7)
++ fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
++ if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
++ trx->distinct_page_access++;
++ trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
++ return;
++}
+
+ /*
+ IMPLEMENTATION OF THE BUFFER POOL
+@@ -2399,11 +2433,19 @@
+ mutex_t* block_mutex;
+ ibool must_read;
+ unsigned access_time;
++ trx_t* trx = NULL;
++ ulint sec;
++ ulint ms;
++ ib_uint64_t start_time;
++ ib_uint64_t finish_time;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+ #ifndef UNIV_LOG_DEBUG
+ ut_ad(!ibuf_inside());
+ #endif
++ if (innobase_get_slow_log()) {
++ trx = innobase_get_trx();
++ }
+ buf_pool->stat.n_page_gets++;
+
+ for (;;) {
+@@ -2421,7 +2463,7 @@
+ //buf_pool_mutex_exit(buf_pool);
+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+- buf_read_page(space, zip_size, offset);
++ buf_read_page(space, zip_size, offset, trx);
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(++buf_dbg_counter % 37 || buf_validate());
+@@ -2499,6 +2541,13 @@
+ /* Let us wait until the read operation
+ completes */
+
++ if (innobase_get_slow_log() && trx && trx->take_stats)
++ {
++ ut_usectime(&sec, &ms);
++ start_time = (ib_uint64_t)sec * 1000000 + ms;
++ } else {
++ start_time = 0;
++ }
+ for (;;) {
+ enum buf_io_fix io_fix;
+
+@@ -2513,6 +2562,12 @@
+ break;
+ }
+ }
++ if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++ {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_uint64_t)sec * 1000000 + ms;
++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++ }
+ }
+
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+@@ -2825,6 +2880,11 @@
+ ibool must_read;
+ ulint retries = 0;
+ mutex_t* block_mutex = NULL;
++ trx_t* trx = NULL;
++ ulint sec;
++ ulint ms;
++ ib_uint64_t start_time;
++ ib_uint64_t finish_time;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+ ut_ad(mtr);
+@@ -2842,6 +2902,9 @@
+ #ifndef UNIV_LOG_DEBUG
+ ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
+ #endif
++ if (innobase_get_slow_log()) {
++ trx = innobase_get_trx();
++ }
+ buf_pool->stat.n_page_gets++;
+ fold = buf_page_address_fold(space, offset);
+ loop:
+@@ -2915,7 +2978,7 @@
+ return(NULL);
+ }
+
+- if (buf_read_page(space, zip_size, offset)) {
++ if (buf_read_page(space, zip_size, offset, trx)) {
+ retries = 0;
+ } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
+ ++retries;
+@@ -3178,6 +3241,13 @@
+ /* Let us wait until the read operation
+ completes */
+
++ if (innobase_get_slow_log() && trx && trx->take_stats)
++ {
++ ut_usectime(&sec, &ms);
++ start_time = (ib_uint64_t)sec * 1000000 + ms;
++ } else {
++ start_time = 0;
++ }
+ for (;;) {
+ enum buf_io_fix io_fix;
+
+@@ -3192,6 +3262,12 @@
+ break;
+ }
+ }
++ if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++ {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_uint64_t)sec * 1000000 + ms;
++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++ }
+ }
+
+ fix_type = MTR_MEMO_BUF_FIX;
+@@ -3217,13 +3293,17 @@
+ /* In the case of a first access, try to apply linear
+ read-ahead */
+
+- buf_read_ahead_linear(space, zip_size, offset);
++ buf_read_ahead_linear(space, zip_size, offset, trx);
+ }
+
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(buf_block_get_space(block),
+ buf_block_get_page_no(block)) == 0);
+ #endif
++ if (innobase_get_slow_log()) {
++ _increment_page_get_statistics(block, trx);
++ }
++
+ return(block);
+ }
+
+@@ -3247,6 +3327,7 @@
+ unsigned access_time;
+ ibool success;
+ ulint fix_type;
++ trx_t* trx = NULL;
+
+ ut_ad(block);
+ ut_ad(mtr);
+@@ -3324,13 +3405,17 @@
+ #ifdef UNIV_DEBUG_FILE_ACCESSES
+ ut_a(block->page.file_page_was_freed == FALSE);
+ #endif
++ if (innobase_get_slow_log()) {
++ trx = innobase_get_trx();
++ }
++
+ if (UNIV_UNLIKELY(!access_time)) {
+ /* In the case of a first access, try to apply linear
+ read-ahead */
+
+ buf_read_ahead_linear(buf_block_get_space(block),
+ buf_block_get_zip_size(block),
+- buf_block_get_page_no(block));
++ buf_block_get_page_no(block), trx);
+ }
+
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+@@ -3340,6 +3425,9 @@
+ buf_pool = buf_pool_from_block(block);
+ buf_pool->stat.n_page_gets++;
+
++ if (innobase_get_slow_log()) {
++ _increment_page_get_statistics(block, trx);
++ }
+ return(TRUE);
+ }
+
+@@ -3362,6 +3450,7 @@
+ buf_pool_t* buf_pool;
+ ibool success;
+ ulint fix_type;
++ trx_t* trx = NULL;
+
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
+@@ -3448,6 +3537,11 @@
+ #endif
+ buf_pool->stat.n_page_gets++;
+
++ if (innobase_get_slow_log()) {
++ trx = innobase_get_trx();
++ _increment_page_get_statistics(block, trx);
++ }
++
+ return(TRUE);
+ }
+
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-03 17:32:15.617037263 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-03 17:42:42.075297193 +0900
+@@ -77,7 +77,8 @@
+ treat the tablespace as dropped; this is a timestamp we
+ use to stop dangling page reads from a tablespace
+ which we have DISCARDed + IMPORTed back */
+- ulint offset) /*!< in: page number */
++ ulint offset, /*!< in: page number */
++ trx_t* trx)
+ {
+ buf_page_t* bpage;
+ ulint wake_later;
+@@ -179,15 +180,15 @@
+
+ thd_wait_begin(NULL, THD_WAIT_DISKIO);
+ if (zip_size) {
+- *err = fil_io(OS_FILE_READ | wake_later,
++ *err = _fil_io(OS_FILE_READ | wake_later,
+ sync, space, zip_size, offset, 0, zip_size,
+- bpage->zip.data, bpage);
++ bpage->zip.data, bpage, trx);
+ } else {
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+
+- *err = fil_io(OS_FILE_READ | wake_later,
++ *err = _fil_io(OS_FILE_READ | wake_later,
+ sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
+- ((buf_block_t*) bpage)->frame, bpage);
++ ((buf_block_t*) bpage)->frame, bpage, trx);
+ }
+ thd_wait_end(NULL);
+ ut_a(*err == DB_SUCCESS);
+@@ -213,7 +214,8 @@
+ /*==========*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+- ulint offset) /*!< in: page number */
++ ulint offset, /*!< in: page number */
++ trx_t* trx)
+ {
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ib_int64_t tablespace_version;
+@@ -227,7 +229,7 @@
+
+ count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ zip_size, FALSE,
+- tablespace_version, offset);
++ tablespace_version, offset, trx);
+ srv_buf_pool_reads += count;
+ if (err == DB_TABLESPACE_DELETED) {
+ ut_print_timestamp(stderr);
+@@ -278,8 +280,9 @@
+ /*==================*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+- ulint offset) /*!< in: page number of a page; NOTE: the current thread
++ ulint offset, /*!< in: page number of a page; NOTE: the current thread
+ must want access to this page (see NOTE 3 above) */
++ trx_t* trx)
+ {
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ib_int64_t tablespace_version;
+@@ -500,7 +503,7 @@
+ count += buf_read_page_low(
+ &err, FALSE,
+ ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
+- space, zip_size, FALSE, tablespace_version, i);
++ space, zip_size, FALSE, tablespace_version, i, trx);
+ if (err == DB_TABLESPACE_DELETED) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+@@ -594,7 +597,7 @@
+ buf_read_page_low(&err, sync && (i + 1 == n_stored),
+ BUF_READ_ANY_PAGE, space_ids[i],
+ zip_size, TRUE, space_versions[i],
+- page_nos[i]);
++ page_nos[i], NULL);
+
+ if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
+ tablespace_deleted:
+@@ -736,12 +739,12 @@
+ if ((i + 1 == n_stored) && sync) {
+ buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ zip_size, TRUE, tablespace_version,
+- page_nos[i]);
++ page_nos[i], NULL);
+ } else {
+ buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+ | OS_AIO_SIMULATED_WAKE_LATER,
+ space, zip_size, TRUE,
+- tablespace_version, page_nos[i]);
++ tablespace_version, page_nos[i], NULL);
+ }
+ }
+
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-03 15:53:54.610037199 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-03 17:42:42.079064198 +0900
+@@ -4349,7 +4349,7 @@
+ node->name, node->handle, buf,
+ offset_low, offset_high,
+ page_size * n_pages,
+- NULL, NULL);
++ NULL, NULL, NULL);
+ #endif
+ if (success) {
+ node->size += n_pages;
+@@ -4676,7 +4676,7 @@
+ i/o on a tablespace which does not exist */
+ UNIV_INTERN
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
+ ORed to OS_FILE_LOG, if a log i/o
+@@ -4701,8 +4701,9 @@
+ void* buf, /*!< in/out: buffer where to store read data
+ or from where to write; in aio this must be
+ appropriately aligned */
+- void* message) /*!< in: message for aio handler if non-sync
++ void* message, /*!< in: message for aio handler if non-sync
+ aio used, else ignored */
++ trx_t* trx)
+ {
+ ulint mode;
+ fil_space_t* space;
+@@ -4872,7 +4873,7 @@
+ #else
+ /* Queue the aio request */
+ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
+- offset_low, offset_high, len, node, message);
++ offset_low, offset_high, len, node, message, trx);
+ #endif
+ ut_a(ret);
+
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:36:44.293955189 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:42:42.090024586 +0900
+@@ -1528,6 +1528,16 @@
+ trx->check_unique_secondary = !thd_test_options(
+ thd, OPTION_RELAXED_UNIQUE_CHECKS);
+
++#ifdef EXTENDED_SLOWLOG
++ if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) {
++ trx->take_stats = TRUE;
++ } else {
++ trx->take_stats = FALSE;
++ }
++#else
++ trx->take_stats = FALSE;
++#endif
++
+ DBUG_VOID_RETURN;
+ }
+
+@@ -1583,6 +1593,32 @@
+ }
+
+
++/*************************************************************************
++Gets current trx. */
++extern "C"
++trx_t*
++innobase_get_trx()
++{
++ THD *thd=current_thd;
++ if (likely(thd != 0)) {
++ trx_t*& trx = thd_to_trx(thd);
++ return(trx);
++ } else {
++ return(NULL);
++ }
++}
++
++extern "C"
++ibool
++innobase_get_slow_log()
++{
++#ifdef EXTENDED_SLOWLOG
++ return((ibool) thd_opt_slow_log());
++#else
++ return(FALSE);
++#endif
++}
++
+ /*********************************************************************//**
+ Construct ha_innobase handler. */
+ UNIV_INTERN
+@@ -9179,6 +9215,25 @@
+ statement has ended */
+
+ if (trx->n_mysql_tables_in_use == 0) {
++#ifdef EXTENDED_SLOWLOG
++ increment_thd_innodb_stats(thd,
++ (unsigned long long) trx->id,
++ trx->io_reads,
++ trx->io_read,
++ trx->io_reads_wait_timer,
++ trx->lock_que_wait_timer,
++ trx->innodb_que_wait_timer,
++ trx->distinct_page_access);
++
++ trx->io_reads = 0;
++ trx->io_read = 0;
++ trx->io_reads_wait_timer = 0;
++ trx->lock_que_wait_timer = 0;
++ trx->innodb_que_wait_timer = 0;
++ trx->distinct_page_access = 0;
++ if (trx->distinct_page_access_hash)
++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++#endif
+
+ trx->mysql_n_tables_locked = 0;
+ prebuilt->used_in_HANDLER = FALSE;
+diff -ruN a/storage/innobase/handler/innodb_patch_info.h b/storage/innobase/handler/innodb_patch_info.h
+--- a/storage/innobase/handler/innodb_patch_info.h 2010-12-03 17:36:44.293955189 +0900
++++ b/storage/innobase/handler/innodb_patch_info.h 2010-12-03 17:42:42.094955866 +0900
+@@ -38,5 +38,6 @@
+ {"innodb_recovery_patches","Bugfixes and adjustments about recovery process","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_admin_command_base","XtraDB specific command interface through i_s","","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {"innodb_show_lock_name","Show mutex/lock name instead of crated file/line","","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_extend_slow","Extended statistics in slow.log","It is InnoDB-part only. It needs to patch also to mysqld.","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h 2010-12-03 15:18:48.891024406 +0900
++++ b/storage/innobase/include/buf0rea.h 2010-12-03 17:42:42.096026873 +0900
+@@ -27,6 +27,7 @@
+ #define buf0rea_h
+
+ #include "univ.i"
++#include "trx0types.h"
+ #include "buf0types.h"
+
+ /********************************************************************//**
+@@ -41,7 +42,8 @@
+ /*==========*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+- ulint offset);/*!< in: page number */
++ ulint offset, /*!< in: page number */
++ trx_t* trx);
+ /********************************************************************//**
+ Applies linear read-ahead if in the buf_pool the page is a border page of
+ a linear read-ahead area and all the pages in the area have been accessed.
+@@ -72,8 +74,9 @@
+ /*==================*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+- ulint offset);/*!< in: page number of a page; NOTE: the current thread
++ ulint offset, /*!< in: page number of a page; NOTE: the current thread
+ must want access to this page (see NOTE 3 above) */
++ trx_t* trx);
+ /********************************************************************//**
+ Issues read requests for pages which the ibuf module wants to read in, in
+ order to contract the insert buffer tree. Technically, this function is like
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h 2010-12-03 15:09:51.290958543 +0900
++++ b/storage/innobase/include/fil0fil.h 2010-12-03 17:42:42.097027548 +0900
+@@ -611,9 +611,12 @@
+ Reads or writes data. This operation is asynchronous (aio).
+ @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+ i/o on a tablespace which does not exist */
++#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \
++ _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL)
++
+ UNIV_INTERN
+ ulint
+-fil_io(
++_fil_io(
+ /*===*/
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
+ ORed to OS_FILE_LOG, if a log i/o
+@@ -638,8 +641,9 @@
+ void* buf, /*!< in/out: buffer where to store read data
+ or from where to write; in aio this must be
+ appropriately aligned */
+- void* message); /*!< in: message for aio handler if non-sync
++ void* message, /*!< in: message for aio handler if non-sync
+ aio used, else ignored */
++ trx_t* trx);
+ /**********************************************************************//**
+ Waits for an aio operation to complete. This function is used to write the
+ handler for completed requests. The aio array of pending requests is divided
+diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
+--- a/storage/innobase/include/os0file.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.h 2010-12-03 17:42:42.100023783 +0900
+@@ -36,6 +36,7 @@
+ #define os0file_h
+
+ #include "univ.i"
++#include "trx0types.h"
+
+ #ifndef __WIN__
+ #include <dirent.h>
+@@ -277,13 +278,17 @@
+ pfs_os_file_close_func(file, __FILE__, __LINE__)
+
+ # define os_aio(type, mode, name, file, buf, offset, offset_high, \
+- n, message1, message2) \
++ n, message1, message2, trx) \
+ pfs_os_aio_func(type, mode, name, file, buf, offset, \
+- offset_high, n, message1, message2, \
++ offset_high, n, message1, message2, trx, \
+ __FILE__, __LINE__)
+
+ # define os_file_read(file, buf, offset, offset_high, n) \
+- pfs_os_file_read_func(file, buf, offset, offset_high, n, \
++ pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL, \
++ __FILE__, __LINE__)
++
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
++ pfs_os_file_read_func(file, buf, offset, offset_high, n, trx, \
+ __FILE__, __LINE__)
+
+ # define os_file_read_no_error_handling(file, buf, offset, \
+@@ -319,12 +324,15 @@
+ # define os_file_close(file) os_file_close_func(file)
+
+ # define os_aio(type, mode, name, file, buf, offset, offset_high, \
+- n, message1, message2) \
++ n, message1, message2, trx) \
+ os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\
+- message1, message2)
++ message1, message2, trx)
+
+ # define os_file_read(file, buf, offset, offset_high, n) \
+- os_file_read_func(file, buf, offset, offset_high, n)
++ os_file_read_func(file, buf, offset, offset_high, n, NULL)
++
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
++ os_file_read_func(file, buf, offset, offset_high, n, trx)
+
+ # define os_file_read_no_error_handling(file, buf, offset, \
+ offset_high, n) \
+@@ -690,6 +698,7 @@
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+@@ -744,6 +753,7 @@
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+ /*******************************************************************//**
+@@ -885,7 +895,8 @@
+ offset where to read */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+- ulint n); /*!< in: number of bytes to read */
++ ulint n, /*!< in: number of bytes to read */
++ trx_t* trx);
+ /*******************************************************************//**
+ Rewind file to its start, read at most size - 1 bytes from it to str, and
+ NUL-terminate str. All errors are silently ignored. This function is
+@@ -1044,10 +1055,11 @@
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+- void* message2);/*!< in: message for the aio handler
++ void* message2,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
++ trx_t* trx);
+ /************************************************************************//**
+ Wakes up all async i/o threads so that they know to exit themselves in
+ shutdown. */
+diff -ruN a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
+--- a/storage/innobase/include/os0file.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.ic 2010-12-03 17:42:42.102024458 +0900
+@@ -229,6 +229,7 @@
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+ {
+@@ -244,7 +245,7 @@
+ src_file, src_line);
+
+ result = os_aio_func(type, mode, name, file, buf, offset, offset_high,
+- n, message1, message2);
++ n, message1, message2, trx);
+
+ register_pfs_file_io_end(locker, n);
+
+@@ -268,6 +269,7 @@
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+ {
+@@ -278,7 +280,7 @@
+ register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+ src_file, src_line);
+
+- result = os_file_read_func(file, buf, offset, offset_high, n);
++ result = os_file_read_func(file, buf, offset, offset_high, n, trx);
+
+ register_pfs_file_io_end(locker, n);
+
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 17:32:15.634987408 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 17:42:42.104028644 +0900
+@@ -62,6 +62,9 @@
+ #define SRV_AUTO_EXTEND_INCREMENT \
+ (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
+
++/* prototypes for new functions added to ha_innodb.cc */
++ibool innobase_get_slow_log();
++
+ /* This is set to TRUE if the MySQL user has set it in MySQL */
+ extern ibool srv_lower_case_table_names;
+
+diff -ruN a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
+--- a/storage/innobase/include/trx0trx.h 2010-12-03 15:41:52.049372966 +0900
++++ b/storage/innobase/include/trx0trx.h 2010-12-03 17:42:42.107024532 +0900
+@@ -728,6 +728,17 @@
+ /*------------------------------*/
+ char detailed_error[256]; /*!< detailed error message for last
+ error, or empty. */
++ /*------------------------------*/
++ ulint io_reads;
++ ib_uint64_t io_read;
++ ulint io_reads_wait_timer;
++ ib_uint64_t lock_que_wait_ustarted;
++ ulint lock_que_wait_timer;
++ ulint innodb_que_wait_timer;
++ ulint distinct_page_access;
++#define DPAH_SIZE 8192
++ byte* distinct_page_access_hash;
++ ibool take_stats;
+ };
+
+ #define TRX_MAX_N_THREADS 32 /* maximum number of
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c 2010-12-03 15:09:51.297986437 +0900
++++ b/storage/innobase/lock/lock0lock.c 2010-12-03 17:42:42.111024587 +0900
+@@ -1755,6 +1755,8 @@
+ {
+ lock_t* lock;
+ trx_t* trx;
++ ulint sec;
++ ulint ms;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+@@ -1813,6 +1815,10 @@
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ trx->wait_started = time(NULL);
++ if (innobase_get_slow_log() && trx->take_stats) {
++ ut_usectime(&sec, &ms);
++ trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
++ }
+
+ ut_a(que_thr_stop(thr));
+
+@@ -3692,6 +3698,8 @@
+ {
+ lock_t* lock;
+ trx_t* trx;
++ ulint sec;
++ ulint ms;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+@@ -3747,6 +3755,10 @@
+ return(DB_SUCCESS);
+ }
+
++ if (innobase_get_slow_log() && trx->take_stats) {
++ ut_usectime(&sec, &ms);
++ trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
++ }
+ trx->que_state = TRX_QUE_LOCK_WAIT;
+ trx->was_chosen_as_deadlock_victim = FALSE;
+ trx->wait_started = time(NULL);
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c 2010-12-03 17:32:15.644024974 +0900
++++ b/storage/innobase/os/os0file.c 2010-12-03 17:42:42.117023467 +0900
+@@ -43,6 +43,8 @@
+ #include "srv0start.h"
+ #include "fil0fil.h"
+ #include "buf0buf.h"
++#include "trx0sys.h"
++#include "trx0trx.h"
+ #include "log0recv.h"
+ #ifndef UNIV_HOTBACKUP
+ # include "os0sync.h"
+@@ -2175,13 +2177,18 @@
+ ulint n, /*!< in: number of bytes to read */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset from where to read */
+- ulint offset_high) /*!< in: most significant 32 bits of
++ ulint offset_high, /*!< in: most significant 32 bits of
+ offset */
++ trx_t* trx)
+ {
+ off_t offs;
+ #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
+ ssize_t n_bytes;
+ #endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */
++ ulint sec;
++ ulint ms;
++ ib_uint64_t start_time;
++ ib_uint64_t finish_time;
+
+ ut_a((offset & 0xFFFFFFFFUL) == offset);
+
+@@ -2202,6 +2209,15 @@
+
+ os_n_file_reads++;
+
++ if (innobase_get_slow_log() && trx && trx->take_stats)
++ {
++ trx->io_reads++;
++ trx->io_read += n;
++ ut_usectime(&sec, &ms);
++ start_time = (ib_uint64_t)sec * 1000000 + ms;
++ } else {
++ start_time = 0;
++ }
+ #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
+ os_mutex_enter(os_file_count_mutex);
+ os_file_n_pending_preads++;
+@@ -2215,6 +2231,13 @@
+ os_n_pending_reads--;
+ os_mutex_exit(os_file_count_mutex);
+
++ if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++ {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_uint64_t)sec * 1000000 + ms;
++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++ }
++
+ return(n_bytes);
+ #else
+ {
+@@ -2251,6 +2274,13 @@
+ os_n_pending_reads--;
+ os_mutex_exit(os_file_count_mutex);
+
++ if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
++ {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_uint64_t)sec * 1000000 + ms;
++ trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
++ }
++
+ return(ret);
+ }
+ #endif
+@@ -2391,7 +2421,8 @@
+ offset where to read */
+ ulint offset_high, /*!< in: most significant 32 bits of
+ offset */
+- ulint n) /*!< in: number of bytes to read */
++ ulint n, /*!< in: number of bytes to read */
++ trx_t* trx)
+ {
+ #ifdef __WIN__
+ BOOL ret;
+@@ -2463,7 +2494,7 @@
+ os_bytes_read_since_printout += n;
+
+ try_again:
+- ret = os_file_pread(file, buf, n, offset, offset_high);
++ ret = os_file_pread(file, buf, n, offset, offset_high, trx);
+
+ if ((ulint)ret == n) {
+
+@@ -2589,7 +2620,7 @@
+ os_bytes_read_since_printout += n;
+
+ try_again:
+- ret = os_file_pread(file, buf, n, offset, offset_high);
++ ret = os_file_pread(file, buf, n, offset, offset_high, NULL);
+
+ if ((ulint)ret == n) {
+
+@@ -3608,7 +3639,8 @@
+ offset */
+ ulint offset_high, /*!< in: most significant 32 bits of
+ offset */
+- ulint len) /*!< in: length of the block to read or write */
++ ulint len, /*!< in: length of the block to read or write */
++ trx_t* trx)
+ {
+ os_aio_slot_t* slot = NULL;
+ #ifdef WIN_ASYNC_IO
+@@ -3976,10 +4008,11 @@
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+- void* message2)/*!< in: message for the aio handler
++ void* message2,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
++ trx_t* trx)
+ {
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+@@ -4017,8 +4050,8 @@
+ wait in the Windows case. */
+
+ if (type == OS_FILE_READ) {
+- return(os_file_read(file, buf, offset,
+- offset_high, n));
++ return(os_file_read_trx(file, buf, offset,
++ offset_high, n, trx));
+ }
+
+ ut_a(type == OS_FILE_WRITE);
+@@ -4056,8 +4089,13 @@
+ ut_error;
+ }
+
++ if (trx && type == OS_FILE_READ)
++ {
++ trx->io_reads++;
++ trx->io_read += n;
++ }
+ slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+- name, buf, offset, offset_high, n);
++ name, buf, offset, offset_high, n, trx);
+ if (type == OS_FILE_READ) {
+ if (srv_use_native_aio) {
+ os_n_file_reads++;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 17:32:15.648024399 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 17:45:05.067023254 +0900
+@@ -87,6 +87,9 @@
+ #include "mysql/plugin.h"
+ #include "mysql/service_thd_wait.h"
+
++/* prototypes for new functions added to ha_innodb.cc */
++ibool innobase_get_slow_log();
++
+ /* This is set to TRUE if the MySQL user has set it in MySQL; currently
+ affects only FOREIGN KEY definition parsing */
+ UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
+@@ -1204,6 +1207,10 @@
+ ibool has_slept = FALSE;
+ srv_conc_slot_t* slot = NULL;
+ ulint i;
++ ib_uint64_t start_time = 0L;
++ ib_uint64_t finish_time = 0L;
++ ulint sec;
++ ulint ms;
+
+ if (trx->mysql_thd != NULL
+ && thd_is_replication_slave_thread(trx->mysql_thd)) {
+@@ -1280,6 +1287,7 @@
+ switches. */
+ if (SRV_THREAD_SLEEP_DELAY > 0) {
+ os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
++ trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY;
+ }
+
+ trx->op_info = "";
+@@ -1335,6 +1343,13 @@
+ /* Go to wait for the event; when a thread leaves InnoDB it will
+ release this thread */
+
++ if (innobase_get_slow_log() && trx->take_stats) {
++ ut_usectime(&sec, &ms);
++ start_time = (ib_uint64_t)sec * 1000000 + ms;
++ } else {
++ start_time = 0;
++ }
++
+ trx->op_info = "waiting in InnoDB queue";
+
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
+@@ -1343,6 +1358,12 @@
+
+ trx->op_info = "";
+
++ if (innobase_get_slow_log() && trx->take_stats && start_time) {
++ ut_usectime(&sec, &ms);
++ finish_time = (ib_uint64_t)sec * 1000000 + ms;
++ trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
++ }
++
+ os_fast_mutex_lock(&srv_conc_mutex);
+
+ srv_conc_n_waiting_threads--;
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c 2010-12-03 15:41:52.053955669 +0900
++++ b/storage/innobase/trx/trx0trx.c 2010-12-03 17:42:42.127023410 +0900
+@@ -184,6 +184,15 @@
+ trx->global_read_view = NULL;
+ trx->read_view = NULL;
+
++ trx->io_reads = 0;
++ trx->io_read = 0;
++ trx->io_reads_wait_timer = 0;
++ trx->lock_que_wait_timer = 0;
++ trx->innodb_que_wait_timer = 0;
++ trx->distinct_page_access = 0;
++ trx->distinct_page_access_hash = NULL;
++ trx->take_stats = FALSE;
++
+ /* Set X/Open XA transaction identification to NULL */
+ memset(&trx->xid, 0, sizeof(trx->xid));
+ trx->xid.formatID = -1;
+@@ -221,6 +230,11 @@
+
+ trx->mysql_process_no = os_proc_get_number();
+
++ if (innobase_get_slow_log() && trx->take_stats) {
++ trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
++ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
++ }
++
+ return(trx);
+ }
+
+@@ -352,6 +366,12 @@
+ /*===============*/
+ trx_t* trx) /*!< in, own: trx object */
+ {
++ if (trx->distinct_page_access_hash)
++ {
++ mem_free(trx->distinct_page_access_hash);
++ trx->distinct_page_access_hash= NULL;
++ }
++
+ mutex_enter(&kernel_mutex);
+
+ UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
+@@ -373,6 +393,12 @@
+ /*====================*/
+ trx_t* trx) /*!< in, own: trx object */
+ {
++ if (trx->distinct_page_access_hash)
++ {
++ mem_free(trx->distinct_page_access_hash);
++ trx->distinct_page_access_hash= NULL;
++ }
++
+ mutex_enter(&kernel_mutex);
+
+ trx_free(trx);
+@@ -1094,6 +1120,9 @@
+ trx_t* trx) /*!< in: transaction */
+ {
+ que_thr_t* thr;
++ ulint sec;
++ ulint ms;
++ ib_uint64_t now;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+@@ -1108,6 +1137,11 @@
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
++ if (innobase_get_slow_log() && trx->take_stats) {
++ ut_usectime(&sec, &ms);
++ now = (ib_uint64_t)sec * 1000000 + ms;
++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
++ }
+ trx->que_state = TRX_QUE_RUNNING;
+ }
+
+@@ -1121,6 +1155,9 @@
+ trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
+ {
+ que_thr_t* thr;
++ ulint sec;
++ ulint ms;
++ ib_uint64_t now;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+@@ -1135,6 +1172,11 @@
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
++ if (innobase_get_slow_log() && trx->take_stats) {
++ ut_usectime(&sec, &ms);
++ now = (ib_uint64_t)sec * 1000000 + ms;
++ trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
++ }
+ trx->que_state = TRX_QUE_RUNNING;
+ }
+
--- /dev/null
+# name : innodb_split_buf_pool_mutex.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
+@@ -4042,7 +4042,8 @@
+
+ mtr_commit(mtr);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ mutex_enter(&block->mutex);
+
+ /* Only free the block if it is still allocated to
+@@ -4053,17 +4054,22 @@
+ && buf_block_get_space(block) == space
+ && buf_block_get_page_no(block) == page_no) {
+
+- if (buf_LRU_free_block(&block->page, all, NULL)
++ if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
+ != BUF_LRU_FREED
+- && all && block->page.zip.data) {
++ && all && block->page.zip.data
++ /* Now, buf_LRU_free_block() may release mutex temporarily */
++ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
++ && buf_block_get_space(block) == space
++ && buf_block_get_page_no(block) == page_no) {
+ /* Attempt to deallocate the uncompressed page
+ if the whole block cannot be deallocted. */
+
+- buf_LRU_free_block(&block->page, FALSE, NULL);
++ buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ mutex_exit(&block->mutex);
+ }
+
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
++++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
+@@ -1211,7 +1211,7 @@
+ ulint* offsets;
+
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ //buf_pool_mutex_enter_all();
+
+ table = btr_search_sys->hash_index;
+
+@@ -1220,6 +1220,8 @@
+
+ buf_pool = buf_pool_from_array(j);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (bpage != NULL) {
+@@ -1301,9 +1303,11 @@
+
+ bpage = UT_LIST_GET_PREV(LRU, bpage);
+ }
++
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+- buf_pool_mutex_exit_all();
++ //buf_pool_mutex_exit_all();
+ rw_lock_x_unlock(&btr_search_latch);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+@@ -1896,7 +1900,7 @@
+ rec_offs_init(offsets_);
+
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ buf_pool_page_hash_x_lock_all();
+
+ cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+
+@@ -1904,11 +1908,11 @@
+ /* We release btr_search_latch every once in a while to
+ give other queries a chance to run. */
+ if ((i != 0) && ((i % chunk_size) == 0)) {
+- buf_pool_mutex_exit_all();
++ buf_pool_page_hash_x_unlock_all();
+ rw_lock_x_unlock(&btr_search_latch);
+ os_thread_yield();
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ buf_pool_page_hash_x_lock_all();
+ }
+
+ node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+@@ -2019,11 +2023,11 @@
+ /* We release btr_search_latch every once in a while to
+ give other queries a chance to run. */
+ if (i != 0) {
+- buf_pool_mutex_exit_all();
++ buf_pool_page_hash_x_unlock_all();
+ rw_lock_x_unlock(&btr_search_latch);
+ os_thread_yield();
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ buf_pool_page_hash_x_lock_all();
+ }
+
+ if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+@@ -2031,7 +2035,7 @@
+ }
+ }
+
+- buf_pool_mutex_exit_all();
++ buf_pool_page_hash_x_unlock_all();
+ rw_lock_x_unlock(&btr_search_latch);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
+@@ -73,10 +73,11 @@
+ if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
+ #endif /* UNIV_DEBUG_VALGRIND */
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+ ut_ad(buf_pool->zip_free[i].start != bpage);
+- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
++ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
+
+ #ifdef UNIV_DEBUG_VALGRIND
+ if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
+@@ -96,8 +97,8 @@
+ buf_pool->zip_free[] */
+ {
+ #ifdef UNIV_DEBUG_VALGRIND
+- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
+- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
++ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
++ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
+
+ if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
+ if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
+@@ -106,9 +107,10 @@
+ ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
+ #endif /* UNIV_DEBUG_VALGRIND */
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
+
+ #ifdef UNIV_DEBUG_VALGRIND
+ if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
+@@ -128,12 +130,13 @@
+ {
+ buf_page_t* bpage;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_a(i < BUF_BUDDY_SIZES);
+
+ #ifndef UNIV_DEBUG_VALGRIND
+ /* Valgrind would complain about accessing free memory. */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+@@ -177,16 +180,19 @@
+ buf_buddy_block_free(
+ /*=================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+- void* buf) /*!< in: buffer frame to deallocate */
++ void* buf, /*!< in: buffer frame to deallocate */
++ ibool have_page_hash_mutex)
+ {
+ const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
+ buf_page_t* bpage;
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
+
++ mutex_enter(&buf_pool->zip_hash_mutex);
++
+ HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+ && bpage->in_zip_hash && !bpage->in_page_hash),
+@@ -198,12 +204,14 @@
+ ut_d(bpage->in_zip_hash = FALSE);
+ HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
+
++ mutex_exit(&buf_pool->zip_hash_mutex);
++
+ ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+ UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+
+ block = (buf_block_t*) bpage;
+ mutex_enter(&block->mutex);
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ mutex_exit(&block->mutex);
+
+ ut_ad(buf_pool->buddy_n_frames > 0);
+@@ -220,7 +228,7 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+ const ulint fold = BUF_POOL_ZIP_FOLD(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+
+@@ -232,7 +240,10 @@
+ ut_ad(!block->page.in_page_hash);
+ ut_ad(!block->page.in_zip_hash);
+ ut_d(block->page.in_zip_hash = TRUE);
++
++ mutex_enter(&buf_pool->zip_hash_mutex);
+ HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
++ mutex_exit(&buf_pool->zip_hash_mutex);
+
+ ut_d(buf_pool->buddy_n_frames++);
+ }
+@@ -268,7 +279,7 @@
+ bpage->state = BUF_BLOCK_ZIP_FREE;
+ #ifndef UNIV_DEBUG_VALGRIND
+ /* Valgrind would complain about accessing free memory. */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(
+ ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE)));
+@@ -291,25 +302,29 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+- ibool* lru) /*!< in: pointer to a variable that
++ ibool* lru, /*!< in: pointer to a variable that
+ will be assigned TRUE if storage was
+ allocated from the LRU list and
+ buf_pool->mutex was temporarily
+ released, or NULL if the LRU list
+ should not be used */
++ ibool have_page_hash_mutex)
+ {
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+
+ if (i < BUF_BUDDY_SIZES) {
+ /* Try to allocate from the buddy system. */
++ mutex_enter(&buf_pool->zip_free_mutex);
+ block = buf_buddy_alloc_zip(buf_pool, i);
+
+ if (block) {
+ goto func_exit;
+ }
++ mutex_exit(&buf_pool->zip_free_mutex);
+ }
+
+ /* Try allocating from the buf_pool->free list. */
+@@ -326,19 +341,30 @@
+ }
+
+ /* Try replacing an uncompressed page in the buffer pool. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ if (have_page_hash_mutex) {
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ block = buf_LRU_get_free_block(buf_pool, 0);
+ *lru = TRUE;
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ if (have_page_hash_mutex) {
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
+
+ alloc_big:
+ buf_buddy_block_register(block);
+
++ mutex_enter(&buf_pool->zip_free_mutex);
+ block = buf_buddy_alloc_from(
+ buf_pool, block->frame, i, BUF_BUDDY_SIZES);
+
+ func_exit:
+ buf_pool->buddy_stat[i].used++;
++ mutex_exit(&buf_pool->zip_free_mutex);
++
+ return(block);
+ }
+
+@@ -355,7 +381,10 @@
+ buf_page_t* b;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+@@ -364,7 +393,7 @@
+ case BUF_BLOCK_FILE_PAGE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* Cannot relocate dirty pages. */
+ return(FALSE);
+@@ -374,9 +403,18 @@
+ }
+
+ mutex_enter(&buf_pool->zip_mutex);
++ mutex_enter(&buf_pool->zip_free_mutex);
+
+ if (!buf_page_can_relocate(bpage)) {
+ mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(&buf_pool->zip_free_mutex);
++ return(FALSE);
++ }
++
++ if (bpage != buf_page_hash_get(buf_pool,
++ bpage->space, bpage->offset)) {
++ mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(&buf_pool->zip_free_mutex);
+ return(FALSE);
+ }
+
+@@ -384,18 +422,19 @@
+ ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
+
+ /* relocate buf_pool->zip_clean */
+- b = UT_LIST_GET_PREV(list, dpage);
+- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
++ b = UT_LIST_GET_PREV(zip_list, dpage);
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
+
+ if (b) {
+- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
++ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
+ } else {
+- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
++ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
+ }
+
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
+ mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(&buf_pool->zip_free_mutex);
+ return(TRUE);
+ }
+
+@@ -409,14 +448,16 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* src, /*!< in: block to relocate */
+ void* dst, /*!< in: free block to relocate to */
+- ulint i) /*!< in: index of
++ ulint i, /*!< in: index of
+ buf_pool->zip_free[] */
++ ibool have_page_hash_mutex)
+ {
+ buf_page_t* bpage;
+ const ulint size = BUF_BUDDY_LOW << i;
+ ullint usec = ut_time_us(NULL);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(!ut_align_offset(src, size));
+ ut_ad(!ut_align_offset(dst, size));
+@@ -438,6 +479,12 @@
+ /* This is a compressed page. */
+ mutex_t* mutex;
+
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->zip_free_mutex);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
++
+ /* The src block may be split into smaller blocks,
+ some of which may be free. Thus, the
+ mach_read_from_4() calls below may attempt to read
+@@ -462,6 +509,11 @@
+ added to buf_pool->page_hash yet. Obviously,
+ it cannot be relocated. */
+
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->zip_free_mutex);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ return(FALSE);
+ }
+
+@@ -473,18 +525,27 @@
+ For the sake of simplicity, give up. */
+ ut_ad(page_zip_get_size(&bpage->zip) < size);
+
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->zip_free_mutex);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ return(FALSE);
+ }
+
++ /* To keep latch order */
++ if (have_page_hash_mutex)
++ mutex_exit(&buf_pool->zip_free_mutex);
++
+ /* The block must have been allocated, but it may
+ contain uninitialized data. */
+ UNIV_MEM_ASSERT_W(src, size);
+
+- mutex = buf_page_get_mutex(bpage);
++ mutex = buf_page_get_mutex_enter(bpage);
+
+- mutex_enter(mutex);
++ mutex_enter(&buf_pool->zip_free_mutex);
+
+- if (buf_page_can_relocate(bpage)) {
++ if (mutex && buf_page_can_relocate(bpage)) {
+ /* Relocate the compressed page. */
+ ut_a(bpage->zip.data == src);
+ memcpy(dst, src, size);
+@@ -499,10 +560,22 @@
+ buddy_stat->relocated_usec
+ += ut_time_us(NULL) - usec;
+ }
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ return(TRUE);
+ }
+
+- mutex_exit(mutex);
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
++
++ if (mutex) {
++ mutex_exit(mutex);
++ }
+ } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
+ /* This must be a buf_page_t object. */
+ #if UNIV_WORD_SIZE == 4
+@@ -511,10 +584,31 @@
+ about uninitialized pad bytes. */
+ UNIV_MEM_ASSERT_RW(src, size);
+ #endif
++
++ mutex_exit(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
++
+ if (buf_buddy_relocate_block(src, dst)) {
++ mutex_enter(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+
+ goto success;
+ }
++
++ mutex_enter(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ }
+
+ return(FALSE);
+@@ -529,13 +623,15 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint i) /*!< in: index of buf_pool->zip_free[],
++ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
++ ibool have_page_hash_mutex)
+ {
+ buf_page_t* bpage;
+ buf_page_t* buddy;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(i <= BUF_BUDDY_SIZES);
+ ut_ad(buf_pool->buddy_stat[i].used > 0);
+@@ -546,7 +642,9 @@
+ ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+
+ if (i == BUF_BUDDY_SIZES) {
+- buf_buddy_block_free(buf_pool, buf);
++ mutex_exit(&buf_pool->zip_free_mutex);
++ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
++ mutex_enter(&buf_pool->zip_free_mutex);
+ return;
+ }
+
+@@ -591,7 +689,7 @@
+ ut_a(bpage != buf);
+
+ {
+- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
++ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
+ UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
+ bpage = next;
+ }
+@@ -600,13 +698,13 @@
+ #ifndef UNIV_DEBUG_VALGRIND
+ buddy_nonfree:
+ /* Valgrind would complain about accessing free memory. */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE)));
+ #endif /* UNIV_DEBUG_VALGRIND */
+
+ /* The buddy is not free. Is there a free block of this size? */
+- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+
+ if (bpage) {
+ /* Remove the block from the free list, because a successful
+@@ -616,7 +714,7 @@
+ buf_buddy_remove_from_free(buf_pool, bpage, i);
+
+ /* Try to relocate the buddy of buf to the free block. */
+- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
++ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
+
+ ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+ goto buddy_free2;
+@@ -636,14 +734,14 @@
+
+ (Parts of the buddy can be free in
+ buf_pool->zip_free[j] with j < i.) */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(
+ ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE
+ && ut_list_node_313 != buddy)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+
+- if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
++ if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
+
+ buf = bpage;
+ UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
+@@ -263,6 +263,7 @@
+ #ifdef UNIV_PFS_RWLOCK
+ /* Keys to register buffer block related rwlocks and mutexes with
+ performance schema */
++UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
+ UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
+@@ -273,6 +274,10 @@
+ UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
+ #endif /* UNIV_PFS_MUTEX */
+
+@@ -881,9 +886,9 @@
+ block->page.in_zip_hash = FALSE;
+ block->page.in_flush_list = FALSE;
+ block->page.in_free_list = FALSE;
+- block->in_unzip_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
+ block->page.in_LRU_list = FALSE;
++ block->in_unzip_LRU_list = FALSE;
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -981,9 +986,11 @@
+ memset(block->frame, '\0', UNIV_PAGE_SIZE);
+ #endif
+ /* Add the block to the free list */
+- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
++ mutex_enter(&buf_pool->free_list_mutex);
++ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
+
+ ut_d(block->page.in_free_list = TRUE);
++ mutex_exit(&buf_pool->free_list_mutex);
+ ut_ad(buf_pool_from_block(block) == buf_pool);
+
+ block++;
+@@ -1038,7 +1045,8 @@
+ buf_chunk_t* chunk = buf_pool->chunks;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ for (n = buf_pool->n_chunks; n--; chunk++) {
+
+ buf_block_t* block = buf_chunk_contains_zip(chunk, data);
+@@ -1138,7 +1146,7 @@
+ buf_block_t* block;
+ const buf_block_t* block_end;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
+
+ block_end = chunk->blocks + chunk->size;
+
+@@ -1150,8 +1158,10 @@
+ ut_ad(!block->in_unzip_LRU_list);
+ ut_ad(!block->page.in_flush_list);
+ /* Remove the block from the free list. */
++ mutex_enter(&buf_pool->free_list_mutex);
+ ut_ad(block->page.in_free_list);
+- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ /* Free the latches. */
+ mutex_free(&block->mutex);
+@@ -1208,9 +1218,21 @@
+ ------------------------------- */
+ mutex_create(buf_pool_mutex_key,
+ &buf_pool->mutex, SYNC_BUF_POOL);
++ mutex_create(buf_pool_LRU_list_mutex_key,
++ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
++ rw_lock_create(buf_pool_page_hash_key,
++ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
++ mutex_create(buf_pool_free_list_mutex_key,
++ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
++ mutex_create(buf_pool_zip_free_mutex_key,
++ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
++ mutex_create(buf_pool_zip_hash_mutex_key,
++ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
+ mutex_create(buf_pool_zip_mutex_key,
+ &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_enter(buf_pool);
+
+ if (buf_pool_size > 0) {
+@@ -1223,6 +1245,8 @@
+ mem_free(chunk);
+ mem_free(buf_pool);
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_exit(buf_pool);
+
+ return(DB_ERROR);
+@@ -1253,6 +1277,8 @@
+
+ /* All fields are initialized by mem_zalloc(). */
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_exit(buf_pool);
+
+ return(DB_SUCCESS);
+@@ -1469,7 +1495,11 @@
+ ulint fold;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ ut_a(bpage->buf_fix_count == 0);
+@@ -1556,7 +1586,8 @@
+
+ try_again:
+ btr_search_disable(); /* Empty the adaptive hash index again */
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ shrink_again:
+ if (buf_pool->n_chunks <= 1) {
+@@ -1627,7 +1658,7 @@
+
+ buf_LRU_make_block_old(&block->page);
+ dirty++;
+- } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
++ } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
+ != BUF_LRU_FREED) {
+ nonfree++;
+ }
+@@ -1635,7 +1666,8 @@
+ mutex_exit(&block->mutex);
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ /* Request for a flush of the chunk if it helps.
+ Do not flush if there are non-free blocks, since
+@@ -1685,7 +1717,8 @@
+ func_done:
+ buf_pool->old_pool_size = buf_pool->curr_pool_size;
+ func_exit:
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ btr_search_enable();
+ }
+
+@@ -1726,7 +1759,9 @@
+ hash_table_t* zip_hash;
+ hash_table_t* page_hash;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ /* Free, create, and populate the hash table. */
+ hash_table_free(buf_pool->page_hash);
+@@ -1767,8 +1802,9 @@
+ All such blocks are either in buf_pool->zip_clean or
+ in buf_pool->flush_list. */
+
++ mutex_enter(&buf_pool->zip_mutex);
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(zip_list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ ut_ad(!b->in_flush_list);
+ ut_ad(b->in_LRU_list);
+@@ -1778,10 +1814,11 @@
+ HASH_INSERT(buf_page_t, hash, page_hash,
+ buf_page_address_fold(b->space, b->offset), b);
+ }
++ mutex_exit(&buf_pool->zip_mutex);
+
+ buf_flush_list_mutex_enter(buf_pool);
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(flush_list, b)) {
+ ut_ad(b->in_flush_list);
+ ut_ad(b->in_LRU_list);
+ ut_ad(b->in_page_hash);
+@@ -1808,7 +1845,9 @@
+ }
+
+ buf_flush_list_mutex_exit(buf_pool);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+
+ /********************************************************************
+@@ -1855,21 +1894,32 @@
+ buf_page_t* bpage;
+ ulint i;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
++ mutex_t* block_mutex;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
++ if (bpage) {
++ block_mutex = buf_page_get_mutex_enter(bpage);
++ ut_a(block_mutex);
++ }
+
+ if (UNIV_LIKELY_NULL(bpage)) {
+ if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+ /* The page was loaded meanwhile. */
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ return(bpage);
+ }
+ /* Add to an existing watch. */
+ bpage->buf_fix_count++;
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(block_mutex);
+ return(NULL);
+ }
+
++ /* buf_pool->watch is protected by zip_mutex for now */
++ mutex_enter(&buf_pool->zip_mutex);
+ for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
+ bpage = &buf_pool->watch[i];
+
+@@ -1897,6 +1947,8 @@
+ ut_d(bpage->in_page_hash = TRUE);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ fold, bpage);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->zip_mutex);
+ return(NULL);
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_ad(bpage->in_page_hash);
+@@ -1914,6 +1966,8 @@
+ ut_error;
+
+ /* Fix compiler warning */
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->zip_mutex);
+ return(NULL);
+ }
+
+@@ -1943,6 +1997,8 @@
+ buf_chunk_t* chunks;
+ buf_chunk_t* chunk;
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_enter(buf_pool);
+ chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
+
+@@ -1961,6 +2017,8 @@
+ buf_pool->n_chunks++;
+ }
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_exit(buf_pool);
+ }
+
+@@ -2048,7 +2106,11 @@
+ space, offset) */
+ buf_page_t* watch) /*!< in/out: sentinel for watch */
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
++ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
+
+ HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
+ ut_d(watch->in_page_hash = FALSE);
+@@ -2070,28 +2132,31 @@
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ulint fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ /* The page must exist because buf_pool_watch_set()
+ increments buf_fix_count. */
+ ut_a(bpage);
+
+ if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
+- mutex_t* mutex = buf_page_get_mutex(bpage);
++ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
+
+- mutex_enter(mutex);
+ ut_a(bpage->buf_fix_count > 0);
+ bpage->buf_fix_count--;
+ mutex_exit(mutex);
+ } else {
++ mutex_enter(&buf_pool->zip_mutex);
+ ut_a(bpage->buf_fix_count > 0);
+
+ if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
+ buf_pool_watch_remove(buf_pool, fold, bpage);
+ }
++ mutex_exit(&buf_pool->zip_mutex);
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+
+ /****************************************************************//**
+@@ -2111,14 +2176,16 @@
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ulint fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ /* The page must exist because buf_pool_watch_set()
+ increments buf_fix_count. */
+ ut_a(bpage);
+ ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(ret);
+ }
+@@ -2135,13 +2202,15 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ ut_a(buf_page_in_file(bpage));
+
+ buf_LRU_make_block_young(bpage);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /********************************************************************//**
+@@ -2165,14 +2234,20 @@
+ ut_a(buf_page_in_file(bpage));
+
+ if (buf_page_peek_if_too_old(bpage)) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ buf_LRU_make_block_young(bpage);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ } else if (!access_time) {
+ ulint time_ms = ut_time_ms();
+- buf_pool_mutex_enter(buf_pool);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
++ //buf_pool_mutex_enter(buf_pool);
++ if (block_mutex) {
+ buf_page_set_accessed(bpage, time_ms);
+- buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
++ }
++ //buf_pool_mutex_exit(buf_pool);
+ }
+ }
+
+@@ -2189,7 +2264,8 @@
+ buf_block_t* block;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2198,7 +2274,8 @@
+ block->check_index_page_at_flush = FALSE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+
+ /********************************************************************//**
+@@ -2217,7 +2294,8 @@
+ ibool is_hashed;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2228,7 +2306,8 @@
+ is_hashed = block->is_hashed;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(is_hashed);
+ }
+@@ -2250,7 +2329,8 @@
+ buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2259,7 +2339,8 @@
+ bpage->file_page_was_freed = TRUE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(bpage);
+ }
+@@ -2280,7 +2361,8 @@
+ buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2289,7 +2371,8 @@
+ bpage->file_page_was_freed = FALSE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(bpage);
+ }
+@@ -2324,8 +2407,9 @@
+ buf_pool->stat.n_page_gets++;
+
+ for (;;) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+ lookup:
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+ if (bpage) {
+ ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+@@ -2334,7 +2418,8 @@
+
+ /* Page not in buf_pool: needs to be read from file */
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ buf_read_page(space, zip_size, offset);
+
+@@ -2346,10 +2431,15 @@
+ if (UNIV_UNLIKELY(!bpage->zip.data)) {
+ /* There is no compressed page. */
+ err_exit:
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ return(NULL);
+ }
+
++ block_mutex = buf_page_get_mutex_enter(bpage);
++
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+ ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+
+ switch (buf_page_get_state(bpage)) {
+@@ -2358,19 +2448,19 @@
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ case BUF_BLOCK_ZIP_FREE:
++ if (block_mutex)
++ mutex_exit(block_mutex);
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+- block_mutex = &buf_pool->zip_mutex;
+- mutex_enter(block_mutex);
++ ut_a(block_mutex == &buf_pool->zip_mutex);
+ bpage->buf_fix_count++;
+ goto got_block;
+ case BUF_BLOCK_FILE_PAGE:
+- block_mutex = &((buf_block_t*) bpage)->mutex;
+- mutex_enter(block_mutex);
++ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
+
+ /* Discard the uncompressed page frame if possible. */
+- if (buf_LRU_free_block(bpage, FALSE, NULL)
++ if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
+ == BUF_LRU_FREED) {
+
+ mutex_exit(block_mutex);
+@@ -2389,7 +2479,7 @@
+ must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+ access_time = buf_page_is_accessed(bpage);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ mutex_exit(block_mutex);
+
+@@ -2698,7 +2788,7 @@
+ const buf_block_t* block) /*!< in: pointer to block,
+ not dereferenced */
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+ /* The pointer should be aligned. */
+@@ -2734,6 +2824,7 @@
+ ulint fix_type;
+ ibool must_read;
+ ulint retries = 0;
++ mutex_t* block_mutex = NULL;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+ ut_ad(mtr);
+@@ -2755,9 +2846,11 @@
+ fold = buf_page_address_fold(space, offset);
+ loop:
+ block = guess;
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ if (block) {
++ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++
+ /* If the guess is a compressed page descriptor that
+ has been allocated by buf_buddy_alloc(), it may have
+ been invalidated by buf_buddy_relocate(). In that
+@@ -2766,11 +2859,15 @@
+ the guess may be pointing to a buffer pool chunk that
+ has been released when resizing the buffer pool. */
+
+- if (!buf_block_is_uncompressed(buf_pool, block)
++ if (!block_mutex) {
++ block = guess = NULL;
++ } else if (!buf_block_is_uncompressed(buf_pool, block)
+ || offset != block->page.offset
+ || space != block->page.space
+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
++ mutex_exit(block_mutex);
++
+ block = guess = NULL;
+ } else {
+ ut_ad(!block->page.in_zip_hash);
+@@ -2779,12 +2876,19 @@
+ }
+
+ if (block == NULL) {
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ block = (buf_block_t*) buf_page_hash_get_low(
+ buf_pool, space, offset, fold);
++ if (block) {
++ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++ ut_a(block_mutex);
++ }
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+
+ loop2:
+ if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
++ mutex_exit(block_mutex);
+ block = NULL;
+ }
+
+@@ -2796,12 +2900,14 @@
+ space, offset, fold);
+
+ if (UNIV_LIKELY_NULL(block)) {
+-
++ block_mutex = buf_page_get_mutex((buf_page_t*)block);
++ ut_a(block_mutex);
++ ut_ad(mutex_own(block_mutex));
+ goto got_block;
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ if (mode == BUF_GET_IF_IN_POOL
+ || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+@@ -2849,7 +2955,8 @@
+ /* The page is being read to buffer pool,
+ but we cannot wait around for the read to
+ complete. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+
+ return(NULL);
+ }
+@@ -2859,38 +2966,49 @@
+ ibool success;
+
+ case BUF_BLOCK_FILE_PAGE:
++ if (block_mutex == &buf_pool->zip_mutex) {
++ /* it is wrong mutex... */
++ mutex_exit(block_mutex);
++ goto loop;
++ }
+ break;
+
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
++ ut_ad(block_mutex == &buf_pool->zip_mutex);
+ bpage = &block->page;
+ /* Protect bpage->buf_fix_count. */
+- mutex_enter(&buf_pool->zip_mutex);
++ //mutex_enter(&buf_pool->zip_mutex);
+
+ if (bpage->buf_fix_count
+ || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ /* This condition often occurs when the buffer
+ is not buffer-fixed, but I/O-fixed by
+ buf_page_init_for_read(). */
+- mutex_exit(&buf_pool->zip_mutex);
++ //mutex_exit(&buf_pool->zip_mutex);
+ wait_until_unfixed:
+ /* The block is buffer-fixed or I/O-fixed.
+ Try again later. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+ os_thread_sleep(WAIT_FOR_READ);
+
+ goto loop;
+ }
+
+ /* Allocate an uncompressed page. */
+- buf_pool_mutex_exit(buf_pool);
+- mutex_exit(&buf_pool->zip_mutex);
++ //buf_pool_mutex_exit(buf_pool);
++ //mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(block_mutex);
+
+ block = buf_LRU_get_free_block(buf_pool, 0);
+ ut_a(block);
++ block_mutex = &block->mutex;
+
+- buf_pool_mutex_enter(buf_pool);
+- mutex_enter(&block->mutex);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ mutex_enter(block_mutex);
+
+ {
+ buf_page_t* hash_bpage;
+@@ -2903,35 +3021,47 @@
+ while buf_pool->mutex was released.
+ Free the block that was allocated. */
+
+- buf_LRU_block_free_non_file_page(block);
+- mutex_exit(&block->mutex);
++ buf_LRU_block_free_non_file_page(block, TRUE);
++ mutex_exit(block_mutex);
+
+ block = (buf_block_t*) hash_bpage;
++ if (block) {
++ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++ ut_a(block_mutex);
++ }
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ goto loop2;
+ }
+ }
+
++ mutex_enter(&buf_pool->zip_mutex);
++
+ if (UNIV_UNLIKELY
+ (bpage->buf_fix_count
+ || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
+
++ mutex_exit(&buf_pool->zip_mutex);
+ /* The block was buffer-fixed or I/O-fixed
+ while buf_pool->mutex was not held by this thread.
+ Free the block that was allocated and try again.
+ This should be extremely unlikely. */
+
+- buf_LRU_block_free_non_file_page(block);
+- mutex_exit(&block->mutex);
++ buf_LRU_block_free_non_file_page(block, TRUE);
++ //mutex_exit(&block->mutex);
+
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ goto wait_until_unfixed;
+ }
+
+ /* Move the compressed page from bpage to block,
+ and uncompress it. */
+
+- mutex_enter(&buf_pool->zip_mutex);
+-
+ buf_relocate(bpage, &block->page);
++
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+ buf_block_init_low(block);
+ block->lock_hash_val = lock_rec_hash(space, offset);
+
+@@ -2940,7 +3070,7 @@
+
+ if (buf_page_get_state(&block->page)
+ == BUF_BLOCK_ZIP_PAGE) {
+- UT_LIST_REMOVE(list, buf_pool->zip_clean,
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
+ &block->page);
+ ut_ad(!block->page.in_flush_list);
+ } else {
+@@ -2957,19 +3087,24 @@
+ /* Insert at the front of unzip_LRU list */
+ buf_unzip_LRU_add_block(block, FALSE);
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++
+ block->page.buf_fix_count = 1;
+ buf_block_set_io_fix(block, BUF_IO_READ);
+ rw_lock_x_lock_func(&block->lock, 0, file, line);
+
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
+- mutex_exit(&block->mutex);
++ mutex_exit(block_mutex);
+ mutex_exit(&buf_pool->zip_mutex);
++
++ buf_pool_mutex_enter(buf_pool);
+ buf_pool->n_pend_unzip++;
++ buf_pool_mutex_exit(buf_pool);
+
+- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
++ buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ /* Decompress the page and apply buffered operations
+ while not holding buf_pool->mutex or block->mutex. */
+@@ -2982,12 +3117,15 @@
+ }
+
+ /* Unfix and unlatch the block. */
+- buf_pool_mutex_enter(buf_pool);
+- mutex_enter(&block->mutex);
++ //buf_pool_mutex_enter(buf_pool);
++ block_mutex = &block->mutex;
++ mutex_enter(block_mutex);
+ block->page.buf_fix_count--;
+ buf_block_set_io_fix(block, BUF_IO_NONE);
+- mutex_exit(&block->mutex);
++
++ buf_pool_mutex_enter(buf_pool);
+ buf_pool->n_pend_unzip--;
++ buf_pool_mutex_exit(buf_pool);
+ rw_lock_x_unlock(&block->lock);
+
+ break;
+@@ -3003,7 +3141,7 @@
+
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+- mutex_enter(&block->mutex);
++ //mutex_enter(&block->mutex);
+ #if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in buf_page_t. On
+ other systems, Valgrind could complain about uninitialized pad
+@@ -3013,13 +3151,14 @@
+
+ buf_block_buf_fix_inc(block, file, line);
+
+- mutex_exit(&block->mutex);
++ //mutex_exit(&block->mutex);
+
+ /* Check if this is the first access to the page */
+
+ access_time = buf_page_is_accessed(&block->page);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+
+ buf_page_set_accessed_make_young(&block->page, access_time);
+
+@@ -3252,9 +3391,11 @@
+ buf_pool = buf_pool_from_block(block);
+
+ if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ buf_LRU_make_block_young(&block->page);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ } else if (!buf_page_is_accessed(&block->page)) {
+ /* Above, we do a dirty read on purpose, to avoid
+ mutex contention. The field buf_page_t::access_time
+@@ -3262,9 +3403,11 @@
+ field must be protected by mutex, however. */
+ ulint time_ms = ut_time_ms();
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&block->mutex);
+ buf_page_set_accessed(&block->page, time_ms);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&block->mutex);
+ }
+
+ ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+@@ -3331,18 +3474,21 @@
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ block = buf_block_hash_get(buf_pool, space_id, page_no);
+
+ if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ return(NULL);
+ }
+
+ ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
+
+ mutex_enter(&block->mutex);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+@@ -3431,7 +3577,10 @@
+ buf_page_t* hash_page;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(&(block->mutex)));
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+@@ -3460,11 +3609,14 @@
+ if (UNIV_LIKELY(!hash_page)) {
+ } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
+ /* Preserve the reference count. */
+- ulint buf_fix_count = hash_page->buf_fix_count;
++ ulint buf_fix_count;
+
++ mutex_enter(&buf_pool->zip_mutex);
++ buf_fix_count = hash_page->buf_fix_count;
+ ut_a(buf_fix_count > 0);
+ block->page.buf_fix_count += buf_fix_count;
+ buf_pool_watch_remove(buf_pool, fold, hash_page);
++ mutex_exit(&buf_pool->zip_mutex);
+ } else {
+ fprintf(stderr,
+ "InnoDB: Error: page %lu %lu already found"
+@@ -3474,7 +3626,8 @@
+ (const void*) hash_page, (const void*) block);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(&block->mutex);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_print();
+ buf_LRU_print();
+ buf_validate();
+@@ -3558,7 +3711,9 @@
+
+ fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
+@@ -3567,9 +3722,15 @@
+ err_exit:
+ if (block) {
+ mutex_enter(&block->mutex);
+- buf_LRU_block_free_non_file_page(block);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ buf_LRU_block_free_non_file_page(block, FALSE);
+ mutex_exit(&block->mutex);
+ }
++ else {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+
+ bpage = NULL;
+ goto func_exit;
+@@ -3592,6 +3753,8 @@
+
+ buf_page_init(space, offset, fold, block);
+
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+
+@@ -3619,7 +3782,7 @@
+ been added to buf_pool->LRU and
+ buf_pool->page_hash. */
+ mutex_exit(&block->mutex);
+- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+@@ -3632,6 +3795,7 @@
+ buf_unzip_LRU_add_block(block, TRUE);
+ }
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ mutex_exit(&block->mutex);
+ } else {
+ /* Defer buf_buddy_alloc() until after the block has
+@@ -3643,8 +3807,8 @@
+ control block (bpage), in order to avoid the
+ invocation of buf_buddy_relocate_block() on
+ uninitialized data. */
+- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
+- bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
++ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
++ bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
+
+ /* Initialize the buf_pool pointer. */
+ bpage->buf_pool = buf_pool;
+@@ -3663,8 +3827,11 @@
+
+ /* The block was added by some other thread. */
+ watch_page = NULL;
+- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
+- buf_buddy_free(buf_pool, data, zip_size);
++ buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
++ buf_buddy_free(buf_pool, data, zip_size, TRUE);
++
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ bpage = NULL;
+ goto func_exit;
+@@ -3708,18 +3875,24 @@
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
+ bpage);
+
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+ buf_LRU_insert_zip_clean(bpage);
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++
+ buf_page_set_io_fix(bpage, BUF_IO_READ);
+
+ mutex_exit(&buf_pool->zip_mutex);
+ }
+
++ buf_pool_mutex_enter(buf_pool);
+ buf_pool->n_pend_reads++;
+-func_exit:
+ buf_pool_mutex_exit(buf_pool);
++func_exit:
++ //buf_pool_mutex_exit(buf_pool);
+
+ if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+@@ -3761,7 +3934,9 @@
+
+ fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get_low(
+ buf_pool, space, offset, fold);
+@@ -3777,7 +3952,9 @@
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+
+ /* Page can be found in buf_pool */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ buf_block_free(free_block);
+
+@@ -3799,6 +3976,7 @@
+ mutex_enter(&block->mutex);
+
+ buf_page_init(space, offset, fold, block);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ /* The block must be put to the LRU list */
+ buf_LRU_add_block(&block->page, FALSE);
+@@ -3825,7 +4003,7 @@
+ the reacquisition of buf_pool->mutex. We also must
+ defer this operation until after the block descriptor
+ has been added to buf_pool->LRU and buf_pool->page_hash. */
+- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+@@ -3843,7 +4021,8 @@
+
+ buf_page_set_accessed(&block->page, time_ms);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+
+@@ -3894,6 +4073,8 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ const ibool uncompressed = (buf_page_get_state(bpage)
+ == BUF_BLOCK_FILE_PAGE);
++ ibool have_LRU_mutex = FALSE;
++ mutex_t* block_mutex;
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -4027,8 +4208,26 @@
+ }
+ }
+
++ if (io_type == BUF_IO_WRITE
++ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
++ /* to keep consistency at buf_LRU_insert_zip_clean() */
++ have_LRU_mutex = TRUE; /* optimistic */
++ }
++retry_mutex:
++ if (have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ block_mutex = buf_page_get_mutex_enter(bpage);
++ ut_a(block_mutex);
++ if (io_type == BUF_IO_WRITE
++ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
++ && !have_LRU_mutex) {
++ mutex_exit(block_mutex);
++ have_LRU_mutex = TRUE;
++ goto retry_mutex;
++ }
+ buf_pool_mutex_enter(buf_pool);
+- mutex_enter(buf_page_get_mutex(bpage));
+
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+ if (io_type == BUF_IO_WRITE || uncompressed) {
+@@ -4051,6 +4250,7 @@
+ the x-latch to this OS thread: do not let this confuse you in
+ debugging! */
+
++ ut_a(!have_LRU_mutex);
+ ut_ad(buf_pool->n_pend_reads > 0);
+ buf_pool->n_pend_reads--;
+ buf_pool->stat.n_pages_read++;
+@@ -4068,6 +4268,9 @@
+
+ buf_flush_write_complete(bpage);
+
++ if (have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
++
+ if (uncompressed) {
+ rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_WRITE);
+@@ -4090,8 +4293,8 @@
+ }
+ #endif /* UNIV_DEBUG */
+
+- mutex_exit(buf_page_get_mutex(bpage));
+ buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+ }
+
+ /*********************************************************************//**
+@@ -4108,7 +4311,9 @@
+
+ ut_ad(buf_pool);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ chunk = buf_pool->chunks;
+
+@@ -4125,7 +4330,9 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ return(TRUE);
+ }
+@@ -4173,7 +4380,8 @@
+ freed = buf_LRU_search_and_free_block(buf_pool, 100);
+ }
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+ ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
+@@ -4186,7 +4394,8 @@
+ memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
+ buf_refresh_io_stats(buf_pool);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /*********************************************************************//**
+@@ -4228,7 +4437,10 @@
+
+ ut_ad(buf_pool);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ /* for keep the new latch order, it cannot validate correctly... */
+
+ chunk = buf_pool->chunks;
+
+@@ -4323,7 +4535,7 @@
+ /* Check clean compressed-only blocks. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(zip_list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ switch (buf_page_get_io_fix(b)) {
+ case BUF_IO_NONE:
+@@ -4354,7 +4566,7 @@
+
+ buf_flush_list_mutex_enter(buf_pool);
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(flush_list, b)) {
+ ut_ad(b->in_flush_list);
+ ut_a(b->oldest_modification);
+ n_flush++;
+@@ -4413,6 +4625,8 @@
+ }
+
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
++ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
++/*
+ if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+ fprintf(stderr, "Free list len %lu, free blocks %lu\n",
+ (ulong) UT_LIST_GET_LEN(buf_pool->free),
+@@ -4423,8 +4637,11 @@
+ ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
++*/
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ ut_a(buf_LRU_validate());
+ ut_a(buf_flush_validate(buf_pool));
+@@ -4480,7 +4697,9 @@
+ index_ids = mem_alloc(size * sizeof *index_ids);
+ counts = mem_alloc(sizeof(ulint) * size);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
+ buf_flush_list_mutex_enter(buf_pool);
+
+ fprintf(stderr,
+@@ -4549,7 +4768,9 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ for (i = 0; i < n_found; i++) {
+ index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -4606,7 +4827,7 @@
+ buf_chunk_t* chunk;
+ ulint fixed_pages_number = 0;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ chunk = buf_pool->chunks;
+
+@@ -4640,7 +4861,7 @@
+ /* Traverse the lists of clean and dirty compressed-only blocks. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(zip_list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
+
+@@ -4652,7 +4873,7 @@
+
+ buf_flush_list_mutex_enter(buf_pool);
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(flush_list, b)) {
+ ut_ad(b->in_flush_list);
+
+ switch (buf_page_get_state(b)) {
+@@ -4678,7 +4899,7 @@
+
+ buf_flush_list_mutex_exit(buf_pool);
+ mutex_exit(&buf_pool->zip_mutex);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ return(fixed_pages_number);
+ }
+@@ -4772,6 +4993,8 @@
+
+ ut_ad(buf_pool);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
+ buf_pool_mutex_enter(buf_pool);
+ buf_flush_list_mutex_enter(buf_pool);
+
+@@ -4875,6 +5098,8 @@
+ buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+
+ buf_refresh_io_stats(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_exit(&buf_pool->free_list_mutex);
+ buf_pool_mutex_exit(buf_pool);
+ }
+
+@@ -4994,11 +5219,13 @@
+ {
+ ulint len;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->free_list_mutex);
+
+ len = UT_LIST_GET_LEN(buf_pool->free);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ return(len);
+ }
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
++++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
+@@ -279,7 +279,7 @@
+
+ ut_d(block->page.in_flush_list = TRUE);
+ block->page.oldest_modification = lsn;
+- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+
+ #ifdef UNIV_DEBUG_VALGRIND
+ {
+@@ -373,14 +373,14 @@
+ > block->page.oldest_modification) {
+ ut_ad(b->in_flush_list);
+ prev_b = b;
+- b = UT_LIST_GET_NEXT(list, b);
++ b = UT_LIST_GET_NEXT(flush_list, b);
+ }
+ }
+
+ if (prev_b == NULL) {
+- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+ } else {
+- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
++ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
+ prev_b, &block->page);
+ }
+
+@@ -406,7 +406,7 @@
+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ //ut_ad(bpage->in_LRU_list);
+
+ if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+@@ -442,14 +442,14 @@
+ enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+- ut_a(buf_page_in_file(bpage));
++ //ut_a(buf_page_in_file(bpage));
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+
+- if (bpage->oldest_modification != 0
++ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
+ && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+ ut_ad(bpage->in_flush_list);
+
+@@ -480,7 +480,7 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(bpage->in_flush_list);
+
+@@ -498,11 +498,11 @@
+ return;
+ case BUF_BLOCK_ZIP_DIRTY:
+ buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
+- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+ buf_LRU_insert_zip_clean(bpage);
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+ break;
+ }
+
+@@ -546,7 +546,7 @@
+ buf_page_t* prev_b = NULL;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ /* Must reside in the same buffer pool. */
+ ut_ad(buf_pool == buf_pool_from_bpage(dpage));
+
+@@ -575,18 +575,18 @@
+ because we assert on in_flush_list in comparison function. */
+ ut_d(bpage->in_flush_list = FALSE);
+
+- prev = UT_LIST_GET_PREV(list, bpage);
+- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++ prev = UT_LIST_GET_PREV(flush_list, bpage);
++ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+
+ if (prev) {
+ ut_ad(prev->in_flush_list);
+ UT_LIST_INSERT_AFTER(
+- list,
++ flush_list,
+ buf_pool->flush_list,
+ prev, dpage);
+ } else {
+ UT_LIST_ADD_FIRST(
+- list,
++ flush_list,
+ buf_pool->flush_list,
+ dpage);
+ }
+@@ -1055,7 +1055,7 @@
+
+ #ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(!buf_pool_mutex_own(buf_pool));
++ //ut_ad(!buf_pool_mutex_own(buf_pool));
+ #endif
+
+ #ifdef UNIV_LOG_DEBUG
+@@ -1069,7 +1069,8 @@
+ io_fixed and oldest_modification != 0. Thus, it cannot be
+ relocated in the buffer pool or removed from flush_list or
+ LRU_list. */
+- ut_ad(!buf_pool_mutex_own(buf_pool));
++ //ut_ad(!buf_pool_mutex_own(buf_pool));
++ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(!buf_flush_list_mutex_own(buf_pool));
+ ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+@@ -1155,12 +1156,18 @@
+ ibool is_uncompressed;
+
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+ ut_ad(buf_page_in_file(bpage));
+
+ block_mutex = buf_page_get_mutex(bpage);
+ ut_ad(mutex_own(block_mutex));
+
++ buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+ ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
+
+ buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+@@ -1322,14 +1329,16 @@
+
+ buf_pool = buf_pool_get(space, i);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ /* We only want to flush pages from this buffer pool. */
+ bpage = buf_page_hash_get(buf_pool, space, i);
+
+ if (!bpage) {
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ continue;
+ }
+
+@@ -1341,11 +1350,9 @@
+ if (flush_type != BUF_FLUSH_LRU
+ || i == offset
+ || buf_page_is_old(bpage)) {
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+
+- mutex_enter(block_mutex);
+-
+- if (buf_flush_ready_for_flush(bpage, flush_type)
++ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
+ && (i == offset || !bpage->buf_fix_count)) {
+ /* We only try to flush those
+ neighbors != offset where the buf fix
+@@ -1361,11 +1368,12 @@
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ count++;
+ continue;
+- } else {
++ } else if (block_mutex) {
+ mutex_exit(block_mutex);
+ }
+ }
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+
+ return(count);
+@@ -1398,21 +1406,25 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(flush_type != BUF_FLUSH_LRU
++ || mutex_own(&buf_pool->LRU_list_mutex));
+
+- block_mutex = buf_page_get_mutex(bpage);
+- mutex_enter(block_mutex);
++ block_mutex = buf_page_get_mutex_enter(bpage);
+
+- ut_a(buf_page_in_file(bpage));
++ //ut_a(buf_page_in_file(bpage));
+
+- if (buf_flush_ready_for_flush(bpage, flush_type)) {
++ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
+ ulint space;
+ ulint offset;
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_bpage(bpage);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ }
+
+ /* These fields are protected by both the
+ buffer pool mutex and block mutex. */
+@@ -1428,13 +1440,18 @@
+ *count,
+ n_to_flush);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ }
+ flushed = TRUE;
+- } else {
++ } else if (block_mutex) {
+ mutex_exit(block_mutex);
+ }
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(flush_type != BUF_FLUSH_LRU
++ || mutex_own(&buf_pool->LRU_list_mutex));
+
+ return(flushed);
+ }
+@@ -1455,7 +1472,8 @@
+ buf_page_t* bpage;
+ ulint count = 0;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ do {
+ /* Start from the end of the list looking for a
+@@ -1477,7 +1495,8 @@
+ should be flushed, we factor in this value. */
+ buf_lru_flush_page_count += count;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ return(count);
+ }
+@@ -1505,9 +1524,10 @@
+ {
+ ulint len;
+ buf_page_t* bpage;
++ buf_page_t* prev_bpage = NULL;
+ ulint count = 0;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ /* If we have flushed enough, leave the loop */
+ do {
+@@ -1526,6 +1546,7 @@
+
+ if (bpage) {
+ ut_a(bpage->oldest_modification > 0);
++ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
+ }
+
+ if (!bpage || bpage->oldest_modification >= lsn_limit) {
+@@ -1567,9 +1588,17 @@
+ break;
+ }
+
+- bpage = UT_LIST_GET_PREV(list, bpage);
++ bpage = UT_LIST_GET_PREV(flush_list, bpage);
+
+- ut_ad(!bpage || bpage->in_flush_list);
++ //ut_ad(!bpage || bpage->in_flush_list);
++ if (bpage != prev_bpage) {
++ /* the search might warp.. retrying */
++ buf_flush_list_mutex_exit(buf_pool);
++ break;
++ }
++ if (bpage) {
++ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
++ }
+
+ buf_flush_list_mutex_exit(buf_pool);
+
+@@ -1578,7 +1607,7 @@
+
+ } while (count < min_n && bpage != NULL && len > 0);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ return(count);
+ }
+@@ -1617,13 +1646,15 @@
+ || sync_thread_levels_empty_gen(TRUE));
+ #endif /* UNIV_SYNC_DEBUG */
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ /* Note: The buffer pool mutex is released and reacquired within
+ the flush functions. */
+ switch(flush_type) {
+ case BUF_FLUSH_LRU:
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ count = buf_flush_LRU_list_batch(buf_pool, min_n);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ break;
+ case BUF_FLUSH_LIST:
+ count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
+@@ -1632,7 +1663,7 @@
+ ut_error;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ buf_flush_buffered_writes();
+
+@@ -1888,7 +1919,7 @@
+ retry:
+ //buf_pool_mutex_enter(buf_pool);
+ if (have_LRU_mutex)
+- buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+@@ -1905,15 +1936,15 @@
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ continue;
+ }
+- block_mutex = buf_page_get_mutex(bpage);
+-
+- mutex_enter(block_mutex);
++ block_mutex = buf_page_get_mutex_enter(bpage);
+
+- if (buf_flush_ready_for_replace(bpage)) {
++ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
+ n_replaceable++;
+ }
+
+- mutex_exit(block_mutex);
++ if (block_mutex) {
++ mutex_exit(block_mutex);
++ }
+
+ distance++;
+
+@@ -1922,7 +1953,7 @@
+
+ //buf_pool_mutex_exit(buf_pool);
+ if (have_LRU_mutex)
+- buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
+
+@@ -2121,7 +2152,7 @@
+
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+
+- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
++ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
+ ut_ad(ut_list_node_313->in_flush_list));
+
+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+@@ -2161,7 +2192,7 @@
+ rnode = rbt_next(buf_pool->flush_rbt, rnode);
+ }
+
+- bpage = UT_LIST_GET_NEXT(list, bpage);
++ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+
+ ut_a(!bpage || om >= bpage->oldest_modification);
+ }
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
++++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
+@@ -143,8 +143,9 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+- buf_block_t* block); /*!< in: block, must contain a file page and
++ buf_block_t* block, /*!< in: block, must contain a file page and
+ be in a state where it can be freed */
++ ibool have_page_hash_mutex);
+
+ /******************************************************************//**
+ Determines if the unzip_LRU list should be used for evicting a victim
+@@ -154,15 +155,20 @@
+ ibool
+ buf_LRU_evict_from_unzip_LRU(
+ /*=========================*/
+- buf_pool_t* buf_pool)
++ buf_pool_t* buf_pool,
++ ibool have_LRU_mutex)
+ {
+ ulint io_avg;
+ ulint unzip_avg;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
++ if (!have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ /* If the unzip_LRU list is empty, we can only use the LRU. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ return(FALSE);
+ }
+
+@@ -171,14 +177,20 @@
+ decompressed pages in the buffer pool. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+ <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ return(FALSE);
+ }
+
+ /* If eviction hasn't started yet, we assume by default
+ that a workload is disk bound. */
+ if (buf_pool->freed_page_clock == 0) {
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ return(TRUE);
+ }
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ /* Calculate the average over past intervals, and add the values
+ of the current interval. */
+@@ -246,19 +258,23 @@
+ page_arr = ut_malloc(
+ sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ scan_again:
+ num_entries = 0;
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (bpage != NULL) {
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+ buf_page_t* prev_bpage;
+
+- mutex_enter(block_mutex);
+ prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+
++ if (!block_mutex) {
++ goto next_page;
++ }
++
+ ut_a(buf_page_in_file(bpage));
+
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
+@@ -287,14 +303,16 @@
+
+ /* Array full. We release the buf_pool->mutex to
+ obey the latching order. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ buf_LRU_drop_page_hash_batch(
+ id, zip_size, page_arr, num_entries);
+
+ num_entries = 0;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ } else {
+ mutex_exit(block_mutex);
+ }
+@@ -319,7 +337,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ /* Drop any remaining batch of search hashed pages. */
+ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+@@ -341,7 +360,9 @@
+ ibool all_freed;
+
+ scan_again:
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ all_freed = TRUE;
+
+@@ -369,8 +390,16 @@
+
+ all_freed = FALSE;
+ } else {
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+- mutex_enter(block_mutex);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
++
++ if (!block_mutex) {
++ /* It may be impossible case...
++ Something wrong, so will be scan_again */
++
++ all_freed = FALSE;
++
++ goto next_page_no_mutex;
++ }
+
+ if (bpage->buf_fix_count > 0) {
+
+@@ -429,7 +458,9 @@
+ ulint page_no;
+ ulint zip_size;
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ zip_size = buf_page_get_zip_size(bpage);
+ page_no = buf_page_get_page_no(bpage);
+@@ -454,7 +485,7 @@
+ if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ != BUF_BLOCK_ZIP_FREE) {
+ buf_LRU_block_free_hashed_page((buf_block_t*)
+- bpage);
++ bpage, TRUE);
+ } else {
+ /* The block_mutex should have been
+ released by buf_LRU_block_remove_hashed_page()
+@@ -486,7 +517,9 @@
+ bpage = prev_bpage;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ if (!all_freed) {
+ os_thread_sleep(20000);
+@@ -532,7 +565,9 @@
+ buf_page_t* b;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++ ut_ad(mutex_own(&buf_pool->flush_list_mutex));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
+
+ /* Find the first successor of bpage in the LRU list
+@@ -540,17 +575,17 @@
+ b = bpage;
+ do {
+ b = UT_LIST_GET_NEXT(LRU, b);
+- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
++ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
+
+ /* Insert bpage before b, i.e., after the predecessor of b. */
+ if (b) {
+- b = UT_LIST_GET_PREV(list, b);
++ b = UT_LIST_GET_PREV(zip_list, b);
+ }
+
+ if (b) {
+- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
++ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
+ } else {
+- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
++ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
+ }
+ }
+
+@@ -563,18 +598,19 @@
+ buf_LRU_free_from_unzip_LRU_list(
+ /*=============================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+- ulint n_iterations) /*!< in: how many times this has
++ ulint n_iterations, /*!< in: how many times this has
+ been called repeatedly without
+ result: a high value means that
+ we should search farther; we will
+ search n_iterations / 5 of the
+ unzip_LRU list, or nothing if
+ n_iterations >= 5 */
++ ibool have_LRU_mutex)
+ {
+ buf_block_t* block;
+ ulint distance;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ /* Theoratically it should be much easier to find a victim
+ from unzip_LRU as we can choose even a dirty block (as we'll
+@@ -584,7 +620,7 @@
+ if we have done five iterations so far. */
+
+ if (UNIV_UNLIKELY(n_iterations >= 5)
+- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
++ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
+
+ return(FALSE);
+ }
+@@ -592,18 +628,25 @@
+ distance = 100 + (n_iterations
+ * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
+
++restart:
+ for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+ UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+ block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
+
+ enum buf_lru_free_block_status freed;
+
++ mutex_enter(&block->mutex);
++ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
++ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
++ mutex_exit(&block->mutex);
++ goto restart;
++ }
++
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->in_unzip_LRU_list);
+ ut_ad(block->page.in_LRU_list);
+
+- mutex_enter(&block->mutex);
+- freed = buf_LRU_free_block(&block->page, FALSE, NULL);
++ freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
+ mutex_exit(&block->mutex);
+
+ switch (freed) {
+@@ -637,21 +680,23 @@
+ buf_LRU_free_from_common_LRU_list(
+ /*==============================*/
+ buf_pool_t* buf_pool,
+- ulint n_iterations)
++ ulint n_iterations,
+ /*!< in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; if
+ n_iterations < 10, then we search
+ n_iterations / 10 * buf_pool->curr_size
+ pages from the end of the LRU list */
++ ibool have_LRU_mutex)
+ {
+ buf_page_t* bpage;
+ ulint distance;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
+
++restart:
+ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+ bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
+@@ -659,14 +704,23 @@
+ enum buf_lru_free_block_status freed;
+ unsigned accessed;
+ mutex_t* block_mutex
+- = buf_page_get_mutex(bpage);
++ = buf_page_get_mutex_enter(bpage);
++
++ if (!block_mutex) {
++ goto restart;
++ }
++
++ if (!bpage->in_LRU_list
++ || !buf_page_in_file(bpage)) {
++ mutex_exit(block_mutex);
++ goto restart;
++ }
+
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(bpage->in_LRU_list);
+
+- mutex_enter(block_mutex);
+ accessed = buf_page_is_accessed(bpage);
+- freed = buf_LRU_free_block(bpage, TRUE, NULL);
++ freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
+ mutex_exit(block_mutex);
+
+ switch (freed) {
+@@ -718,16 +772,23 @@
+ n_iterations / 5 of the unzip_LRU list. */
+ {
+ ibool freed = FALSE;
++ ibool have_LRU_mutex = FALSE;
+
+- buf_pool_mutex_enter(buf_pool);
++ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++ have_LRU_mutex = TRUE;
++
++ //buf_pool_mutex_enter(buf_pool);
++ if (have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
++ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
+
+ if (!freed) {
+ freed = buf_LRU_free_from_common_LRU_list(
+- buf_pool, n_iterations);
++ buf_pool, n_iterations, have_LRU_mutex);
+ }
+
++ buf_pool_mutex_enter(buf_pool);
+ if (!freed) {
+ buf_pool->LRU_flush_ended = 0;
+ } else if (buf_pool->LRU_flush_ended > 0) {
+@@ -735,6 +796,8 @@
+ }
+
+ buf_pool_mutex_exit(buf_pool);
++ if (have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ return(freed);
+ }
+@@ -795,7 +858,9 @@
+
+ buf_pool = buf_pool_from_array(i);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
+
+ if (!recv_recovery_on
+ && UT_LIST_GET_LEN(buf_pool->free)
+@@ -805,7 +870,9 @@
+ ret = TRUE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_exit(&buf_pool->free_list_mutex);
+ }
+
+ return(ret);
+@@ -823,9 +890,10 @@
+ {
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
++ mutex_enter(&buf_pool->free_list_mutex);
++ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
+
+ if (block) {
+
+@@ -834,7 +902,9 @@
+ ut_ad(!block->page.in_flush_list);
+ ut_ad(!block->page.in_LRU_list);
+ ut_a(!buf_page_in_file(&block->page));
+- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ mutex_enter(&block->mutex);
+
+@@ -844,6 +914,8 @@
+ ut_ad(buf_pool_from_block(block) == buf_pool);
+
+ mutex_exit(&block->mutex);
++ } else {
++ mutex_exit(&buf_pool->free_list_mutex);
+ }
+
+ return(block);
+@@ -868,7 +940,7 @@
+ ibool mon_value_was = FALSE;
+ ibool started_monitor = FALSE;
+ loop:
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
+@@ -951,8 +1023,10 @@
+ ibool lru;
+ page_zip_set_size(&block->page.zip, zip_size);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ block->page.zip.data = buf_buddy_alloc(
+- buf_pool, zip_size, &lru);
++ buf_pool, zip_size, &lru, FALSE);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+ } else {
+@@ -960,7 +1034,7 @@
+ block->page.zip.data = NULL;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ if (started_monitor) {
+ srv_print_innodb_monitor = mon_value_was;
+@@ -972,7 +1046,7 @@
+ /* If no block was in the free list, search from the end of the LRU
+ list and try to free a block there */
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
+
+@@ -1058,7 +1132,8 @@
+ ulint new_len;
+
+ ut_a(buf_pool->LRU_old);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+ ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+ #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
+@@ -1124,7 +1199,8 @@
+ {
+ buf_page_t* bpage;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+
+ /* We first initialize all blocks in the LRU list as old and then use
+@@ -1159,13 +1235,14 @@
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+ ut_ad(buf_page_in_file(bpage));
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ if (buf_page_belongs_to_unzip_LRU(bpage)) {
+ buf_block_t* block = (buf_block_t*) bpage;
+
+ ut_ad(block->in_unzip_LRU_list);
+- ut_d(block->in_unzip_LRU_list = FALSE);
++ block->in_unzip_LRU_list = FALSE;
+
+ UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+ }
+@@ -1183,7 +1260,8 @@
+
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -1260,12 +1338,13 @@
+
+ ut_ad(buf_pool);
+ ut_ad(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+
+ ut_ad(!block->in_unzip_LRU_list);
+- ut_d(block->in_unzip_LRU_list = TRUE);
++ block->in_unzip_LRU_list = TRUE;
+
+ if (old) {
+ UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+@@ -1286,7 +1365,8 @@
+
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -1337,7 +1417,8 @@
+
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(!bpage->in_LRU_list);
+@@ -1416,7 +1497,8 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ if (bpage->old) {
+ buf_pool->stat.n_pages_made_young++;
+@@ -1458,19 +1540,20 @@
+ buf_page_t* bpage, /*!< in: block to be freed */
+ ibool zip, /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+- ibool* buf_pool_mutex_released)
++ ibool* buf_pool_mutex_released,
+ /*!< in: pointer to a variable that will
+ be assigned TRUE if buf_pool_mutex
+ was temporarily released, or NULL */
++ ibool have_LRU_mutex)
+ {
+ buf_page_t* b = NULL;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(block_mutex));
+ ut_ad(buf_page_in_file(bpage));
+- ut_ad(bpage->in_LRU_list);
++ //ut_ad(bpage->in_LRU_list);
+ ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+ #if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in buf_page_t. On
+@@ -1479,7 +1562,7 @@
+ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+ #endif
+
+- if (!buf_page_can_relocate(bpage)) {
++ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
+
+ /* Do not free buffer-fixed or I/O-fixed blocks. */
+ return(BUF_LRU_NOT_FREED);
+@@ -1511,15 +1594,15 @@
+ If it cannot be allocated (without freeing a block
+ from the LRU list), refuse to free bpage. */
+ alloc:
+- buf_pool_mutex_exit_forbid(buf_pool);
+- b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
+- buf_pool_mutex_exit_allow(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
++ b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
++ //buf_pool_mutex_exit_allow(buf_pool);
+
+ if (UNIV_UNLIKELY(!b)) {
+ return(BUF_LRU_CANNOT_RELOCATE);
+ }
+
+- memcpy(b, bpage, sizeof *b);
++ //memcpy(b, bpage, sizeof *b);
+ }
+
+ #ifdef UNIV_DEBUG
+@@ -1530,6 +1613,39 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++ /* not to break latch order, must re-enter block_mutex */
++ mutex_exit(block_mutex);
++
++ if (!have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ mutex_enter(block_mutex);
++
++ /* recheck states of block */
++ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
++ || !buf_page_can_relocate(bpage)) {
++not_freed:
++ if (b) {
++ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
++ }
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ return(BUF_LRU_NOT_FREED);
++ } else if (zip || !bpage->zip.data) {
++ if (bpage->oldest_modification)
++ goto not_freed;
++ } else if (bpage->oldest_modification) {
++ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
++ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
++ goto not_freed;
++ }
++ }
++
++ if (b) {
++ memcpy(b, bpage, sizeof *b);
++ }
++
+ if (buf_LRU_block_remove_hashed_page(bpage, zip)
+ != BUF_BLOCK_ZIP_FREE) {
+ ut_a(bpage->buf_fix_count == 0);
+@@ -1546,6 +1662,10 @@
+
+ ut_a(!hash_b);
+
++ while (prev_b && !prev_b->in_LRU_list) {
++ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
++ }
++
+ b->state = b->oldest_modification
+ ? BUF_BLOCK_ZIP_DIRTY
+ : BUF_BLOCK_ZIP_PAGE;
+@@ -1642,7 +1762,9 @@
+ *buf_pool_mutex_released = TRUE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ mutex_exit(block_mutex);
+
+ /* Remove possible adaptive hash index on the page.
+@@ -1674,7 +1796,9 @@
+ : BUF_NO_CHECKSUM_MAGIC);
+ }
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ if (have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ mutex_enter(block_mutex);
+
+ if (b) {
+@@ -1684,13 +1808,17 @@
+ mutex_exit(&buf_pool->zip_mutex);
+ }
+
+- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
++ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
+ } else {
+ /* The block_mutex should have been released by
+ buf_LRU_block_remove_hashed_page() when it returns
+ BUF_BLOCK_ZIP_FREE. */
+ ut_ad(block_mutex == &buf_pool->zip_mutex);
+ mutex_enter(block_mutex);
++
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+
+ return(BUF_LRU_FREED);
+@@ -1702,13 +1830,14 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+- buf_block_t* block) /*!< in: block, must not contain a file page */
++ buf_block_t* block, /*!< in: block, must not contain a file page */
++ ibool have_page_hash_mutex)
+ {
+ void* data;
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+
+ ut_ad(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(&block->mutex));
+
+ switch (buf_block_get_state(block)) {
+@@ -1742,18 +1871,21 @@
+ if (data) {
+ block->page.zip.data = NULL;
+ mutex_exit(&block->mutex);
+- buf_pool_mutex_exit_forbid(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
+
+ buf_buddy_free(
+- buf_pool, data, page_zip_get_size(&block->page.zip));
++ buf_pool, data, page_zip_get_size(&block->page.zip),
++ have_page_hash_mutex);
+
+- buf_pool_mutex_exit_allow(buf_pool);
++ //buf_pool_mutex_exit_allow(buf_pool);
+ mutex_enter(&block->mutex);
+ page_zip_set_size(&block->page.zip, 0);
+ }
+
+- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
++ mutex_enter(&buf_pool->free_list_mutex);
++ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
+ ut_d(block->page.in_free_list = TRUE);
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+ }
+@@ -1783,7 +1915,11 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+@@ -1891,7 +2027,9 @@
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(buf_page_get_mutex(bpage));
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_print();
+ buf_LRU_print();
+ buf_validate();
+@@ -1912,17 +2050,17 @@
+ ut_a(bpage->zip.data);
+ ut_a(buf_page_get_zip_size(bpage));
+
+- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
+
+ mutex_exit(&buf_pool->zip_mutex);
+- buf_pool_mutex_exit_forbid(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
+
+ buf_buddy_free(
+ buf_pool, bpage->zip.data,
+- page_zip_get_size(&bpage->zip));
++ page_zip_get_size(&bpage->zip), TRUE);
+
+- buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
+- buf_pool_mutex_exit_allow(buf_pool);
++ buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
++ //buf_pool_mutex_exit_allow(buf_pool);
+
+ UNIV_MEM_UNDESC(bpage);
+ return(BUF_BLOCK_ZIP_FREE);
+@@ -1945,13 +2083,13 @@
+ ut_ad(!bpage->in_flush_list);
+ ut_ad(!bpage->in_LRU_list);
+ mutex_exit(&((buf_block_t*) bpage)->mutex);
+- buf_pool_mutex_exit_forbid(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
+
+ buf_buddy_free(
+ buf_pool, data,
+- page_zip_get_size(&bpage->zip));
++ page_zip_get_size(&bpage->zip), TRUE);
+
+- buf_pool_mutex_exit_allow(buf_pool);
++ //buf_pool_mutex_exit_allow(buf_pool);
+ mutex_enter(&((buf_block_t*) bpage)->mutex);
+ page_zip_set_size(&bpage->zip, 0);
+ }
+@@ -1977,18 +2115,19 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+- buf_block_t* block) /*!< in: block, must contain a file page and
++ buf_block_t* block, /*!< in: block, must contain a file page and
+ be in a state where it can be freed */
++ ibool have_page_hash_mutex)
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_block(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_block(block);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(mutex_own(&block->mutex));
+
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ }
+
+ /**********************************************************************//**
+@@ -2015,7 +2154,8 @@
+ }
+
+ if (adjust) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ if (ratio != buf_pool->LRU_old_ratio) {
+ buf_pool->LRU_old_ratio = ratio;
+@@ -2027,7 +2167,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ } else {
+ buf_pool->LRU_old_ratio = ratio;
+ }
+@@ -2124,7 +2265,8 @@
+ ulint new_len;
+
+ ut_ad(buf_pool);
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+@@ -2185,16 +2327,22 @@
+
+ ut_a(buf_pool->LRU_old_len == old_len);
+
+- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
++
++ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
+ ut_ad(ut_list_node_313->in_free_list));
+
+ for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+ bpage != NULL;
+- bpage = UT_LIST_GET_NEXT(list, bpage)) {
++ bpage = UT_LIST_GET_NEXT(free, bpage)) {
+
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+ }
+
++ mutex_exit(&buf_pool->free_list_mutex);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++
+ UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
+ ut_ad(ut_list_node_313->in_unzip_LRU_list
+ && ut_list_node_313->page.in_LRU_list));
+@@ -2208,7 +2356,8 @@
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /**********************************************************************//**
+@@ -2244,7 +2393,8 @@
+ const buf_page_t* bpage;
+
+ ut_ad(buf_pool);
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+@@ -2301,7 +2451,8 @@
+ bpage = UT_LIST_GET_NEXT(LRU, bpage);
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /**********************************************************************//**
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
+@@ -311,6 +311,7 @@
+
+ return(0);
+ }
++ buf_pool_mutex_exit(buf_pool);
+
+ /* Check that almost all pages in the area have been accessed; if
+ offset == low, the accesses must be in a descending order, otherwise,
+@@ -329,6 +330,7 @@
+
+ fail_count = 0;
+
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ for (i = low; i < high; i++) {
+ bpage = buf_page_hash_get(buf_pool, space, i);
+
+@@ -356,7 +358,8 @@
+
+ if (fail_count > threshold) {
+ /* Too many failures: return */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ return(0);
+ }
+
+@@ -371,7 +374,8 @@
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+ if (bpage == NULL) {
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(0);
+ }
+@@ -397,7 +401,8 @@
+ pred_offset = fil_page_get_prev(frame);
+ succ_offset = fil_page_get_next(frame);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ if ((offset == low) && (succ_offset == offset + 1)) {
+
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
+@@ -250,6 +250,10 @@
+ # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+ {&buf_pool_mutex_key, "buf_pool_mutex", 0},
+ {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
++ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
++ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
++ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
++ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
+ {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
+ {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
+ {&dict_sys_mutex_key, "dict_sys_mutex", 0},
+@@ -301,6 +305,7 @@
+ {&archive_lock_key, "archive_lock", 0},
+ # endif /* UNIV_LOG_ARCHIVE */
+ {&btr_search_latch_key, "btr_search_latch", 0},
++ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
+ # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
+ {&buf_block_lock_key, "buf_block_lock", 0},
+ # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
+@@ -1725,7 +1725,8 @@
+
+ buf_pool = buf_pool_from_array(i);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->zip_free_mutex);
+
+ for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+ buf_buddy_stat_t* buddy_stat;
+@@ -1755,7 +1756,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->zip_free_mutex);
+
+ if (status) {
+ break;
+diff -ruN a/storage/innobase/handler/innodb_patch_info.h b/storage/innobase/handler/innodb_patch_info.h
+--- a/storage/innobase/handler/innodb_patch_info.h 2010-12-03 15:48:03.064995674 +0900
++++ b/storage/innobase/handler/innodb_patch_info.h 2010-12-03 15:48:29.331955850 +0900
+@@ -31,5 +31,6 @@
+ {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
+ {"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
+ {"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
++{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
+ {NULL, NULL, NULL, NULL}
+ };
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
+@@ -3700,9 +3700,11 @@
+ ulint fold = buf_page_address_fold(space, page_no);
+ buf_pool_t* buf_pool = buf_pool_get(space, page_no);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ if (UNIV_LIKELY_NULL(bpage)) {
+ /* A buffer pool watch has been set or the
+diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
+--- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
+@@ -51,10 +51,11 @@
+ buf_pool_t* buf_pool,
+ /*!< buffer pool in which the block resides */
+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
+- ibool* lru) /*!< in: pointer to a variable that will be assigned
++ ibool* lru, /*!< in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool->mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ __attribute__((malloc));
+
+ /**********************************************************************//**
+@@ -67,7 +68,8 @@
+ /*!< buffer pool in which the block resides */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
++ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
++ ibool have_page_hash_mutex)
+ __attribute__((nonnull));
+
+ #ifndef UNIV_NONINL
+diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
+--- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
+@@ -46,10 +46,11 @@
+ /*!< in: buffer pool in which the page resides */
+ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+- ibool* lru) /*!< in: pointer to a variable that will be assigned
++ ibool* lru, /*!< in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool->mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ __attribute__((malloc));
+
+ /**********************************************************************//**
+@@ -61,8 +62,9 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint i) /*!< in: index of buf_pool->zip_free[],
++ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
++ ibool have_page_hash_mutex)
+ __attribute__((nonnull));
+
+ /**********************************************************************//**
+@@ -102,16 +104,17 @@
+ the page resides */
+ ulint size, /*!< in: block size, up to
+ UNIV_PAGE_SIZE */
+- ibool* lru) /*!< in: pointer to a variable
++ ibool* lru, /*!< in: pointer to a variable
+ that will be assigned TRUE if
+ storage was allocated from the
+ LRU list and buf_pool->mutex was
+ temporarily released, or NULL if
+ the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+- return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
++ return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+ }
+
+ /**********************************************************************//**
+@@ -123,12 +126,25 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint size) /*!< in: block size, up to
++ ulint size, /*!< in: block size, up to
+ UNIV_PAGE_SIZE */
++ ibool have_page_hash_mutex)
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
+
+- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
++ mutex_enter(&buf_pool->zip_free_mutex);
++ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
++ mutex_exit(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ }
+
+ #ifdef UNIV_MATERIALIZE
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
+@@ -132,6 +132,20 @@
+ /*==========================*/
+
+ /********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void);
++/*================================*/
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void);
++/*==================================*/
++
++/********************************************************************//**
+ Creates the buffer pool.
+ @return own: buf_pool object, NULL if not enough memory or error */
+ UNIV_INTERN
+@@ -761,6 +775,15 @@
+ const buf_page_t* bpage) /*!< in: pointer to control block */
+ __attribute__((pure));
+
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++ const buf_page_t* bpage) /*!< in: pointer to control block */
++ __attribute__((pure));
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return flush type */
+@@ -1227,7 +1250,7 @@
+ All these are protected by buf_pool_mutex. */
+ /* @{ */
+
+- UT_LIST_NODE_T(buf_page_t) list;
++ /* UT_LIST_NODE_T(buf_page_t) list; */
+ /*!< based on state, this is a
+ list node, protected either by
+ buf_pool_mutex or by
+@@ -1254,6 +1277,10 @@
+ BUF_BLOCK_REMOVE_HASH or
+ BUF_BLOCK_READY_IN_USE. */
+
++ /* resplit for optimistic use */
++ UT_LIST_NODE_T(buf_page_t) free;
++ UT_LIST_NODE_T(buf_page_t) flush_list;
++ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
+ #ifdef UNIV_DEBUG
+ ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
+ when flush_list_mutex is free, the
+@@ -1347,11 +1374,11 @@
+ a block is in the unzip_LRU list
+ if page.state == BUF_BLOCK_FILE_PAGE
+ and page.zip.data != NULL */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+ ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
+ decompressed LRU list;
+ used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+ mutex_t mutex; /*!< mutex protecting this block:
+ state (also protected by the buffer
+ pool mutex), io_fix, buf_fix_count,
+@@ -1517,6 +1544,11 @@
+ pool instance, protects compressed
+ only pages (of type buf_page_t, not
+ buf_block_t */
++ mutex_t LRU_list_mutex;
++ rw_lock_t page_hash_latch;
++ mutex_t free_list_mutex;
++ mutex_t zip_free_mutex;
++ mutex_t zip_hash_mutex;
+ ulint instance_no; /*!< Array index of this buffer
+ pool instance */
+ ulint old_pool_size; /*!< Old pool size in bytes */
+diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
+--- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
+@@ -232,7 +232,7 @@
+ case BUF_BLOCK_ZIP_FREE:
+ /* This is a free page in buf_pool->zip_free[].
+ Such pages should only be accessed by the buddy allocator. */
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+@@ -275,9 +275,14 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
++ if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
++ /* TODO: this code is the interim. should be confirmed later. */
++ return(&buf_pool->zip_mutex);
++ }
++
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ return(NULL);
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+@@ -287,6 +292,28 @@
+ }
+ }
+
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++ const buf_page_t* bpage) /*!< in: pointer to control block */
++{
++ mutex_t* block_mutex;
++
++ while(1) {
++ block_mutex = buf_page_get_mutex(bpage);
++ if (!block_mutex)
++ return block_mutex;
++
++ mutex_enter(block_mutex);
++ if (block_mutex == buf_page_get_mutex(bpage))
++ return block_mutex;
++ mutex_exit(block_mutex);
++ }
++}
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return flush type */
+@@ -383,8 +410,8 @@
+ enum buf_io_fix io_fix) /*!< in: io_fix state */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+@@ -414,14 +441,14 @@
+ const buf_page_t* bpage) /*!< control block being relocated */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(buf_page_in_file(bpage));
+- ut_ad(bpage->in_LRU_list);
++ //ut_ad(bpage->in_LRU_list);
+
+- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
++ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
+ && bpage->buf_fix_count == 0);
+ }
+
+@@ -435,8 +462,8 @@
+ const buf_page_t* bpage) /*!< in: control block */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(buf_page_in_file(bpage));
+
+@@ -456,7 +483,8 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+ ut_a(buf_page_in_file(bpage));
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(bpage->in_LRU_list);
+
+ #ifdef UNIV_LRU_DEBUG
+@@ -503,9 +531,10 @@
+ ulint time_ms) /*!< in: ut_time_ms() */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
++ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(buf_page_in_file(bpage));
+
+ if (!bpage->access_time) {
+@@ -719,19 +748,19 @@
+ /*===========*/
+ buf_block_t* block) /*!< in, own: block to be freed */
+ {
+- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
++ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ mutex_enter(&block->mutex);
+
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, FALSE);
+
+ mutex_exit(&block->mutex);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+ }
+ #endif /* !UNIV_HOTBACKUP */
+
+@@ -779,17 +808,17 @@
+ page frame */
+ {
+ ib_uint64_t lsn;
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+-
+- mutex_enter(block_mutex);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+
+- if (buf_page_in_file(bpage)) {
++ if (block_mutex && buf_page_in_file(bpage)) {
+ lsn = bpage->newest_modification;
+ } else {
+ lsn = 0;
+ }
+
+- mutex_exit(block_mutex);
++ if (block_mutex) {
++ mutex_exit(block_mutex);
++ }
+
+ return(lsn);
+ }
+@@ -807,7 +836,7 @@
+ #ifdef UNIV_SYNC_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+
+- ut_ad((buf_pool_mutex_own(buf_pool)
++ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
+ && (block->page.buf_fix_count == 0))
+ || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -962,7 +991,11 @@
+ buf_page_t* bpage;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
++ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+ ut_ad(fold == buf_page_address_fold(space, offset));
+
+ /* Look for the page in the hash table */
+@@ -1047,11 +1080,13 @@
+ const buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(bpage != NULL);
+ }
+@@ -1179,4 +1214,38 @@
+ buf_pool_mutex_exit(buf_pool);
+ }
+ }
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void)
++/*===============================*/
++{
++ ulint i;
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
++}
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void)
++/*=================================*/
++{
++ ulint i;
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
++}
+ #endif /* !UNIV_HOTBACKUP */
+diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
+--- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
+@@ -113,10 +113,11 @@
+ buf_page_t* bpage, /*!< in: block to be freed */
+ ibool zip, /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+- ibool* buf_pool_mutex_released);
++ ibool* buf_pool_mutex_released,
+ /*!< in: pointer to a variable that will
+ be assigned TRUE if buf_pool->mutex
+ was temporarily released, or NULL */
++ ibool have_LRU_mutex);
+ /******************************************************************//**
+ Try to free a replaceable block.
+ @return TRUE if found and freed */
+@@ -163,7 +164,8 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+- buf_block_t* block); /*!< in: block, must not contain a file page */
++ buf_block_t* block, /*!< in: block, must not contain a file page */
++ ibool have_page_hash_mutex);
+ /******************************************************************//**
+ Adds a block to the LRU list. */
+ UNIV_INTERN
+diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
+--- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
+@@ -112,6 +112,7 @@
+ extern mysql_pfs_key_t archive_lock_key;
+ # endif /* UNIV_LOG_ARCHIVE */
+ extern mysql_pfs_key_t btr_search_latch_key;
++extern mysql_pfs_key_t buf_pool_page_hash_key;
+ extern mysql_pfs_key_t buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ extern mysql_pfs_key_t buf_block_debug_latch_key;
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
+@@ -75,6 +75,10 @@
+ extern mysql_pfs_key_t buffer_block_mutex_key;
+ extern mysql_pfs_key_t buf_pool_mutex_key;
+ extern mysql_pfs_key_t buf_pool_zip_mutex_key;
++extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
++extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
++extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
++extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
+ extern mysql_pfs_key_t cache_last_read_mutex_key;
+ extern mysql_pfs_key_t dict_foreign_err_mutex_key;
+ extern mysql_pfs_key_t dict_sys_mutex_key;
+@@ -661,7 +665,7 @@
+ #define SYNC_TRX_LOCK_HEAP 298
+ #define SYNC_TRX_SYS_HEADER 290
+ #define SYNC_LOG 170
+-#define SYNC_LOG_FLUSH_ORDER 147
++#define SYNC_LOG_FLUSH_ORDER 156
+ #define SYNC_RECV 168
+ #define SYNC_WORK_QUEUE 162
+ #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
+@@ -671,8 +675,13 @@
+ SYNC_SEARCH_SYS, as memory allocation
+ can call routines there! Otherwise
+ the level is SYNC_MEM_HASH. */
++#define SYNC_BUF_LRU_LIST 158
++#define SYNC_BUF_PAGE_HASH 157
++#define SYNC_BUF_BLOCK 155 /* Block mutex */
++#define SYNC_BUF_FREE_LIST 153
++#define SYNC_BUF_ZIP_FREE 152
++#define SYNC_BUF_ZIP_HASH 151
+ #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
+-#define SYNC_BUF_BLOCK 146 /* Block mutex */
+ #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
+ #define SYNC_DOUBLEWRITE 140
+ #define SYNC_ANY_LATCH 135
+@@ -704,7 +713,7 @@
+ os_fast_mutex; /*!< We use this OS mutex in place of lock_word
+ when atomic operations are not enabled */
+ #endif
+- ulint waiters; /*!< This ulint is set to 1 if there are (or
++ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
+ may be) threads waiting in the global wait
+ array for this mutex to be released.
+ Otherwise, this is 0. */
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
+@@ -3065,7 +3065,7 @@
+ level += log_sys->max_checkpoint_age
+ - (lsn - oldest_modification);
+ }
+- bpage = UT_LIST_GET_NEXT(list, bpage);
++ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+ n_blocks++;
+ }
+
+@@ -3150,7 +3150,7 @@
+ found = TRUE;
+ break;
+ }
+- bpage = UT_LIST_GET_NEXT(list, bpage);
++ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+ new_blocks_num++;
+ }
+ if (!found) {
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
+@@ -265,7 +265,7 @@
+ mutex->lock_word = 0;
+ #endif
+ mutex->event = os_event_create(NULL);
+- mutex_set_waiters(mutex, 0);
++ mutex->waiters = 0;
+ #ifdef UNIV_DEBUG
+ mutex->magic_n = MUTEX_MAGIC_N;
+ #endif /* UNIV_DEBUG */
+@@ -444,6 +444,15 @@
+ mutex_t* mutex, /*!< in: mutex */
+ ulint n) /*!< in: value to set */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ut_ad(mutex);
++
++ if (n) {
++ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
++ } else {
++ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
++ }
++#else
+ volatile ulint* ptr; /* declared volatile to ensure that
+ the value is stored to memory */
+ ut_ad(mutex);
+@@ -452,6 +461,7 @@
+
+ *ptr = n; /* Here we assume that the write of a single
+ word in memory is atomic */
++#endif
+ }
+
+ /******************************************************************//**
+@@ -1193,7 +1203,12 @@
+ ut_error;
+ }
+ break;
++ case SYNC_BUF_LRU_LIST:
+ case SYNC_BUF_FLUSH_LIST:
++ case SYNC_BUF_PAGE_HASH:
++ case SYNC_BUF_FREE_LIST:
++ case SYNC_BUF_ZIP_FREE:
++ case SYNC_BUF_ZIP_HASH:
+ case SYNC_BUF_POOL:
+ /* We can have multiple mutexes of this type therefore we
+ can only check whether the greater than condition holds. */
+@@ -1211,7 +1226,8 @@
+ buffer block (block->mutex or buf_pool_zip_mutex). */
+ if (!sync_thread_levels_g(array, level, FALSE)) {
+ ut_a(sync_thread_levels_g(array, level - 1, TRUE));
+- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
++ /* the exact rule is not fixed yet, for now */
++ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
+ }
+ break;
+ case SYNC_REC_LOCK:
--- /dev/null
+# name : microsec_process.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/microsec_process.info b/patch_info/microsec_process.info
+--- /dev/null 1970-01-01 09:00:00.000000000 +0900
++++ b/patch_info/microsec_process.info 2010-12-02 20:41:41.616069579 +0900
+@@ -0,0 +1,8 @@
++File=microsec_process.patch
++Name=Adds INFOMATION_SCHEMA.PROCESSLIST with TIME_MS column
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++2010-01
++Ported to 5.1.42
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc 2010-12-02 19:22:40.054024541 +0900
++++ b/sql/sql_show.cc 2010-12-02 20:41:41.622941425 +0900
+@@ -1875,7 +1875,8 @@
+ TABLE *table= tables->table;
+ CHARSET_INFO *cs= system_charset_info;
+ char *user;
+- time_t now= my_time(0);
++ time_t now;
++ ulonglong now_utime= my_micro_time_and_time(&now);
+ DBUG_ENTER("fill_process_list");
+
+ user= thd->security_ctx->master_access & PROCESS_ACL ?
+@@ -1959,6 +1960,10 @@
+ }
+ mysql_mutex_unlock(&tmp->LOCK_thd_data);
+
++ /* TIME_MS */
++ table->field[8]->store(((tmp->start_utime ?
++ now_utime - tmp->start_utime : 0)/ 1000));
++
+ if (schema_table_store_record(thd, table))
+ {
+ mysql_mutex_unlock(&LOCK_thread_count);
+@@ -7202,6 +7207,8 @@
+ {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
+ {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
+ SKIP_OPEN_TABLE},
++ {"TIME_MS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG,
++ 0, 0, "Time_ms", SKIP_OPEN_TABLE},
+ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+ };
+
--- /dev/null
+# name : userstat.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/configure b/configure
+--- a/configure 2010-08-27 14:28:05.621275596 +0900
++++ b/configure 2010-08-27 15:10:33.736074033 +0900
+@@ -38009,7 +38009,7 @@
+ realpath rename rint rwlock_init setupterm \
+ shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
+ sighold sigset sigthreadmask port_create sleep \
+- snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
++ snprintf socket strsep stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
+ strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
+ posix_fallocate backtrace backtrace_symbols backtrace_symbols_fd printstack
+ do
+diff -ruN a/configure.in b/configure.in
+--- a/configure.in 2010-08-04 02:24:24.000000000 +0900
++++ b/configure.in 2010-08-27 15:10:33.737073307 +0900
+@@ -2086,7 +2086,7 @@
+ realpath rename rint rwlock_init setupterm \
+ shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
+ sighold sigset sigthreadmask port_create sleep \
+- snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
++ snprintf socket strsep stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
+ strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
+ posix_fallocate backtrace backtrace_symbols backtrace_symbols_fd printstack)
+
+diff -ruN a/include/config.h.in b/include/config.h.in
+--- a/include/config.h.in 2010-08-04 02:28:40.000000000 +0900
++++ b/include/config.h.in 2010-08-27 15:10:33.740077919 +0900
+@@ -802,6 +802,9 @@
+ /* Define to 1 if you have the <stdlib.h> header file. */
+ #undef HAVE_STDLIB_H
+
++/* Define to 1 if you have the `strsep' function. */
++#undef HAVE_STRSEP
++
+ /* Define to 1 if you have the `stpcpy' function. */
+ #undef HAVE_STPCPY
+
+diff -ruN a/include/mysql/plugin.h b/include/mysql/plugin.h
+--- a/include/mysql/plugin.h 2010-08-27 14:38:08.682439958 +0900
++++ b/include/mysql/plugin.h 2010-08-27 15:10:33.742003842 +0900
+@@ -705,6 +705,9 @@
+ unsigned long thd_log_slow_verbosity(const MYSQL_THD thd);
+ int thd_opt_slow_log();
+ #define EXTENDED_SLOWLOG
++
++#define EXTENDED_FOR_USERSTAT
++
+ /**
+ Create a temporary file.
+
+diff -ruN a/include/mysql_com.h b/include/mysql_com.h
+--- a/include/mysql_com.h 2010-08-04 02:24:30.000000000 +0900
++++ b/include/mysql_com.h 2010-08-27 15:10:33.743072186 +0900
+@@ -29,6 +29,7 @@
+
+ #define SERVER_VERSION_LENGTH 60
+ #define SQLSTATE_LENGTH 5
++#define LIST_PROCESS_HOST_LEN 64
+
+ /*
+ USER_HOST_BUFF_SIZE -- length of string buffer, that is enough to contain
+@@ -115,6 +116,12 @@
+ thread */
+ #define REFRESH_MASTER 128 /* Remove all bin logs in the index
+ and truncate the index */
++#define REFRESH_TABLE_STATS 256 /* Refresh table stats hash table */
++#define REFRESH_INDEX_STATS 512 /* Refresh index stats hash table */
++#define REFRESH_USER_STATS 1024 /* Refresh user stats hash table */
++#define REFRESH_SLOW_QUERY_LOG 2048 /* Flush slow query log and rotate*/
++#define REFRESH_CLIENT_STATS 4096 /* Refresh client stats hash table */
++#define REFRESH_THREAD_STATS 8192 /* Refresh thread stats hash table */
+
+ /* The following can't be set with mysql_refresh() */
+ #define REFRESH_READ_LOCK 16384 /* Lock tables for read */
+diff -ruN a/patch_info/userstats.info b/patch_info/userstats.info
+--- /dev/null 1970-01-01 09:00:00.000000000 +0900
++++ b/patch_info/userstats.info 2010-08-27 15:10:33.744161257 +0900
+@@ -0,0 +1,11 @@
++File=userstats.patch
++Name=SHOW USER/TABLE/INDEX statistics
++Version=V2
++Author=Google
++License=GPL
++Comment=Added INFORMATION_SCHEMA.*_STATISTICS
++2008-12-01
++YK: fix behavior for prepared statements
++
++2008-11-26
++YK: add switch variable "userstat_running" to control INFORMATION_SCHEMA.*_STATISTICS (default:OFF)
+diff -ruN a/sql/handler.cc b/sql/handler.cc
+--- a/sql/handler.cc 2010-08-04 02:24:27.000000000 +0900
++++ b/sql/handler.cc 2010-08-27 15:10:33.749058856 +0900
+@@ -1194,6 +1194,8 @@
+ if (cookie)
+ tc_log->unlog(cookie, xid);
+ DBUG_EXECUTE_IF("crash_commit_after", abort(););
++ if (is_real_trans)
++ thd->diff_commit_trans++;
+ end:
+ if (rw_trans)
+ start_waiting_global_read_lock(thd);
+@@ -1324,6 +1326,8 @@
+ /* Always cleanup. Even if there nht==0. There may be savepoints. */
+ if (is_real_trans)
+ thd->transaction.cleanup();
++
++ thd->diff_rollback_trans++;
+ #endif /* USING_TRANSACTIONS */
+ if (all)
+ thd->transaction_rollback_request= FALSE;
+@@ -1762,6 +1766,7 @@
+ ha_info->reset(); /* keep it conveniently zero-filled */
+ }
+ trans->ha_list= sv->ha_list;
++ thd->diff_rollback_trans++;
+ DBUG_RETURN(error);
+ }
+
+@@ -2122,6 +2127,8 @@
+ dup_ref=ref+ALIGN_SIZE(ref_length);
+ cached_table_flags= table_flags();
+ }
++ rows_read = rows_changed = 0;
++ memset(index_rows_read, 0, sizeof(index_rows_read));
+ DBUG_RETURN(error);
+ }
+
+@@ -3571,6 +3578,111 @@
+ return;
+ }
+
++// Updates the global table stats with the TABLE this handler represents.
++void handler::update_global_table_stats() {
++ if (!opt_userstat_running) {
++ rows_read = rows_changed = 0;
++ return;
++ }
++
++ if (!rows_read && !rows_changed) return; // Nothing to update.
++ // table_cache_key is db_name + '\0' + table_name + '\0'.
++ if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++
++ TABLE_STATS* table_stats;
++ char key[NAME_LEN * 2 + 2];
++ // [db] + '.' + [table]
++ sprintf(key, "%s.%s", table->s->table_cache_key.str, table->s->table_name.str);
++
++ pthread_mutex_lock(&LOCK_global_table_stats);
++ // Gets the global table stats, creating one if necessary.
++ if (!(table_stats = (TABLE_STATS*)hash_search(&global_table_stats,
++ (uchar*)key,
++ strlen(key)))) {
++ if (!(table_stats = ((TABLE_STATS*)
++ my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++ // Out of memory.
++ sql_print_error("Allocating table stats failed.");
++ goto end;
++ }
++ strncpy(table_stats->table, key, sizeof(table_stats->table));
++ table_stats->rows_read = 0;
++ table_stats->rows_changed = 0;
++ table_stats->rows_changed_x_indexes = 0;
++ table_stats->engine_type = (int) ht->db_type;
++
++ if (my_hash_insert(&global_table_stats, (uchar*)table_stats)) {
++ // Out of memory.
++ sql_print_error("Inserting table stats failed.");
++ my_free((char*)table_stats, 0);
++ goto end;
++ }
++ }
++ // Updates the global table stats.
++ table_stats->rows_read += rows_read;
++ table_stats->rows_changed += rows_changed;
++ table_stats->rows_changed_x_indexes +=
++ rows_changed * (table->s->keys ? table->s->keys : 1);
++ current_thd->diff_total_read_rows += rows_read;
++ rows_read = rows_changed = 0;
++end:
++ pthread_mutex_unlock(&LOCK_global_table_stats);
++}
++
++// Updates the global index stats with this handler's accumulated index reads.
++void handler::update_global_index_stats() {
++ // table_cache_key is db_name + '\0' + table_name + '\0'.
++ if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++
++ if (!opt_userstat_running) {
++ for (uint x = 0; x < table->s->keys; x++) {
++ index_rows_read[x] = 0;
++ }
++ return;
++ }
++
++ for (uint x = 0; x < table->s->keys; x++) {
++ if (index_rows_read[x]) {
++ // Rows were read using this index.
++ KEY* key_info = &table->key_info[x];
++
++ if (!key_info->name) continue;
++
++ INDEX_STATS* index_stats;
++ char key[NAME_LEN * 3 + 3];
++ // [db] + '.' + [table] + '.' + [index]
++ sprintf(key, "%s.%s.%s", table->s->table_cache_key.str,
++ table->s->table_name.str, key_info->name);
++
++ pthread_mutex_lock(&LOCK_global_index_stats);
++ // Gets the global index stats, creating one if necessary.
++ if (!(index_stats = (INDEX_STATS*)hash_search(&global_index_stats,
++ (uchar*)key,
++ strlen(key)))) {
++ if (!(index_stats = ((INDEX_STATS*)
++ my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++ // Out of memory.
++ sql_print_error("Allocating index stats failed.");
++ goto end;
++ }
++ strncpy(index_stats->index, key, sizeof(index_stats->index));
++ index_stats->rows_read = 0;
++
++ if (my_hash_insert(&global_index_stats, (uchar*)index_stats)) {
++ // Out of memory.
++ sql_print_error("Inserting index stats failed.");
++ my_free((char*)index_stats, 0);
++ goto end;
++ }
++ }
++ // Updates the global index stats.
++ index_stats->rows_read += index_rows_read[x];
++ index_rows_read[x] = 0;
++end:
++ pthread_mutex_unlock(&LOCK_global_index_stats);
++ }
++ }
++}
+
+ /****************************************************************************
+ ** Some general functions that isn't in the handler class
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h 2010-08-04 02:24:27.000000000 +0900
++++ b/sql/handler.h 2010-08-27 15:10:33.753058869 +0900
+@@ -30,6 +30,10 @@
+
+ #define USING_TRANSACTIONS
+
++#if MAX_KEY > 128
++#error MAX_KEY is too large. Values up to 128 are supported.
++#endif
++
+ // the following is for checking tables
+
+ #define HA_ADMIN_ALREADY_DONE 1
+@@ -1121,6 +1125,9 @@
+ bool locked;
+ bool implicit_emptied; /* Can be !=0 only if HEAP */
+ const COND *pushed_cond;
++ ulonglong rows_read;
++ ulonglong rows_changed;
++ ulonglong index_rows_read[MAX_KEY];
+ /**
+ next_insert_id is the next value which should be inserted into the
+ auto_increment column: in a inserting-multi-row statement (like INSERT
+@@ -1158,9 +1165,11 @@
+ ref_length(sizeof(my_off_t)),
+ ft_handler(0), inited(NONE),
+ locked(FALSE), implicit_emptied(0),
+- pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
++ pushed_cond(0), rows_read(0), rows_changed(0), next_insert_id(0), insert_id_for_cur_row(0),
+ auto_inc_intervals_count(0)
+- {}
++ {
++ memset(index_rows_read, 0, sizeof(index_rows_read));
++ }
+ virtual ~handler(void)
+ {
+ DBUG_ASSERT(locked == FALSE);
+@@ -1284,6 +1293,8 @@
+ {
+ table= table_arg;
+ table_share= share;
++ rows_read = rows_changed = 0;
++ memset(index_rows_read, 0, sizeof(index_rows_read));
+ }
+ virtual double scan_time()
+ { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
+@@ -1628,6 +1639,8 @@
+ virtual bool is_crashed() const { return 0; }
+ virtual bool auto_repair() const { return 0; }
+
++ void update_global_table_stats();
++ void update_global_index_stats();
+
+ #define CHF_CREATE_FLAG 0
+ #define CHF_DELETE_FLAG 1
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h 2010-08-27 14:29:26.009071592 +0900
++++ b/sql/lex.h 2010-08-27 15:10:33.755063742 +0900
+@@ -106,6 +106,7 @@
+ { "CHECKSUM", SYM(CHECKSUM_SYM)},
+ { "CIPHER", SYM(CIPHER_SYM)},
+ { "CLIENT", SYM(CLIENT_SYM)},
++ { "CLIENT_STATISTICS", SYM(CLIENT_STATS_SYM)},
+ { "CLOSE", SYM(CLOSE_SYM)},
+ { "COALESCE", SYM(COALESCE)},
+ { "CODE", SYM(CODE_SYM)},
+@@ -245,6 +246,7 @@
+ { "IN", SYM(IN_SYM)},
+ { "INDEX", SYM(INDEX_SYM)},
+ { "INDEXES", SYM(INDEXES)},
++ { "INDEX_STATISTICS", SYM(INDEX_STATS_SYM)},
+ { "INFILE", SYM(INFILE)},
+ { "INITIAL_SIZE", SYM(INITIAL_SIZE_SYM)},
+ { "INNER", SYM(INNER_SYM)},
+@@ -478,6 +480,7 @@
+ { "SIGNED", SYM(SIGNED_SYM)},
+ { "SIMPLE", SYM(SIMPLE_SYM)},
+ { "SLAVE", SYM(SLAVE)},
++ { "SLOW", SYM(SLOW_SYM)},
+ { "SNAPSHOT", SYM(SNAPSHOT_SYM)},
+ { "SMALLINT", SYM(SMALLINT)},
+ { "SOCKET", SYM(SOCKET_SYM)},
+@@ -527,12 +530,14 @@
+ { "TABLES", SYM(TABLES)},
+ { "TABLESPACE", SYM(TABLESPACE)},
+ { "TABLE_CHECKSUM", SYM(TABLE_CHECKSUM_SYM)},
++ { "TABLE_STATISTICS", SYM(TABLE_STATS_SYM)},
+ { "TEMPORARY", SYM(TEMPORARY)},
+ { "TEMPTABLE", SYM(TEMPTABLE_SYM)},
+ { "TERMINATED", SYM(TERMINATED)},
+ { "TEXT", SYM(TEXT_SYM)},
+ { "THAN", SYM(THAN_SYM)},
+ { "THEN", SYM(THEN_SYM)},
++ { "THREAD_STATISTICS", SYM(THREAD_STATS_SYM)},
+ { "TIME", SYM(TIME_SYM)},
+ { "TIMESTAMP", SYM(TIMESTAMP)},
+ { "TIMESTAMPADD", SYM(TIMESTAMP_ADD)},
+@@ -568,6 +573,7 @@
+ { "USE", SYM(USE_SYM)},
+ { "USER", SYM(USER)},
+ { "USER_RESOURCES", SYM(RESOURCES)},
++ { "USER_STATISTICS", SYM(USER_STATS_SYM)},
+ { "USE_FRM", SYM(USE_FRM)},
+ { "USING", SYM(USING)},
+ { "UTC_DATE", SYM(UTC_DATE_SYM)},
+diff -ruN a/sql/log.cc b/sql/log.cc
+--- a/sql/log.cc 2010-08-27 14:43:41.986138797 +0900
++++ b/sql/log.cc 2010-08-27 15:10:33.761058932 +0900
+@@ -826,6 +826,13 @@
+ mysql_slow_log.reopen_file();
+ }
+
++void Log_to_file_event_handler::flush_slow_log()
++{
++ /* reopen slow log file */
++ if (opt_slow_log)
++ mysql_slow_log.reopen_file();
++}
++
+ /*
+ Log error with all enabled log event handlers
+
+@@ -937,6 +944,21 @@
+ return rc;
+ }
+
++bool LOGGER::flush_slow_log(THD *thd)
++{
++ /*
++ Now we lock logger, as nobody should be able to use logging routines while
++ log tables are closed
++ */
++ logger.lock_exclusive();
++
++ /* reopen log files */
++ file_log_handler->flush_slow_log();
++
++ /* end of log flush */
++ logger.unlock();
++ return 0;
++}
+
+ /*
+ Log slow query with all enabled log event handlers
+@@ -4491,6 +4513,8 @@
+ thd->first_successful_insert_id_in_prev_stmt_for_binlog);
+ if (e.write(file))
+ goto err;
++ if (file == &log_file)
++ thd->binlog_bytes_written += e.data_written;
+ }
+ if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
+ {
+@@ -4502,12 +4526,16 @@
+ minimum());
+ if (e.write(file))
+ goto err;
++ if (file == &log_file)
++ thd->binlog_bytes_written += e.data_written;
+ }
+ if (thd->rand_used)
+ {
+ Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
+ if (e.write(file))
+ goto err;
++ if (file == &log_file)
++ thd->binlog_bytes_written += e.data_written;
+ }
+ if (thd->user_var_events.elements)
+ {
+@@ -4523,6 +4551,8 @@
+ user_var_event->charset_number);
+ if (e.write(file))
+ goto err;
++ if (file == &log_file)
++ thd->binlog_bytes_written += e.data_written;
+ }
+ }
+ }
+@@ -4535,6 +4565,8 @@
+ if (event_info->write(file) ||
+ DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
+ goto err;
++ if (file == &log_file)
++ thd->binlog_bytes_written += event_info->data_written;
+
+ if (file == &log_file) // we are writing to the real log (disk)
+ {
+@@ -4680,7 +4712,7 @@
+ be reset as a READ_CACHE to be able to read the contents from it.
+ */
+
+-int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
++int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool sync_log)
+ {
+ Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
+
+@@ -4728,6 +4760,7 @@
+ /* write the first half of the split header */
+ if (my_b_write(&log_file, header, carry))
+ return ER_ERROR_ON_WRITE;
++ thd->binlog_bytes_written += carry;
+
+ /*
+ copy fixed second half of header to cache so the correct
+@@ -4796,6 +4829,7 @@
+ /* Write data to the binary log file */
+ if (my_b_write(&log_file, cache->read_pos, length))
+ return ER_ERROR_ON_WRITE;
++ thd->binlog_bytes_written += length;
+ cache->read_pos=cache->read_end; // Mark buffer used up
+ } while ((length= my_b_fill(cache)));
+
+@@ -4918,21 +4952,24 @@
+ */
+ if (qinfo.write(&log_file))
+ goto err;
++ thd->binlog_bytes_written += qinfo.data_written;
+
+ DBUG_EXECUTE_IF("crash_before_writing_xid",
+ {
+- if ((write_error= write_cache(cache, false, true)))
++ if ((write_error= write_cache(thd, cache, false, true)))
+ DBUG_PRINT("info", ("error writing binlog cache: %d",
+ write_error));
+ DBUG_PRINT("info", ("crashing before writing xid"));
+ abort();
+ });
+
+- if ((write_error= write_cache(cache, false, false)))
++ if ((write_error= write_cache(thd, cache, false, false)))
+ goto err;
+
+ if (commit_event && commit_event->write(&log_file))
+ goto err;
++ if (commit_event)
++ thd->binlog_bytes_written += commit_event->data_written;
+
+ if (incident && write_incident(thd, FALSE))
+ goto err;
+diff -ruN a/sql/log.h b/sql/log.h
+--- a/sql/log.h 2010-08-27 14:38:08.690071101 +0900
++++ b/sql/log.h 2010-08-27 15:13:33.762976324 +0900
+@@ -361,7 +361,7 @@
+ bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
+
+ bool write_incident(THD *thd, bool lock);
+- int write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
++ int write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+ void set_write_error(THD *thd);
+ bool check_write_error(THD *thd);
+
+@@ -499,6 +499,7 @@
+ const char *sql_text, uint sql_text_len,
+ CHARSET_INFO *client_cs);
+ void flush();
++ void flush_slow_log();
+ void init_pthread_objects();
+ MYSQL_QUERY_LOG *get_mysql_slow_log() { return &mysql_slow_log; }
+ MYSQL_QUERY_LOG *get_mysql_log() { return &mysql_log; }
+@@ -543,6 +544,7 @@
+ void init_base();
+ void init_log_tables();
+ bool flush_logs(THD *thd);
++ bool flush_slow_log(THD *thd);
+ /* Perform basic logger cleanup. this will leave e.g. error log open. */
+ void cleanup_base();
+ /* Free memory. Nothing could be logged after this function is called */
+diff -ruN a/sql/mysql_priv.h b/sql/mysql_priv.h
+--- a/sql/mysql_priv.h 2010-08-27 14:38:08.699057407 +0900
++++ b/sql/mysql_priv.h 2010-08-27 15:10:33.805058568 +0900
+@@ -1139,7 +1139,17 @@
+ bool multi_delete_set_locks_and_link_aux_tables(LEX *lex);
+ void init_max_user_conn(void);
+ void init_update_queries(void);
++void init_global_user_stats(void);
++void init_global_table_stats(void);
++void init_global_index_stats(void);
++void init_global_client_stats(void);
++void init_global_thread_stats(void);
+ void free_max_user_conn(void);
++void free_global_user_stats(void);
++void free_global_table_stats(void);
++void free_global_index_stats(void);
++void free_global_client_stats(void);
++void free_global_thread_stats(void);
+ pthread_handler_t handle_bootstrap(void *arg);
+ int mysql_execute_command(THD *thd);
+ bool do_command(THD *thd);
+@@ -2014,6 +2024,7 @@
+ extern ulong max_connect_errors, connect_timeout;
+ extern ulong slave_net_timeout, slave_trans_retries;
+ extern uint max_user_connections;
++extern ulonglong denied_connections;
+ extern ulong what_to_log,flush_time;
+ extern ulong query_buff_size;
+ extern ulong max_prepared_stmt_count, prepared_stmt_count;
+@@ -2067,6 +2078,7 @@
+ extern my_bool opt_slave_compressed_protocol, use_temp_pool;
+ extern ulong slave_exec_mode_options;
+ extern my_bool opt_readonly, lower_case_file_system;
++extern my_bool opt_userstat_running, opt_thread_statistics;
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+ extern char* opt_secure_file_priv;
+@@ -2131,6 +2143,15 @@
+ extern struct system_variables max_system_variables;
+ extern struct system_status_var global_status_var;
+ extern struct rand_struct sql_rand;
++extern HASH global_user_stats;
++extern HASH global_client_stats;
++extern HASH global_thread_stats;
++extern pthread_mutex_t LOCK_global_user_client_stats;
++extern HASH global_table_stats;
++extern pthread_mutex_t LOCK_global_table_stats;
++extern HASH global_index_stats;
++extern pthread_mutex_t LOCK_global_index_stats;
++extern pthread_mutex_t LOCK_stats;
+
+ extern const char *opt_date_time_formats[];
+ extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[];
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-08-27 14:43:41.996021369 +0900
++++ b/sql/mysqld.cc 2010-08-27 15:10:33.772058694 +0900
+@@ -533,6 +533,7 @@
+ uint opt_debug_sync_timeout= 0;
+ #endif /* defined(ENABLED_DEBUG_SYNC) */
+ my_bool opt_old_style_user_limits= 0, trust_function_creators= 0;
++my_bool opt_userstat_running= 0, opt_thread_statistics= 0;
+ /*
+ True if there is at least one per-hour limit for some user, so we should
+ check them before each query (and possibly reset counters when hour is
+@@ -581,6 +582,7 @@
+ ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
+ ulong max_connections, max_connect_errors;
+ uint max_user_connections= 0;
++ulonglong denied_connections = 0;
+ /**
+ Limit of the total number of prepared statements in the server.
+ Is necessary to protect the server against out-of-memory attacks.
+@@ -682,6 +684,10 @@
+ LOCK_global_system_variables,
+ LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
+ LOCK_connection_count;
++pthread_mutex_t LOCK_stats;
++pthread_mutex_t LOCK_global_user_client_stats;
++pthread_mutex_t LOCK_global_table_stats;
++pthread_mutex_t LOCK_global_index_stats;
+ /**
+ The below lock protects access to two global server variables:
+ max_prepared_stmt_count and prepared_stmt_count. These variables
+@@ -1367,6 +1373,11 @@
+ x_free(opt_secure_file_priv);
+ bitmap_free(&temp_pool);
+ free_max_user_conn();
++ free_global_user_stats();
++ free_global_client_stats();
++ free_global_thread_stats();
++ free_global_table_stats();
++ free_global_index_stats();
+ #ifdef HAVE_REPLICATION
+ end_slave_list();
+ #endif
+@@ -1483,6 +1494,10 @@
+ (void) pthread_cond_destroy(&COND_thread_cache);
+ (void) pthread_cond_destroy(&COND_flush_thread_cache);
+ (void) pthread_cond_destroy(&COND_manager);
++ (void) pthread_mutex_destroy(&LOCK_stats);
++ (void) pthread_mutex_destroy(&LOCK_global_user_client_stats);
++ (void) pthread_mutex_destroy(&LOCK_global_table_stats);
++ (void) pthread_mutex_destroy(&LOCK_global_index_stats);
+ }
+
+ #endif /*EMBEDDED_LIBRARY*/
+@@ -3172,6 +3187,7 @@
+ {"show_binlog_events", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOG_EVENTS]), SHOW_LONG_STATUS},
+ {"show_binlogs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOGS]), SHOW_LONG_STATUS},
+ {"show_charsets", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CHARSETS]), SHOW_LONG_STATUS},
++ {"show_client_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CLIENT_STATS]), SHOW_LONG_STATUS},
+ {"show_collations", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS},
+ {"show_column_types", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLUMN_TYPES]), SHOW_LONG_STATUS},
+ {"show_contributors", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CONTRIBUTORS]), SHOW_LONG_STATUS},
+@@ -3193,6 +3209,7 @@
+ #endif
+ {"show_function_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS_FUNC]), SHOW_LONG_STATUS},
+ {"show_grants", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS},
++ {"show_index_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INDEX_STATS]), SHOW_LONG_STATUS},
+ {"show_keys", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS},
+ {"show_master_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS},
+ {"show_new_master", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS},
+@@ -3211,9 +3228,12 @@
+ {"show_slave_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
+ {"show_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
+ {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
++ {"show_table_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATS]), SHOW_LONG_STATUS},
+ {"show_table_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
+ {"show_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
++ {"show_thread_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_THREAD_STATS]), SHOW_LONG_STATUS},
+ {"show_triggers", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
++ {"show_user_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_USER_STATS]), SHOW_LONG_STATUS},
+ {"show_variables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
+ {"show_warnings", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
+ {"slave_start", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SLAVE_START]), SHOW_LONG_STATUS},
+@@ -3652,6 +3672,10 @@
+ #endif
+ (void) pthread_mutex_init(&LOCK_server_started, MY_MUTEX_INIT_FAST);
+ (void) pthread_cond_init(&COND_server_started,NULL);
++ (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST);
++ (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST);
++ (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
++ (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
+ sp_cache_init();
+ #ifdef HAVE_EVENT_SCHEDULER
+ Events::init_mutexes();
+@@ -4053,6 +4077,9 @@
+ if (!errmesg[0][0])
+ unireg_abort(1);
+
++ init_global_table_stats();
++ init_global_index_stats();
++
+ /* We have to initialize the storage engines before CSV logging */
+ if (ha_init())
+ {
+@@ -4199,6 +4226,9 @@
+
+ init_max_user_conn();
+ init_update_queries();
++ init_global_user_stats();
++ init_global_client_stats();
++ init_global_thread_stats();
+ DBUG_RETURN(0);
+ }
+
+@@ -5016,6 +5046,7 @@
+
+ DBUG_PRINT("error",("Too many connections"));
+ close_connection(thd, ER_CON_COUNT_ERROR, 1);
++ statistic_increment(denied_connections, &LOCK_status);
+ delete thd;
+ DBUG_VOID_RETURN;
+ }
+@@ -5800,6 +5831,8 @@
+ OPT_SLAVE_EXEC_MODE,
+ OPT_GENERAL_LOG_FILE,
+ OPT_SLOW_QUERY_LOG_FILE,
++ OPT_USERSTAT_RUNNING,
++ OPT_THREAD_STATISTICS,
+ OPT_USE_GLOBAL_LONG_QUERY_TIME,
+ OPT_USE_GLOBAL_LOG_SLOW_CONTROL,
+ OPT_SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP,
+@@ -7292,6 +7325,14 @@
+ &max_system_variables.net_wait_timeout, 0, GET_ULONG,
+ REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT),
+ 0, 1, 0},
++ {"userstat_running", OPT_USERSTAT_RUNNING,
++ "Control USER_STATISTICS, CLIENT_STATISTICS, THREAD_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS running",
++ (uchar**) &opt_userstat_running, (uchar**) &opt_userstat_running,
++ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
++ {"thread_statistics", OPT_THREAD_STATISTICS,
++ "Control TABLE_STATISTICS running, when userstat_running is enabled",
++ (uchar**) &opt_thread_statistics, (uchar**) &opt_thread_statistics,
++ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
+ {"binlog-direct-non-transactional-updates", OPT_BINLOG_DIRECT_NON_TRANS_UPDATE,
+ "Causes updates to non-transactional engines using statement format to be "
+ "written directly to binary log. Before using this option, make sure that "
+diff -ruN a/sql/set_var.cc b/sql/set_var.cc
+--- a/sql/set_var.cc 2010-08-27 14:43:42.004008722 +0900
++++ b/sql/set_var.cc 2010-08-27 15:10:33.809988740 +0900
+@@ -554,6 +554,10 @@
+ static sys_var_thd_ulong sys_read_buff_size(&vars, "read_buffer_size",
+ &SV::read_buff_size);
+ static sys_var_opt_readonly sys_readonly(&vars, "read_only", &opt_readonly);
++static sys_var_bool_ptr sys_userstat_running(&vars, "userstat_running",
++ &opt_userstat_running);
++static sys_var_bool_ptr sys_thread_statistics(&vars, "thread_statistics",
++ &opt_thread_statistics);
+ static sys_var_thd_ulong sys_read_rnd_buff_size(&vars, "read_rnd_buffer_size",
+ &SV::read_rnd_buff_size);
+ static sys_var_thd_ulong sys_div_precincrement(&vars, "div_precision_increment",
+diff -ruN a/sql/sql_base.cc b/sql/sql_base.cc
+--- a/sql/sql_base.cc 2010-08-04 02:24:34.000000000 +0900
++++ b/sql/sql_base.cc 2010-08-27 15:10:33.818058934 +0900
+@@ -1382,6 +1382,12 @@
+ DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
+ table->s->table_name.str, (long) table));
+
++ if(table->file)
++ {
++ table->file->update_global_table_stats();
++ table->file->update_global_index_stats();
++ }
++
+ *table_ptr=table->next;
+ /*
+ When closing a MERGE parent or child table, detach the children first.
+@@ -1922,6 +1928,8 @@
+ DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
+ table->s->db.str, table->s->table_name.str));
+
++ table->file->update_global_table_stats();
++ table->file->update_global_index_stats();
+ free_io_cache(table);
+ closefrm(table, 0);
+ if (delete_table)
+diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
+--- a/sql/sql_class.cc 2010-08-27 14:38:08.741990000 +0900
++++ b/sql/sql_class.cc 2010-08-27 15:10:33.825058007 +0900
+@@ -704,6 +704,13 @@
+ mysys_var=0;
+ binlog_evt_union.do_union= FALSE;
+ enable_slow_log= 0;
++ busy_time = 0;
++ cpu_time = 0;
++ bytes_received = 0;
++ bytes_sent = 0;
++ binlog_bytes_written = 0;
++ updated_row_count = 0;
++ sent_row_count_2 = 0;
+ #ifndef DBUG_OFF
+ dbug_sentry=THD_SENTRY_MAGIC;
+ #endif
+@@ -907,6 +914,7 @@
+ reset_current_stmt_binlog_row_based();
+ bzero((char *) &status_var, sizeof(status_var));
+ sql_log_bin_toplevel= options & OPTION_BIN_LOG;
++ reset_stats();
+
+ #if defined(ENABLED_DEBUG_SYNC)
+ /* Initialize the Debug Sync Facility. See debug_sync.cc. */
+@@ -914,6 +922,84 @@
+ #endif /* defined(ENABLED_DEBUG_SYNC) */
+ }
+
++// Resets stats in a THD.
++void THD::reset_stats(void) {
++ current_connect_time = time(NULL);
++ last_global_update_time = current_connect_time;
++ reset_diff_stats();
++}
++
++// Resets the 'diff' stats, which are used to update global stats.
++void THD::reset_diff_stats(void) {
++ diff_total_busy_time = 0;
++ diff_total_cpu_time = 0;
++ diff_total_bytes_received = 0;
++ diff_total_bytes_sent = 0;
++ diff_total_binlog_bytes_written = 0;
++ diff_total_sent_rows = 0;
++ diff_total_updated_rows = 0;
++ diff_total_read_rows = 0;
++ diff_select_commands = 0;
++ diff_update_commands = 0;
++ diff_other_commands = 0;
++ diff_commit_trans = 0;
++ diff_rollback_trans = 0;
++ diff_denied_connections = 0;
++ diff_lost_connections = 0;
++ diff_access_denied_errors = 0;
++ diff_empty_queries = 0;
++}
++
++// Updates 'diff' stats of a THD.
++void THD::update_stats(bool ran_command) {
++ if (opt_userstat_running) {
++ diff_total_busy_time += busy_time;
++ diff_total_cpu_time += cpu_time;
++ diff_total_bytes_received += bytes_received;
++ diff_total_bytes_sent += bytes_sent;
++ diff_total_binlog_bytes_written += binlog_bytes_written;
++ diff_total_sent_rows += sent_row_count_2;
++ diff_total_updated_rows += updated_row_count;
++ // diff_total_read_rows is updated in handler.cc.
++
++ if (ran_command) {
++ // The replication thread has the COM_CONNECT command.
++ if ((old_command == COM_QUERY || command == COM_CONNECT) &&
++ (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END)) {
++ // A SQL query.
++ if (lex->sql_command == SQLCOM_SELECT) {
++ diff_select_commands++;
++ if (!sent_row_count_2)
++ diff_empty_queries++;
++ } else if (! sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) {
++ // 'SHOW ' commands become SQLCOM_SELECT.
++ diff_other_commands++;
++ // 'SHOW ' commands shouldn't inflate total sent row count.
++ diff_total_sent_rows -= sent_row_count_2;
++ } else if (is_update_query(lex->sql_command)) {
++ diff_update_commands++;
++ } else {
++ diff_other_commands++;
++ }
++ }
++ }
++ // diff_commit_trans is updated in handler.cc.
++ // diff_rollback_trans is updated in handler.cc.
++ // diff_denied_connections is updated in sql_parse.cc.
++ // diff_lost_connections is updated in sql_parse.cc.
++ // diff_access_denied_errors is updated in sql_parse.cc.
++
++ /* reset counters to zero to avoid double-counting since values
++ are already store in diff_total_*. */
++ }
++ busy_time = 0;
++ cpu_time = 0;
++ bytes_received = 0;
++ bytes_sent = 0;
++ binlog_bytes_written = 0;
++ updated_row_count = 0;
++ sent_row_count_2 = 0;
++}
+
+ /*
+ Init THD for query processing.
+@@ -1545,6 +1631,32 @@
+ }
+ #endif
+
++char *THD::get_client_host_port(THD *client)
++{
++ Security_context *client_sctx= client->security_ctx;
++ char *client_host= NULL;
++
++ if (client->peer_port && (client_sctx->host || client_sctx->ip) &&
++ security_ctx->host_or_ip[0])
++ {
++ if ((client_host= (char *) this->alloc(LIST_PROCESS_HOST_LEN+1)))
++ my_snprintf((char *) client_host, LIST_PROCESS_HOST_LEN,
++ "%s:%u", client_sctx->host_or_ip, client->peer_port);
++ }
++ else
++ client_host= this->strdup(client_sctx->host_or_ip[0] ?
++ client_sctx->host_or_ip :
++ client_sctx->host ? client_sctx->host : "");
++
++ return client_host;
++}
++
++const char *get_client_host(THD *client)
++{
++ return client->security_ctx->host_or_ip[0] ?
++ client->security_ctx->host_or_ip :
++ client->security_ctx->host ? client->security_ctx->host : "";
++}
+
+ struct Item_change_record: public ilink
+ {
+@@ -1732,6 +1844,7 @@
+ buffer.set(buff, sizeof(buff), &my_charset_bin);
+ }
+ thd->sent_row_count++;
++ thd->sent_row_count_2++;
+ if (thd->is_error())
+ {
+ protocol->remove_last_row();
+@@ -1836,6 +1949,7 @@
+ select_export::~select_export()
+ {
+ thd->sent_row_count=row_count;
++ thd->sent_row_count_2=row_count;
+ }
+
+
+@@ -2868,6 +2982,7 @@
+ if (likely(thd != 0))
+ { /* current_thd==0 when close_connection() calls net_send_error() */
+ thd->status_var.bytes_sent+= length;
++ thd->bytes_sent+= length;
+ }
+ }
+
+@@ -2875,6 +2990,7 @@
+ void thd_increment_bytes_received(ulong length)
+ {
+ current_thd->status_var.bytes_received+= length;
++ current_thd->bytes_received+= length;
+ }
+
+
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h 2010-08-27 14:43:42.008006390 +0900
++++ b/sql/sql_class.h 2010-08-27 15:10:33.830058443 +0900
+@@ -1435,6 +1435,8 @@
+ first byte of the packet in do_command()
+ */
+ enum enum_server_command command;
++ // Used to save the command, before it is set to COM_SLEEP.
++ enum enum_server_command old_command;
+ uint32 server_id;
+ uint32 file_id; // for LOAD DATA INFILE
+ /* remote (peer) port */
+@@ -1828,6 +1830,8 @@
+ /* variables.transaction_isolation is reset to this after each commit */
+ enum_tx_isolation session_tx_isolation;
+ enum_check_fields count_cuted_fields;
++ ha_rows updated_row_count;
++ ha_rows sent_row_count_2; /* for userstat */
+
+ DYNAMIC_ARRAY user_var_events; /* For user variables replication */
+ MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */
+@@ -1916,6 +1920,49 @@
+ */
+ LOG_INFO* current_linfo;
+ NET* slave_net; // network connection from slave -> m.
++
++ /*
++ Used to update global user stats. The global user stats are updated
++ occasionally with the 'diff' variables. After the update, the 'diff'
++ variables are reset to 0.
++ */
++ // Time when the current thread connected to MySQL.
++ time_t current_connect_time;
++ // Last time when THD stats were updated in global_user_stats.
++ time_t last_global_update_time;
++ // Busy (non-idle) time for just one command.
++ double busy_time;
++ // Busy time not updated in global_user_stats yet.
++ double diff_total_busy_time;
++ // Cpu (non-idle) time for just one thread.
++ double cpu_time;
++ // Cpu time not updated in global_user_stats yet.
++ double diff_total_cpu_time;
++ /* bytes counting */
++ ulonglong bytes_received;
++ ulonglong diff_total_bytes_received;
++ ulonglong bytes_sent;
++ ulonglong diff_total_bytes_sent;
++ ulonglong binlog_bytes_written;
++ ulonglong diff_total_binlog_bytes_written;
++
++ // Number of rows not reflected in global_user_stats yet.
++ ha_rows diff_total_sent_rows, diff_total_updated_rows, diff_total_read_rows;
++ // Number of commands not reflected in global_user_stats yet.
++ ulonglong diff_select_commands, diff_update_commands, diff_other_commands;
++ // Number of transactions not reflected in global_user_stats yet.
++ ulonglong diff_commit_trans, diff_rollback_trans;
++ // Number of connection errors not reflected in global_user_stats yet.
++ ulonglong diff_denied_connections, diff_lost_connections;
++ // Number of db access denied, not reflected in global_user_stats yet.
++ ulonglong diff_access_denied_errors;
++ // Number of queries that return 0 rows
++ ulonglong diff_empty_queries;
++
++ // Per account query delay in miliseconds. When not 0, sleep this number of
++ // milliseconds before every SQL command.
++ ulonglong query_delay_millis;
++
+ /* Used by the sys_var class to store temporary values */
+ union
+ {
+@@ -1981,6 +2028,11 @@
+ alloc_root.
+ */
+ void init_for_queries();
++ void reset_stats(void);
++ void reset_diff_stats(void);
++ // ran_command is true when this is called immediately after a
++ // command has been run.
++ void update_stats(bool ran_command);
+ void change_user(void);
+ void cleanup(void);
+ void cleanup_after_query();
+@@ -2351,9 +2403,15 @@
+ *p_db= strmake(db, db_length);
+ *p_db_length= db_length;
+ return FALSE;
++
++ // Returns string as 'IP:port' for the client-side of the connnection represented
++ // by 'client' as displayed by SHOW PROCESSLIST. Allocates memory from the heap of
++ // this THD and that is not reclaimed immediately, so use sparingly. May return NULL.
+ }
+ thd_scheduler scheduler;
+
++ char *get_client_host_port(THD *client);
++
+ public:
+ inline Internal_error_handler *get_internal_handler()
+ { return m_internal_handler; }
+@@ -2438,6 +2496,9 @@
+ LEX_STRING invoker_host;
+ };
+
++// Returns string as 'IP' for the client-side of the connection represented by
++// 'client'. Does not allocate memory. May return "".
++const char *get_client_host(THD *client);
+
+ /** A short cut for thd->main_da.set_ok_status(). */
+
+diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
+--- a/sql/sql_connect.cc 2010-08-27 14:38:08.750990238 +0900
++++ b/sql/sql_connect.cc 2010-08-27 15:10:33.834058369 +0900
+@@ -42,6 +42,24 @@
+ extern void win_install_sigabrt_handler();
+ #endif
+
++// Increments connection count for user.
++static int increment_connection_count(THD* thd, bool use_lock);
++
++// Uses the THD to update the global stats by user name and client IP
++void update_global_user_stats(THD* thd, bool create_user, time_t now);
++
++HASH global_user_stats;
++HASH global_client_stats;
++HASH global_thread_stats;
++// Protects global_user_stats and global_client_stats
++extern pthread_mutex_t LOCK_global_user_client_stats;
++
++HASH global_table_stats;
++extern pthread_mutex_t LOCK_global_table_stats;
++
++HASH global_index_stats;
++extern pthread_mutex_t LOCK_global_index_stats;
++
+ /*
+ Get structure for logging connection data for the current user
+ */
+@@ -99,6 +117,563 @@
+
+ }
+
++extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
++ my_bool not_used __attribute__((unused)))
++{
++ *length = strlen(user_stats->user);
++ return (uchar*)user_stats->user;
++}
++
++extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
++ my_bool not_used __attribute__((unused)))
++{
++ *length = sizeof(my_thread_id);
++ return (uchar*)&(thread_stats->id);
++}
++
++void free_user_stats(USER_STATS* user_stats)
++{
++ my_free((char*)user_stats, MYF(0));
++}
++
++void free_thread_stats(THREAD_STATS* thread_stats)
++{
++ my_free((char*)thread_stats, MYF(0));
++}
++
++void init_user_stats(USER_STATS *user_stats,
++ const char *user,
++ const char *priv_user,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries)
++{
++ DBUG_ENTER("init_user_stats");
++ DBUG_PRINT("info",
++ ("Add user_stats entry for user %s - priv_user %s",
++ user, priv_user));
++ strncpy(user_stats->user, user, sizeof(user_stats->user));
++ strncpy(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user));
++
++ user_stats->total_connections = total_connections;
++ user_stats->concurrent_connections = concurrent_connections;
++ user_stats->connected_time = connected_time;
++ user_stats->busy_time = busy_time;
++ user_stats->cpu_time = cpu_time;
++ user_stats->bytes_received = bytes_received;
++ user_stats->bytes_sent = bytes_sent;
++ user_stats->binlog_bytes_written = binlog_bytes_written;
++ user_stats->rows_fetched = rows_fetched;
++ user_stats->rows_updated = rows_updated;
++ user_stats->rows_read = rows_read;
++ user_stats->select_commands = select_commands;
++ user_stats->update_commands = update_commands;
++ user_stats->other_commands = other_commands;
++ user_stats->commit_trans = commit_trans;
++ user_stats->rollback_trans = rollback_trans;
++ user_stats->denied_connections = denied_connections;
++ user_stats->lost_connections = lost_connections;
++ user_stats->access_denied_errors = access_denied_errors;
++ user_stats->empty_queries = empty_queries;
++ DBUG_VOID_RETURN;
++}
++
++void init_thread_stats(THREAD_STATS *thread_stats,
++ my_thread_id id,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries)
++{
++ DBUG_ENTER("init_thread_stats");
++ DBUG_PRINT("info",
++ ("Add thread_stats entry for thread %lu",
++ id));
++ thread_stats->id = id;
++
++ thread_stats->total_connections = total_connections;
++ thread_stats->concurrent_connections = concurrent_connections;
++ thread_stats->connected_time = connected_time;
++ thread_stats->busy_time = busy_time;
++ thread_stats->cpu_time = cpu_time;
++ thread_stats->bytes_received = bytes_received;
++ thread_stats->bytes_sent = bytes_sent;
++ thread_stats->binlog_bytes_written = binlog_bytes_written;
++ thread_stats->rows_fetched = rows_fetched;
++ thread_stats->rows_updated = rows_updated;
++ thread_stats->rows_read = rows_read;
++ thread_stats->select_commands = select_commands;
++ thread_stats->update_commands = update_commands;
++ thread_stats->other_commands = other_commands;
++ thread_stats->commit_trans = commit_trans;
++ thread_stats->rollback_trans = rollback_trans;
++ thread_stats->denied_connections = denied_connections;
++ thread_stats->lost_connections = lost_connections;
++ thread_stats->access_denied_errors = access_denied_errors;
++ thread_stats->empty_queries = empty_queries;
++ DBUG_VOID_RETURN;
++}
++
++void add_user_stats(USER_STATS *user_stats,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries)
++{
++ user_stats->total_connections += total_connections;
++ user_stats->concurrent_connections += concurrent_connections;
++ user_stats->connected_time += connected_time;
++ user_stats->busy_time += busy_time;
++ user_stats->cpu_time += cpu_time;
++ user_stats->bytes_received += bytes_received;
++ user_stats->bytes_sent += bytes_sent;
++ user_stats->binlog_bytes_written += binlog_bytes_written;
++ user_stats->rows_fetched += rows_fetched;
++ user_stats->rows_updated += rows_updated;
++ user_stats->rows_read += rows_read;
++ user_stats->select_commands += select_commands;
++ user_stats->update_commands += update_commands;
++ user_stats->other_commands += other_commands;
++ user_stats->commit_trans += commit_trans;
++ user_stats->rollback_trans += rollback_trans;
++ user_stats->denied_connections += denied_connections;
++ user_stats->lost_connections += lost_connections;
++ user_stats->access_denied_errors += access_denied_errors;
++ user_stats->empty_queries += empty_queries;
++}
++
++void add_thread_stats(THREAD_STATS *thread_stats,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries)
++{
++ thread_stats->total_connections += total_connections;
++ thread_stats->concurrent_connections += concurrent_connections;
++ thread_stats->connected_time += connected_time;
++ thread_stats->busy_time += busy_time;
++ thread_stats->cpu_time += cpu_time;
++ thread_stats->bytes_received += bytes_received;
++ thread_stats->bytes_sent += bytes_sent;
++ thread_stats->binlog_bytes_written += binlog_bytes_written;
++ thread_stats->rows_fetched += rows_fetched;
++ thread_stats->rows_updated += rows_updated;
++ thread_stats->rows_read += rows_read;
++ thread_stats->select_commands += select_commands;
++ thread_stats->update_commands += update_commands;
++ thread_stats->other_commands += other_commands;
++ thread_stats->commit_trans += commit_trans;
++ thread_stats->rollback_trans += rollback_trans;
++ thread_stats->denied_connections += denied_connections;
++ thread_stats->lost_connections += lost_connections;
++ thread_stats->access_denied_errors += access_denied_errors;
++ thread_stats->empty_queries += empty_queries;
++}
++
++void init_global_user_stats(void)
++{
++ if (hash_init(&global_user_stats, system_charset_info, max_connections,
++ 0, 0, (hash_get_key)get_key_user_stats,
++ (hash_free_key)free_user_stats, 0)) {
++ sql_print_error("Initializing global_user_stats failed.");
++ exit(1);
++ }
++}
++
++void init_global_client_stats(void)
++{
++ if (hash_init(&global_client_stats, system_charset_info, max_connections,
++ 0, 0, (hash_get_key)get_key_user_stats,
++ (hash_free_key)free_user_stats, 0)) {
++ sql_print_error("Initializing global_client_stats failed.");
++ exit(1);
++ }
++}
++
++void init_global_thread_stats(void)
++{
++ if (hash_init(&global_thread_stats, &my_charset_bin, max_connections,
++ 0, 0, (hash_get_key)get_key_thread_stats,
++ (hash_free_key)free_thread_stats, 0)) {
++ sql_print_error("Initializing global_client_stats failed.");
++ exit(1);
++ }
++}
++
++extern "C" uchar *get_key_table_stats(TABLE_STATS *table_stats, size_t *length,
++ my_bool not_used __attribute__((unused)))
++{
++ *length = strlen(table_stats->table);
++ return (uchar*)table_stats->table;
++}
++
++extern "C" void free_table_stats(TABLE_STATS* table_stats)
++{
++ my_free((char*)table_stats, MYF(0));
++}
++
++void init_global_table_stats(void)
++{
++ if (hash_init(&global_table_stats, system_charset_info, max_connections,
++ 0, 0, (hash_get_key)get_key_table_stats,
++ (hash_free_key)free_table_stats, 0)) {
++ sql_print_error("Initializing global_table_stats failed.");
++ exit(1);
++ }
++}
++
++extern "C" uchar *get_key_index_stats(INDEX_STATS *index_stats, size_t *length,
++ my_bool not_used __attribute__((unused)))
++{
++ *length = strlen(index_stats->index);
++ return (uchar*)index_stats->index;
++}
++
++extern "C" void free_index_stats(INDEX_STATS* index_stats)
++{
++ my_free((char*)index_stats, MYF(0));
++}
++
++void init_global_index_stats(void)
++{
++ if (hash_init(&global_index_stats, system_charset_info, max_connections,
++ 0, 0, (hash_get_key)get_key_index_stats,
++ (hash_free_key)free_index_stats, 0)) {
++ sql_print_error("Initializing global_index_stats failed.");
++ exit(1);
++ }
++}
++
++void free_global_user_stats(void)
++{
++ hash_free(&global_user_stats);
++}
++
++void free_global_thread_stats(void)
++{
++ hash_free(&global_thread_stats);
++}
++
++void free_global_table_stats(void)
++{
++ hash_free(&global_table_stats);
++}
++
++void free_global_index_stats(void)
++{
++ hash_free(&global_index_stats);
++}
++
++void free_global_client_stats(void)
++{
++ hash_free(&global_client_stats);
++}
++
++// 'mysql_system_user' is used for when the user is not defined for a THD.
++static char mysql_system_user[] = "#mysql_system#";
++
++// Returns 'user' if it's not NULL. Returns 'mysql_system_user' otherwise.
++static char* get_valid_user_string(char* user) {
++ return user ? user : mysql_system_user;
++}
++
++// Increments the global stats connection count for an entry from
++// global_client_stats or global_user_stats. Returns 0 on success
++// and 1 on error.
++static int increment_count_by_name(const char *name, const char *role_name,
++ HASH *users_or_clients, THD *thd)
++{
++ USER_STATS* user_stats;
++
++ if (!(user_stats = (USER_STATS*)hash_search(users_or_clients, (uchar*) name,
++ strlen(name))))
++ {
++ // First connection for this user or client
++ if (!(user_stats = ((USER_STATS*)
++ my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++ {
++ return 1; // Out of memory
++ }
++
++ init_user_stats(user_stats, name, role_name,
++ 0, 0, // connections
++ 0, 0, 0, // time
++ 0, 0, 0, // bytes sent, received and written
++ 0, 0, 0, // rows fetched, updated and read
++ 0, 0, 0, // select, update and other commands
++ 0, 0, // commit and rollback trans
++ thd->diff_denied_connections,
++ 0, // lost connections
++ 0, // access denied errors
++ 0); // empty queries
++
++ if (my_hash_insert(users_or_clients, (uchar*)user_stats))
++ {
++ my_free((char*)user_stats, 0);
++ return 1; // Out of memory
++ }
++ }
++ user_stats->total_connections++;
++ return 0;
++}
++
++static int increment_count_by_id(my_thread_id id,
++ HASH *users_or_clients, THD *thd)
++{
++ THREAD_STATS* thread_stats;
++
++ if (!(thread_stats = (THREAD_STATS*)hash_search(users_or_clients, (uchar*) &id,
++ sizeof(my_thread_id))))
++ {
++ // First connection for this user or client
++ if (!(thread_stats = ((THREAD_STATS*)
++ my_malloc(sizeof(THREAD_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++ {
++ return 1; // Out of memory
++ }
++
++ init_thread_stats(thread_stats, id,
++ 0, 0, // connections
++ 0, 0, 0, // time
++ 0, 0, 0, // bytes sent, received and written
++ 0, 0, 0, // rows fetched, updated and read
++ 0, 0, 0, // select, update and other commands
++ 0, 0, // commit and rollback trans
++ thd->diff_denied_connections,
++ 0, // lost connections
++ 0, // access denied errors
++ 0); // empty queries
++
++ if (my_hash_insert(users_or_clients, (uchar*)thread_stats))
++ {
++ my_free((char*)thread_stats, 0);
++ return 1; // Out of memory
++ }
++ }
++ thread_stats->total_connections++;
++ return 0;
++}
++
++// Increments the global user and client stats connection count. If 'use_lock'
++// is true, LOCK_global_user_client_stats will be locked/unlocked. Returns
++// 0 on success, 1 on error.
++static int increment_connection_count(THD* thd, bool use_lock)
++{
++ char* user_string = get_valid_user_string(thd->main_security_ctx.user);
++ const char* client_string = get_client_host(thd);
++ int return_value = 0;
++
++ if (!opt_userstat_running)
++ return return_value;
++
++ if (use_lock) pthread_mutex_lock(&LOCK_global_user_client_stats);
++
++ if (increment_count_by_name(user_string, user_string,
++ &global_user_stats, thd))
++ {
++ return_value = 1;
++ goto end;
++ }
++ if (increment_count_by_name(client_string,
++ user_string,
++ &global_client_stats, thd))
++ {
++ return_value = 1;
++ goto end;
++ }
++ if (opt_thread_statistics) {
++ if (increment_count_by_id(thd->thread_id, &global_thread_stats, thd))
++ {
++ return_value = 1;
++ goto end;
++ }
++ }
++
++end:
++ if (use_lock) pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ return return_value;
++}
++
++// Used to update the global user and client stats.
++static void update_global_user_stats_with_user(THD* thd,
++ USER_STATS* user_stats,
++ time_t now)
++{
++ user_stats->connected_time += now - thd->last_global_update_time;
++// thd->last_global_update_time = now;
++ user_stats->busy_time += thd->diff_total_busy_time;
++ user_stats->cpu_time += thd->diff_total_cpu_time;
++ user_stats->bytes_received += thd->diff_total_bytes_received;
++ user_stats->bytes_sent += thd->diff_total_bytes_sent;
++ user_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
++ user_stats->rows_fetched += thd->diff_total_sent_rows;
++ user_stats->rows_updated += thd->diff_total_updated_rows;
++ user_stats->rows_read += thd->diff_total_read_rows;
++ user_stats->select_commands += thd->diff_select_commands;
++ user_stats->update_commands += thd->diff_update_commands;
++ user_stats->other_commands += thd->diff_other_commands;
++ user_stats->commit_trans += thd->diff_commit_trans;
++ user_stats->rollback_trans += thd->diff_rollback_trans;
++ user_stats->denied_connections += thd->diff_denied_connections;
++ user_stats->lost_connections += thd->diff_lost_connections;
++ user_stats->access_denied_errors += thd->diff_access_denied_errors;
++ user_stats->empty_queries += thd->diff_empty_queries;
++}
++
++static void update_global_thread_stats_with_thread(THD* thd,
++ THREAD_STATS* thread_stats,
++ time_t now)
++{
++ thread_stats->connected_time += now - thd->last_global_update_time;
++// thd->last_global_update_time = now;
++ thread_stats->busy_time += thd->diff_total_busy_time;
++ thread_stats->cpu_time += thd->diff_total_cpu_time;
++ thread_stats->bytes_received += thd->diff_total_bytes_received;
++ thread_stats->bytes_sent += thd->diff_total_bytes_sent;
++ thread_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
++ thread_stats->rows_fetched += thd->diff_total_sent_rows;
++ thread_stats->rows_updated += thd->diff_total_updated_rows;
++ thread_stats->rows_read += thd->diff_total_read_rows;
++ thread_stats->select_commands += thd->diff_select_commands;
++ thread_stats->update_commands += thd->diff_update_commands;
++ thread_stats->other_commands += thd->diff_other_commands;
++ thread_stats->commit_trans += thd->diff_commit_trans;
++ thread_stats->rollback_trans += thd->diff_rollback_trans;
++ thread_stats->denied_connections += thd->diff_denied_connections;
++ thread_stats->lost_connections += thd->diff_lost_connections;
++ thread_stats->access_denied_errors += thd->diff_access_denied_errors;
++ thread_stats->empty_queries += thd->diff_empty_queries;
++}
++
++// Updates the global stats of a user or client
++void update_global_user_stats(THD* thd, bool create_user, time_t now)
++{
++ if (opt_userstat_running) {
++ char* user_string = get_valid_user_string(thd->main_security_ctx.user);
++ const char* client_string = get_client_host(thd);
++
++ USER_STATS* user_stats;
++ THREAD_STATS* thread_stats;
++ pthread_mutex_lock(&LOCK_global_user_client_stats);
++
++ // Update by user name
++ if ((user_stats = (USER_STATS*)hash_search(&global_user_stats,
++ (uchar*)user_string,
++ strlen(user_string)))) {
++ // Found user.
++ update_global_user_stats_with_user(thd, user_stats, now);
++ } else {
++ // Create the entry
++ if (create_user) {
++ increment_count_by_name(user_string, user_string,
++ &global_user_stats, thd);
++ }
++ }
++
++ // Update by client IP
++ if ((user_stats = (USER_STATS*)hash_search(&global_client_stats,
++ (uchar*)client_string,
++ strlen(client_string)))) {
++ // Found by client IP
++ update_global_user_stats_with_user(thd, user_stats, now);
++ } else {
++ // Create the entry
++ if (create_user) {
++ increment_count_by_name(client_string,
++ user_string,
++ &global_client_stats, thd);
++ }
++ }
++
++ if (opt_thread_statistics) {
++ // Update by thread ID
++ if ((thread_stats = (THREAD_STATS*)hash_search(&global_thread_stats,
++ (uchar*) &(thd->thread_id),
++ sizeof(my_thread_id)))) {
++ // Found by thread ID
++ update_global_thread_stats_with_thread(thd, thread_stats, now);
++ } else {
++ // Create the entry
++ if (create_user) {
++ increment_count_by_id(thd->thread_id,
++ &global_thread_stats, thd);
++ }
++ }
++ }
++
++ thd->last_global_update_time = now;
++ thd->reset_diff_stats();
++
++ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ } else {
++ thd->reset_diff_stats();
++ }
++}
+
+ /*
+ check if user has already too many connections
+@@ -154,7 +729,10 @@
+
+ end:
+ if (error)
++ {
+ uc->connections--; // no need for decrease_user_connections() here
++ statistic_increment(denied_connections, &LOCK_status);
++ }
+ (void) pthread_mutex_unlock(&LOCK_user_conn);
+ DBUG_RETURN(error);
+ }
+@@ -490,6 +1068,7 @@
+ general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
+ DBUG_RETURN(1);
+ }
++ thd->diff_access_denied_errors++;
+ my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
+ thd->main_security_ctx.user,
+ thd->main_security_ctx.host_or_ip,
+@@ -971,11 +1550,20 @@
+ my_sleep(1000); /* must wait after eof() */
+ #endif
+ statistic_increment(aborted_connects,&LOCK_status);
++ thd->diff_denied_connections++;
+ DBUG_RETURN(1);
+ }
+ /* Connect completed, set read/write timeouts back to default */
+ my_net_set_read_timeout(net, thd->variables.net_read_timeout);
+ my_net_set_write_timeout(net, thd->variables.net_write_timeout);
++
++ thd->reset_stats();
++ // Updates global user connection stats.
++ if (increment_connection_count(thd, true)) {
++ net_send_error(thd, ER_OUTOFMEMORY); // Out of memory
++ DBUG_RETURN(1);
++ }
++
+ DBUG_RETURN(0);
+ }
+
+@@ -997,6 +1585,7 @@
+ if (thd->killed || (net->error && net->vio != 0))
+ {
+ statistic_increment(aborted_threads,&LOCK_status);
++ thd->diff_lost_connections++;
+ }
+
+ if (net->error && net->vio != 0)
+@@ -1123,10 +1712,14 @@
+ for (;;)
+ {
+ NET *net= &thd->net;
++ bool create_user= TRUE;
+
+ lex_start(thd);
+ if (login_connection(thd))
++ {
++ create_user= FALSE;
+ goto end_thread;
++ }
+
+ prepare_new_connection_state(thd);
+
+@@ -1149,6 +1742,8 @@
+
+ end_thread:
+ close_connection(thd, 0, 1);
++ thd->update_stats(false);
++ update_global_user_stats(thd, create_user, time(NULL));
+ if (thread_scheduler.end_thread(thd,1))
+ return 0; // Probably no-threads
+
+diff -ruN a/sql/sql_delete.cc b/sql/sql_delete.cc
+--- a/sql/sql_delete.cc 2010-08-04 02:24:34.000000000 +0900
++++ b/sql/sql_delete.cc 2010-08-27 15:10:33.837058490 +0900
+@@ -452,6 +452,7 @@
+ my_ok(thd, (ha_rows) thd->row_count_func);
+ DBUG_PRINT("info",("%ld records deleted",(long) deleted));
+ }
++ thd->updated_row_count += deleted;
+ DBUG_RETURN(error >= 0 || thd->is_error());
+ }
+
+@@ -1059,6 +1060,7 @@
+ thd->row_count_func= deleted;
+ ::my_ok(thd, (ha_rows) thd->row_count_func);
+ }
++ thd->updated_row_count += deleted;
+ return 0;
+ }
+
+diff -ruN a/sql/sql_insert.cc b/sql/sql_insert.cc
+--- a/sql/sql_insert.cc 2010-08-04 02:24:19.000000000 +0900
++++ b/sql/sql_insert.cc 2010-08-27 15:10:33.841059138 +0900
+@@ -981,6 +981,7 @@
+ thd->row_count_func= info.copied + info.deleted + updated;
+ ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
+ }
++ thd->updated_row_count += thd->row_count_func;
+ thd->abort_on_warning= 0;
+ DBUG_RETURN(FALSE);
+
+@@ -3311,6 +3312,7 @@
+ thd->first_successful_insert_id_in_prev_stmt :
+ (info.copied ? autoinc_value_of_last_inserted_row : 0));
+ ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
++ thd->updated_row_count += thd->row_count_func;
+ DBUG_RETURN(0);
+ }
+
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h 2010-08-27 14:29:26.030989835 +0900
++++ b/sql/sql_lex.h 2010-08-27 15:10:33.844058293 +0900
+@@ -124,6 +124,9 @@
+ When a command is added here, be sure it's also added in mysqld.cc
+ in "struct show_var_st status_vars[]= {" ...
+ */
++ // TODO(mcallaghan): update status_vars in mysqld to export these
++ SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS,
++ SQLCOM_SHOW_CLIENT_STATS, SQLCOM_SHOW_THREAD_STATS,
+ /* This should be the last !!! */
+ SQLCOM_END
+ };
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-08-27 14:38:08.757059579 +0900
++++ b/sql/sql_parse.cc 2010-08-27 15:15:30.420996146 +0900
+@@ -46,6 +46,9 @@
+ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables);
+ static bool check_show_create_table_access(THD *thd, TABLE_LIST *table);
+
++// Uses the THD to update the global stats by user name and client IP
++void update_global_user_stats(THD* thd, bool create_user, time_t now);
++
+ const char *any_db="*any*"; // Special symbol for check_access
+
+ const LEX_STRING command_name[]={
+@@ -824,6 +827,12 @@
+ */
+ thd->clear_error(); // Clear error message
+ thd->main_da.reset_diagnostics_area();
++ thd->updated_row_count=0;
++ thd->busy_time=0;
++ thd->cpu_time=0;
++ thd->bytes_received=0;
++ thd->bytes_sent=0;
++ thd->binlog_bytes_written=0;
+
+ net_new_transaction(net);
+
+@@ -993,6 +1002,9 @@
+ DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command));
+
+ thd->command=command;
++ // To increment the corrent command counter for user stats, 'command' must
++ // be saved because it is set to COM_SLEEP at the end of this function.
++ thd->old_command = command;
+ /*
+ Commands which always take a long time are logged into
+ the slow log only if opt_log_slow_admin_statements is set.
+@@ -1864,6 +1876,13 @@
+ thd->profiling.discard_current_query();
+ #endif
+ break;
++ case SCH_USER_STATS:
++ case SCH_CLIENT_STATS:
++ case SCH_THREAD_STATS:
++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++ DBUG_RETURN(1);
++ case SCH_TABLE_STATS:
++ case SCH_INDEX_STATS:
+ case SCH_OPEN_TABLES:
+ case SCH_VARIABLES:
+ case SCH_STATUS:
+@@ -2020,6 +2039,7 @@
+ thd->security_ctx->priv_host)) &&
+ check_global_access(thd, SUPER_ACL))
+ {
++ thd->diff_access_denied_errors++;
+ my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER");
+ DBUG_RETURN(TRUE);
+ }
+@@ -5331,6 +5351,7 @@
+ if (!no_errors)
+ {
+ const char *db_name= db ? db : thd->db;
++ thd->diff_access_denied_errors++;
+ my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+ sctx->priv_user, sctx->priv_host, db_name);
+ }
+@@ -5363,12 +5384,15 @@
+ { // We can never grant this
+ DBUG_PRINT("error",("No possible access"));
+ if (!no_errors)
++ {
++ thd->diff_access_denied_errors++;
+ my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
+ sctx->priv_user,
+ sctx->priv_host,
+ (thd->password ?
+ ER(ER_YES) :
+ ER(ER_NO))); /* purecov: tested */
++ }
+ DBUG_RETURN(TRUE); /* purecov: tested */
+ }
+
+@@ -5394,11 +5418,15 @@
+
+ DBUG_PRINT("error",("Access denied"));
+ if (!no_errors)
++ {
++ // increment needs !no_errors condition, otherwise double counting.
++ thd->diff_access_denied_errors++;
+ my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+ sctx->priv_user, sctx->priv_host,
+ (db ? db : (thd->db ?
+ thd->db :
+ "unknown"))); /* purecov: tested */
++ }
+ DBUG_RETURN(TRUE); /* purecov: tested */
+ }
+
+@@ -5427,6 +5455,7 @@
+
+ if (!thd->col_access && check_grant_db(thd, dst_db_name))
+ {
++ thd->diff_access_denied_errors++;
+ my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+ thd->security_ctx->priv_user,
+ thd->security_ctx->priv_host,
+@@ -5508,9 +5537,12 @@
+ (want_access & ~(SELECT_ACL | EXTRA_ACL | FILE_ACL)))
+ {
+ if (!no_errors)
++ {
++ thd->diff_access_denied_errors++;
+ my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+ sctx->priv_user, sctx->priv_host,
+ INFORMATION_SCHEMA_NAME.str);
++ }
+ return TRUE;
+ }
+ /*
+@@ -5673,6 +5705,7 @@
+ if ((thd->security_ctx->master_access & want_access))
+ return 0;
+ get_privilege_desc(command, sizeof(command), want_access);
++ thd->diff_access_denied_errors++;
+ my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command);
+ return 1;
+ #else
+@@ -6054,6 +6087,34 @@
+ lex_start(thd);
+ mysql_reset_thd_for_next_command(thd);
+
++ int start_time_error = 0;
++ int end_time_error = 0;
++ struct timeval start_time, end_time;
++ double start_usecs = 0;
++ double end_usecs = 0;
++ /* cpu time */
++ int cputime_error = 0;
++ struct timespec tp;
++ double start_cpu_nsecs = 0;
++ double end_cpu_nsecs = 0;
++
++ if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++ /* get start cputime */
++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#else
++#warning : HAVE_CLOCK_GETTIME is disabled.
++#warning : Most systems require librt library to use the function clock_gettime().
++#warning : Did you set environment when ./configure ? (e.g. "export LIBS=-lrt" for sh)
++#endif
++
++ // Gets the start time, in order to measure how long this command takes.
++ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ }
++ }
++
+ if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0)
+ {
+ LEX *lex= thd->lex;
+@@ -6134,6 +6195,43 @@
+ *found_semicolon= NULL;
+ }
+
++ if (opt_userstat_running) {
++ // Gets the end time.
++ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ }
++
++ // Calculates the difference between the end and start times.
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->busy_time > 2629743) {
++ thd->busy_time = 0;
++ }
++ } else {
++ // end time went back in time, or gettimeofday() failed.
++ thd->busy_time = 0;
++ }
++
++#ifdef HAVE_CLOCK_GETTIME
++ /* get end cputime */
++ if (!cputime_error &&
++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++ if (start_cpu_nsecs && !cputime_error) {
++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->cpu_time > 2629743) {
++ thd->cpu_time = 0;
++ }
++ } else
++ thd->cpu_time = 0;
++ }
++ // Updates THD stats and the global user stats.
++ thd->update_stats(true);
++ update_global_user_stats(thd, true, time(NULL));
++
+ DBUG_VOID_RETURN;
+ }
+
+@@ -6999,6 +7097,13 @@
+ if (flush_error_log())
+ result=1;
+ }
++ if (((options & (REFRESH_SLOW_QUERY_LOG | REFRESH_LOG)) ==
++ REFRESH_SLOW_QUERY_LOG))
++ {
++ /* We are only flushing slow query log */
++ logger.flush_slow_log(thd);
++ }
++
+ #ifdef HAVE_QUERY_CACHE
+ if (options & REFRESH_QUERY_CACHE_FREE)
+ {
+@@ -7099,6 +7204,40 @@
+ #endif
+ if (options & REFRESH_USER_RESOURCES)
+ reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */
++ if (options & REFRESH_TABLE_STATS)
++ {
++ pthread_mutex_lock(&LOCK_global_table_stats);
++ free_global_table_stats();
++ init_global_table_stats();
++ pthread_mutex_unlock(&LOCK_global_table_stats);
++ }
++ if (options & REFRESH_INDEX_STATS)
++ {
++ pthread_mutex_lock(&LOCK_global_index_stats);
++ free_global_index_stats();
++ init_global_index_stats();
++ pthread_mutex_unlock(&LOCK_global_index_stats);
++ }
++ if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS | REFRESH_THREAD_STATS))
++ {
++ pthread_mutex_lock(&LOCK_global_user_client_stats);
++ if (options & REFRESH_USER_STATS)
++ {
++ free_global_user_stats();
++ init_global_user_stats();
++ }
++ if (options & REFRESH_CLIENT_STATS)
++ {
++ free_global_client_stats();
++ init_global_client_stats();
++ }
++ if (options & REFRESH_THREAD_STATS)
++ {
++ free_global_thread_stats();
++ init_global_thread_stats();
++ }
++ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ }
+ *write_to_binlog= tmp_write_to_binlog;
+ /*
+ If the query was killed then this function must fail.
+diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
+--- a/sql/sql_prepare.cc 2010-08-27 14:29:26.043058814 +0900
++++ b/sql/sql_prepare.cc 2010-08-27 15:10:33.858058832 +0900
+@@ -96,6 +96,9 @@
+ #include <mysql_com.h>
+ #endif
+
++// Uses the THD to update the global stats by user name and client IP
++void update_global_user_stats(THD* thd, bool create_user, time_t now);
++
+ /**
+ A result class used to send cursor rows using the binary protocol.
+ */
+@@ -2103,8 +2106,36 @@
+ /* First of all clear possible warnings from the previous command */
+ mysql_reset_thd_for_next_command(thd);
+
++ int start_time_error = 0;
++ int end_time_error = 0;
++ struct timeval start_time, end_time;
++ double start_usecs = 0;
++ double end_usecs = 0;
++ /* cpu time */
++ int cputime_error = 0;
++ struct timespec tp;
++ double start_cpu_nsecs = 0;
++ double end_cpu_nsecs = 0;
++
++ if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++ /* get start cputime */
++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#else
++#warning : HAVE_CLOCK_GETTIME is disabled.
++#warning : Most systems require librt library to use the function clock_gettime().
++#warning : Did you set environment when ./configure ? (e.g. "export LIBS=-lrt" for sh)
++#endif
++
++ // Gets the start time, in order to measure how long this command takes.
++ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ }
++ }
++
+ if (! (stmt= new Prepared_statement(thd)))
+- DBUG_VOID_RETURN; /* out of memory: error is set in Sql_alloc */
++ goto end; /* out of memory: error is set in Sql_alloc */
+
+ if (thd->stmt_map.insert(thd, stmt))
+ {
+@@ -2112,7 +2143,7 @@
+ The error is set in the insert. The statement itself
+ will be also deleted there (this is how the hash works).
+ */
+- DBUG_VOID_RETURN;
++ goto end;
+ }
+
+ /* Reset warnings from previous command */
+@@ -2139,6 +2170,44 @@
+ thd->protocol= save_protocol;
+
+ /* check_prepared_statemnt sends the metadata packet in case of success */
++end:
++ if (opt_userstat_running) {
++ // Gets the end time.
++ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ }
++
++ // Calculates the difference between the end and start times.
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->busy_time > 2629743) {
++ thd->busy_time = 0;
++ }
++ } else {
++ // end time went back in time, or gettimeofday() failed.
++ thd->busy_time = 0;
++ }
++
++#ifdef HAVE_CLOCK_GETTIME
++ /* get end cputime */
++ if (!cputime_error &&
++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++ if (start_cpu_nsecs && !cputime_error) {
++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->cpu_time > 2629743) {
++ thd->cpu_time = 0;
++ }
++ } else
++ thd->cpu_time = 0;
++ }
++ // Updates THD stats and the global user stats.
++ thd->update_stats(true);
++ update_global_user_stats(thd, true, time(NULL));
++
+ DBUG_VOID_RETURN;
+ }
+
+@@ -2485,12 +2554,36 @@
+ /* First of all clear possible warnings from the previous command */
+ mysql_reset_thd_for_next_command(thd);
+
++ int start_time_error = 0;
++ int end_time_error = 0;
++ struct timeval start_time, end_time;
++ double start_usecs = 0;
++ double end_usecs = 0;
++ /* cpu time */
++ int cputime_error = 0;
++ struct timespec tp;
++ double start_cpu_nsecs = 0;
++ double end_cpu_nsecs = 0;
++
++ if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++ /* get start cputime */
++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++
++ // Gets the start time, in order to measure how long this command takes.
++ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ }
++ }
++
+ if (!(stmt= find_prepared_statement(thd, stmt_id)))
+ {
+ char llbuf[22];
+ my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
+ llstr(stmt_id, llbuf), "mysqld_stmt_execute");
+- DBUG_VOID_RETURN;
++ goto end;
+ }
+
+ #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
+@@ -2511,6 +2604,44 @@
+ /* Close connection socket; for use with client testing (Bug#43560). */
+ DBUG_EXECUTE_IF("close_conn_after_stmt_execute", vio_close(thd->net.vio););
+
++end:
++ if (opt_userstat_running) {
++ // Gets the end time.
++ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ }
++
++ // Calculates the difference between the end and start times.
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->busy_time > 2629743) {
++ thd->busy_time = 0;
++ }
++ } else {
++ // end time went back in time, or gettimeofday() failed.
++ thd->busy_time = 0;
++ }
++
++#ifdef HAVE_CLOCK_GETTIME
++ /* get end cputime */
++ if (!cputime_error &&
++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++ if (start_cpu_nsecs && !cputime_error) {
++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->cpu_time > 2629743) {
++ thd->cpu_time = 0;
++ }
++ } else
++ thd->cpu_time = 0;
++ }
++ // Updates THD stats and the global user stats.
++ thd->update_stats(true);
++ update_global_user_stats(thd, true, time(NULL));
++
+ DBUG_VOID_RETURN;
+
+ }
+@@ -2584,20 +2715,45 @@
+
+ /* First of all clear possible warnings from the previous command */
+ mysql_reset_thd_for_next_command(thd);
++
++ int start_time_error = 0;
++ int end_time_error = 0;
++ struct timeval start_time, end_time;
++ double start_usecs = 0;
++ double end_usecs = 0;
++ /* cpu time */
++ int cputime_error = 0;
++ struct timespec tp;
++ double start_cpu_nsecs = 0;
++ double end_cpu_nsecs = 0;
++
++ if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++ /* get start cputime */
++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++
++ // Gets the start time, in order to measure how long this command takes.
++ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ }
++ }
++
+ status_var_increment(thd->status_var.com_stmt_fetch);
+ if (!(stmt= find_prepared_statement(thd, stmt_id)))
+ {
+ char llbuf[22];
+ my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
+ llstr(stmt_id, llbuf), "mysqld_stmt_fetch");
+- DBUG_VOID_RETURN;
++ goto end;
+ }
+
+ cursor= stmt->cursor;
+ if (!cursor)
+ {
+ my_error(ER_STMT_HAS_NO_OPEN_CURSOR, MYF(0), stmt_id);
+- DBUG_VOID_RETURN;
++ goto end;
+ }
+
+ thd->stmt_arena= stmt;
+@@ -2621,6 +2777,44 @@
+ thd->restore_backup_statement(stmt, &stmt_backup);
+ thd->stmt_arena= thd;
+
++end:
++ if (opt_userstat_running) {
++ // Gets the end time.
++ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ }
++
++ // Calculates the difference between the end and start times.
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->busy_time > 2629743) {
++ thd->busy_time = 0;
++ }
++ } else {
++ // end time went back in time, or gettimeofday() failed.
++ thd->busy_time = 0;
++ }
++
++#ifdef HAVE_CLOCK_GETTIME
++ /* get end cputime */
++ if (!cputime_error &&
++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++ if (start_cpu_nsecs && !cputime_error) {
++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->cpu_time > 2629743) {
++ thd->cpu_time = 0;
++ }
++ } else
++ thd->cpu_time = 0;
++ }
++ // Updates THD stats and the global user stats.
++ thd->update_stats(true);
++ update_global_user_stats(thd, true, time(NULL));
++
+ DBUG_VOID_RETURN;
+ }
+
+@@ -2651,13 +2845,37 @@
+ /* First of all clear possible warnings from the previous command */
+ mysql_reset_thd_for_next_command(thd);
+
++ int start_time_error = 0;
++ int end_time_error = 0;
++ struct timeval start_time, end_time;
++ double start_usecs = 0;
++ double end_usecs = 0;
++ /* cpu time */
++ int cputime_error = 0;
++ struct timespec tp;
++ double start_cpu_nsecs = 0;
++ double end_cpu_nsecs = 0;
++
++ if (opt_userstat_running) {
++#ifdef HAVE_CLOCK_GETTIME
++ /* get start cputime */
++ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++
++ // Gets the start time, in order to measure how long this command takes.
++ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ }
++ }
++
+ status_var_increment(thd->status_var.com_stmt_reset);
+ if (!(stmt= find_prepared_statement(thd, stmt_id)))
+ {
+ char llbuf[22];
+ my_error(ER_UNKNOWN_STMT_HANDLER, MYF(0), sizeof(llbuf),
+ llstr(stmt_id, llbuf), "mysqld_stmt_reset");
+- DBUG_VOID_RETURN;
++ goto end;
+ }
+
+ stmt->close_cursor();
+@@ -2674,6 +2892,44 @@
+
+ my_ok(thd);
+
++end:
++ if (opt_userstat_running) {
++ // Gets the end time.
++ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ }
++
++ // Calculates the difference between the end and start times.
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
++ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->busy_time > 2629743) {
++ thd->busy_time = 0;
++ }
++ } else {
++ // end time went back in time, or gettimeofday() failed.
++ thd->busy_time = 0;
++ }
++
++#ifdef HAVE_CLOCK_GETTIME
++ /* get end cputime */
++ if (!cputime_error &&
++ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++#endif
++ if (start_cpu_nsecs && !cputime_error) {
++ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ // In case there are bad values, 2629743 is the #seconds in a month.
++ if (thd->cpu_time > 2629743) {
++ thd->cpu_time = 0;
++ }
++ } else
++ thd->cpu_time = 0;
++ }
++ // Updates THD stats and the global user stats.
++ thd->update_stats(true);
++ update_global_user_stats(thd, true, time(NULL));
++
+ DBUG_VOID_RETURN;
+ }
+
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc 2010-08-27 14:48:13.050141329 +0900
++++ b/sql/sql_show.cc 2010-08-27 15:10:33.866059533 +0900
+@@ -84,6 +84,40 @@
+
+ static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table);
+
++/*
++ * Solaris 10 does not have strsep().
++ *
++ * based on getToken from http://www.winehq.org/pipermail/wine-patches/2001-November/001322.html
++ *
++ */
++
++#ifndef HAVE_STRSEP
++static char* strsep(char** str, const char* delims)
++{
++ char *token;
++
++ if (*str == NULL) {
++ /* No more tokens */
++ return NULL;
++ }
++
++ token = *str;
++ while (**str != '\0') {
++ if (strchr(delims, **str) != NULL) {
++ **str = '\0';
++ (*str)++;
++ return token;
++ }
++ (*str)++;
++ }
++
++ /* There is not another token */
++ *str = NULL;
++
++ return token;
++}
++#endif
++
+ /***************************************************************************
+ ** List all table types supported
+ ***************************************************************************/
+@@ -832,6 +866,7 @@
+ sctx->master_access);
+ if (!(db_access & DB_ACLS) && check_grant_db(thd,dbname))
+ {
++ thd->diff_access_denied_errors++;
+ my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+ sctx->priv_user, sctx->host_or_ip, dbname);
+ general_log_print(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR),
+@@ -2386,6 +2421,279 @@
+ DBUG_RETURN(res);
+ }
+
++/*
++ Write result to network for SHOW USER_STATISTICS
++
++ SYNOPSIS
++ send_user_stats
++ all_user_stats - values to return
++ table - I_S table
++
++ RETURN
++ 0 - OK
++ 1 - error
++ */
++int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table)
++{
++ DBUG_ENTER("send_user_stats");
++ for (uint i = 0; i < all_user_stats->records; ++i) {
++ restore_record(table, s->default_values);
++ USER_STATS *user_stats = (USER_STATS*)hash_element(all_user_stats, i);
++ table->field[0]->store(user_stats->user, strlen(user_stats->user), system_charset_info);
++ table->field[1]->store((longlong)user_stats->total_connections);
++ table->field[2]->store((longlong)user_stats->concurrent_connections);
++ table->field[3]->store((longlong)user_stats->connected_time);
++ table->field[4]->store((longlong)user_stats->busy_time);
++ table->field[5]->store((longlong)user_stats->cpu_time);
++ table->field[6]->store((longlong)user_stats->bytes_received);
++ table->field[7]->store((longlong)user_stats->bytes_sent);
++ table->field[8]->store((longlong)user_stats->binlog_bytes_written);
++ table->field[9]->store((longlong)user_stats->rows_fetched);
++ table->field[10]->store((longlong)user_stats->rows_updated);
++ table->field[11]->store((longlong)user_stats->rows_read);
++ table->field[12]->store((longlong)user_stats->select_commands);
++ table->field[13]->store((longlong)user_stats->update_commands);
++ table->field[14]->store((longlong)user_stats->other_commands);
++ table->field[15]->store((longlong)user_stats->commit_trans);
++ table->field[16]->store((longlong)user_stats->rollback_trans);
++ table->field[17]->store((longlong)user_stats->denied_connections);
++ table->field[18]->store((longlong)user_stats->lost_connections);
++ table->field[19]->store((longlong)user_stats->access_denied_errors);
++ table->field[20]->store((longlong)user_stats->empty_queries);
++ if (schema_table_store_record(thd, table))
++ {
++ DBUG_PRINT("error", ("store record error"));
++ DBUG_RETURN(1);
++ }
++ }
++ DBUG_RETURN(0);
++}
++
++int send_thread_stats(THD* thd, HASH *all_thread_stats, TABLE *table)
++{
++ DBUG_ENTER("send_thread_stats");
++ for (uint i = 0; i < all_thread_stats->records; ++i) {
++ restore_record(table, s->default_values);
++ THREAD_STATS *user_stats = (THREAD_STATS*)hash_element(all_thread_stats, i);
++ table->field[0]->store((longlong)user_stats->id);
++ table->field[1]->store((longlong)user_stats->total_connections);
++ table->field[2]->store((longlong)user_stats->concurrent_connections);
++ table->field[3]->store((longlong)user_stats->connected_time);
++ table->field[4]->store((longlong)user_stats->busy_time);
++ table->field[5]->store((longlong)user_stats->cpu_time);
++ table->field[6]->store((longlong)user_stats->bytes_received);
++ table->field[7]->store((longlong)user_stats->bytes_sent);
++ table->field[8]->store((longlong)user_stats->binlog_bytes_written);
++ table->field[9]->store((longlong)user_stats->rows_fetched);
++ table->field[10]->store((longlong)user_stats->rows_updated);
++ table->field[11]->store((longlong)user_stats->rows_read);
++ table->field[12]->store((longlong)user_stats->select_commands);
++ table->field[13]->store((longlong)user_stats->update_commands);
++ table->field[14]->store((longlong)user_stats->other_commands);
++ table->field[15]->store((longlong)user_stats->commit_trans);
++ table->field[16]->store((longlong)user_stats->rollback_trans);
++ table->field[17]->store((longlong)user_stats->denied_connections);
++ table->field[18]->store((longlong)user_stats->lost_connections);
++ table->field[19]->store((longlong)user_stats->access_denied_errors);
++ table->field[20]->store((longlong)user_stats->empty_queries);
++ if (schema_table_store_record(thd, table))
++ {
++ DBUG_PRINT("error", ("store record error"));
++ DBUG_RETURN(1);
++ }
++ }
++ DBUG_RETURN(0);
++}
++
++/*
++ Process SHOW USER_STATISTICS
++
++ SYNOPSIS
++ mysqld_show_user_stats
++ thd - current thread
++ wild - limit results to the entry for this user
++ with_roles - when true, display role for mapped users
++
++ RETURN
++ 0 - OK
++ 1 - error
++ */
++
++
++int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++ TABLE *table= tables->table;
++ DBUG_ENTER("fill_schema_user_stats");
++
++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++ DBUG_RETURN(1);
++
++ // Iterates through all the global stats and sends them to the client.
++ // Pattern matching on the client IP is supported.
++
++ pthread_mutex_lock(&LOCK_global_user_client_stats);
++ int result= send_user_stats(thd, &global_user_stats, table);
++ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ if (result)
++ goto err;
++
++ DBUG_PRINT("exit", ("fill_schema_user_stats result is 0"));
++ DBUG_RETURN(0);
++
++ err:
++ DBUG_PRINT("exit", ("fill_schema_user_stats result is 1"));
++ DBUG_RETURN(1);
++}
++
++/*
++ Process SHOW CLIENT_STATISTICS
++
++ SYNOPSIS
++ mysqld_show_client_stats
++ thd - current thread
++ wild - limit results to the entry for this client
++
++ RETURN
++ 0 - OK
++ 1 - error
++ */
++
++
++int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++ TABLE *table= tables->table;
++ DBUG_ENTER("fill_schema_client_stats");
++
++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++ DBUG_RETURN(1);
++
++ // Iterates through all the global stats and sends them to the client.
++ // Pattern matching on the client IP is supported.
++
++ pthread_mutex_lock(&LOCK_global_user_client_stats);
++ int result= send_user_stats(thd, &global_client_stats, table);
++ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ if (result)
++ goto err;
++
++ DBUG_PRINT("exit", ("mysqld_show_client_stats result is 0"));
++ DBUG_RETURN(0);
++
++ err:
++ DBUG_PRINT("exit", ("mysqld_show_client_stats result is 1"));
++ DBUG_RETURN(1);
++}
++
++int fill_schema_thread_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++ TABLE *table= tables->table;
++ DBUG_ENTER("fill_schema_thread_stats");
++
++ if (check_global_access(thd, SUPER_ACL | PROCESS_ACL))
++ DBUG_RETURN(1);
++
++ // Iterates through all the global stats and sends them to the client.
++ // Pattern matching on the client IP is supported.
++
++ pthread_mutex_lock(&LOCK_global_user_client_stats);
++ int result= send_thread_stats(thd, &global_thread_stats, table);
++ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ if (result)
++ goto err;
++
++ DBUG_PRINT("exit", ("mysqld_show_thread_stats result is 0"));
++ DBUG_RETURN(0);
++
++ err:
++ DBUG_PRINT("exit", ("mysqld_show_thread_stats result is 1"));
++ DBUG_RETURN(1);
++}
++
++// Sends the global table stats back to the client.
++int fill_schema_table_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++ TABLE *table= tables->table;
++ DBUG_ENTER("fill_schema_table_stats");
++ char *table_full_name, *table_schema;
++
++ pthread_mutex_lock(&LOCK_global_table_stats);
++ for (uint i = 0; i < global_table_stats.records; ++i) {
++ restore_record(table, s->default_values);
++ TABLE_STATS *table_stats =
++ (TABLE_STATS*)hash_element(&global_table_stats, i);
++
++ table_full_name= thd->strdup(table_stats->table);
++ table_schema= strsep(&table_full_name, ".");
++
++ TABLE_LIST tmp_table;
++ bzero((char*) &tmp_table,sizeof(tmp_table));
++ tmp_table.table_name= table_full_name;
++ tmp_table.db= table_schema;
++ tmp_table.grant.privilege= 0;
++ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++ &tmp_table.grant.privilege, 0, 0,
++ is_schema_db(table_schema)) ||
++ check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
++ continue;
++
++ table->field[0]->store(table_schema, strlen(table_schema), system_charset_info);
++ table->field[1]->store(table_full_name, strlen(table_full_name), system_charset_info);
++ table->field[2]->store((longlong)table_stats->rows_read, TRUE);
++ table->field[3]->store((longlong)table_stats->rows_changed, TRUE);
++ table->field[4]->store((longlong)table_stats->rows_changed_x_indexes, TRUE);
++
++ if (schema_table_store_record(thd, table))
++ {
++ VOID(pthread_mutex_unlock(&LOCK_global_table_stats));
++ DBUG_RETURN(1);
++ }
++ }
++ pthread_mutex_unlock(&LOCK_global_table_stats);
++ DBUG_RETURN(0);
++}
++
++// Sends the global index stats back to the client.
++int fill_schema_index_stats(THD* thd, TABLE_LIST* tables, COND* cond)
++{
++ TABLE *table= tables->table;
++ DBUG_ENTER("fill_schema_index_stats");
++ char *index_full_name, *table_schema, *table_name;
++
++ pthread_mutex_lock(&LOCK_global_index_stats);
++ for (uint i = 0; i < global_index_stats.records; ++i) {
++ restore_record(table, s->default_values);
++ INDEX_STATS *index_stats =
++ (INDEX_STATS*)hash_element(&global_index_stats, i);
++
++ index_full_name= thd->strdup(index_stats->index);
++ table_schema= strsep(&index_full_name, ".");
++ table_name= strsep(&index_full_name, ".");
++
++ TABLE_LIST tmp_table;
++ bzero((char*) &tmp_table,sizeof(tmp_table));
++ tmp_table.table_name= table_name;
++ tmp_table.db= table_schema;
++ tmp_table.grant.privilege= 0;
++ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++ &tmp_table.grant.privilege, 0, 0,
++ is_schema_db(table_schema)) ||
++ check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
++ continue;
++
++ table->field[0]->store(table_schema, strlen(table_schema), system_charset_info);
++ table->field[1]->store(table_name, strlen(table_name), system_charset_info);
++ table->field[2]->store(index_full_name, strlen(index_full_name), system_charset_info);
++ table->field[3]->store((longlong)index_stats->rows_read, TRUE);
++
++ if (schema_table_store_record(thd, table))
++ {
++ VOID(pthread_mutex_unlock(&LOCK_global_index_stats));
++ DBUG_RETURN(1);
++ }
++ }
++ pthread_mutex_unlock(&LOCK_global_index_stats);
++ DBUG_RETURN(0);
++}
+
+ /* collect status for all running threads */
+
+@@ -6688,6 +6996,104 @@
+ };
+
+
++ST_FIELD_INFO user_stats_fields_info[]=
++{
++ {"USER", USERNAME_LENGTH, MYSQL_TYPE_STRING, 0, 0, "User", SKIP_OPEN_TABLE},
++ {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections", SKIP_OPEN_TABLE},
++ {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections", SKIP_OPEN_TABLE},
++ {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time", SKIP_OPEN_TABLE},
++ {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time", SKIP_OPEN_TABLE},
++ {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time", SKIP_OPEN_TABLE},
++ {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received", SKIP_OPEN_TABLE},
++ {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent", SKIP_OPEN_TABLE},
++ {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written", SKIP_OPEN_TABLE},
++ {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched", SKIP_OPEN_TABLE},
++ {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated", SKIP_OPEN_TABLE},
++ {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read", SKIP_OPEN_TABLE},
++ {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands", SKIP_OPEN_TABLE},
++ {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands", SKIP_OPEN_TABLE},
++ {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands", SKIP_OPEN_TABLE},
++ {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions", SKIP_OPEN_TABLE},
++ {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions", SKIP_OPEN_TABLE},
++ {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections", SKIP_OPEN_TABLE},
++ {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections", SKIP_OPEN_TABLE},
++ {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied", SKIP_OPEN_TABLE},
++ {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries", SKIP_OPEN_TABLE},
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO client_stats_fields_info[]=
++{
++ {"CLIENT", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Client", SKIP_OPEN_TABLE},
++ {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections", SKIP_OPEN_TABLE},
++ {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections", SKIP_OPEN_TABLE},
++ {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time", SKIP_OPEN_TABLE},
++ {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time", SKIP_OPEN_TABLE},
++ {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time", SKIP_OPEN_TABLE},
++ {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received", SKIP_OPEN_TABLE},
++ {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent", SKIP_OPEN_TABLE},
++ {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written", SKIP_OPEN_TABLE},
++ {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched", SKIP_OPEN_TABLE},
++ {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated", SKIP_OPEN_TABLE},
++ {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read", SKIP_OPEN_TABLE},
++ {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands", SKIP_OPEN_TABLE},
++ {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands", SKIP_OPEN_TABLE},
++ {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands", SKIP_OPEN_TABLE},
++ {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions", SKIP_OPEN_TABLE},
++ {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions", SKIP_OPEN_TABLE},
++ {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections", SKIP_OPEN_TABLE},
++ {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections", SKIP_OPEN_TABLE},
++ {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied", SKIP_OPEN_TABLE},
++ {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries", SKIP_OPEN_TABLE},
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO thread_stats_fields_info[]=
++{
++ {"THREAD_ID", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Thread_id", SKIP_OPEN_TABLE},
++ {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections", SKIP_OPEN_TABLE},
++ {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections", SKIP_OPEN_TABLE},
++ {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time", SKIP_OPEN_TABLE},
++ {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Busy_time", SKIP_OPEN_TABLE},
++ {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Cpu_time", SKIP_OPEN_TABLE},
++ {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_received", SKIP_OPEN_TABLE},
++ {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Bytes_sent", SKIP_OPEN_TABLE},
++ {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Binlog_bytes_written", SKIP_OPEN_TABLE},
++ {"ROWS_FETCHED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_fetched", SKIP_OPEN_TABLE},
++ {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_updated", SKIP_OPEN_TABLE},
++ {"TABLE_ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Table_rows_read", SKIP_OPEN_TABLE},
++ {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Select_commands", SKIP_OPEN_TABLE},
++ {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Update_commands", SKIP_OPEN_TABLE},
++ {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Other_commands", SKIP_OPEN_TABLE},
++ {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Commit_transactions", SKIP_OPEN_TABLE},
++ {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rollback_transactions", SKIP_OPEN_TABLE},
++ {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Denied_connections", SKIP_OPEN_TABLE},
++ {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Lost_connections", SKIP_OPEN_TABLE},
++ {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Access_denied", SKIP_OPEN_TABLE},
++ {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Empty_queries", SKIP_OPEN_TABLE},
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO table_stats_fields_info[]=
++{
++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema", SKIP_OPEN_TABLE},
++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name", SKIP_OPEN_TABLE},
++ {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read", SKIP_OPEN_TABLE},
++ {"ROWS_CHANGED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed", SKIP_OPEN_TABLE},
++ {"ROWS_CHANGED_X_INDEXES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_changed_x_#indexes", SKIP_OPEN_TABLE},
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++ST_FIELD_INFO index_stats_fields_info[]=
++{
++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema", SKIP_OPEN_TABLE},
++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name", SKIP_OPEN_TABLE},
++ {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name", SKIP_OPEN_TABLE},
++ {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Rows_read", SKIP_OPEN_TABLE},
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0}
++};
++
++
+ ST_FIELD_INFO processlist_fields_info[]=
+ {
+ {"ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, "Id", SKIP_OPEN_TABLE},
+@@ -6823,6 +7229,8 @@
+ {
+ {"CHARACTER_SETS", charsets_fields_info, create_schema_table,
+ fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0},
++ {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table,
++ fill_schema_client_stats, make_old_format, 0, -1, -1, 0, 0},
+ {"COLLATIONS", collation_fields_info, create_schema_table,
+ fill_schema_collation, make_old_format, 0, -1, -1, 0, 0},
+ {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info,
+@@ -6832,6 +7240,8 @@
+ OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL},
+ {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table,
+ fill_schema_column_privileges, 0, 0, -1, -1, 0, 0},
++ {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table,
++ fill_schema_index_stats, make_old_format, 0, -1, -1, 0, 0},
+ {"ENGINES", engines_fields_info, create_schema_table,
+ fill_schema_engines, make_old_format, 0, -1, -1, 0, 0},
+ #ifdef HAVE_EVENT_SCHEDULER
+@@ -6888,11 +7298,17 @@
+ get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
+ {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
+ fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
++ {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table,
++ fill_schema_table_stats, make_old_format, 0, -1, -1, 0, 0},
++ {"THREAD_STATISTICS", thread_stats_fields_info, create_schema_table,
++ fill_schema_thread_stats, make_old_format, 0, -1, -1, 0, 0},
+ {"TRIGGERS", triggers_fields_info, create_schema_table,
+ get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
+ OPEN_TABLE_ONLY},
+ {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table,
+ fill_schema_user_privileges, 0, 0, -1, -1, 0, 0},
++ {"USER_STATISTICS", user_stats_fields_info, create_schema_table,
++ fill_schema_user_stats, make_old_format, 0, -1, -1, 0, 0},
+ {"VARIABLES", variables_fields_info, create_schema_table, fill_variables,
+ make_old_format, 0, 0, -1, 1, 0},
+ {"VIEWS", view_fields_info, create_schema_table,
+diff -ruN a/sql/sql_update.cc b/sql/sql_update.cc
+--- a/sql/sql_update.cc 2010-08-04 02:24:35.000000000 +0900
++++ b/sql/sql_update.cc 2010-08-27 15:10:33.880988383 +0900
+@@ -843,6 +843,7 @@
+ thd->row_count_func=
+ (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
+ my_ok(thd, (ulong) thd->row_count_func, id, buff);
++ thd->updated_row_count += thd->row_count_func;
+ DBUG_PRINT("info",("%ld records updated", (long) updated));
+ }
+ thd->count_cuted_fields= CHECK_FIELD_IGNORE; /* calc cuted fields */
+@@ -2145,5 +2146,6 @@
+ thd->row_count_func=
+ (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
+ ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
++ thd->updated_row_count += thd->row_count_func;
+ DBUG_RETURN(FALSE);
+ }
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy 2010-08-27 14:29:26.060990130 +0900
++++ b/sql/sql_yacc.yy 2010-08-27 15:10:33.890987529 +0900
+@@ -757,6 +757,7 @@
+ %token CHECK_SYM /* SQL-2003-R */
+ %token CIPHER_SYM
+ %token CLIENT_SYM
++%token CLIENT_STATS_SYM
+ %token CLOSE_SYM /* SQL-2003-R */
+ %token COALESCE /* SQL-2003-N */
+ %token CODE_SYM
+@@ -903,6 +904,7 @@
+ %token IMPORT
+ %token INDEXES
+ %token INDEX_SYM
++%token INDEX_STATS_SYM
+ %token INFILE
+ %token INITIAL_SIZE_SYM
+ %token INNER_SYM /* SQL-2003-R */
+@@ -1144,6 +1146,7 @@
+ %token SIGNED_SYM
+ %token SIMPLE_SYM /* SQL-2003-N */
+ %token SLAVE
++%token SLOW_SYM
+ %token SMALLINT /* SQL-2003-R */
+ %token SNAPSHOT_SYM
+ %token SOCKET_SYM
+@@ -1189,6 +1192,7 @@
+ %token TABLESPACE
+ %token TABLE_REF_PRIORITY
+ %token TABLE_SYM /* SQL-2003-R */
++%token TABLE_STATS_SYM
+ %token TABLE_CHECKSUM_SYM
+ %token TEMPORARY /* SQL-2003-N */
+ %token TEMPTABLE_SYM
+@@ -1197,6 +1201,7 @@
+ %token TEXT_SYM
+ %token THAN_SYM
+ %token THEN_SYM /* SQL-2003-R */
++%token THREAD_STATS_SYM
+ %token TIMESTAMP /* SQL-2003-R */
+ %token TIMESTAMP_ADD
+ %token TIMESTAMP_DIFF
+@@ -1234,6 +1239,7 @@
+ %token UPGRADE_SYM
+ %token USAGE /* SQL-2003-N */
+ %token USER /* SQL-2003-R */
++%token USER_STATS_SYM
+ %token USE_FRM
+ %token USE_SYM
+ %token USING /* SQL-2003-R */
+@@ -10346,6 +10352,41 @@
+ {
+ Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
+ }
++ | CLIENT_STATS_SYM wild_and_where
++ {
++ LEX *lex= Lex;
++ Lex->sql_command = SQLCOM_SELECT;
++ if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS))
++ MYSQL_YYABORT;
++ }
++ | USER_STATS_SYM wild_and_where
++ {
++ LEX *lex= Lex;
++ lex->sql_command = SQLCOM_SELECT;
++ if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS))
++ MYSQL_YYABORT;
++ }
++ | THREAD_STATS_SYM wild_and_where
++ {
++ LEX *lex= Lex;
++ Lex->sql_command = SQLCOM_SELECT;
++ if (prepare_schema_table(YYTHD, lex, 0, SCH_THREAD_STATS))
++ MYSQL_YYABORT;
++ }
++ | TABLE_STATS_SYM wild_and_where
++ {
++ LEX *lex= Lex;
++ lex->sql_command= SQLCOM_SELECT;
++ if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_STATS))
++ MYSQL_YYABORT;
++ }
++ | INDEX_STATS_SYM wild_and_where
++ {
++ LEX *lex= Lex;
++ lex->sql_command= SQLCOM_SELECT;
++ if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS))
++ MYSQL_YYABORT;
++ }
+ | CREATE PROCEDURE sp_name
+ {
+ LEX *lex= Lex;
+@@ -10554,6 +10595,18 @@
+ { Lex->type|= REFRESH_STATUS; }
+ | SLAVE
+ { Lex->type|= REFRESH_SLAVE; }
++ | SLOW_SYM QUERY_SYM LOGS_SYM
++ { Lex->type |= REFRESH_SLOW_QUERY_LOG; }
++ | CLIENT_STATS_SYM
++ { Lex->type|= REFRESH_CLIENT_STATS; }
++ | USER_STATS_SYM
++ { Lex->type|= REFRESH_USER_STATS; }
++ | THREAD_STATS_SYM
++ { Lex->type|= REFRESH_THREAD_STATS; }
++ | TABLE_STATS_SYM
++ { Lex->type|= REFRESH_TABLE_STATS; }
++ | INDEX_STATS_SYM
++ { Lex->type|= REFRESH_INDEX_STATS; }
+ | MASTER_SYM
+ { Lex->type|= REFRESH_MASTER; }
+ | DES_KEY_FILE
+@@ -11671,6 +11724,7 @@
+ | CHAIN_SYM {}
+ | CHANGED {}
+ | CIPHER_SYM {}
++ | CLIENT_STATS_SYM {}
+ | CLIENT_SYM {}
+ | COALESCE {}
+ | CODE_SYM {}
+@@ -11732,6 +11786,7 @@
+ | HOSTS_SYM {}
+ | HOUR_SYM {}
+ | IDENTIFIED_SYM {}
++ | INDEX_STATS_SYM {}
+ | INVOKER_SYM {}
+ | IMPORT {}
+ | INDEXES {}
+@@ -11856,6 +11911,7 @@
+ | SIMPLE_SYM {}
+ | SHARE_SYM {}
+ | SHUTDOWN {}
++ | SLOW_SYM {}
+ | SNAPSHOT_SYM {}
+ | SOUNDS_SYM {}
+ | SOURCE_SYM {}
+@@ -11875,6 +11931,7 @@
+ | SUSPEND_SYM {}
+ | SWAPS_SYM {}
+ | SWITCHES_SYM {}
++ | TABLE_STATS_SYM {}
+ | TABLES {}
+ | TABLE_CHECKSUM_SYM {}
+ | TABLESPACE {}
+@@ -11882,6 +11939,7 @@
+ | TEMPTABLE_SYM {}
+ | TEXT_SYM {}
+ | THAN_SYM {}
++ | THREAD_STATS_SYM {}
+ | TRANSACTION_SYM {}
+ | TRIGGERS_SYM {}
+ | TIMESTAMP {}
+@@ -11899,6 +11957,7 @@
+ | UNKNOWN_SYM {}
+ | UNTIL_SYM {}
+ | USER {}
++ | USER_STATS_SYM {}
+ | USE_FRM {}
+ | VARIABLES {}
+ | VIEW_SYM {}
+diff -ruN a/sql/structs.h b/sql/structs.h
+--- a/sql/structs.h 2010-08-04 02:24:35.000000000 +0900
++++ b/sql/structs.h 2010-08-27 15:10:33.904059058 +0900
+@@ -237,6 +237,171 @@
+ USER_RESOURCES user_resources;
+ } USER_CONN;
+
++typedef struct st_user_stats {
++ char user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
++ // Account name the user is mapped to when this is a user from mapped_user.
++ // Otherwise, the same value as user.
++ char priv_user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
++ uint total_connections;
++ uint concurrent_connections;
++ time_t connected_time; // in seconds
++ double busy_time; // in seconds
++ double cpu_time; // in seconds
++ ulonglong bytes_received;
++ ulonglong bytes_sent;
++ ulonglong binlog_bytes_written;
++ ha_rows rows_fetched, rows_updated, rows_read;
++ ulonglong select_commands, update_commands, other_commands;
++ ulonglong commit_trans, rollback_trans;
++ ulonglong denied_connections, lost_connections;
++ ulonglong access_denied_errors;
++ ulonglong empty_queries;
++} USER_STATS;
++
++/* Lookup function for hash tables with USER_STATS entries */
++extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
++ my_bool not_used __attribute__((unused)));
++
++/* Free all memory for a hash table with USER_STATS entries */
++extern void free_user_stats(USER_STATS* user_stats);
++
++/* Intialize an instance of USER_STATS */
++extern void
++init_user_stats(USER_STATS *user_stats,
++ const char *user,
++ const char *priv_user,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries);
++
++/* Increment values of an instance of USER_STATS */
++extern void
++add_user_stats(USER_STATS *user_stats,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries);
++
++typedef struct st_thread_stats {
++ my_thread_id id;
++ uint total_connections;
++ uint concurrent_connections;
++ time_t connected_time; // in seconds
++ double busy_time; // in seconds
++ double cpu_time; // in seconds
++ ulonglong bytes_received;
++ ulonglong bytes_sent;
++ ulonglong binlog_bytes_written;
++ ha_rows rows_fetched, rows_updated, rows_read;
++ ulonglong select_commands, update_commands, other_commands;
++ ulonglong commit_trans, rollback_trans;
++ ulonglong denied_connections, lost_connections;
++ ulonglong access_denied_errors;
++ ulonglong empty_queries;
++} THREAD_STATS;
++
++/* Lookup function for hash tables with THREAD_STATS entries */
++extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
++ my_bool not_used __attribute__((unused)));
++
++/* Free all memory for a hash table with THREAD_STATS entries */
++extern void free_thread_stats(THREAD_STATS* thread_stats);
++
++/* Intialize an instance of THREAD_STATS */
++extern void
++init_thread_stats(THREAD_STATS *thread_stats,
++ my_thread_id id,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries);
++
++/* Increment values of an instance of THREAD_STATS */
++extern void
++add_thread_stats(THREAD_STATS *thread_stats,
++ uint total_connections,
++ uint concurrent_connections,
++ time_t connected_time,
++ double busy_time,
++ double cpu_time,
++ ulonglong bytes_received,
++ ulonglong bytes_sent,
++ ulonglong binlog_bytes_written,
++ ha_rows rows_fetched,
++ ha_rows rows_updated,
++ ha_rows rows_read,
++ ulonglong select_commands,
++ ulonglong update_commands,
++ ulonglong other_commands,
++ ulonglong commit_trans,
++ ulonglong rollback_trans,
++ ulonglong denied_connections,
++ ulonglong lost_connections,
++ ulonglong access_denied_errors,
++ ulonglong empty_queries);
++
++typedef struct st_table_stats {
++ char table[NAME_LEN * 2 + 2]; // [db] + '.' + [table] + '\0'
++ ulonglong rows_read, rows_changed;
++ ulonglong rows_changed_x_indexes;
++ /* Stores enum db_type, but forward declarations cannot be done */
++ int engine_type;
++} TABLE_STATS;
++
++typedef struct st_index_stats {
++ char index[NAME_LEN * 3 + 3]; // [db] + '.' + [table] + '.' + [index] + '\0'
++ ulonglong rows_read;
++} INDEX_STATS;
++
+ /* Bits in form->update */
+ #define REG_MAKE_DUPP 1 /* Make a copy of record when read */
+ #define REG_NEW_RECORD 2 /* Write a new record if not found */
+diff -ruN a/sql/table.h b/sql/table.h
+--- a/sql/table.h 2010-08-04 02:24:19.000000000 +0900
++++ b/sql/table.h 2010-08-27 15:10:33.906987259 +0900
+@@ -943,10 +943,12 @@
+ enum enum_schema_tables
+ {
+ SCH_CHARSETS= 0,
++ SCH_CLIENT_STATS,
+ SCH_COLLATIONS,
+ SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
+ SCH_COLUMNS,
+ SCH_COLUMN_PRIVILEGES,
++ SCH_INDEX_STATS,
+ SCH_ENGINES,
+ SCH_EVENTS,
+ SCH_FILES,
+@@ -970,8 +972,11 @@
+ SCH_TABLE_CONSTRAINTS,
+ SCH_TABLE_NAMES,
+ SCH_TABLE_PRIVILEGES,
++ SCH_TABLE_STATS,
++ SCH_THREAD_STATS,
+ SCH_TRIGGERS,
+ SCH_USER_PRIVILEGES,
++ SCH_USER_STATS,
+ SCH_VARIABLES,
+ SCH_VIEWS
+ };
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-08-04 02:24:20.000000000 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-08-27 15:10:33.913058592 +0900
+@@ -4055,6 +4055,8 @@
+
+ error = row_insert_for_mysql((byte*) record, prebuilt);
+
++ if (error == DB_SUCCESS) rows_changed++;
++
+ /* Handle duplicate key errors */
+ if (auto_inc_used) {
+ ulint err;
+@@ -4392,6 +4394,8 @@
+ }
+ }
+
++ if (error == DB_SUCCESS) rows_changed++;
++
+ innodb_srv_conc_exit_innodb(trx);
+
+ error = convert_error_code_to_mysql(error, user_thd);
+@@ -4444,6 +4448,8 @@
+
+ error = row_update_for_mysql((byte*) record, prebuilt);
+
++ if (error == DB_SUCCESS) rows_changed++;
++
+ innodb_srv_conc_exit_innodb(trx);
+
+ error = convert_error_code_to_mysql(error, user_thd);
+@@ -4923,6 +4929,9 @@
+ if (ret == DB_SUCCESS) {
+ error = 0;
+ table->status = 0;
++ rows_read++;
++ if (active_index >= 0 && active_index < MAX_KEY)
++ index_rows_read[active_index]++;
+
+ } else if (ret == DB_RECORD_NOT_FOUND) {
+ error = HA_ERR_END_OF_FILE;
+diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
+--- a/storage/myisam/ha_myisam.cc 2010-08-04 02:24:27.000000000 +0900
++++ b/storage/myisam/ha_myisam.cc 2010-08-27 15:10:33.921058182 +0900
+@@ -761,6 +761,7 @@
+
+ int ha_myisam::write_row(uchar *buf)
+ {
++ int error;
+ ha_statistic_increment(&SSV::ha_write_count);
+
+ /* If we have a timestamp column, update it to the current time */
+@@ -773,11 +774,12 @@
+ */
+ if (table->next_number_field && buf == table->record[0])
+ {
+- int error;
+ if ((error= update_auto_increment()))
+ return error;
+ }
+- return mi_write(file,buf);
++ error=mi_write(file,buf);
++ if (!error) rows_changed++;
++ return error;
+ }
+
+ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
+@@ -1638,16 +1640,22 @@
+
+ int ha_myisam::update_row(const uchar *old_data, uchar *new_data)
+ {
++ int error;
+ ha_statistic_increment(&SSV::ha_update_count);
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+ table->timestamp_field->set_time();
+- return mi_update(file,old_data,new_data);
++ error=mi_update(file,old_data,new_data);
++ if (!error) rows_changed++;
++ return error;
+ }
+
+ int ha_myisam::delete_row(const uchar *buf)
+ {
++ int error;
+ ha_statistic_increment(&SSV::ha_delete_count);
+- return mi_delete(file,buf);
++ error=mi_delete(file,buf);
++ if (!error) rows_changed++;
++ return error;
+ }
+
+ int ha_myisam::index_read_map(uchar *buf, const uchar *key,
+@@ -1658,6 +1666,13 @@
+ ha_statistic_increment(&SSV::ha_read_key_count);
+ int error=mi_rkey(file, buf, active_index, key, keypart_map, find_flag);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ return error;
+ }
+
+@@ -1668,6 +1683,13 @@
+ ha_statistic_increment(&SSV::ha_read_key_count);
+ int error=mi_rkey(file, buf, index, key, keypart_map, find_flag);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ return error;
+ }
+
+@@ -1680,6 +1702,13 @@
+ int error=mi_rkey(file, buf, active_index, key, keypart_map,
+ HA_READ_PREFIX_LAST);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ DBUG_RETURN(error);
+ }
+
+@@ -1689,6 +1718,13 @@
+ ha_statistic_increment(&SSV::ha_read_next_count);
+ int error=mi_rnext(file,buf,active_index);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ return error;
+ }
+
+@@ -1698,6 +1734,13 @@
+ ha_statistic_increment(&SSV::ha_read_prev_count);
+ int error=mi_rprev(file,buf, active_index);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ return error;
+ }
+
+@@ -1707,6 +1750,13 @@
+ ha_statistic_increment(&SSV::ha_read_first_count);
+ int error=mi_rfirst(file, buf, active_index);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ return error;
+ }
+
+@@ -1716,6 +1766,13 @@
+ ha_statistic_increment(&SSV::ha_read_last_count);
+ int error=mi_rlast(file, buf, active_index);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ return error;
+ }
+
+@@ -1731,6 +1788,13 @@
+ error= mi_rnext_same(file,buf);
+ } while (error == HA_ERR_RECORD_DELETED);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) {
++ rows_read++;
++
++ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
++ if (inx >= 0 && inx < MAX_KEY)
++ index_rows_read[inx]++;
++ }
+ return error;
+ }
+
+@@ -1747,6 +1811,7 @@
+ ha_statistic_increment(&SSV::ha_read_rnd_next_count);
+ int error=mi_scan(file, buf);
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) rows_read++;
+ return error;
+ }
+
+@@ -1760,6 +1825,7 @@
+ ha_statistic_increment(&SSV::ha_read_rnd_count);
+ int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length));
+ table->status=error ? STATUS_NOT_FOUND: 0;
++ if (!error) rows_read++;
+ return error;
+ }
+
Patch15: plugin-avoid-version.patch
Patch16: %{name}-fix-dummy-thread-race-condition.patch
Patch18: %{name}-sphinx.patch
-# <percona patches, http://www.percona.com/percona-lab.html>
-Patch100: %{name}-userstats.patch
-Patch101: %{name}-microslow.patch
-Patch102: %{name}-acc-pslist.patch
-Patch103: %{name}-split_buf_pool_mutex_fixed_optimistic_safe.patch
-Patch104: %{name}-innodb_rw_lock.patch
+# <percona patches, http://bazaar.launchpad.net/~percona-dev/percona-server/5.5.7/files>
+Patch100: %{name}-userstat.patch
+Patch101: %{name}-innodb_extend_slow.patch
+Patch102: %{name}-microsec_process.patch
+Patch103: %{name}-innodb_split_buf_pool_mutex.patch
# </percona>
URL: http://www.mysql.com/products/database/mysql/community_edition.html
BuildRequires: bison
#%patch102 -p1
# CHECK ME
#%patch103 -p1
-# CHECK ME
-#%patch104 -p1
# </percona>
%build