# name : innodb_extend_slow.patch # introduced : 11 or before # maintainer : Yasufumi # #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:49:59.175955882 +0900 +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 17:42:42.074307123 +0900 @@ -51,6 +51,40 @@ #include "dict0dict.h" #include "log0recv.h" #include "page0zip.h" +#include "trx0trx.h" + +/* prototypes for new functions added to ha_innodb.cc */ +trx_t* innobase_get_trx(); + +inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) +{ + ulint block_hash; + ulint block_hash_byte; + byte block_hash_offset; + + ut_ad(block); + + if (!innobase_get_slow_log() || !trx || !trx->take_stats) + return; + + if (!trx->distinct_page_access_hash) { + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); + } + + block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space + + block->page.offset, DPAH_SIZE << 3); + block_hash_byte = block_hash >> 3; + block_hash_offset = (byte) block_hash & 0x07; + if (block_hash_byte >= DPAH_SIZE) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); + if (block_hash_offset > 7) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); + if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) + trx->distinct_page_access++; + trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; + return; +} /* IMPLEMENTATION OF THE BUFFER POOL @@ -2403,8 +2437,16 @@ mutex_t* block_mutex; ibool must_read; unsigned access_time; + trx_t* trx = NULL; + ulint sec; + ulint ms; + ib_uint64_t start_time; + ib_uint64_t finish_time; buf_pool_t* buf_pool = buf_pool_get(space, offset); + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + } buf_pool->stat.n_page_gets++; for (;;) { @@ -2422,7 +2464,7 @@ //buf_pool_mutex_exit(buf_pool); rw_lock_s_unlock(&buf_pool->page_hash_latch); - buf_read_page(space, zip_size, offset); + buf_read_page(space, zip_size, offset, trx); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 37 || buf_validate()); @@ -2518,6 +2560,13 @@ /* Let us wait until the read operation completes */ + if (innobase_get_slow_log() && trx && trx->take_stats) + { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } for (;;) { enum buf_io_fix io_fix; @@ -2532,6 +2581,12 @@ break; } } + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -2847,6 +2902,11 @@ ibool must_read; ulint retries = 0; mutex_t* block_mutex = NULL; + trx_t* trx = NULL; + ulint sec; + ulint ms; + ib_uint64_t start_time; + ib_uint64_t finish_time; buf_pool_t* buf_pool = buf_pool_get(space, offset); ut_ad(mtr); @@ -2875,6 +2935,9 @@ || ibuf_page_low(space, zip_size, offset, FALSE, file, line, NULL)); #endif + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + } buf_pool->stat.n_page_gets++; fold = buf_page_address_fold(space, offset); loop: @@ -2949,7 +3012,7 @@ return(NULL); } - if (buf_read_page(space, zip_size, offset)) { + if (buf_read_page(space, zip_size, offset, trx)) { retries = 0; } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { ++retries; @@ -3258,6 +3321,13 @@ /* Let us wait until the read operation completes */ + if (innobase_get_slow_log() && trx && trx->take_stats) + { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } for (;;) { enum buf_io_fix io_fix; @@ -3272,6 +3342,12 @@ break; } } + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } } fix_type = MTR_MEMO_BUF_FIX; @@ -3298,13 +3374,17 @@ read-ahead */ buf_read_ahead_linear(space, zip_size, offset, - ibuf_inside(mtr)); + ibuf_inside(mtr), trx); } #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(buf_block_get_space(block), buf_block_get_page_no(block)) == 0); #endif + if (innobase_get_slow_log()) { + _increment_page_get_statistics(block, trx); + } + return(block); } @@ -3328,6 +3408,7 @@ unsigned access_time; ibool success; ulint fix_type; + trx_t* trx = NULL; ut_ad(block); ut_ad(mtr); @@ -3405,6 +3486,10 @@ #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(block->page.file_page_was_freed == FALSE); #endif + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + } + if (UNIV_UNLIKELY(!access_time)) { /* In the case of a first access, try to apply linear read-ahead */ @@ -3412,7 +3497,7 @@ buf_read_ahead_linear(buf_block_get_space(block), buf_block_get_zip_size(block), buf_block_get_page_no(block), - ibuf_inside(mtr)); + ibuf_inside(mtr), trx); } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -3422,6 +3507,9 @@ buf_pool = buf_pool_from_block(block); buf_pool->stat.n_page_gets++; + if (innobase_get_slow_log()) { + _increment_page_get_statistics(block, trx); + } return(TRUE); } @@ -3444,6 +3532,7 @@ buf_pool_t* buf_pool; ibool success; ulint fix_type; + trx_t* trx = NULL; ut_ad(mtr); ut_ad(mtr->state == MTR_ACTIVE); @@ -3530,6 +3619,11 @@ #endif buf_pool->stat.n_page_gets++; + if (innobase_get_slow_log()) { + trx = innobase_get_trx(); + _increment_page_get_statistics(block, trx); + } + return(TRUE); } diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c --- a/storage/innobase/buf/buf0rea.c 2010-12-03 17:32:15.617037263 +0900 +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 17:42:42.075297193 +0900 @@ -77,7 +77,8 @@ treat the tablespace as dropped; this is a timestamp we use to stop dangling page reads from a tablespace which we have DISCARDed + IMPORTed back */ - ulint offset) /*!< in: page number */ + ulint offset, /*!< in: page number */ + trx_t* trx) { buf_page_t* bpage; ulint wake_later; @@ -179,15 +180,15 @@ thd_wait_begin(NULL, THD_WAIT_DISKIO); if (zip_size) { - *err = fil_io(OS_FILE_READ | wake_later, + *err = _fil_io(OS_FILE_READ | wake_later, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage); + bpage->zip.data, bpage, trx); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - *err = fil_io(OS_FILE_READ | wake_later, + *err = _fil_io(OS_FILE_READ | wake_later, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage); + ((buf_block_t*) bpage)->frame, bpage, trx); } thd_wait_end(NULL); ut_a(*err == DB_SUCCESS); @@ -213,7 +214,8 @@ /*==========*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset) /*!< in: page number */ + ulint offset, /*!< in: page number */ + trx_t* trx) { buf_pool_t* buf_pool = buf_pool_get(space, offset); ib_int64_t tablespace_version; @@ -227,7 +229,7 @@ count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, zip_size, FALSE, - tablespace_version, offset); + tablespace_version, offset, trx); srv_buf_pool_reads += count; if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); @@ -279,7 +281,8 @@ ulint space, /*!< in: space id */ ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ ulint offset, /*!< in: page number; see NOTE 3 above */ - ibool inside_ibuf) /*!< in: TRUE if we are inside ibuf routine */ + ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */ + trx_t* trx) { buf_pool_t* buf_pool = buf_pool_get(space, offset); ib_int64_t tablespace_version; @@ -498,7 +501,7 @@ count += buf_read_page_low( &err, FALSE, ibuf_mode, - space, zip_size, FALSE, tablespace_version, i); + space, zip_size, FALSE, tablespace_version, i, trx); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -591,7 +594,7 @@ buf_read_page_low(&err, sync && (i + 1 == n_stored), BUF_READ_ANY_PAGE, space_ids[i], zip_size, TRUE, space_versions[i], - page_nos[i]); + page_nos[i], NULL); if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) { tablespace_deleted: @@ -733,12 +736,12 @@ if ((i + 1 == n_stored) && sync) { buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, zip_size, TRUE, tablespace_version, - page_nos[i]); + page_nos[i], NULL); } else { buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, TRUE, - tablespace_version, page_nos[i]); + tablespace_version, page_nos[i], NULL); } } diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c --- a/storage/innobase/fil/fil0fil.c 2010-12-03 15:53:54.610037199 +0900 +++ b/storage/innobase/fil/fil0fil.c 2010-12-03 17:42:42.079064198 +0900 @@ -4747,7 +4747,7 @@ node->name, node->handle, buf, offset_low, offset_high, page_size * n_pages, - NULL, NULL); + NULL, NULL, NULL); #endif if (success) { node->size += n_pages; @@ -5074,7 +5074,7 @@ i/o on a tablespace which does not exist */ UNIV_INTERN ulint -fil_io( +_fil_io( /*===*/ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, ORed to OS_FILE_LOG, if a log i/o @@ -5099,8 +5099,9 @@ void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message) /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx) { ulint mode; fil_space_t* space; @@ -5268,7 +5269,7 @@ #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message); + offset_low, offset_high, len, node, message, trx); #endif ut_a(ret); diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:36:44.293955189 +0900 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:42:42.090024586 +0900 @@ -1573,6 +1573,16 @@ trx->check_unique_secondary = !thd_test_options( thd, OPTION_RELAXED_UNIQUE_CHECKS); +#ifdef EXTENDED_SLOWLOG + if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) { + trx->take_stats = TRUE; + } else { + trx->take_stats = FALSE; + } +#else + trx->take_stats = FALSE; +#endif + DBUG_VOID_RETURN; } @@ -1627,6 +1637,32 @@ return(trx); } +/************************************************************************* +Gets current trx. */ +extern "C" +trx_t* +innobase_get_trx() +{ + THD *thd=current_thd; + if (likely(thd != 0)) { + trx_t*& trx = thd_to_trx(thd); + return(trx); + } else { + return(NULL); + } +} + +extern "C" +ibool +innobase_get_slow_log() +{ +#ifdef EXTENDED_SLOWLOG + return((ibool) thd_opt_slow_log()); +#else + return(FALSE); +#endif +} + /*********************************************************************//** Note that a transaction has been registered with MySQL. @return true if transaction is registered with MySQL 2PC coordinator */ @@ -9301,6 +9337,25 @@ statement has ended */ if (trx->n_mysql_tables_in_use == 0) { +#ifdef EXTENDED_SLOWLOG + increment_thd_innodb_stats(thd, + (unsigned long long) trx->id, + trx->io_reads, + trx->io_read, + trx->io_reads_wait_timer, + trx->lock_que_wait_timer, + trx->innodb_que_wait_timer, + trx->distinct_page_access); + + trx->io_reads = 0; + trx->io_read = 0; + trx->io_reads_wait_timer = 0; + trx->lock_que_wait_timer = 0; + trx->innodb_que_wait_timer = 0; + trx->distinct_page_access = 0; + if (trx->distinct_page_access_hash) + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); +#endif trx->mysql_n_tables_locked = 0; prebuilt->used_in_HANDLER = FALSE; diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h --- a/storage/innobase/include/buf0rea.h 2010-12-03 15:18:48.891024406 +0900 +++ b/storage/innobase/include/buf0rea.h 2010-12-03 17:42:42.096026873 +0900 @@ -27,6 +27,7 @@ #define buf0rea_h #include "univ.i" +#include "trx0types.h" #include "buf0types.h" /********************************************************************//** @@ -41,7 +42,8 @@ /*==========*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset);/*!< in: page number */ + ulint offset, /*!< in: page number */ + trx_t* trx); /********************************************************************//** Applies linear read-ahead if in the buf_pool the page is a border page of a linear read-ahead area and all the pages in the area have been accessed. @@ -73,7 +75,8 @@ ulint space, /*!< in: space id */ ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ ulint offset, /*!< in: page number; see NOTE 3 above */ - ibool inside_ibuf); /*!< in: TRUE if we are inside ibuf routine */ + ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */ + trx_t* trx); /********************************************************************//** Issues read requests for pages which the ibuf module wants to read in, in order to contract the insert buffer tree. Technically, this function is like diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h --- a/storage/innobase/include/fil0fil.h 2010-12-03 15:09:51.290958543 +0900 +++ b/storage/innobase/include/fil0fil.h 2010-12-03 17:42:42.097027548 +0900 @@ -611,9 +611,12 @@ Reads or writes data. This operation is asynchronous (aio). @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ +#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \ + _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL) + UNIV_INTERN ulint -fil_io( +_fil_io( /*===*/ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, ORed to OS_FILE_LOG, if a log i/o @@ -638,8 +641,9 @@ void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message); /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx); /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h --- a/storage/innobase/include/os0file.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/os0file.h 2010-12-03 17:42:42.100023783 +0900 @@ -36,6 +36,7 @@ #define os0file_h #include "univ.i" +#include "trx0types.h" #ifndef __WIN__ #include @@ -277,13 +278,17 @@ pfs_os_file_close_func(file, __FILE__, __LINE__) # define os_aio(type, mode, name, file, buf, offset, offset_high, \ - n, message1, message2) \ + n, message1, message2, trx) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ - offset_high, n, message1, message2, \ + offset_high, n, message1, message2, trx, \ __FILE__, __LINE__) # define os_file_read(file, buf, offset, offset_high, n) \ - pfs_os_file_read_func(file, buf, offset, offset_high, n, \ + pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL, \ + __FILE__, __LINE__) + +# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \ + pfs_os_file_read_func(file, buf, offset, offset_high, n, trx, \ __FILE__, __LINE__) # define os_file_read_no_error_handling(file, buf, offset, \ @@ -319,12 +324,15 @@ # define os_file_close(file) os_file_close_func(file) # define os_aio(type, mode, name, file, buf, offset, offset_high, \ - n, message1, message2) \ + n, message1, message2, trx) \ os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\ - message1, message2) + message1, message2, trx) # define os_file_read(file, buf, offset, offset_high, n) \ - os_file_read_func(file, buf, offset, offset_high, n) + os_file_read_func(file, buf, offset, offset_high, n, NULL) + +# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \ + os_file_read_func(file, buf, offset, offset_high, n, trx) # define os_file_read_no_error_handling(file, buf, offset, \ offset_high, n) \ @@ -692,6 +700,7 @@ ulint offset_high,/*!< in: most significant 32 bits of offset */ ulint n, /*!< in: number of bytes to read */ + trx_t* trx, const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -746,6 +755,7 @@ (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + trx_t* trx, const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -889,7 +899,8 @@ offset where to read */ ulint offset_high,/*!< in: most significant 32 bits of offset */ - ulint n); /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + trx_t* trx); /*******************************************************************//** Rewind file to its start, read at most size - 1 bytes from it to str, and NUL-terminate str. All errors are silently ignored. This function is @@ -1048,10 +1059,11 @@ (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ - void* message2);/*!< in: message for the aio handler + void* message2,/*!< in: message for the aio handler (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + trx_t* trx); /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ diff -ruN a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic --- a/storage/innobase/include/os0file.ic 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/os0file.ic 2010-12-03 17:42:42.102024458 +0900 @@ -229,6 +229,7 @@ (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + trx_t* trx, const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -244,7 +245,7 @@ src_file, src_line); result = os_aio_func(type, mode, name, file, buf, offset, offset_high, - n, message1, message2); + n, message1, message2, trx); register_pfs_file_io_end(locker, n); @@ -268,6 +269,7 @@ ulint offset_high,/*!< in: most significant 32 bits of offset */ ulint n, /*!< in: number of bytes to read */ + trx_t* trx, const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -278,7 +280,7 @@ register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_func(file, buf, offset, offset_high, n); + result = os_file_read_func(file, buf, offset, offset_high, n, trx); register_pfs_file_io_end(locker, n); diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h --- a/storage/innobase/include/srv0srv.h 2010-12-03 17:32:15.634987408 +0900 +++ b/storage/innobase/include/srv0srv.h 2010-12-03 17:42:42.104028644 +0900 @@ -71,6 +71,9 @@ #define SRV_AUTO_EXTEND_INCREMENT \ (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) +/* prototypes for new functions added to ha_innodb.cc */ +ibool innobase_get_slow_log(); + /* Mutex for locking srv_monitor_file */ extern mutex_t srv_monitor_file_mutex; /* Temporary file for innodb monitor output */ diff -ruN a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h --- a/storage/innobase/include/trx0trx.h 2010-12-03 15:41:52.049372966 +0900 +++ b/storage/innobase/include/trx0trx.h 2010-12-03 17:42:42.107024532 +0900 @@ -743,6 +743,17 @@ /*------------------------------*/ char detailed_error[256]; /*!< detailed error message for last error, or empty. */ + /*------------------------------*/ + ulint io_reads; + ib_uint64_t io_read; + ulint io_reads_wait_timer; + ib_uint64_t lock_que_wait_ustarted; + ulint lock_que_wait_timer; + ulint innodb_que_wait_timer; + ulint distinct_page_access; +#define DPAH_SIZE 8192 + byte* distinct_page_access_hash; + ibool take_stats; }; #define TRX_MAX_N_THREADS 32 /* maximum number of diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c --- a/storage/innobase/lock/lock0lock.c 2010-12-03 15:09:51.297986437 +0900 +++ b/storage/innobase/lock/lock0lock.c 2010-12-03 17:42:42.111024587 +0900 @@ -1755,6 +1755,8 @@ { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -1813,6 +1815,10 @@ trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; + } ut_a(que_thr_stop(thr)); @@ -3764,6 +3770,8 @@ { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -3819,6 +3827,10 @@ return(DB_SUCCESS); } + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; + } trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c --- a/storage/innobase/os/os0file.c 2010-12-03 17:32:15.644024974 +0900 +++ b/storage/innobase/os/os0file.c 2010-12-03 17:42:42.117023467 +0900 @@ -43,6 +43,8 @@ #include "srv0start.h" #include "fil0fil.h" #include "buf0buf.h" +#include "trx0sys.h" +#include "trx0trx.h" #include "log0recv.h" #ifndef UNIV_HOTBACKUP # include "os0sync.h" @@ -2212,13 +2214,18 @@ ulint n, /*!< in: number of bytes to read */ ulint offset, /*!< in: least significant 32 bits of file offset from where to read */ - ulint offset_high) /*!< in: most significant 32 bits of + ulint offset_high, /*!< in: most significant 32 bits of offset */ + trx_t* trx) { off_t offs; #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) ssize_t n_bytes; #endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */ + ulint sec; + ulint ms; + ib_uint64_t start_time; + ib_uint64_t finish_time; ut_a((offset & 0xFFFFFFFFUL) == offset); @@ -2239,6 +2246,15 @@ os_n_file_reads++; + if (innobase_get_slow_log() && trx && trx->take_stats) + { + trx->io_reads++; + trx->io_read += n; + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) os_mutex_enter(os_file_count_mutex); os_file_n_pending_preads++; @@ -2252,6 +2268,13 @@ os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + return(n_bytes); #else { @@ -2288,6 +2311,13 @@ os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) + { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + return(ret); } #endif @@ -2428,7 +2458,8 @@ offset where to read */ ulint offset_high, /*!< in: most significant 32 bits of offset */ - ulint n) /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + trx_t* trx) { #ifdef __WIN__ BOOL ret; @@ -2503,7 +2534,7 @@ os_bytes_read_since_printout += n; try_again: - ret = os_file_pread(file, buf, n, offset, offset_high); + ret = os_file_pread(file, buf, n, offset, offset_high, trx); if ((ulint)ret == n) { @@ -2632,7 +2663,7 @@ os_bytes_read_since_printout += n; try_again: - ret = os_file_pread(file, buf, n, offset, offset_high); + ret = os_file_pread(file, buf, n, offset, offset_high, NULL); if ((ulint)ret == n) { @@ -4026,10 +4057,11 @@ (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ - void* message2)/*!< in: message for the aio handler + void* message2,/*!< in: message for the aio handler (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + trx_t* trx) { os_aio_array_t* array; os_aio_slot_t* slot; @@ -4070,8 +4102,8 @@ wait in the Windows case. */ if (type == OS_FILE_READ) { - return(os_file_read(file, buf, offset, - offset_high, n)); + return(os_file_read_trx(file, buf, offset, + offset_high, n, trx)); } ut_a(type == OS_FILE_WRITE); @@ -4111,6 +4143,11 @@ array = NULL; /* Eliminate compiler warning */ } + if (trx && type == OS_FILE_READ) + { + trx->io_reads++; + trx->io_read += n; + } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, name, buf, offset, offset_high, n); if (type == OS_FILE_READ) { diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c --- a/storage/innobase/srv/srv0srv.c 2010-12-03 17:32:15.648024399 +0900 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 17:45:05.067023254 +0900 @@ -87,6 +87,9 @@ #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +/* prototypes for new functions added to ha_innodb.cc */ +ibool innobase_get_slow_log(); + /* The following counter is incremented whenever there is some user activity in the server */ UNIV_INTERN ulint srv_activity_count = 0; @@ -1232,6 +1235,10 @@ ibool has_slept = FALSE; srv_conc_slot_t* slot = NULL; ulint i; + ib_uint64_t start_time = 0L; + ib_uint64_t finish_time = 0L; + ulint sec; + ulint ms; #ifdef UNIV_SYNC_DEBUG ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); @@ -1312,6 +1319,7 @@ switches. */ if (SRV_THREAD_SLEEP_DELAY > 0) { os_thread_sleep(SRV_THREAD_SLEEP_DELAY); + trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY; } trx->op_info = ""; @@ -1371,6 +1379,14 @@ #ifdef UNIV_SYNC_DEBUG ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); #endif /* UNIV_SYNC_DEBUG */ + + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } + trx->op_info = "waiting in InnoDB queue"; thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK); @@ -1379,6 +1395,12 @@ trx->op_info = ""; + if (innobase_get_slow_log() && trx->take_stats && start_time) { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); + } + os_fast_mutex_lock(&srv_conc_mutex); srv_conc_n_waiting_threads--; diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c --- a/storage/innobase/trx/trx0trx.c 2010-12-03 15:41:52.053955669 +0900 +++ b/storage/innobase/trx/trx0trx.c 2010-12-03 17:42:42.127023410 +0900 @@ -188,6 +188,15 @@ trx->global_read_view = NULL; trx->read_view = NULL; + trx->io_reads = 0; + trx->io_read = 0; + trx->io_reads_wait_timer = 0; + trx->lock_que_wait_timer = 0; + trx->innodb_que_wait_timer = 0; + trx->distinct_page_access = 0; + trx->distinct_page_access_hash = NULL; + trx->take_stats = FALSE; + /* Set X/Open XA transaction identification to NULL */ memset(&trx->xid, 0, sizeof(trx->xid)); trx->xid.formatID = -1; @@ -221,6 +230,11 @@ mutex_exit(&kernel_mutex); + if (innobase_get_slow_log() && trx->take_stats) { + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); + } + return(trx); } @@ -406,6 +420,12 @@ /*===============*/ trx_t* trx) /*!< in, own: trx object */ { + if (trx->distinct_page_access_hash) + { + mem_free(trx->distinct_page_access_hash); + trx->distinct_page_access_hash= NULL; + } + mutex_enter(&kernel_mutex); UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); @@ -427,6 +447,12 @@ /*====================*/ trx_t* trx) /*!< in, own: trx object */ { + if (trx->distinct_page_access_hash) + { + mem_free(trx->distinct_page_access_hash); + trx->distinct_page_access_hash= NULL; + } + mutex_enter(&kernel_mutex); trx_free(trx); @@ -1212,6 +1238,9 @@ trx_t* trx) /*!< in: transaction */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_uint64_t now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1226,6 +1255,11 @@ thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + now = (ib_uint64_t)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); + } trx->que_state = TRX_QUE_RUNNING; } @@ -1239,6 +1273,9 @@ trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_uint64_t now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1253,6 +1290,11 @@ thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + if (innobase_get_slow_log() && trx->take_stats) { + ut_usectime(&sec, &ms); + now = (ib_uint64_t)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); + } trx->que_state = TRX_QUE_RUNNING; }