From eccb488f1f659d1774a04d96e1cd228b6aa6f200 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Elan=20Ruusam=C3=A4e?= Date: Fri, 9 Jan 2009 11:04:53 +0000 Subject: [PATCH] - for 5.0.75 Changed files: mysql-innodb_check_fragmentation.patch -> 1.1.2.1 mysql-innodb_fsync_source.patch -> 1.1.2.1 mysql-innodb_io_patches.patch -> 1.1.2.1 mysql-innodb_io_pattern.patch -> 1.1.2.1 mysql-innodb_locks_held.patch -> 1.1.2.1 mysql-innodb_rw_lock.patch -> 1.1.2.1 mysql-innodb_show_bp.patch -> 1.1.2.1 mysql-innodb_show_hashed_memory.patch -> 1.1.2.1 mysql-microsec_process.patch -> 1.1.2.1 --- mysql-innodb_check_fragmentation.patch | 275 +++++ mysql-innodb_fsync_source.patch | 594 ++++++++++ mysql-innodb_io_patches.patch | 487 ++++++++ mysql-innodb_io_pattern.patch | 688 +++++++++++ mysql-innodb_locks_held.patch | 168 +++ mysql-innodb_rw_lock.patch | 1459 ++++++++++++++++++++++++ mysql-innodb_show_bp.patch | 447 ++++++++ mysql-innodb_show_hashed_memory.patch | 275 +++++ mysql-microsec_process.patch | 281 +++++ 9 files changed, 4674 insertions(+) create mode 100644 mysql-innodb_check_fragmentation.patch create mode 100644 mysql-innodb_fsync_source.patch create mode 100644 mysql-innodb_io_patches.patch create mode 100644 mysql-innodb_io_pattern.patch create mode 100644 mysql-innodb_locks_held.patch create mode 100644 mysql-innodb_rw_lock.patch create mode 100644 mysql-innodb_show_bp.patch create mode 100644 mysql-innodb_show_hashed_memory.patch create mode 100644 mysql-microsec_process.patch diff --git a/mysql-innodb_check_fragmentation.patch b/mysql-innodb_check_fragmentation.patch new file mode 100644 index 0000000..4b16731 --- /dev/null +++ b/mysql-innodb_check_fragmentation.patch @@ -0,0 +1,275 @@ +diff -r 936d427a9a15 innobase/btr/btr0cur.c +--- a/innobase/btr/btr0cur.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0cur.c Mon Dec 22 00:33:11 2008 -0800 +@@ -516,6 +516,14 @@ + == index->table->comp); + } + ++ if (level == 0) { ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); ++ } ++ + break; + } + +@@ -663,6 +671,12 @@ + btr_cur_add_path_info(cursor, height, + root_height); + } ++ ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); + + break; + } +diff -r 936d427a9a15 innobase/btr/btr0pcur.c +--- a/innobase/btr/btr0pcur.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0pcur.c Mon Dec 22 00:33:11 2008 -0800 +@@ -381,6 +381,7 @@ + last record of the current page */ + mtr_t* mtr) /* in: mtr */ + { ++ ulint page_no; + ulint next_page_no; + ulint space; + page_t* page; +@@ -393,11 +394,22 @@ + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; + + page = btr_pcur_get_page(cursor); ++ page_no = buf_frame_get_page_no(page); + + next_page_no = btr_page_get_next(page, mtr); + space = buf_frame_get_space_id(page); + + ut_ad(next_page_no != FIL_NULL); ++ ++ if (next_page_no - page_no == 1) { ++ innobase_mysql_thd_increment_innodb_scan_cont(1); ++ } else { ++ innobase_mysql_thd_increment_innodb_scan_jump(1); ++ } ++ innobase_mysql_thd_increment_innodb_scan_data( ++ page_get_data_size(page)); ++ innobase_mysql_thd_increment_innodb_scan_garbage( ++ page_header_get_field(page, PAGE_GARBAGE)); + + next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); +@@ -427,6 +439,7 @@ + record of the current page */ + mtr_t* mtr) /* in: mtr */ + { ++ ulint page_no; + ulint prev_page_no; + ulint space; + page_t* page; +@@ -462,9 +475,20 @@ + btr_pcur_restore_position(latch_mode2, cursor, mtr); + + page = btr_pcur_get_page(cursor); ++ page_no = buf_frame_get_page_no(page); + + prev_page_no = btr_page_get_prev(page, mtr); + space = buf_frame_get_space_id(page); ++ ++ if (page_no - prev_page_no == 1) { ++ innobase_mysql_thd_increment_innodb_scan_cont(1); ++ } else { ++ innobase_mysql_thd_increment_innodb_scan_jump(1); ++ } ++ innobase_mysql_thd_increment_innodb_scan_data( ++ page_get_data_size(page)); ++ innobase_mysql_thd_increment_innodb_scan_garbage( ++ page_header_get_field(page, PAGE_GARBAGE)); + + if (btr_pcur_is_before_first_on_page(cursor, mtr) + && (prev_page_no != FIL_NULL)) { +diff -r 936d427a9a15 innobase/btr/btr0sea.c +--- a/innobase/btr/btr0sea.c Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/btr/btr0sea.c Mon Dec 22 00:33:11 2008 -0800 +@@ -861,6 +861,12 @@ + + buf_pool->n_page_gets++; + ++ /* Initializes status counters */ ++ innobase_mysql_thd_init_innodb_scan_cont(); ++ innobase_mysql_thd_init_innodb_scan_jump(); ++ innobase_mysql_thd_init_innodb_scan_data(); ++ innobase_mysql_thd_init_innodb_scan_garbage(); ++ + return(TRUE); + + /*-------------------------------------------*/ +diff -r 936d427a9a15 innobase/include/btr0cur.h +--- a/innobase/include/btr0cur.h Mon Dec 22 00:33:03 2008 -0800 ++++ b/innobase/include/btr0cur.h Mon Dec 22 00:33:11 2008 -0800 +@@ -697,6 +697,17 @@ + extern ulint btr_cur_n_non_sea_old; + extern ulint btr_cur_n_sea_old; + ++/*--------------------------------------*/ ++/* prototypes for new functions added to ha_innodb.cc */ ++void innobase_mysql_thd_init_innodb_scan_cont(); ++void innobase_mysql_thd_increment_innodb_scan_cont(ulong length); ++void innobase_mysql_thd_init_innodb_scan_jump(); ++void innobase_mysql_thd_increment_innodb_scan_jump(ulong length); ++void innobase_mysql_thd_init_innodb_scan_data(); ++void innobase_mysql_thd_increment_innodb_scan_data(ulong length); ++void innobase_mysql_thd_init_innodb_scan_garbage(); ++void innobase_mysql_thd_increment_innodb_scan_garbage(ulong length); ++ + #ifndef UNIV_NONINL + #include "btr0cur.ic" + #endif +diff -r 936d427a9a15 patch_info/innodb_check_fragmentation.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_check_fragmentation.info Mon Dec 22 00:33:11 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_check_fragmentation.patch ++Name=Session status to check fragmentation of the last InnoDB scan ++Version=1.0 ++Author=Percona ++License=GPL ++Comment=The names are Innodb_scan_* +diff -r 936d427a9a15 sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:33:11 2008 -0800 +@@ -760,6 +760,102 @@ + } + + /************************************************************************* ++Initializes Innodb_scan_blocks_contiguous. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_cont() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_cont = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_blocks_contiguous. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_cont(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_cont+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_blocks_jumpy. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_jump() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_jump = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_blocks_jumpy. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_jump(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_jump+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_data_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_data() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_data = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_data_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_data(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_data+= length; ++ } ++} ++ ++/************************************************************************* ++Initializes Innodb_scan_garbages_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_init_innodb_scan_garbage() ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_garbage = 0; ++ } ++} ++ ++/************************************************************************* ++Increments Innodb_scan_garbages_in_pages. */ ++extern "C" ++void ++innobase_mysql_thd_increment_innodb_scan_garbage(ulong length) ++{ ++ THD *thd=current_thd; ++ if (likely(thd != 0)) { ++ thd->status_var.innodb_scan_garbage+= length; ++ } ++} ++ ++/************************************************************************* + Gets the InnoDB transaction handle for a MySQL handler object, creates + an InnoDB transaction struct if the corresponding MySQL thread struct still + lacks one. */ +diff -r 936d427a9a15 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:33:11 2008 -0800 +@@ -6673,6 +6673,10 @@ + {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, + #ifdef HAVE_INNOBASE_DB + {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS}, ++ {"Innodb_scan_pages_contiguous",(char*) offsetof(STATUS_VAR, innodb_scan_cont), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_pages_jumpy", (char*) offsetof(STATUS_VAR, innodb_scan_jump), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_data_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_data), SHOW_LONGLONG_STATUS}, ++ {"Innodb_scan_garbages_in_pages",(char*) offsetof(STATUS_VAR, innodb_scan_garbage), SHOW_LONGLONG_STATUS}, + #endif /*HAVE_INNOBASE_DB*/ + {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG}, + {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG}, +diff -r 936d427a9a15 sql/sql_class.h +--- a/sql/sql_class.h Mon Dec 22 00:33:03 2008 -0800 ++++ b/sql/sql_class.h Mon Dec 22 00:33:11 2008 -0800 +@@ -729,6 +729,10 @@ + sense to add to the /global/ status variable counter. + */ + double last_query_cost; ++ ulonglong innodb_scan_cont; ++ ulonglong innodb_scan_jump; ++ ulonglong innodb_scan_data; ++ ulonglong innodb_scan_garbage; + } STATUS_VAR; + + /* diff --git a/mysql-innodb_fsync_source.patch b/mysql-innodb_fsync_source.patch new file mode 100644 index 0000000..637a7d6 --- /dev/null +++ b/mysql-innodb_fsync_source.patch @@ -0,0 +1,594 @@ +diff -r 61031ebb48ce innobase/buf/buf0flu.c +--- a/innobase/buf/buf0flu.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/buf/buf0flu.c Mon Nov 03 05:07:56 2008 -0800 +@@ -341,7 +341,7 @@ + + /* Now flush the doublewrite buffer data to disk */ + +- fil_flush(TRX_SYS_SPACE); ++ fil_flush(TRX_SYS_SPACE, FLUSH_FROM_DIRTY_BUFFER); + + /* We know that the writes have been flushed to disk now + and in recovery we will find them in the doublewrite buffer +@@ -381,7 +381,7 @@ + + /* Now we flush the data to disk (for example, with fsync) */ + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_DIRTY_BUFFER); + + /* We can now reuse the doublewrite memory buffer: */ + +@@ -501,7 +501,8 @@ + } + #else + /* Force the log to the disk before writing the modified block */ +- log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_DIRTY_BUFFER); + #endif + buf_flush_init_for_writing(block->frame, block->newest_modification, + block->space, block->offset); +diff -r 61031ebb48ce innobase/fil/fil0fil.c +--- a/innobase/fil/fil0fil.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/fil/fil0fil.c Mon Nov 03 05:07:56 2008 -0800 +@@ -245,6 +245,7 @@ + request */ + UT_LIST_BASE_NODE_T(fil_space_t) space_list; + /* list of all file spaces */ ++ ulint flush_types[FLUSH_FROM_NUMBER];/* calls to fil_flush by caller */ + }; + + /* The tablespace memory cache. This variable is NULL before the module is +@@ -849,7 +850,7 @@ + /* Flush tablespaces so that we can close modified files in the LRU + list */ + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + count++; + +@@ -1309,7 +1310,10 @@ + + UT_LIST_INIT(system->unflushed_spaces); + UT_LIST_INIT(system->space_list); +- ++ { ++ int x; ++ for (x = 0; x < FLUSH_FROM_NUMBER; ++x) system->flush_types[x] = 0; ++ } + return(system); + } + +@@ -1437,6 +1441,23 @@ + } + + mutex_exit(&(system->mutex)); ++} ++ ++/******************************************************************** ++Prints internal counters */ ++ ++void ++fil_print(FILE *file) ++{ ++ fprintf(file, ++ "fsync callers: %lu buffer pool, %lu other, %lu checkpoint, " ++ "%lu log aio, %lu log sync, %lu archive\n", ++ fil_system->flush_types[FLUSH_FROM_DIRTY_BUFFER], ++ fil_system->flush_types[FLUSH_FROM_OTHER], ++ fil_system->flush_types[FLUSH_FROM_CHECKPOINT], ++ fil_system->flush_types[FLUSH_FROM_LOG_IO_COMPLETE], ++ fil_system->flush_types[FLUSH_FROM_LOG_WRITE_UP_TO], ++ fil_system->flush_types[FLUSH_FROM_ARCHIVE]); + } + + /******************************************************************** +@@ -2256,7 +2277,7 @@ + + os_thread_sleep(20000); + +- fil_flush(id); ++ fil_flush(id, FLUSH_FROM_OTHER); + + goto retry; + +@@ -3574,7 +3595,7 @@ + size_after_extend, *actual_size); */ + mutex_exit(&(system->mutex)); + +- fil_flush(space_id); ++ fil_flush(space_id, FLUSH_FROM_OTHER); + + return(success); + } +@@ -4166,8 +4187,9 @@ + void + fil_flush( + /*======*/ +- ulint space_id) /* in: file space id (this can be a group of ++ ulint space_id, /* in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ flush_from_type flush_type)/* in: identifies the caller */ + { + fil_system_t* system = fil_system; + fil_space_t* space; +@@ -4176,7 +4198,7 @@ + ib_longlong old_mod_counter; + + mutex_enter(&(system->mutex)); +- ++ system->flush_types[flush_type]++; + HASH_SEARCH(hash, system->spaces, space_id, space, + space->id == space_id); + if (!space || space->is_being_deleted) { +@@ -4281,7 +4303,8 @@ + void + fil_flush_file_spaces( + /*==================*/ +- ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */ ++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ ++ flush_from_type flush_type)/* in: identifies the caller */ + { + fil_system_t* system = fil_system; + fil_space_t* space; +@@ -4322,7 +4345,7 @@ + a non-existing space id. */ + for (i = 0; i < n_space_ids; i++) { + +- fil_flush(space_ids[i]); ++ fil_flush(space_ids[i], flush_type); + } + + mem_free(space_ids); +diff -r 61031ebb48ce innobase/include/fil0fil.h +--- a/innobase/include/fil0fil.h Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/include/fil0fil.h Mon Nov 03 05:07:56 2008 -0800 +@@ -197,6 +197,13 @@ + fil_init( + /*=====*/ + ulint max_n_open); /* in: max number of open files */ ++/******************************************************************** ++ * Prints internal counters. */ ++ ++void ++fil_print( ++ /*=====*/ ++ FILE* file); /* in: output stream */ + /*********************************************************************** + Opens all log files and system tablespace data files. They stay open until the + database server shutdown. This should be called at a server startup after the +@@ -621,14 +628,26 @@ + ulint segment); /* in: the number of the segment in the aio + array to wait for */ + /************************************************************************** ++Identifies the caller of fil_flush. */ ++typedef enum { ++ FLUSH_FROM_DIRTY_BUFFER, ++ FLUSH_FROM_OTHER, ++ FLUSH_FROM_CHECKPOINT, ++ FLUSH_FROM_LOG_IO_COMPLETE, ++ FLUSH_FROM_LOG_WRITE_UP_TO, ++ FLUSH_FROM_ARCHIVE, ++ FLUSH_FROM_NUMBER ++} flush_from_type; ++/************************************************************************** + Flushes to disk possible writes cached by the OS. If the space does not exist + or is being dropped, does not do anything. */ + + void + fil_flush( + /*======*/ +- ulint space_id); /* in: file space id (this can be a group of ++ ulint space_id, /* in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ flush_from_type flush_type);/* in: identifies the caller */ + /************************************************************************** + Flushes to disk writes in file spaces of the given type possibly cached by + the OS. */ +@@ -636,7 +655,8 @@ + void + fil_flush_file_spaces( + /*==================*/ +- ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */ ++ ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ ++ flush_from_type flush_type);/* in: identifies the caller */ + /********************************************************************** + Checks the consistency of the tablespace cache. */ + +diff -r 61031ebb48ce innobase/include/log0log.h +--- a/innobase/include/log0log.h Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/include/log0log.h Mon Nov 03 05:07:56 2008 -0800 +@@ -146,6 +146,22 @@ + log_io_complete( + /*============*/ + log_group_t* group); /* in: log group */ ++ ++/********************************************************** ++Describes the caller of log_write_up_to. */ ++ ++typedef enum { ++ LOG_WRITE_FROM_DIRTY_BUFFER, ++ LOG_WRITE_FROM_BACKGROUND_SYNC, ++ LOG_WRITE_FROM_BACKGROUND_ASYNC, ++ LOG_WRITE_FROM_INTERNAL, ++ LOG_WRITE_FROM_CHECKPOINT_SYNC, ++ LOG_WRITE_FROM_CHECKPOINT_ASYNC, ++ LOG_WRITE_FROM_LOG_ARCHIVE, ++ LOG_WRITE_FROM_COMMIT_SYNC, ++ LOG_WRITE_FROM_COMMIT_ASYNC, ++ LOG_WRITE_FROM_NUMBER ++} log_sync_type; + /********************************************************** + This function is called, e.g., when a transaction wants to commit. It checks + that the log has been written to the log file up to the last log entry written +@@ -159,14 +175,21 @@ + be written, ut_dulint_max if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ +- ibool flush_to_disk); +- /* in: TRUE if we want the written log also to be +- flushed to disk */ ++ ibool flush_to_disk, ++ /* in: TRUE if we want the written log also to be flushed to disk */ ++ log_sync_type caller);/* in: identifies the caller */ + /******************************************************************** + Does a syncronous flush of the log buffer to disk. */ + + void + log_buffer_flush_to_disk(void); ++/*==========================*/ ++/******************************************************************** ++Flushes the log buffer. Forces it to disk depending on the value of ++the configuration parameter innodb_flush_log_at_trx_commit. */ ++ ++void ++log_buffer_flush_maybe_sync(void); + /*==========================*/ + /******************************************************************** + Advances the smallest lsn for which there are unflushed dirty blocks in the +@@ -744,6 +767,12 @@ + AND flushed to disk */ + ulint n_pending_writes;/* number of currently pending flushes + or writes */ ++ ulint log_sync_callers[LOG_WRITE_FROM_NUMBER]; ++ /* counts calls to log_write_up_to */ ++ ulint log_sync_syncers[LOG_WRITE_FROM_NUMBER]; ++ /* counts calls to log_write_up_to when log file is sync'd */ ++ ulint n_syncs; /* number of fsyncs done for log file */ ++ ulint n_checkpoints; /* number of calls to log_checkpoint */ + /* NOTE on the 'flush' in names of the fields below: starting from + 4.0.14, we separate the write of the log file and the actual fsync() + or other method to flush it to disk. The names below shhould really +diff -r 61031ebb48ce innobase/log/log0log.c +--- a/innobase/log/log0log.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/log/log0log.c Mon Nov 03 05:07:56 2008 -0800 +@@ -782,6 +782,15 @@ + log_sys->written_to_all_lsn = log_sys->lsn; + + log_sys->n_pending_writes = 0; ++ { ++ int x; ++ for (x = 0; x < LOG_WRITE_FROM_NUMBER; ++x) { ++ log_sys->log_sync_callers[x] = 0; ++ log_sys->log_sync_syncers[x] = 0; ++ } ++ } ++ log_sys->n_syncs = 0; ++ log_sys->n_checkpoints = 0; + + log_sys->no_flush_event = os_event_create(NULL); + +@@ -1066,7 +1075,7 @@ + if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); + } + + #ifdef UNIV_DEBUG +@@ -1088,7 +1097,7 @@ + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC + && srv_flush_log_at_trx_commit != 2) { + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); + } + + mutex_enter(&(log_sys->mutex)); +@@ -1303,9 +1312,10 @@ + be written, ut_dulint_max if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ +- ibool flush_to_disk) ++ ibool flush_to_disk, + /* in: TRUE if we want the written log also to be + flushed to disk */ ++ log_sync_type caller) /* in: identifies caller */ + { + log_group_t* group; + ulint start_offset; +@@ -1315,6 +1325,7 @@ + ulint loop_count; + ulint unlock; + ++ log_sys->log_sync_callers[caller]++; + if (recv_no_ibuf_operations) { + /* Recovery is running and no operations on the log files are + allowed yet (the variable name .._no_ibuf_.. is misleading) */ +@@ -1465,13 +1476,17 @@ + so we have also flushed to disk what we have written */ + + log_sys->flushed_to_disk_lsn = log_sys->write_lsn; ++ log_sys->n_syncs++; ++ log_sys->log_sync_syncers[caller]++; + + } else if (flush_to_disk) { + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FLUSH_FROM_LOG_WRITE_UP_TO); + log_sys->flushed_to_disk_lsn = log_sys->write_lsn; ++ log_sys->n_syncs++; ++ log_sys->log_sync_syncers[caller]++; + } + + mutex_enter(&(log_sys->mutex)); +@@ -1520,7 +1535,8 @@ + + mutex_exit(&(log_sys->mutex)); + +- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_BACKGROUND_SYNC); + } + + /******************************************************************** +@@ -1551,7 +1567,7 @@ + mutex_exit(&(log->mutex)); + + if (do_flush) { +- log_write_up_to(lsn, LOG_NO_WAIT, FALSE); ++ log_write_up_to(lsn, LOG_NO_WAIT, FALSE, LOG_WRITE_FROM_INTERNAL); + } + } + +@@ -1921,11 +1937,11 @@ + } + + if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_CHECKPOINT); + } + + mutex_enter(&(log_sys->mutex)); +- ++ log_sys->n_checkpoints++; + oldest_lsn = log_buf_pool_get_oldest_modification(); + + mutex_exit(&(log_sys->mutex)); +@@ -1938,7 +1954,8 @@ + write-ahead-logging algorithm ensures that the log has been flushed + up to oldest_lsn. */ + +- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_CHECKPOINT_SYNC); + + mutex_enter(&(log_sys->mutex)); + +@@ -2566,7 +2583,7 @@ + + mutex_exit(&(log_sys->mutex)); + +- fil_flush(group->archive_space_id); ++ fil_flush(group->archive_space_id, FLUSH_FROM_ARCHIVE); + + mutex_enter(&(log_sys->mutex)); + +@@ -2647,7 +2664,8 @@ + + mutex_exit(&(log_sys->mutex)); + +- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE); ++ log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE, ++ LOG_WRITE_FROM_LOG_ARCHIVE); + + calc_new_limit = FALSE; + +@@ -3184,8 +3202,8 @@ + } + mutex_exit(&kernel_mutex); + +- fil_flush_file_spaces(FIL_TABLESPACE); +- fil_flush_file_spaces(FIL_LOG); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); ++ fil_flush_file_spaces(FIL_LOG, FLUSH_FROM_OTHER); + + /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer + pool: therefore it is essential that the buffer pool has been +@@ -3218,7 +3236,7 @@ + + fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + fil_close_all_files(); + +@@ -3331,15 +3349,45 @@ + time_elapsed = 0.001 + difftime(current_time, + log_sys->last_printout_time); + fprintf(file, +- "%lu pending log writes, %lu pending chkp writes\n" +- "%lu log i/o's done, %.2f log i/o's/second\n", +- (ulong) log_sys->n_pending_writes, +- (ulong) log_sys->n_pending_checkpoint_writes, +- (ulong) log_sys->n_log_ios, +- ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed)); ++ "%lu pending log writes, %lu pending chkp writes\n" ++ "%lu log i/o's done, %.2f log i/o's/second, %lu syncs, %lu checkpoints\n", ++ (ulong) log_sys->n_pending_writes, ++ (ulong) log_sys->n_pending_checkpoint_writes, ++ (ulong) log_sys->n_log_ios, ++ (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed, ++ log_sys->n_syncs, ++ log_sys->n_checkpoints); + + log_sys->n_log_ios_old = log_sys->n_log_ios; + log_sys->last_printout_time = current_time; ++ ++ fprintf(file, ++ "log sync callers: %lu buffer pool, background %lu sync and %lu async, " ++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " ++ "commit %lu sync and %lu async\n", ++ log_sys->log_sync_callers[LOG_WRITE_FROM_DIRTY_BUFFER], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_ASYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_INTERNAL], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_LOG_ARCHIVE], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_SYNC], ++ log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_ASYNC]); ++ ++ fprintf(file, ++ "log sync syncers: %lu buffer pool, background %lu sync and %lu async, " ++ "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " ++ "commit %lu sync and %lu async\n", ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_DIRTY_BUFFER], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_ASYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_INTERNAL], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_LOG_ARCHIVE], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_SYNC], ++ log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_ASYNC]); + + mutex_exit(&(log_sys->mutex)); + } +diff -r 61031ebb48ce innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Nov 03 05:07:56 2008 -0800 +@@ -1638,6 +1638,12 @@ + (ulong)time_elapsed); + + fputs("----------\n" ++ "BACKGROUND THREAD\n" ++ "----------\n", file); ++ fil_print(file); ++ ++ ++ fputs("----------\n" + "SEMAPHORES\n" + "----------\n", file); + sync_print(file); +diff -r 61031ebb48ce innobase/trx/trx0sys.c +--- a/innobase/trx/trx0sys.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/trx/trx0sys.c Mon Nov 03 05:07:56 2008 -0800 +@@ -511,7 +511,7 @@ + page += UNIV_PAGE_SIZE; + } + +- fil_flush_file_spaces(FIL_TABLESPACE); ++ fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + + leave_func: + ut_free(unaligned_read_buf); +diff -r 61031ebb48ce innobase/trx/trx0trx.c +--- a/innobase/trx/trx0trx.c Mon Nov 03 05:07:46 2008 -0800 ++++ b/innobase/trx/trx0trx.c Mon Nov 03 05:07:56 2008 -0800 +@@ -916,19 +916,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, +- FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush + them to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +@@ -1659,18 +1661,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush them to + disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +@@ -1906,19 +1911,21 @@ + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, +- FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + /* Write the log to the log files AND flush + them to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, ++ LOG_WRITE_FROM_COMMIT_SYNC); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + +- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); ++ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, ++ LOG_WRITE_FROM_COMMIT_ASYNC); + } else { + ut_error; + } +diff -r 61031ebb48ce patch_info/innodb_fsync_source.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_fsync_source.info Mon Nov 03 05:07:56 2008 -0800 +@@ -0,0 +1,9 @@ ++File=innodb_fsync_source.patch ++Name=Information of fsync callers in InnoDB ++Version=1.0 ++Author=Google ++License=GPL ++Comment= ++ChangeLog= ++2008-11-01 ++VT: Initial porting diff --git a/mysql-innodb_io_patches.patch b/mysql-innodb_io_patches.patch new file mode 100644 index 0000000..90af625 --- /dev/null +++ b/mysql-innodb_io_patches.patch @@ -0,0 +1,487 @@ +diff -r 45683461331d innobase/buf/buf0rea.c +--- a/innobase/buf/buf0rea.c Mon Dec 22 00:31:16 2008 -0800 ++++ b/innobase/buf/buf0rea.c Mon Dec 22 00:32:02 2008 -0800 +@@ -188,6 +188,10 @@ + ulint low, high; + ulint err; + ulint i; ++ ++ if (!(srv_read_ahead & 1)) { ++ return(0); ++ } + + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ +@@ -396,6 +400,10 @@ + ulint err; + ulint i; + ++ if (!(srv_read_ahead & 2)) { ++ return(0); ++ } ++ + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); +diff -r 45683461331d innobase/include/os0file.h +--- a/innobase/include/os0file.h Mon Dec 22 00:31:16 2008 -0800 ++++ b/innobase/include/os0file.h Mon Dec 22 00:32:02 2008 -0800 +@@ -551,8 +551,10 @@ + /*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ +- ulint n_segments, /* in: combined number of segments in the four +- first aio arrays; must be >= 4 */ ++// ulint n_segments, /* in: combined number of segments in the four ++// first aio arrays; must be >= 4 */ ++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */ ++ ulint n_write_threads, /**/ + ulint n_slots_sync); /* in: number of slots in the sync aio array */ + /*********************************************************************** + Requests an asynchronous i/o operation. */ +diff -r 45683461331d innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Mon Dec 22 00:31:16 2008 -0800 ++++ b/innobase/include/srv0srv.h Mon Dec 22 00:32:02 2008 -0800 +@@ -89,6 +89,8 @@ + extern ulint srv_lock_table_size; + + extern ulint srv_n_file_io_threads; ++extern ulint srv_n_read_io_threads; ++extern ulint srv_n_write_io_threads; + + #ifdef UNIV_LOG_ARCHIVE + extern ibool srv_log_archive_on; +@@ -133,6 +135,10 @@ + extern ulong srv_max_purge_lag; + extern ibool srv_use_awe; + extern ibool srv_use_adaptive_hash_indexes; ++ ++extern ulint srv_io_capacity; ++extern ulint srv_read_ahead; ++extern ulint srv_adaptive_checkpoint; + /*-------------------------------------------*/ + + extern ulint srv_n_rows_inserted; +diff -r 45683461331d innobase/log/log0log.c +--- a/innobase/log/log0log.c Mon Dec 22 00:31:16 2008 -0800 ++++ b/innobase/log/log0log.c Mon Dec 22 00:32:02 2008 -0800 +@@ -3326,6 +3326,15 @@ + (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn), + (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn)); + ++ fprintf(file, ++ "Max checkpoint age %lu\n" ++ "Modified age %lu\n" ++ "Checkpoint age %lu\n", ++ (ulong) log_sys->max_checkpoint_age, ++ (ulong) ut_dulint_minus(log_sys->lsn, ++ log_buf_pool_get_oldest_modification()), ++ (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn)); ++ + current_time = time(NULL); + + time_elapsed = 0.001 + difftime(current_time, +diff -r 45683461331d innobase/os/os0file.c +--- a/innobase/os/os0file.c Mon Dec 22 00:31:16 2008 -0800 ++++ b/innobase/os/os0file.c Mon Dec 22 00:32:02 2008 -0800 +@@ -2877,8 +2877,10 @@ + /*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ +- ulint n_segments, /* in: combined number of segments in the four +- first aio arrays; must be >= 4 */ ++// ulint n_segments, /* in: combined number of segments in the four ++// first aio arrays; must be >= 4 */ ++ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/ ++ ulint n_write_threads, /**/ + ulint n_slots_sync) /* in: number of slots in the sync aio array */ + { + ulint n_read_segs; +@@ -2888,6 +2890,8 @@ + #ifdef POSIX_ASYNC_IO + sigset_t sigset; + #endif ++ ulint n_segments = 2 + n_read_threads + n_write_threads; ++ + ut_ad(n % n_segments == 0); + ut_ad(n_segments >= 4); + +@@ -2898,8 +2902,8 @@ + } + + n_per_seg = n / n_segments; +- n_write_segs = (n_segments - 2) / 2; +- n_read_segs = n_segments - 2 - n_write_segs; ++ n_write_segs = n_write_threads; ++ n_read_segs = n_read_threads; + + /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */ + +@@ -3180,6 +3184,13 @@ + struct aiocb* control; + #endif + ulint i; ++ ulint prim_segment; ++ ulint n; ++ ++ n = array->n_slots / array->n_segments; ++ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */ ++ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments); ++ + loop: + os_mutex_enter(array->mutex); + +@@ -3198,12 +3209,23 @@ + goto loop; + } + ++ for (i = prim_segment * n; i < array->n_slots; i++) { ++ slot = os_aio_array_get_nth_slot(array, i); ++ ++ if (slot->reserved == FALSE) { ++ break; ++ } ++ } ++ ++ if (slot->reserved == TRUE){ ++ /* Not found after the intended segment. So we should search before. */ + for (i = 0;; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved == FALSE) { + break; + } ++ } + } + + array->n_reserved++; +diff -r 45683461331d innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Dec 22 00:31:16 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:32:02 2008 -0800 +@@ -167,6 +167,8 @@ + ulint srv_lock_table_size = ULINT_MAX; + + ulint srv_n_file_io_threads = ULINT_MAX; ++ulint srv_n_read_io_threads = 1; ++ulint srv_n_write_io_threads = 1; + + #ifdef UNIV_LOG_ARCHIVE + ibool srv_log_archive_on = FALSE; +@@ -324,6 +326,15 @@ + ibool srv_use_awe = FALSE; + ibool srv_use_adaptive_hash_indexes = TRUE; + ++ulint srv_io_capacity = 100; ++ ++/* Returns the number of IO operations that is X percent of the capacity. ++PCT_IO(5) -> returns the number of IO operations that is 5% of the max ++where max is srv_io_capacity. */ ++#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0))) ++ ++ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ ++ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ + /*-------------------------------------------*/ + ulong srv_n_spin_wait_rounds = 20; + ulong srv_n_free_tickets_to_enter = 500; +@@ -2214,6 +2225,8 @@ + ibool skip_sleep = FALSE; + ulint i; + ++ dulint oldest_lsn; ++ + #ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Master thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +@@ -2302,9 +2315,9 @@ + + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; +- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { ++ if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) { + srv_main_thread_op_info = "doing insert buffer merge"; +- ibuf_contract_for_n_pages(TRUE, 5); ++ ibuf_contract_for_n_pages(TRUE, PCT_IO(5)); + + srv_main_thread_op_info = "flushing log"; + +@@ -2317,7 +2330,7 @@ + /* Try to keep the number of modified pages in the + buffer pool under the limit wished by the user */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + ut_dulint_max); + + /* If we had to do the flush, it may have taken +@@ -2326,6 +2339,44 @@ + iteration of this loop. */ + + skip_sleep = TRUE; ++ } else if (srv_adaptive_checkpoint) { ++ ++ /* Try to keep modified age not to exceed ++ max_checkpoint_age * 7/8 line */ ++ ++ mutex_enter(&(log_sys->mutex)); ++ ++ oldest_lsn = buf_pool_get_oldest_modification(); ++ if (ut_dulint_is_zero(oldest_lsn)) { ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ } else { ++ if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) { ++ ++ /* 2nd defence line (max_checkpoint_age * 3/4) */ ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ++ ut_dulint_max); ++ skip_sleep = TRUE; ++ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn) ++ > (log_sys->max_checkpoint_age)/2 ) { ++ ++ /* 1st defence line (max_checkpoint_age * 1/2) */ ++ ++ mutex_exit(&(log_sys->mutex)); ++ ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), ++ ut_dulint_max); ++ skip_sleep = TRUE; ++ } else { ++ mutex_exit(&(log_sys->mutex)); ++ } ++ } ++ + } + + if (srv_activity_count == old_activity_count) { +@@ -2352,10 +2403,10 @@ + n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; +- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { ++ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) { + + srv_main_thread_op_info = "flushing buffer pool pages"; +- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); ++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); +@@ -2365,7 +2416,7 @@ + even if the server were active */ + + srv_main_thread_op_info = "doing insert buffer merge"; +- ibuf_contract_for_n_pages(TRUE, 5); ++ ibuf_contract_for_n_pages(TRUE, PCT_IO(5)); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); +@@ -2407,14 +2458,14 @@ + (> 70 %), we assume we can afford reserving the disk(s) for + the time it requires to flush 100 pages */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + ut_dulint_max); + } else { + /* Otherwise, we only flush a small number of pages so that + we do not unnecessarily use much disk i/o capacity from + other work */ + +- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, ++ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), + ut_dulint_max); + } + +@@ -2503,7 +2554,7 @@ + if (srv_fast_shutdown && srv_shutdown_state > 0) { + n_bytes_merged = 0; + } else { +- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20); ++ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100)); + } + + srv_main_thread_op_info = "reserving kernel mutex"; +@@ -2520,7 +2571,7 @@ + + if (srv_fast_shutdown < 2) { + n_pages_flushed = +- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); ++ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); + } else { + /* In the fastest shutdown we do not flush the buffer pool + to data files: we set n_pages_flushed to 0 artificially. */ +diff -r 45683461331d innobase/srv/srv0start.c +--- a/innobase/srv/srv0start.c Mon Dec 22 00:31:16 2008 -0800 ++++ b/innobase/srv/srv0start.c Mon Dec 22 00:32:02 2008 -0800 +@@ -1205,24 +1205,28 @@ + return(DB_ERROR); + } + ++ /* over write innodb_file_io_threads */ ++ srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads; ++ + /* Restrict the maximum number of file i/o threads */ + if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { + + srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; ++ srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2; + } + + if (!os_aio_use_native_aio) { + /* In simulated aio we currently have use only for 4 threads */ +- srv_n_file_io_threads = 4; ++ /*srv_n_file_io_threads = 4;*/ + + os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD + * srv_n_file_io_threads, +- srv_n_file_io_threads, +- SRV_MAX_N_PENDING_SYNC_IOS); ++ srv_n_read_io_threads, srv_n_write_io_threads, ++ SRV_MAX_N_PENDING_SYNC_IOS * 8); + } else { + os_aio_init(SRV_N_PENDING_IOS_PER_THREAD + * srv_n_file_io_threads, +- srv_n_file_io_threads, ++ srv_n_read_io_threads, srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS); + } + +diff -r 45683461331d patch_info/innodb_io_patches.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_io_patches.info Mon Dec 22 00:32:02 2008 -0800 +@@ -0,0 +1,9 @@ ++File=innodb_io_patches.patch ++Name=Cluster of past InnoDB IO patches ++Version=1.0 ++Author=Percona ++License=GPL ++Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush) ++ChangeLog= ++2008-11-06 ++YK: Initial release +diff -r 45683461331d sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:31:16 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:32:02 2008 -0800 +@@ -149,6 +149,7 @@ + innobase_lock_wait_timeout, innobase_force_recovery, + innobase_open_files; + ++long innobase_read_io_threads, innobase_write_io_threads; + longlong innobase_buffer_pool_size, innobase_log_file_size; + + /* The default values for the following char* start-up parameters +@@ -1403,6 +1404,8 @@ + srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; + + srv_n_file_io_threads = (ulint) innobase_file_io_threads; ++ srv_n_read_io_threads = (ulint) innobase_read_io_threads; ++ srv_n_write_io_threads = (ulint) innobase_write_io_threads; + + srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; + srv_force_recovery = (ulint) innobase_force_recovery; +diff -r 45683461331d sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Dec 22 00:31:16 2008 -0800 ++++ b/sql/ha_innodb.h Mon Dec 22 00:32:02 2008 -0800 +@@ -204,6 +204,7 @@ + extern long innobase_additional_mem_pool_size; + extern long innobase_buffer_pool_awe_mem_mb; + extern long innobase_file_io_threads, innobase_lock_wait_timeout; ++extern long innobase_read_io_threads, innobase_write_io_threads; + extern long innobase_force_recovery; + extern long innobase_open_files; + extern char *innobase_data_home_dir, *innobase_data_file_path; +@@ -234,6 +235,9 @@ + extern ulong srv_thread_concurrency; + extern ulong srv_commit_concurrency; + extern ulong srv_flush_log_at_trx_commit; ++extern ulong srv_io_capacity; ++extern ulong srv_read_ahead; ++extern ulong srv_adaptive_checkpoint; + } + + bool innobase_init(void); +diff -r 45683461331d sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:31:16 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:32:02 2008 -0800 +@@ -5036,6 +5036,11 @@ + OPT_INNODB_ROLLBACK_ON_TIMEOUT, + OPT_SECURE_FILE_PRIV, + OPT_KEEP_FILES_ON_CREATE, ++ OPT_INNODB_IO_CAPACITY, ++ OPT_INNODB_READ_AHEAD, ++ OPT_INNODB_ADAPTIVE_CHECKPOINT, ++ OPT_INNODB_READ_IO_THREADS, ++ OPT_INNODB_WRITE_IO_THREADS, + OPT_INNODB_ADAPTIVE_HASH_INDEX, + OPT_FEDERATED + }; +@@ -5344,6 +5349,26 @@ + (gptr*) &global_system_variables.innodb_table_locks, + (gptr*) &global_system_variables.innodb_table_locks, + 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, ++ {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY, ++ "Number of IO operations per second the server can do. Tunes background IO rate.", ++ (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity, ++ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0}, ++ {"innodb_read_ahead", OPT_INNODB_READ_AHEAD, ++ "Enable/Diasable read aheads bit0:random bit1:linear", ++ (gptr*) &srv_read_ahead, (gptr*) &srv_read_ahead, ++ 0, GET_ULONG, REQUIRED_ARG, 3, 0, 3, 0, 0, 0}, ++ {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT, ++ "Enable/Diasable flushing along modified age 0:disable 1:enable", ++ (gptr*) &srv_adaptive_checkpoint, (gptr*) &srv_adaptive_checkpoint, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS, ++ "Number of background read I/O threads in InnoDB.", ++ (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads, ++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, ++ {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS, ++ "Number of background write I/O threads in InnoDB.", ++ (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, ++ 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, + #endif /* End HAVE_INNOBASE_DB */ + {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", + (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, +diff -r 45683461331d sql/set_var.cc +--- a/sql/set_var.cc Mon Dec 22 00:31:16 2008 -0800 ++++ b/sql/set_var.cc Mon Dec 22 00:32:02 2008 -0800 +@@ -484,6 +484,12 @@ + sys_var_long_ptr sys_innodb_flush_log_at_trx_commit( + "innodb_flush_log_at_trx_commit", + &srv_flush_log_at_trx_commit); ++sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity", ++ &srv_io_capacity); ++sys_var_long_ptr sys_innodb_read_ahead("innodb_read_ahead", ++ &srv_read_ahead); ++sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", ++ &srv_adaptive_checkpoint); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -847,6 +853,9 @@ + &sys_innodb_thread_concurrency, + &sys_innodb_commit_concurrency, + &sys_innodb_flush_log_at_trx_commit, ++ &sys_innodb_io_capacity, ++ &sys_innodb_read_ahead, ++ &sys_innodb_adaptive_checkpoint, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -982,6 +991,11 @@ + {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, + {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, + {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, ++ {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS}, ++ {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS}, ++ {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, ++ {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, ++ {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, + #endif + {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, + {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, diff --git a/mysql-innodb_io_pattern.patch b/mysql-innodb_io_pattern.patch new file mode 100644 index 0000000..604404f --- /dev/null +++ b/mysql-innodb_io_pattern.patch @@ -0,0 +1,688 @@ +diff -r 2bbfde0e0e70 include/mysql_com.h +--- a/include/mysql_com.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/include/mysql_com.h Mon Dec 22 00:33:48 2008 -0800 +@@ -121,6 +121,9 @@ + #define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */ + #define REFRESH_DES_KEY_FILE 0x40000L + #define REFRESH_USER_RESOURCES 0x80000L ++ ++/* TRUNCATE INFORMATION_SCHEMA.INNODB_IO_PATTERN */ ++#define REFRESH_INNODB_IO_PATTERN 0x1000000L + + #define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */ + #define CLIENT_FOUND_ROWS 2 /* Found instead of affected rows */ +diff -r 2bbfde0e0e70 innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Dec 22 00:33:48 2008 -0800 +@@ -653,6 +653,9 @@ + } + + buf_pool->page_hash = hash_create(2 * max_size); ++ buf_pool->io_counter_hash = NULL; ++ buf_pool->io_counter_heap = NULL; ++ buf_pool->io_counters = 0; + + buf_pool->n_pend_reads = 0; + +@@ -1966,6 +1969,9 @@ + ulint io_type; + ulint read_page_no; + ++ buf_io_counter_t* io_counter; ++ ulint fold; ++ + ut_ad(block); + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); +@@ -2067,6 +2073,26 @@ + buf_pool->n_pages_read++; + + rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ); ++ /* io_counter here */ ++ if (srv_io_pattern && srv_io_pattern_trace_running) { ++ fold = buf_page_address_fold(block->space, block->offset); ++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter, ++ (io_counter->space == block->space) && (io_counter->offset == block->offset)); ++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) { ++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t))); ++ io_counter->space = block->space; ++ io_counter->offset = block->offset; ++ io_counter->n_read = 0; ++ io_counter->n_write = 0; ++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash, ++ buf_page_address_fold(block->space, block->offset), io_counter); ++ buf_pool->io_counters++; ++ } ++ if (io_counter != NULL) { ++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block))); ++ io_counter->n_read++; ++ } ++ } + + #ifdef UNIV_DEBUG + if (buf_debug_prints) { +@@ -2082,6 +2108,26 @@ + buf_flush_write_complete(block); + + rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE); ++ /* io_counter here */ ++ if (srv_io_pattern && srv_io_pattern_trace_running) { ++ fold = buf_page_address_fold(block->space, block->offset); ++ HASH_SEARCH(hash, buf_pool->io_counter_hash, fold, io_counter, ++ (io_counter->space == block->space) && (io_counter->offset == block->offset)); ++ if (io_counter == NULL && buf_pool->io_counters < srv_io_pattern_size_limit) { ++ io_counter = mem_heap_alloc(buf_pool->io_counter_heap,(sizeof(buf_io_counter_t))); ++ io_counter->space = block->space; ++ io_counter->offset = block->offset; ++ io_counter->n_read = 0; ++ io_counter->n_write = 0; ++ HASH_INSERT(buf_io_counter_t, hash, buf_pool->io_counter_hash, ++ buf_page_address_fold(block->space, block->offset), io_counter); ++ buf_pool->io_counters++; ++ } ++ if (io_counter != NULL) { ++ io_counter->index_id = ut_dulint_get_low(btr_page_get_index_id(buf_block_get_frame(block))); ++ io_counter->n_write++; ++ } ++ } + + buf_pool->n_pages_written++; + +@@ -2656,3 +2702,58 @@ + return buf_pool_get_nth_block(buf_pool, i); + + } ++ ++/************************************************************************* ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++buf_io_counter_control(void) ++/*========================*/ ++{ ++ ulint n; ++ ++ mutex_enter(&(buf_pool->mutex)); ++ if (srv_io_pattern_trace) { ++ if (buf_pool->io_counter_hash == NULL) { ++ /* estimating (buf_pool * 10) */ ++ buf_pool->io_counter_hash = hash_create(20 * buf_pool->max_size); ++ buf_pool->io_counter_heap = mem_heap_create(4096 * 1024); ++ buf_pool->io_counters = 0; ++ ++ srv_io_pattern = TRUE; ++ } ++ } else { ++ if (buf_pool->io_counter_hash != NULL) { ++ srv_io_pattern = FALSE; ++ ++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ (buf_pool->io_counter_hash->array + n)->node = NULL; ++ } ++ mem_heap_free(buf_pool->io_counter_heap); ++ buf_pool->io_counter_heap = NULL; ++ buf_pool->io_counters = 0; ++ ++ hash_table_free(buf_pool->io_counter_hash); ++ buf_pool->io_counter_hash = NULL; ++ } ++ } ++ mutex_exit(&(buf_pool->mutex)); ++} ++ ++void ++buf_io_counter_clear(void) ++/*======================*/ ++{ ++ ulint n; ++ ++ mutex_enter(&(buf_pool->mutex)); ++ if (buf_pool->io_counter_hash != NULL) { ++ for (n = 0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ (buf_pool->io_counter_hash->array + n)->node = NULL; ++ } ++ mem_heap_empty(buf_pool->io_counter_heap); ++ buf_pool->io_counters = 0; ++ } ++ mutex_exit(&(buf_pool->mutex)); ++} +diff -r 2bbfde0e0e70 innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/include/buf0buf.h Mon Dec 22 00:33:48 2008 -0800 +@@ -709,6 +709,18 @@ + void buf_pool_dump(void); + buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i); + ++ ++/************************************************************************* ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++buf_io_counter_control(void); ++/*=========================*/ ++ ++void ++buf_io_counter_clear(void); ++/*=======================*/ + + /* The buffer control block structure */ + +@@ -930,6 +942,9 @@ + ulint curr_size; /* current pool size in pages; + currently always the same as + max_size */ ++ hash_table_t* io_counter_hash; ++ mem_heap_t* io_counter_heap; ++ ulint io_counters; + hash_table_t* page_hash; /* hash table of the file pages */ + + ulint n_pend_reads; /* number of pending read operations */ +@@ -1015,6 +1030,15 @@ + locki table, are not in this list */ + }; + ++struct buf_io_counter_struct{ ++ ulint space; ++ ulint offset; ++ buf_io_counter_t* hash; ++ ulint index_id; ++ ulint n_read; ++ ulint n_write; ++}; ++ + /* States of a control block */ + #define BUF_BLOCK_NOT_USED 211 /* is in the free list */ + #define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns +diff -r 2bbfde0e0e70 innobase/include/buf0types.h +--- a/innobase/include/buf0types.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/include/buf0types.h Mon Dec 22 00:33:48 2008 -0800 +@@ -12,6 +12,8 @@ + typedef struct buf_block_struct buf_block_t; + typedef struct buf_pool_struct buf_pool_t; + ++typedef struct buf_io_counter_struct buf_io_counter_t; ++ + /* The 'type' used of a buffer frame */ + typedef byte buf_frame_t; + +diff -r 2bbfde0e0e70 innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/include/srv0srv.h Mon Dec 22 00:33:48 2008 -0800 +@@ -141,6 +141,11 @@ + extern ulint srv_io_capacity; + extern ulint srv_read_ahead; + extern ulint srv_adaptive_checkpoint; ++ ++extern volatile ibool srv_io_pattern; ++extern ulong srv_io_pattern_trace; ++extern ulong srv_io_pattern_trace_running; ++extern ulong srv_io_pattern_size_limit; + /*-------------------------------------------*/ + + extern ulint srv_n_rows_inserted; +diff -r 2bbfde0e0e70 innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Dec 22 00:33:11 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:33:48 2008 -0800 +@@ -337,6 +337,11 @@ + + ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ + ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ ++ ++volatile ibool srv_io_pattern = FALSE; ++ulint srv_io_pattern_trace = 0; ++ulint srv_io_pattern_trace_running = 0; ++ulint srv_io_pattern_size_limit = ULINT_MAX - (1024 * 1024); + /*-------------------------------------------*/ + ulong srv_n_spin_wait_rounds = 20; + ulong srv_n_free_tickets_to_enter = 500; +diff -r 2bbfde0e0e70 mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Mon Dec 22 00:33:11 2008 -0800 ++++ b/mysql-test/r/information_schema.result Mon Dec 22 00:33:48 2008 -0800 +@@ -59,6 +59,7 @@ + USER_PRIVILEGES + USER_STATISTICS + VIEWS ++INNODB_IO_PATTERN + columns_priv + db + func +@@ -742,7 +743,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-108 ++109 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -812,12 +813,13 @@ + TABLE_PRIVILEGES TABLE_NAME select + TABLE_STATISTICS TABLE_NAME select + VIEWS TABLE_NAME select ++INNODB_IO_PATTERN TABLE_NAME select + delete from mysql.user where user='mysqltest_4'; + delete from mysql.db where user='mysqltest_4'; + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 23 ++information_schema 24 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1225,6 +1227,7 @@ + USER_PRIVILEGES GRANTEE + USER_STATISTICS USER + VIEWS TABLE_SCHEMA ++INNODB_IO_PATTERN SPACE + SELECT t.table_name, c1.column_name + FROM information_schema.tables t + INNER JOIN +@@ -1263,6 +1266,7 @@ + USER_PRIVILEGES GRANTEE + USER_STATISTICS USER + VIEWS TABLE_SCHEMA ++INNODB_IO_PATTERN SPACE + SELECT MAX(table_name) FROM information_schema.tables; + MAX(table_name) + VIEWS +@@ -1337,6 +1341,7 @@ + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1 ++INNODB_IO_PATTERN information_schema.INNODB_IO_PATTERN 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 +diff -r 2bbfde0e0e70 mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Mon Dec 22 00:33:11 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Mon Dec 22 00:33:48 2008 -0800 +@@ -28,6 +28,7 @@ + USER_PRIVILEGES + USER_STATISTICS + VIEWS ++INNODB_IO_PATTERN + show tables from INFORMATION_SCHEMA like 'T%'; + Tables_in_information_schema (T%) + TABLES +diff -r 2bbfde0e0e70 mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Mon Dec 22 00:33:11 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Mon Dec 22 00:33:48 2008 -0800 +@@ -102,6 +102,7 @@ + | USER_PRIVILEGES | + | USER_STATISTICS | + | VIEWS | ++| INNODB_IO_PATTERN | + +---------------------------------------+ + Database: INFORMATION_SCHEMA + +---------------------------------------+ +@@ -130,6 +131,7 @@ + | USER_PRIVILEGES | + | USER_STATISTICS | + | VIEWS | ++| INNODB_IO_PATTERN | + +---------------------------------------+ + Wildcard: inf_rmation_schema + +--------------------+ +diff -r 2bbfde0e0e70 patch_info/innodb_io_pattern.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_io_pattern.info Mon Dec 22 00:33:48 2008 -0800 +@@ -0,0 +1,8 @@ ++File=innodb_io_pattern.patch ++Name=Information schema table of InnoDB IO counts for each datafile pages ++Version=1.0 ++Author=Percona ++License=GPL ++Comment=INFORMATION_SCHEMA.INNODB_IO_PATTERN ++2008-12-01 ++YK: fix for mysql-test +diff -r 2bbfde0e0e70 sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -1569,6 +1569,8 @@ + pthread_cond_init(&commit_cond, NULL); + innodb_inited= 1; + ++ buf_io_counter_control(); ++ + /* If this is a replication slave and we needed to do a crash recovery, + set the master binlog position to what InnoDB internally knew about + how far we got transactions durable inside InnoDB. There is a +@@ -6527,6 +6529,28 @@ + } + + /**************************************************************************** ++Controls the internal hash table for IO pattern tracing ++along innodb_io_pattern_trace value.*/ ++ ++void ++innodb_io_pattern_control(void) ++/*===========================*/ ++{ ++ if (innodb_inited) { ++ buf_io_counter_control(); ++ } ++} ++ ++void ++innodb_io_pattern_clear(void) ++/*=========================*/ ++{ ++ if (innodb_inited) { ++ buf_io_counter_clear(); ++ } ++} ++ ++/**************************************************************************** + Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB + Monitor to the client. */ + +diff -r 2bbfde0e0e70 sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/ha_innodb.h Mon Dec 22 00:33:48 2008 -0800 +@@ -240,6 +240,9 @@ + extern ulong srv_adaptive_checkpoint; + extern ulong srv_show_locks_held; + extern ulong srv_show_verbose_locks; ++extern ulong srv_io_pattern_trace; ++extern ulong srv_io_pattern_trace_running; ++extern ulong srv_io_pattern_size_limit; + } + + bool innobase_init(void); +@@ -266,6 +269,9 @@ + bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables); + bool innodb_mutex_show_status(THD* thd); + void innodb_export_status(void); ++ ++void innodb_io_pattern_control(void); ++void innodb_io_pattern_clear(void); + + void innobase_release_temporary_latches(THD *thd); + +diff -r 2bbfde0e0e70 sql/lex.h +--- a/sql/lex.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/lex.h Mon Dec 22 00:33:48 2008 -0800 +@@ -244,6 +244,7 @@ + { "INNER", SYM(INNER_SYM)}, + { "INNOBASE", SYM(INNOBASE_SYM)}, + { "INNODB", SYM(INNOBASE_SYM)}, ++ { "INNODB_IO_PATTERN", SYM(INNODB_IO_PATTERN)}, + { "INOUT", SYM(INOUT_SYM)}, + { "INSENSITIVE", SYM(INSENSITIVE_SYM)}, + { "INSERT", SYM(INSERT)}, +diff -r 2bbfde0e0e70 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -4983,6 +4983,9 @@ + OPT_INNODB_SYNC_SPIN_LOOPS, + OPT_INNODB_CONCURRENCY_TICKETS, + OPT_INNODB_THREAD_SLEEP_DELAY, ++ OPT_INNODB_IO_PATTERN_TRACE, ++ OPT_INNODB_IO_PATTERN_TRACE_RUNNING, ++ OPT_INNODB_IO_PATTERN_SIZE_LIMIT, + OPT_BDB_CACHE_SIZE, + OPT_BDB_LOG_BUFFER_SIZE, + OPT_BDB_MAX_LOCK, +@@ -5382,6 +5385,18 @@ + "Number of background write I/O threads in InnoDB.", + (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, ++ {"innodb_io_pattern_trace", OPT_INNODB_IO_PATTERN_TRACE, ++ "Create/Drop the internal hash table for IO pattern tracing.", ++ (gptr*) &srv_io_pattern_trace, (gptr*) &srv_io_pattern_trace, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_io_pattern_trace_running", OPT_INNODB_IO_PATTERN_TRACE_RUNNING, ++ "Control IO pattern trace running or not.", ++ (gptr*) &srv_io_pattern_trace_running, (gptr*) &srv_io_pattern_trace_running, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, ++ {"innodb_io_pattern_size_limit", OPT_INNODB_IO_PATTERN_SIZE_LIMIT, ++ "Set max number of counters per data pages. (0 = disable counting).", ++ (gptr*) &srv_io_pattern_size_limit, (gptr*) &srv_io_pattern_size_limit, ++ 0, GET_ULONG, REQUIRED_ARG, 0, 0, ULONG_MAX - (1024 * 1024), 0, 0, 0}, + #endif /* End HAVE_INNOBASE_DB */ + {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", + (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, +diff -r 2bbfde0e0e70 sql/set_var.cc +--- a/sql/set_var.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/set_var.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -501,6 +501,12 @@ + sys_var_long_ptr sys_innodb_show_verbose_locks( + "innodb_show_verbose_locks", + &srv_show_verbose_locks); ++sys_var_innodb_io_pattern_trace sys_innodb_io_pattern_trace("innodb_io_pattern_trace", ++ &srv_io_pattern_trace); ++sys_var_long_ptr sys_innodb_io_pattern_trace_running("innodb_io_pattern_trace_running", ++ &srv_io_pattern_trace_running); ++sys_var_long_ptr sys_innodb_io_pattern_size_limit("innodb_io_pattern_size_limit", ++ &srv_io_pattern_size_limit); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -870,6 +876,9 @@ + &sys_innodb_adaptive_checkpoint, + &sys_innodb_show_locks_held, + &sys_innodb_show_verbose_locks, ++ &sys_innodb_io_pattern_trace, ++ &sys_innodb_io_pattern_trace_running, ++ &sys_innodb_io_pattern_size_limit, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -1012,6 +1021,9 @@ + {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, + {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, + {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, ++ {sys_innodb_io_pattern_trace.name, (char*) &sys_innodb_io_pattern_trace, SHOW_SYS}, ++ {sys_innodb_io_pattern_trace_running.name, (char*) &sys_innodb_io_pattern_trace_running, SHOW_SYS}, ++ {sys_innodb_io_pattern_size_limit.name, (char*) &sys_innodb_io_pattern_size_limit, SHOW_SYS}, + #endif + {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, + {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, +@@ -3117,6 +3129,19 @@ + thd->variables.lc_time_names= global_system_variables.lc_time_names; + } + ++#ifdef HAVE_INNOBASE_DB ++bool sys_var_innodb_io_pattern_trace::update(THD *thd, set_var *var) ++{ ++ bool ret; ++ ++ ret = sys_var_long_ptr_global::update(thd, var); ++ ++ innodb_io_pattern_control(); ++ ++ return ret; ++} ++#endif /* HAVE_INNOBASE_DB */ ++ + /* + Functions to update thd->options bits + */ +diff -r 2bbfde0e0e70 sql/set_var.h +--- a/sql/set_var.h Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/set_var.h Mon Dec 22 00:33:48 2008 -0800 +@@ -985,6 +985,17 @@ + virtual void set_default(THD *thd, enum_var_type type); + }; + ++#ifdef HAVE_INNOBASE_DB ++/* sys_var_innodb_io_pattern_trace */ ++class sys_var_innodb_io_pattern_trace :public sys_var_long_ptr ++{ ++public: ++ sys_var_innodb_io_pattern_trace(const char *name_arg, ulong *value_ptr_arg) ++ :sys_var_long_ptr(name_arg,value_ptr_arg) {} ++ bool update(THD *thd, set_var *var); ++}; ++#endif /* HAVE_INNOBASE_DB */ ++ + /**************************************************************************** + Classes for parsing of the SET command + ****************************************************************************/ +diff -r 2bbfde0e0e70 sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/sql_parse.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -7998,6 +7998,13 @@ + } + pthread_mutex_unlock(&LOCK_global_user_client_stats); + } ++#ifdef HAVE_INNOBASE_DB ++ if (options & REFRESH_INNODB_IO_PATTERN) ++ { ++ tmp_write_to_binlog= 0; ++ innodb_io_pattern_clear(); ++ } ++#endif /* HAVE_INNOBASE_DB */ + *write_to_binlog= tmp_write_to_binlog; + return result; + } +diff -r 2bbfde0e0e70 sql/sql_show.cc +--- a/sql/sql_show.cc Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/sql_show.cc Mon Dec 22 00:33:48 2008 -0800 +@@ -32,6 +32,17 @@ + #ifdef HAVE_INNOBASE_DB + #include "ha_innodb.h" + #endif ++ ++#ifdef HAVE_INNOBASE_DB ++#define INSIDE_HA_INNOBASE_CC ++extern "C" { ++#include "srv0srv.h" ++#include "buf0buf.h" ++#include "dict0dict.h" ++} ++/* We need to undef it in InnoDB */ ++#undef byte ++#endif /* HAVE_INNOBASE_DB */ + + #ifndef NO_EMBEDDED_ACCESS_CHECKS + static const char *grant_names[]={ +@@ -4074,6 +4085,67 @@ + DBUG_RETURN(res); + } + ++int innodb_io_pattern_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) ++{ ++ TABLE *table= (TABLE *) tables->table; ++ ++ buf_io_counter_t* io_counter; ++ dict_index_t* index; ++ ++ DBUG_ENTER("innodb_io_pattern_fill_table"); ++ int returnable= 0; ++ ++ /* We cannot use inline functions of InnoDB here */ ++ ++ /* !!!!!ATTENTION!!!!!: This function is not protected by mutex for performance. */ ++ /* Don't use "DROP TABLE innodb_io_pattern" and INFORMATION_SCHEMA.INNODB_IO_PATTERN */ ++ /* at the same time as possible. */ ++ ++ if (srv_io_pattern) { ++ for (ulint n=0; n < buf_pool->io_counter_hash->n_cells; n++) { ++ if (!srv_io_pattern) ++ goto end_func; ++ ++ io_counter = (buf_io_counter_t*)(buf_pool->io_counter_hash->array + n)->node; ++ while (io_counter) { ++ if (!srv_io_pattern) ++ goto end_func; ++ ++ if (dict_sys != NULL) { ++ dulint id; ++ id.high = 0; ++ id.low = io_counter->index_id; ++ index = dict_index_find_on_id_low(id); ++ } else { ++ index = NULL; ++ } ++ ++ table->field[0]->store(io_counter->space); ++ table->field[1]->store(io_counter->offset); ++ table->field[2]->store(io_counter->index_id); ++ if (index != NULL) { ++ table->field[3]->store(index->table_name,strlen(index->table_name),system_charset_info); ++ table->field[4]->store(index->name,strlen(index->name),system_charset_info); ++ } else { ++ table->field[3]->store("",0,system_charset_info); ++ table->field[4]->store("",0,system_charset_info); ++ } ++ table->field[5]->store(io_counter->n_read); ++ table->field[6]->store(io_counter->n_write); ++ if (schema_table_store_record(thd, table)) ++ { ++ returnable= 1; ++ goto end_func; ++ } ++ io_counter = io_counter->hash; ++ } ++ } ++ } ++ ++ end_func: ++ DBUG_RETURN(returnable); ++} ++ + /* + Find schema_tables elment by name + +@@ -4880,6 +4952,19 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++#ifdef HAVE_INNOBASE_DB ++ST_FIELD_INFO innodb_io_pattern_field_info[]= ++{ ++ {"SPACE", 11, MYSQL_TYPE_LONG, 0, 0, "space_id"}, ++ {"OFFSET", 11, MYSQL_TYPE_LONG, 0, 0, "offset"}, ++ {"INDEX_ID", 11, MYSQL_TYPE_LONG, 0, 0, "index id"}, ++ {"TABLE_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "table name"}, ++ {"INDEX_NAME", 32, MYSQL_TYPE_STRING, 0, 0, "index name"}, ++ {"N_READ", 11, MYSQL_TYPE_LONG, 0, 0, "read ios"}, ++ {"N_WRITE", 11, MYSQL_TYPE_LONG, 0, 0, "write ios"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++#endif + + ST_FIELD_INFO variables_fields_info[]= + { +@@ -5055,6 +5140,10 @@ + make_old_format, 0, -1, -1, 1}, + {"VIEWS", view_fields_info, create_schema_table, + get_all_tables, 0, get_schema_views_record, 1, 2, 0}, ++#ifdef HAVE_INNOBASE_DB ++ {"INNODB_IO_PATTERN", innodb_io_pattern_field_info, create_schema_table, ++ innodb_io_pattern_fill_table, 0, 0, -1, -1, 0}, ++#endif + {0, 0, 0, 0, 0, 0, 0, 0, 0} + }; + +diff -r 2bbfde0e0e70 sql/sql_yacc.yy +--- a/sql/sql_yacc.yy Mon Dec 22 00:33:11 2008 -0800 ++++ b/sql/sql_yacc.yy Mon Dec 22 00:33:48 2008 -0800 +@@ -685,6 +685,7 @@ + %token INFILE + %token INNER_SYM + %token INNOBASE_SYM ++%token INNODB_IO_PATTERN + %token INOUT_SYM + %token INSENSITIVE_SYM + %token INSERT +@@ -8541,6 +8542,7 @@ + | MASTER_SYM { Lex->type|= REFRESH_MASTER; } + | DES_KEY_FILE { Lex->type|= REFRESH_DES_KEY_FILE; } + | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; } ++ | INNODB_IO_PATTERN { Lex->type|= REFRESH_INNODB_IO_PATTERN; } + | CLIENT_STATS_SYM { Lex->type|= REFRESH_CLIENT_STATS; } + | USER_STATS_SYM { Lex->type|= REFRESH_USER_STATS; } + | TABLE_STATS_SYM { Lex->type|= REFRESH_TABLE_STATS; } +@@ -9594,6 +9596,7 @@ + | ISOLATION {} + | ISSUER_SYM {} + | INNOBASE_SYM {} ++ | INNODB_IO_PATTERN {} + | INSERT_METHOD {} + | IO_SYM {} + | IPC_SYM {} diff --git a/mysql-innodb_locks_held.patch b/mysql-innodb_locks_held.patch new file mode 100644 index 0000000..416d50e --- /dev/null +++ b/mysql-innodb_locks_held.patch @@ -0,0 +1,168 @@ +diff -r ae6708ab17e5 innobase/include/srv0srv.h +--- a/innobase/include/srv0srv.h Mon Dec 22 00:32:07 2008 -0800 ++++ b/innobase/include/srv0srv.h Mon Dec 22 00:32:58 2008 -0800 +@@ -80,6 +80,8 @@ + extern ulint srv_log_file_size; + extern ulint srv_log_buffer_size; + extern ulong srv_flush_log_at_trx_commit; ++extern ulong srv_show_locks_held; ++extern ulong srv_show_verbose_locks; + + extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 + character set */ +diff -r ae6708ab17e5 innobase/lock/lock0lock.c +--- a/innobase/lock/lock0lock.c Mon Dec 22 00:32:07 2008 -0800 ++++ b/innobase/lock/lock0lock.c Mon Dec 22 00:32:58 2008 -0800 +@@ -4181,6 +4181,7 @@ + #endif /* UNIV_SYNC_DEBUG */ + } + ++ if ( srv_show_verbose_locks ) { + for (i = 0; i < lock_rec_get_n_bits(lock); i++) { + + if (lock_rec_get_nth_bit(lock, i)) { +@@ -4198,6 +4199,7 @@ + putc('\n', file); + } + } ++ } /* srv_show_verbose_locks */ + + mtr_commit(&mtr); + if (UNIV_LIKELY_NULL(heap)) { +@@ -4369,7 +4371,7 @@ + } + } + +- if (!srv_print_innodb_lock_monitor) { ++ if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) { + nth_trx++; + goto loop; + } +@@ -4426,9 +4428,9 @@ + + nth_lock++; + +- if (nth_lock >= 10) { ++ if (nth_lock >= srv_show_locks_held) { + fputs( +- "10 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n", ++ "TOO MANY LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n", + file); + + nth_trx++; +diff -r ae6708ab17e5 innobase/srv/srv0srv.c +--- a/innobase/srv/srv0srv.c Mon Dec 22 00:32:07 2008 -0800 ++++ b/innobase/srv/srv0srv.c Mon Dec 22 00:32:58 2008 -0800 +@@ -116,6 +116,8 @@ + ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ + ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ + ulong srv_flush_log_at_trx_commit = 1; ++ulint srv_show_locks_held = 10; ++ulint srv_show_verbose_locks = 0; + + byte srv_latin1_ordering[256] /* The sort order table of the latin1 + character set. The following table is +diff -r ae6708ab17e5 libmysqld/set_var.cc +--- a/libmysqld/set_var.cc Mon Dec 22 00:32:07 2008 -0800 ++++ b/libmysqld/set_var.cc Mon Dec 22 00:32:58 2008 -0800 +@@ -821,6 +821,8 @@ + &sys_innodb_thread_concurrency, + &sys_innodb_commit_concurrency, + &sys_innodb_flush_log_at_trx_commit, ++ &sys_innodb_show_locks_held, ++ &sys_innodb_show_verbose_locks, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -936,6 +938,8 @@ + {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, + {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, + {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS}, ++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS }, ++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS }, + {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR}, + {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG }, + {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG }, +diff -r ae6708ab17e5 patch_info/innodb_locks_held.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_locks_held.info Mon Dec 22 00:32:58 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_locks_held.patch ++Name=Add locks held, remove locked records in SHOW INNODB STATUS ++Version=1.0 ++Author=Baron Schwartz ++License=GPL ++Comment=Bug #29126 fix +diff -r ae6708ab17e5 sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Dec 22 00:32:07 2008 -0800 ++++ b/sql/ha_innodb.h Mon Dec 22 00:32:58 2008 -0800 +@@ -238,6 +238,8 @@ + extern ulong srv_io_capacity; + extern ulong srv_read_ahead; + extern ulong srv_adaptive_checkpoint; ++extern ulong srv_show_locks_held; ++extern ulong srv_show_verbose_locks; + } + + bool innobase_init(void); +diff -r ae6708ab17e5 sql/mysqld.cc +--- a/sql/mysqld.cc Mon Dec 22 00:32:07 2008 -0800 ++++ b/sql/mysqld.cc Mon Dec 22 00:32:58 2008 -0800 +@@ -4969,6 +4969,8 @@ + OPT_INNODB_MAX_PURGE_LAG, + OPT_INNODB_FILE_IO_THREADS, + OPT_INNODB_LOCK_WAIT_TIMEOUT, ++ OPT_INNODB_SHOW_LOCKS_HELD, ++ OPT_INNODB_SHOW_VERBOSE_LOCKS, + OPT_INNODB_THREAD_CONCURRENCY, + OPT_INNODB_COMMIT_CONCURRENCY, + OPT_INNODB_FORCE_RECOVERY, +@@ -5308,6 +5310,14 @@ + (gptr*) &srv_flush_log_at_trx_commit, + (gptr*) &srv_flush_log_at_trx_commit, + 0, GET_ULONG, OPT_ARG, 1, 0, 2, 0, 0, 0}, ++ {"innodb_show_locks_held", OPT_INNODB_SHOW_LOCKS_HELD, ++ "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.", ++ (gptr*) &srv_show_locks_held, (gptr*) &srv_show_locks_held, ++ 0, GET_LONG, OPT_ARG, 10, 0, 1000, 0, 1, 0}, ++ {"innodb_show_verbose_locks", OPT_INNODB_SHOW_VERBOSE_LOCKS, ++ "Whether to show records locked in SHOW INNODB STATUS.", ++ (gptr*) &srv_show_verbose_locks, (gptr*) &srv_show_verbose_locks, ++ 0, GET_LONG, OPT_ARG, 0, 0, 1, 0, 1, 0}, + {"innodb_flush_method", OPT_INNODB_FLUSH_METHOD, + "With which method to flush data.", (gptr*) &innobase_unix_file_flush_method, + (gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, +diff -r ae6708ab17e5 sql/set_var.cc +--- a/sql/set_var.cc Mon Dec 22 00:32:07 2008 -0800 ++++ b/sql/set_var.cc Mon Dec 22 00:32:58 2008 -0800 +@@ -495,6 +495,12 @@ + &srv_read_ahead); + sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", + &srv_adaptive_checkpoint); ++sys_var_long_ptr sys_innodb_show_locks_held( ++ "innodb_show_locks_held", ++ &srv_show_locks_held); ++sys_var_long_ptr sys_innodb_show_verbose_locks( ++ "innodb_show_verbose_locks", ++ &srv_show_verbose_locks); + sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", + &innobase_data_file_path); + sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", +@@ -862,6 +868,8 @@ + &sys_innodb_io_capacity, + &sys_innodb_read_ahead, + &sys_innodb_adaptive_checkpoint, ++ &sys_innodb_show_locks_held, ++ &sys_innodb_show_verbose_locks, + #endif + &sys_trust_routine_creators, + &sys_trust_function_creators, +@@ -977,6 +985,8 @@ + {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, + {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, + {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS}, ++ {sys_innodb_show_locks_held.name, (char*) &sys_innodb_show_locks_held, SHOW_SYS }, ++ {sys_innodb_show_verbose_locks.name, (char*) &sys_innodb_show_verbose_locks, SHOW_SYS }, + {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR}, + {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG }, + {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG }, diff --git a/mysql-innodb_rw_lock.patch b/mysql-innodb_rw_lock.patch new file mode 100644 index 0000000..3070bb0 --- /dev/null +++ b/mysql-innodb_rw_lock.patch @@ -0,0 +1,1459 @@ +diff -r 962aec0d731c innobase/configure +--- a/innobase/configure Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/configure Thu Oct 09 08:30:28 2008 -0700 +@@ -20519,6 +20519,88 @@ + + fi + done ++ ++ ++# as http://lists.mysql.com/commits/40686 does ++{ echo "$as_me:$LINENO: checking whether the compiler provides atomic builtins" >&5 ++echo $ECHO_N "checking whether the compiler provides atomic builtins... $ECHO_C" >&6; } ++if test "${mysql_cv_atomic_builtins+set}" = set; then ++ echo $ECHO_N "(cached) $ECHO_C" >&6 ++else ++ if test "$cross_compiling" = yes; then ++ { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling ++See \`config.log' for more details." >&5 ++echo "$as_me: error: cannot run test program while cross compiling ++See \`config.log' for more details." >&2;} ++ { (exit 1); exit 1; }; } ++else ++ cat >conftest.$ac_ext <<_ACEOF ++/* confdefs.h. */ ++_ACEOF ++cat confdefs.h >>conftest.$ac_ext ++cat >>conftest.$ac_ext <<_ACEOF ++/* end confdefs.h. */ ++ ++ int main() ++ { ++ int foo= -10; int bar= 10; ++ __sync_fetch_and_add(&foo, bar); ++ if (foo) ++ return -1; ++ bar= __sync_lock_test_and_set(&foo, bar); ++ if (bar || foo != 10) ++ return -1; ++ bar= __sync_val_compare_and_swap(&bar, foo, 15); ++ if (bar) ++ return -1; ++ return 0; ++ } ++ ++_ACEOF ++rm -f conftest$ac_exeext ++if { (ac_try="$ac_link" ++case "(($ac_try" in ++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; ++ *) ac_try_echo=$ac_try;; ++esac ++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ++ (eval "$ac_link") 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); } && { ac_try='./conftest$ac_exeext' ++ { (case "(($ac_try" in ++ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; ++ *) ac_try_echo=$ac_try;; ++esac ++eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ++ (eval "$ac_try") 2>&5 ++ ac_status=$? ++ echo "$as_me:$LINENO: \$? = $ac_status" >&5 ++ (exit $ac_status); }; }; then ++ mysql_cv_atomic_builtins=yes ++else ++ echo "$as_me: program exited with status $ac_status" >&5 ++echo "$as_me: failed program was:" >&5 ++sed 's/^/| /' conftest.$ac_ext >&5 ++ ++( exit $ac_status ) ++mysql_cv_atomic_builtins=no ++fi ++rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext ++fi ++ ++ ++fi ++{ echo "$as_me:$LINENO: result: $mysql_cv_atomic_builtins" >&5 ++echo "${ECHO_T}$mysql_cv_atomic_builtins" >&6; } ++ ++if test "x$mysql_cv_atomic_builtins" = xyes; then ++ ++cat >>confdefs.h <<\_ACEOF ++#define HAVE_ATOMIC_BUILTINS 1 ++_ACEOF ++ ++fi + + #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args. + # Some versions of Unix only take 2 arguments. +diff -r 962aec0d731c innobase/configure.in +--- a/innobase/configure.in Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/configure.in Thu Oct 09 08:30:28 2008 -0700 +@@ -42,6 +42,31 @@ + AC_CHECK_FUNCS(sched_yield) + AC_CHECK_FUNCS(fdatasync) + AC_CHECK_FUNCS(localtime_r) ++ ++# as http://lists.mysql.com/commits/40686 does ++AC_CACHE_CHECK([whether the compiler provides atomic builtins], ++ [mysql_cv_atomic_builtins], [AC_TRY_RUN([ ++ int main() ++ { ++ int foo= -10; int bar= 10; ++ __sync_fetch_and_add(&foo, bar); ++ if (foo) ++ return -1; ++ bar= __sync_lock_test_and_set(&foo, bar); ++ if (bar || foo != 10) ++ return -1; ++ bar= __sync_val_compare_and_swap(&bar, foo, 15); ++ if (bar) ++ return -1; ++ return 0; ++ } ++], [mysql_cv_atomic_builtins=yes], [mysql_cv_atomic_builtins=no])]) ++ ++if test "x$mysql_cv_atomic_builtins" = xyes; then ++ AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, ++ [Define to 1 if compiler provides atomic builtins.]) ++fi ++ + #AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args. + # Some versions of Unix only take 2 arguments. + #AC_C_INLINE Already checked in MySQL +diff -r 962aec0d731c innobase/ib_config.h +--- a/innobase/ib_config.h Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/ib_config.h Thu Oct 09 08:30:28 2008 -0700 +@@ -3,6 +3,9 @@ + + /* Define to 1 if you have the header file. */ + #define HAVE_AIO_H 1 ++ ++/* Define to 1 if compiler provides atomic builtins. */ ++#define HAVE_ATOMIC_BUILTINS 1 + + /* Define to 1 if you have the header file. */ + #define HAVE_DLFCN_H 1 +diff -r 962aec0d731c innobase/ib_config.h.in +--- a/innobase/ib_config.h.in Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/ib_config.h.in Thu Oct 09 08:30:28 2008 -0700 +@@ -2,6 +2,9 @@ + + /* Define to 1 if you have the header file. */ + #undef HAVE_AIO_H ++ ++/* Define to 1 if compiler provides atomic builtins. */ ++#undef HAVE_ATOMIC_BUILTINS + + /* Define to 1 if you have the header file. */ + #undef HAVE_DLFCN_H +diff -r 962aec0d731c innobase/include/sync0rw.h +--- a/innobase/include/sync0rw.h Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/include/sync0rw.h Thu Oct 09 08:30:28 2008 -0700 +@@ -325,7 +325,17 @@ + Accessor functions for rw lock. */ + UNIV_INLINE + ulint +-rw_lock_get_waiters( ++rw_lock_get_s_waiters( ++/*==================*/ ++ rw_lock_t* lock); ++UNIV_INLINE ++ulint ++rw_lock_get_x_waiters( ++/*==================*/ ++ rw_lock_t* lock); ++UNIV_INLINE ++ulint ++rw_lock_get_wx_waiters( + /*================*/ + rw_lock_t* lock); + UNIV_INLINE +@@ -408,6 +418,11 @@ + rw_lock_debug_t* info); /* in: debug struct */ + #endif /* UNIV_SYNC_DEBUG */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++/* This value means NOT_LOCKED */ ++#define RW_LOCK_BIAS 0x00100000 ++#endif ++ + /* NOTE! The structure appears here only for the compiler to know its size. + Do not use its fields directly! The structure used in the spin lock + implementation of a read-write lock. Several threads may have a shared lock +@@ -417,9 +432,9 @@ + field. Then no new readers are allowed in. */ + + struct rw_lock_struct { +- os_event_t event; /* Used by sync0arr.c for thread queueing */ +- +-#ifdef __WIN__ ++ /* Used by sync0arr.c for thread queueing */ ++ os_event_t s_event; /* Used for s_lock */ ++ os_event_t x_event; /* Used for x_lock */ + os_event_t wait_ex_event; /* This windows specific event is + used by the thread which has set the + lock state to RW_LOCK_WAIT_EX. The +@@ -427,31 +442,35 @@ + thread will be the next one to proceed + once the current the event gets + signalled. See LEMMA 2 in sync0sync.c */ ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ volatile lint lock_word; /* Used by using atomic builtin */ + #endif + +- ulint reader_count; /* Number of readers who have locked this ++ volatile ulint reader_count; /* Number of readers who have locked this + lock in the shared mode */ +- ulint writer; /* This field is set to RW_LOCK_EX if there ++ volatile ulint writer; /* This field is set to RW_LOCK_EX if there + is a writer owning the lock (in exclusive + mode), RW_LOCK_WAIT_EX if a writer is + queueing for the lock, and + RW_LOCK_NOT_LOCKED, otherwise. */ +- os_thread_id_t writer_thread; ++ volatile os_thread_id_t writer_thread; + /* Thread id of a possible writer thread */ +- ulint writer_count; /* Number of times the same thread has ++ volatile ulint writer_count; /* Number of times the same thread has + recursively locked the lock in the exclusive + mode */ ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_t mutex; /* The mutex protecting rw_lock_struct */ ++#endif + ulint pass; /* Default value 0. This is set to some + value != 0 given by the caller of an x-lock + operation, if the x-lock is to be passed to + another thread to unlock (which happens in + asynchronous i/o). */ +- ulint waiters; /* This ulint is set to 1 if there are +- waiters (readers or writers) in the global +- wait array, waiting for this rw_lock. +- Otherwise, == 0. */ +- ibool writer_is_wait_ex; ++ volatile ulint s_waiters; /* 1: there are waiters (s_lock) */ ++ volatile ulint x_waiters; /* 1: there are waiters (x_lock) */ ++ volatile ulint wait_ex_waiters; /* 1: there are waiters (wait_ex) */ ++ volatile ibool writer_is_wait_ex; + /* This is TRUE if the writer field is + RW_LOCK_WAIT_EX; this field is located far + from the memory update hotspot fields which +diff -r 962aec0d731c innobase/include/sync0rw.ic +--- a/innobase/include/sync0rw.ic Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/include/sync0rw.ic Thu Oct 09 08:30:28 2008 -0700 +@@ -47,20 +47,52 @@ + Accessor functions for rw lock. */ + UNIV_INLINE + ulint +-rw_lock_get_waiters( ++rw_lock_get_s_waiters( + /*================*/ + rw_lock_t* lock) + { +- return(lock->waiters); ++ return(lock->s_waiters); ++} ++UNIV_INLINE ++ulint ++rw_lock_get_x_waiters( ++/*================*/ ++ rw_lock_t* lock) ++{ ++ return(lock->x_waiters); ++} ++UNIV_INLINE ++ulint ++rw_lock_get_wx_waiters( ++/*================*/ ++ rw_lock_t* lock) ++{ ++ return(lock->wait_ex_waiters); + } + UNIV_INLINE + void +-rw_lock_set_waiters( +-/*================*/ ++rw_lock_set_s_waiters( + rw_lock_t* lock, + ulint flag) + { +- lock->waiters = flag; ++ lock->s_waiters = flag; ++} ++UNIV_INLINE ++void ++rw_lock_set_x_waiters( ++ rw_lock_t* lock, ++ ulint flag) ++{ ++ lock->x_waiters = flag; ++} ++UNIV_INLINE ++void ++rw_lock_set_wx_waiters( ++/*================*/ ++ rw_lock_t* lock, ++ ulint flag) ++{ ++ lock->wait_ex_waiters = flag; + } + UNIV_INLINE + ulint +@@ -68,7 +100,19 @@ + /*===============*/ + rw_lock_t* lock) + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (lock->writer == RW_LOCK_NOT_LOCKED) { ++ return(RW_LOCK_NOT_LOCKED); ++ } ++ ++ if (lock->writer_is_wait_ex) { ++ return(RW_LOCK_WAIT_EX); ++ } else { ++ return(RW_LOCK_EX); ++ } ++#else + return(lock->writer); ++#endif + } + UNIV_INLINE + void +@@ -96,6 +140,7 @@ + { + lock->reader_count = count; + } ++#ifndef HAVE_ATOMIC_BUILTINS + UNIV_INLINE + mutex_t* + rw_lock_get_mutex( +@@ -104,6 +149,7 @@ + { + return(&(lock->mutex)); + } ++#endif + + /********************************************************************** + Returns the value of writer_count for the lock. Does not reserve the lock +@@ -133,14 +179,26 @@ + const char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ + { +-#ifdef UNIV_SYNC_DEBUG ++#if defined(UNIV_SYNC_DEBUG) && !defined(HAVE_ATOMIC_BUILTINS) + ut_ad(mutex_own(rw_lock_get_mutex(lock))); + #endif /* UNIV_SYNC_DEBUG */ + /* Check if the writer field is free */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) { ++ /* try s-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ return(FALSE); /* locking did not succeed */ ++ } ++ /* success */ ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) { + /* Set the shared lock by incrementing the reader count */ + lock->reader_count++; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, +@@ -167,11 +225,15 @@ + const char* file_name, /* in: file name where requested */ + ulint line) /* in: line where lock requested */ + { +- ut_ad(lock->writer == RW_LOCK_NOT_LOCKED); ++ ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); + ut_ad(rw_lock_get_reader_count(lock) == 0); + + /* Set the shared lock by incrementing the reader count */ ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + lock->reader_count++; ++#endif + + lock->last_s_file_name = file_name; + lock->last_s_line = line; +@@ -199,7 +261,11 @@ + + rw_lock_set_writer(lock, RW_LOCK_EX); + lock->writer_thread = os_thread_get_curr_id(); ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(lock->writer_count),1); ++#else + lock->writer_count++; ++#endif + lock->pass = 0; + + lock->last_x_file_name = file_name; +@@ -241,15 +307,21 @@ + ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ + #endif /* UNIV_SYNC_DEBUG */ + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Success */ + } else { + /* Did not succeed, try spin wait */ ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + rw_lock_s_lock_spin(lock, pass, file_name, line); + +@@ -272,11 +344,23 @@ + { + ibool success = FALSE; + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { ++ /* try s-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ return(FALSE); /* locking did not succeed */ ++ } ++ /* success */ ++ __sync_fetch_and_add(&(lock->reader_count),1); ++#else + mutex_enter(rw_lock_get_mutex(lock)); + + if (lock->writer == RW_LOCK_NOT_LOCKED) { + /* Set the shared lock by incrementing the reader count */ + lock->reader_count++; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, +@@ -289,7 +373,9 @@ + success = TRUE; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return(success); + } +@@ -309,6 +395,55 @@ + { + ibool success = FALSE; + os_thread_id_t curr_thread = os_thread_get_curr_id(); ++#ifdef HAVE_ATOMIC_BUILTINS ++ if ((lock->lock_word == RW_LOCK_BIAS) ++ && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { ++ /* try x-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word), ++ RW_LOCK_BIAS) == 0) { ++ /* success */ ++ /* try to lock writer */ ++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX) ++ == RW_LOCK_NOT_LOCKED) { ++ /* success */ ++ lock->writer_thread = curr_thread; ++ lock->pass = 0; ++ lock->writer_is_wait_ex = FALSE; ++ /* next function may work as memory barrier */ ++ relock: ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ ut_ad(rw_lock_validate(lock)); ++ ++ return(TRUE); ++ } else { ++ /* x-unlock */ ++ __sync_fetch_and_add(&(lock->lock_word), ++ RW_LOCK_BIAS); ++ } ++ } else { ++ /* fail (x-lock) */ ++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS); ++ } ++ } ++ ++ if (lock->pass == 0 ++ && os_thread_eq(lock->writer_thread, curr_thread) ++ && rw_lock_get_writer(lock) == RW_LOCK_EX) { ++ goto relock; ++ } ++ ++ ut_ad(rw_lock_validate(lock)); ++ ++ return(FALSE); ++#else + mutex_enter(rw_lock_get_mutex(lock)); + + if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) { +@@ -339,6 +474,7 @@ + ut_ad(rw_lock_validate(lock)); + + return(success); ++#endif + } + + /********************************************************************** +@@ -354,16 +490,33 @@ + #endif + ) + { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_t* mutex = &(lock->mutex); +- ibool sg = FALSE; ++#endif ++ ibool x_sg = FALSE; ++ ibool wx_sg = FALSE; ++#ifdef HAVE_ATOMIC_BUILTINS ++ ibool last = FALSE; ++#endif + ++#ifndef HAVE_ATOMIC_BUILTINS + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter(mutex); ++#endif + + /* Reset the shared lock by decrementing the reader count */ + + ut_a(lock->reader_count > 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* unlock lock_word */ ++ __sync_fetch_and_add(&(lock->lock_word),1); ++ ++ if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) { ++ last = TRUE; ++ } ++#else + lock->reader_count--; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); +@@ -372,20 +525,36 @@ + /* If there may be waiters and this was the last s-lock, + signal the object */ + +- if (UNIV_UNLIKELY(lock->waiters) ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (UNIV_UNLIKELY(last && lock->wait_ex_waiters)) { ++#else ++ if (UNIV_UNLIKELY(lock->wait_ex_waiters) + && lock->reader_count == 0) { +- sg = TRUE; ++#endif ++ wx_sg = TRUE; + +- rw_lock_set_waiters(lock, 0); ++ rw_lock_set_wx_waiters(lock, 0); ++ } ++#ifdef HAVE_ATOMIC_BUILTINS ++ else if (UNIV_UNLIKELY(last && lock->x_waiters)) { ++#else ++ else if (UNIV_UNLIKELY(lock->x_waiters) ++ && lock->reader_count == 0) { ++#endif ++ x_sg = TRUE; ++ ++ rw_lock_set_x_waiters(lock, 0); + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(mutex); ++#endif + +- if (UNIV_UNLIKELY(sg)) { +-#ifdef __WIN__ ++ if (UNIV_UNLIKELY(wx_sg)) { + os_event_set(lock->wait_ex_event); +-#endif +- os_event_set(lock->event); ++ sync_array_object_signalled(sync_primary_wait_array); ++ } else if (UNIV_UNLIKELY(x_sg)) { ++ os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } + +@@ -409,13 +578,22 @@ + + ut_ad(lock->reader_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_sub_and_fetch(&(lock->reader_count),1); ++#else + lock->reader_count--; ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); + #endif + ++#ifdef HAVE_ATOMIC_BUILTINS ++ ut_ad(!lock->s_waiters); ++ ut_ad(!lock->x_waiters); ++#else + ut_ad(!lock->waiters); ++#endif + ut_ad(rw_lock_validate(lock)); + #ifdef UNIV_SYNC_PERF_STAT + rw_s_exit_count++; +@@ -435,41 +613,81 @@ + #endif + ) + { +- ibool sg = FALSE; ++#ifdef HAVE_ATOMIC_BUILTINS ++ ibool last = FALSE; ++#endif ++ ibool s_sg = FALSE; ++ ibool x_sg = FALSE; + ++#ifndef HAVE_ATOMIC_BUILTINS + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter(&(lock->mutex)); ++#endif + + /* Reset the exclusive lock if this thread no longer has an x-mode + lock */ + + ut_ad(lock->writer_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { ++ last = TRUE; ++ } ++ ++ if (last) { ++ /* unlock lock_word */ ++ __sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS); ++ ++ /* FIXME: It is a value of bad manners for pthread. ++ But we shouldn't keep an ID of not-owner. */ ++ lock->writer_thread = -1; ++ ++ /* atomic operation may be safer about memory order. */ ++ rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); ++ __sync_synchronize(); ++ } ++#else + lock->writer_count--; + + if (lock->writer_count == 0) { + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + } ++#endif + + #ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); + #endif + + /* If there may be waiters, signal the lock */ +- if (UNIV_UNLIKELY(lock->waiters) +- && lock->writer_count == 0) { +- +- sg = TRUE; +- rw_lock_set_waiters(lock, 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (last) { ++#else ++ if (lock->writer_count == 0) { ++#endif ++ if(lock->s_waiters){ ++ s_sg = TRUE; ++ rw_lock_set_s_waiters(lock, 0); ++ } ++ if(lock->x_waiters){ ++ x_sg = TRUE; ++ rw_lock_set_x_waiters(lock, 0); ++ } + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + +- if (UNIV_UNLIKELY(sg)) { ++ if (UNIV_UNLIKELY(s_sg)) { ++ os_event_set(lock->s_event); ++ sync_array_object_signalled(sync_primary_wait_array); ++ } ++ if (UNIV_UNLIKELY(x_sg)) { + #ifdef __WIN__ ++ /* I doubt the necessity of it. */ + os_event_set(lock->wait_ex_event); + #endif +- os_event_set(lock->event); ++ os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } + +@@ -494,9 +712,13 @@ + + ut_ad(lock->writer_count > 0); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { ++#else + lock->writer_count--; + + if (lock->writer_count == 0) { ++#endif + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + } + +@@ -504,7 +726,12 @@ + rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); + #endif + ++#ifdef HAVE_ATOMIC_BUILTINS ++ ut_ad(!lock->s_waiters); ++ ut_ad(!lock->x_waiters); ++#else + ut_ad(!lock->waiters); ++#endif + ut_ad(rw_lock_validate(lock)); + + #ifdef UNIV_SYNC_PERF_STAT +diff -r 962aec0d731c innobase/sync/sync0arr.c +--- a/innobase/sync/sync0arr.c Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/sync/sync0arr.c Thu Oct 09 08:30:28 2008 -0700 +@@ -309,13 +309,13 @@ + { + if (type == SYNC_MUTEX) { + return(os_event_reset(((mutex_t *) object)->event)); +-#ifdef __WIN__ + } else if (type == RW_LOCK_WAIT_EX) { + return(os_event_reset( + ((rw_lock_t *) object)->wait_ex_event)); +-#endif +- } else { +- return(os_event_reset(((rw_lock_t *) object)->event)); ++ } else if (type == RW_LOCK_SHARED) { ++ return(os_event_reset(((rw_lock_t *) object)->s_event)); ++ } else { /* RW_LOCK_EX */ ++ return(os_event_reset(((rw_lock_t *) object)->x_event)); + } + } + +@@ -415,15 +415,12 @@ + + if (cell->request_type == SYNC_MUTEX) { + event = ((mutex_t*) cell->wait_object)->event; +-#ifdef __WIN__ +- /* On windows if the thread about to wait is the one which +- has set the state of the rw_lock to RW_LOCK_WAIT_EX, then +- it waits on a special event i.e.: wait_ex_event. */ + } else if (cell->request_type == RW_LOCK_WAIT_EX) { + event = ((rw_lock_t*) cell->wait_object)->wait_ex_event; +-#endif +- } else { +- event = ((rw_lock_t*) cell->wait_object)->event; ++ } else if (cell->request_type == RW_LOCK_SHARED) { ++ event = ((rw_lock_t*) cell->wait_object)->s_event; ++ } else { ++ event = ((rw_lock_t*) cell->wait_object)->x_event; + } + + cell->waiting = TRUE; +@@ -464,6 +461,7 @@ + mutex_t* mutex; + rw_lock_t* rwlock; + ulint type; ++ ulint writer; + + type = cell->request_type; + +@@ -492,12 +490,10 @@ + (ulong) mutex->waiters); + + } else if (type == RW_LOCK_EX +-#ifdef __WIN__ + || type == RW_LOCK_WAIT_EX +-#endif + || type == RW_LOCK_SHARED) { + +- fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file); ++ fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file); + + rwlock = cell->old_wait_rw_lock; + +@@ -505,21 +501,23 @@ + " RW-latch at %p created in file %s line %lu\n", + rwlock, rwlock->cfile_name, + (ulong) rwlock->cline); +- if (rwlock->writer != RW_LOCK_NOT_LOCKED) { ++ writer = rw_lock_get_writer(rwlock); ++ if (writer != RW_LOCK_NOT_LOCKED) { + fprintf(file, + "a writer (thread id %lu) has reserved it in mode %s", + (ulong) os_thread_pf(rwlock->writer_thread), +- rwlock->writer == RW_LOCK_EX ++ writer == RW_LOCK_EX + ? " exclusive\n" + : " wait exclusive\n"); + } + + fprintf(file, +- "number of readers %lu, waiters flag %lu\n" ++ "number of readers %lu, s_waiters flag %lu, x_waiters flag %lu\n" + "Last time read locked in file %s line %lu\n" + "Last time write locked in file %s line %lu\n", + (ulong) rwlock->reader_count, +- (ulong) rwlock->waiters, ++ (ulong) rwlock->s_waiters, ++ (ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters), + rwlock->last_s_file_name, + (ulong) rwlock->last_s_line, + rwlock->last_x_file_name, +@@ -839,11 +837,15 @@ + /*========================*/ + sync_array_t* arr) /* in: wait array */ + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ __sync_fetch_and_add(&(arr->sg_count),1); ++#else + sync_array_enter(arr); + + arr->sg_count++; + + sync_array_exit(arr); ++#endif + } + + /************************************************************************** +@@ -880,19 +882,23 @@ + + mutex = cell->wait_object; + os_event_set(mutex->event); +-#ifdef __WIN__ + } else if (cell->request_type + == RW_LOCK_WAIT_EX) { + rw_lock_t* lock; + + lock = cell->wait_object; + os_event_set(lock->wait_ex_event); +-#endif +- } else { ++ } else if (cell->request_type ++ == RW_LOCK_SHARED) { + rw_lock_t* lock; + + lock = cell->wait_object; +- os_event_set(lock->event); ++ os_event_set(lock->s_event); ++ } else { ++ rw_lock_t* lock; ++ ++ lock = cell->wait_object; ++ os_event_set(lock->x_event); + } + } + } +diff -r 962aec0d731c innobase/sync/sync0rw.c +--- a/innobase/sync/sync0rw.c Thu Oct 09 08:28:53 2008 -0700 ++++ b/innobase/sync/sync0rw.c Thu Oct 09 08:30:28 2008 -0700 +@@ -99,6 +99,7 @@ + object is created, then the following call initializes + the sync system. */ + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_create(rw_lock_get_mutex(lock)); + mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); + +@@ -108,8 +109,14 @@ + lock->mutex.cmutex_name = cmutex_name; + lock->mutex.mutex_type = 1; + #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ ++#endif /* !HAVE_ATOMIC_BUILTINS */ + +- rw_lock_set_waiters(lock, 0); ++#ifdef HAVE_ATOMIC_BUILTINS ++ lock->lock_word = RW_LOCK_BIAS; ++#endif ++ rw_lock_set_s_waiters(lock, 0); ++ rw_lock_set_x_waiters(lock, 0); ++ rw_lock_set_wx_waiters(lock, 0); + rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED); + lock->writer_count = 0; + rw_lock_set_reader_count(lock, 0); +@@ -130,11 +137,9 @@ + lock->last_x_file_name = "not yet reserved"; + lock->last_s_line = 0; + lock->last_x_line = 0; +- lock->event = os_event_create(NULL); +- +-#ifdef __WIN__ ++ lock->s_event = os_event_create(NULL); ++ lock->x_event = os_event_create(NULL); + lock->wait_ex_event = os_event_create(NULL); +-#endif + + mutex_enter(&rw_lock_list_mutex); + +@@ -162,19 +167,21 @@ + ut_a(rw_lock_validate(lock)); + #endif /* UNIV_DEBUG */ + ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); +- ut_a(rw_lock_get_waiters(lock) == 0); ++ ut_a(rw_lock_get_s_waiters(lock) == 0); ++ ut_a(rw_lock_get_x_waiters(lock) == 0); ++ ut_a(rw_lock_get_wx_waiters(lock) == 0); + ut_a(rw_lock_get_reader_count(lock) == 0); + + lock->magic_n = 0; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_free(rw_lock_get_mutex(lock)); ++#endif + + mutex_enter(&rw_lock_list_mutex); +- os_event_free(lock->event); +- +-#ifdef __WIN__ ++ os_event_free(lock->s_event); ++ os_event_free(lock->x_event); + os_event_free(lock->wait_ex_event); +-#endif + + if (UT_LIST_GET_PREV(list, lock)) { + ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); +@@ -192,6 +199,8 @@ + Checks that the rw-lock has been initialized and that there are no + simultaneous shared and exclusive locks. */ + ++/* MEMO: If HAVE_ATOMIC_BUILTINS, we should use this function statically. */ ++ + ibool + rw_lock_validate( + /*=============*/ +@@ -199,7 +208,9 @@ + { + ut_a(lock); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + ut_a(lock->magic_n == RW_LOCK_MAGIC_N); + ut_a((rw_lock_get_reader_count(lock) == 0) +@@ -207,11 +218,17 @@ + ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX) + || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX) + || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)); +- ut_a((rw_lock_get_waiters(lock) == 0) +- || (rw_lock_get_waiters(lock) == 1)); ++ ut_a((rw_lock_get_s_waiters(lock) == 0) ++ || (rw_lock_get_s_waiters(lock) == 1)); ++ ut_a((rw_lock_get_x_waiters(lock) == 0) ++ || (rw_lock_get_x_waiters(lock) == 1)); ++ ut_a((rw_lock_get_wx_waiters(lock) == 0) ++ || (rw_lock_get_wx_waiters(lock) == 1)); + ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return(TRUE); + } +@@ -237,13 +254,14 @@ + ut_ad(rw_lock_validate(lock)); + + lock_loop: ++ i = 0; ++spin_loop: + rw_s_spin_wait_count++; + + /* Spin waiting for the writer field to become free */ +- i = 0; + +- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED +- && i < SYNC_SPIN_ROUNDS) { ++ while (i < SYNC_SPIN_ROUNDS ++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); + } +@@ -262,15 +280,27 @@ + lock->cfile_name, (ulong) lock->cline, (ulong) i); + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + /* We try once again to obtain the lock */ + + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Success */ + } else { ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ i++; ++ ++ if (i < SYNC_SPIN_ROUNDS) { ++ goto spin_loop; ++ } ++#endif + /* If we get here, locking did not succeed, we may + suspend the thread to wait in the wait array */ + +@@ -281,9 +311,19 @@ + file_name, line, + &index); + +- rw_lock_set_waiters(lock, 1); ++ rw_lock_set_s_waiters(lock, 1); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ for (i = 0; i < 4; i++) { ++ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { ++ sync_array_free_cell(sync_primary_wait_array, index); ++ return; /* Success */ ++ } ++ } ++#else + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + if (srv_print_latch_waits) { + fprintf(stderr, +@@ -318,13 +358,19 @@ + { + ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + lock->writer_thread = os_thread_get_curr_id(); + + lock->pass = 0; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#else ++ __sync_synchronize(); ++#endif + } + + /********************************************************************** +@@ -342,6 +388,89 @@ + const char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ + { ++#ifdef HAVE_ATOMIC_BUILTINS ++ os_thread_id_t curr_thread = os_thread_get_curr_id(); ++ ++ /* try to lock writer */ ++ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX) ++ == RW_LOCK_NOT_LOCKED) { ++ /* success */ ++ /* obtain RW_LOCK_WAIT_EX right */ ++ lock->writer_thread = curr_thread; ++ lock->pass = pass; ++ lock->writer_is_wait_ex = TRUE; ++ /* atomic operation may be safer about memory order. */ ++ __sync_synchronize(); ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX, ++ file_name, line); ++#endif ++ } ++ ++ if (!os_thread_eq(lock->writer_thread, curr_thread)) { ++ return(RW_LOCK_NOT_LOCKED); ++ } ++ ++ switch(rw_lock_get_writer(lock)) { ++ case RW_LOCK_WAIT_EX: ++ /* have right to try x-lock */ ++ if (lock->lock_word == RW_LOCK_BIAS) { ++ /* try x-lock */ ++ if(__sync_sub_and_fetch(&(lock->lock_word), ++ RW_LOCK_BIAS) == 0) { ++ /* success */ ++ lock->pass = pass; ++ lock->writer_is_wait_ex = FALSE; ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX); ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, ++ file_name, line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ /* Locking succeeded, we may return */ ++ return(RW_LOCK_EX); ++ } else { ++ /* fail */ ++ __sync_fetch_and_add(&(lock->lock_word), ++ RW_LOCK_BIAS); ++ } ++ } ++ /* There are readers, we have to wait */ ++ return(RW_LOCK_WAIT_EX); ++ ++ break; ++ ++ case RW_LOCK_EX: ++ /* already have x-lock */ ++ if ((lock->pass == 0)&&(pass == 0)) { ++ __sync_fetch_and_add(&(lock->writer_count),1); ++ ++#ifdef UNIV_SYNC_DEBUG ++ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, ++ line); ++#endif ++ ++ lock->last_x_file_name = file_name; ++ lock->last_x_line = line; ++ ++ /* Locking succeeded, we may return */ ++ return(RW_LOCK_EX); ++ } ++ ++ return(RW_LOCK_NOT_LOCKED); ++ ++ break; ++ ++ default: /* ??? */ ++ return(RW_LOCK_NOT_LOCKED); ++ } ++#else /* HAVE_ATOMIC_BUILTINS */ ++ + #ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(rw_lock_get_mutex(lock))); + #endif /* UNIV_SYNC_DEBUG */ +@@ -423,6 +552,7 @@ + /* Locking succeeded, we may return */ + return(RW_LOCK_EX); + } ++#endif /* HAVE_ATOMIC_BUILTINS */ + + /* Locking did not succeed */ + return(RW_LOCK_NOT_LOCKED); +@@ -448,19 +578,33 @@ + ulint line) /* in: line where requested */ + { + ulint index; /* index of the reserved wait cell */ +- ulint state; /* lock state acquired */ ++ ulint state = RW_LOCK_NOT_LOCKED; /* lock state acquired */ ++#ifdef HAVE_ATOMIC_BUILTINS ++ ulint prev_state = RW_LOCK_NOT_LOCKED; ++#endif + ulint i; /* spin round count */ + + ut_ad(rw_lock_validate(lock)); + + lock_loop: ++ i = 0; ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ prev_state = state; ++#else + /* Acquire the mutex protecting the rw-lock fields */ + mutex_enter_fast(&(lock->mutex)); ++#endif + + state = rw_lock_x_lock_low(lock, pass, file_name, line); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (state != prev_state) i=0; /* if progress, reset counter. */ ++#else + mutex_exit(&(lock->mutex)); ++#endif + ++spin_loop: + if (state == RW_LOCK_EX) { + + return; /* Locking succeeded */ +@@ -468,10 +612,9 @@ + } else if (state == RW_LOCK_NOT_LOCKED) { + + /* Spin waiting for the writer field to become free */ +- i = 0; + +- while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED +- && i < SYNC_SPIN_ROUNDS) { ++ while (i < SYNC_SPIN_ROUNDS ++ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, + srv_spin_wait_delay)); +@@ -485,9 +628,12 @@ + } else if (state == RW_LOCK_WAIT_EX) { + + /* Spin waiting for the reader count field to become zero */ +- i = 0; + ++#ifdef HAVE_ATOMIC_BUILTINS ++ while (lock->lock_word != RW_LOCK_BIAS ++#else + while (rw_lock_get_reader_count(lock) != 0 ++#endif + && i < SYNC_SPIN_ROUNDS) { + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval(0, +@@ -500,7 +646,6 @@ + os_thread_yield(); + } + } else { +- i = 0; /* Eliminate a compiler warning */ + ut_error; + } + +@@ -516,34 +661,69 @@ + /* We try once again to obtain the lock. Acquire the mutex protecting + the rw-lock fields */ + ++#ifdef HAVE_ATOMIC_BUILTINS ++ prev_state = state; ++#else + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + state = rw_lock_x_lock_low(lock, pass, file_name, line); + ++#ifdef HAVE_ATOMIC_BUILTINS ++ if (state != prev_state) i=0; /* if progress, reset counter. */ ++#endif ++ + if (state == RW_LOCK_EX) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + return; /* Locking succeeded */ + } ++ ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ i++; ++ ++ if (i < SYNC_SPIN_ROUNDS) { ++ goto spin_loop; ++ } ++#endif + + rw_x_system_call_count++; + + sync_array_reserve_cell(sync_primary_wait_array, + lock, +-#ifdef __WIN__ +- /* On windows RW_LOCK_WAIT_EX signifies +- that this thread should wait on the +- special wait_ex_event. */ + (state == RW_LOCK_WAIT_EX) + ? RW_LOCK_WAIT_EX : +-#endif + RW_LOCK_EX, + file_name, line, + &index); + +- rw_lock_set_waiters(lock, 1); ++ if (state == RW_LOCK_WAIT_EX) { ++ rw_lock_set_wx_waiters(lock, 1); ++ } else { ++ rw_lock_set_x_waiters(lock, 1); ++ } + ++#ifdef HAVE_ATOMIC_BUILTINS ++ /* like sync0sync.c doing */ ++ for (i = 0; i < 4; i++) { ++ prev_state = state; ++ state = rw_lock_x_lock_low(lock, pass, file_name, line); ++ if (state == RW_LOCK_EX) { ++ sync_array_free_cell(sync_primary_wait_array, index); ++ return; /* Locking succeeded */ ++ } ++ if (state != prev_state) { ++ /* retry! */ ++ sync_array_free_cell(sync_primary_wait_array, index); ++ goto lock_loop; ++ } ++ } ++#else + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + + if (srv_print_latch_waits) { + fprintf(stderr, +@@ -718,7 +898,9 @@ + ut_ad(lock); + ut_ad(rw_lock_validate(lock)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + info = UT_LIST_GET_FIRST(lock->debug_list); + +@@ -728,7 +910,9 @@ + && (info->pass == 0) + && (info->lock_type == lock_type)) { + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + /* Found! */ + + return(TRUE); +@@ -736,7 +920,9 @@ + + info = UT_LIST_GET_NEXT(list, info); + } ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + + return(FALSE); + } +@@ -758,21 +944,25 @@ + ut_ad(lock); + ut_ad(rw_lock_validate(lock)); + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + if (lock_type == RW_LOCK_SHARED) { + if (lock->reader_count > 0) { + ret = TRUE; + } + } else if (lock_type == RW_LOCK_EX) { +- if (lock->writer == RW_LOCK_EX) { ++ if (rw_lock_get_writer(lock) == RW_LOCK_EX) { + ret = TRUE; + } + } else { + ut_error; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + + return(ret); + } +@@ -801,16 +991,26 @@ + + count++; + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(&(lock->mutex)); ++#endif + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0) +- || (rw_lock_get_waiters(lock) != 0)) { ++ || (rw_lock_get_s_waiters(lock) != 0) ++ || (rw_lock_get_x_waiters(lock) != 0) ++ || (rw_lock_get_wx_waiters(lock) != 0)) { + + fprintf(stderr, "RW-LOCK: %p ", lock); + +- if (rw_lock_get_waiters(lock)) { +- fputs(" Waiters for the lock exist\n", stderr); ++ if (rw_lock_get_s_waiters(lock)) { ++ fputs(" s_waiters for the lock exist,", stderr); ++ } ++ if (rw_lock_get_x_waiters(lock)) { ++ fputs(" x_waiters for the lock exist\n", stderr); ++ } ++ if (rw_lock_get_wx_waiters(lock)) { ++ fputs(" wait_ex_waiters for the lock exist\n", stderr); + } else { + putc('\n', stderr); + } +@@ -822,7 +1022,9 @@ + } + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(&(lock->mutex)); ++#endif + lock = UT_LIST_GET_NEXT(list, lock); + } + +@@ -847,10 +1049,18 @@ + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0) +- || (rw_lock_get_waiters(lock) != 0)) { ++ || (rw_lock_get_s_waiters(lock) != 0) ++ || (rw_lock_get_x_waiters(lock) != 0) ++ || (rw_lock_get_wx_waiters(lock) != 0)) { + +- if (rw_lock_get_waiters(lock)) { +- fputs(" Waiters for the lock exist\n", stderr); ++ if (rw_lock_get_s_waiters(lock)) { ++ fputs(" s_waiters for the lock exist,", stderr); ++ } ++ if (rw_lock_get_x_waiters(lock)) { ++ fputs(" x_waiters for the lock exist\n", stderr); ++ } ++ if (rw_lock_get_wx_waiters(lock)) { ++ fputs(" wait_ex_waiters for the lock exist\n", stderr); + } else { + putc('\n', stderr); + } +@@ -909,14 +1119,18 @@ + lock = UT_LIST_GET_FIRST(rw_lock_list); + + while (lock != NULL) { ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_enter(rw_lock_get_mutex(lock)); ++#endif + + if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) + || (rw_lock_get_reader_count(lock) != 0)) { + count++; + } + ++#ifndef HAVE_ATOMIC_BUILTINS + mutex_exit(rw_lock_get_mutex(lock)); ++#endif + lock = UT_LIST_GET_NEXT(list, lock); + } + +diff -r 962aec0d731c patch_info/innodb_rw_lock.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_rw_lock.info Thu Oct 09 08:30:28 2008 -0700 +@@ -0,0 +1,6 @@ ++File=innodb_rw_lock.patch ++Name=Fix of InnoDB rw_locks ++Version=1.0 ++Author=Yasufumi Kinoshita ++License=BSD ++Comment= diff --git a/mysql-innodb_show_bp.patch b/mysql-innodb_show_bp.patch new file mode 100644 index 0000000..a56ae9a --- /dev/null +++ b/mysql-innodb_show_bp.patch @@ -0,0 +1,447 @@ +diff -r fe944d2c6e1f innobase/btr/btr0btr.c +--- a/innobase/btr/btr0btr.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/btr/btr0btr.c Mon Nov 10 19:48:24 2008 -0800 +@@ -2989,3 +2989,11 @@ + + return(TRUE); + } ++ ++dulint ++btr_page_get_index_id_noninline( ++/*============*/ ++ page_t* page) /* in: index page */ ++{ ++ return btr_page_get_index_id(page); ++} +diff -r fe944d2c6e1f innobase/buf/buf0buf.c +--- a/innobase/buf/buf0buf.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/buf/buf0buf.c Mon Nov 10 19:48:24 2008 -0800 +@@ -2629,3 +2629,13 @@ + buf_block_print(block); + } + ++buf_block_t* ++buf_pool_get_nth_block_no_inline( ++/*===================*/ ++ /* out: pointer to block */ ++ buf_pool_t* buf_pool,/* in: buf_pool */ ++ ulint i) /* in: index of the block */{ ++ ++return buf_pool_get_nth_block(buf_pool, i); ++ ++} +diff -r fe944d2c6e1f innobase/include/btr0btr.h +--- a/innobase/include/btr0btr.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/btr0btr.h Mon Nov 10 19:48:24 2008 -0800 +@@ -69,6 +69,12 @@ + UNIV_INLINE + dulint + btr_page_get_index_id( ++/*==================*/ ++ /* out: index id */ ++ page_t* page); /* in: index page */ ++ ++dulint ++btr_page_get_index_id_noninline( + /*==================*/ + /* out: index id */ + page_t* page); /* in: index page */ +diff -r fe944d2c6e1f innobase/include/buf0buf.h +--- a/innobase/include/buf0buf.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/buf0buf.h Mon Nov 10 19:48:24 2008 -0800 +@@ -703,6 +703,8 @@ + buf_get_free_list_len(void); + /*=======================*/ + ++void buf_pool_dump(void); ++buf_block_t* buf_pool_get_nth_block_no_inline(buf_pool_t* pool, ulint i); + + + /* The buffer control block structure */ +diff -r fe944d2c6e1f innobase/include/page0page.h +--- a/innobase/include/page0page.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/include/page0page.h Mon Nov 10 19:48:24 2008 -0800 +@@ -260,6 +260,12 @@ + /*============*/ + /* out: number of user records */ + page_t* page); /* in: index page */ ++ ++ulint ++page_get_n_recs_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page); /* in: index page */ + /******************************************************************* + Returns the number of records before the given record in chain. + The number includes infimum and supremum records. */ +@@ -519,6 +525,12 @@ + UNIV_INLINE + ulint + page_get_data_size( ++/*===============*/ ++ /* out: data in bytes */ ++ page_t* page); /* in: index page */ ++ ++ulint ++page_get_data_size_noninline( + /*===============*/ + /* out: data in bytes */ + page_t* page); /* in: index page */ +diff -r fe944d2c6e1f innobase/page/page0page.c +--- a/innobase/page/page0page.c Mon Nov 10 19:47:27 2008 -0800 ++++ b/innobase/page/page0page.c Mon Nov 10 19:48:24 2008 -0800 +@@ -1994,3 +1994,25 @@ + page_cur_move_to_next(&cur); + } + } ++ ++ulint ++page_get_n_recs_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page) /* in: index page */ ++{ ++ return page_get_n_recs(page); ++} ++ ++ ++ulint ++page_get_data_size_noninline( ++/*============*/ ++ /* out: number of user records */ ++ page_t* page) /* in: index page */ ++{ ++ return page_get_data_size(page); ++} ++ ++ ++ +diff -r fe944d2c6e1f mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/information_schema.result Mon Nov 10 19:48:25 2008 -0800 +@@ -42,6 +42,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INNODB_BUFFER_POOL_CONTENT + INDEX_STATISTICS + KEY_COLUMN_USAGE + PROCESSLIST +@@ -741,7 +742,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-107 ++108 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -802,6 +803,7 @@ + TABLE_NAME COLUMN_NAME PRIVILEGES + COLUMNS TABLE_NAME select + COLUMN_PRIVILEGES TABLE_NAME select ++INNODB_BUFFER_POOL_CONTENT TABLE_NAME select + INDEX_STATISTICS TABLE_NAME select + KEY_COLUMN_USAGE TABLE_NAME select + STATISTICS TABLE_NAME select +@@ -815,7 +817,7 @@ + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 22 ++information_schema 23 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1206,6 +1208,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROCESSLIST ID +@@ -1243,6 +1246,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY COLLATION_NAME + COLUMNS TABLE_SCHEMA + COLUMN_PRIVILEGES TABLE_SCHEMA ++INNODB_BUFFER_POOL_CONTENT TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA + PROCESSLIST ID +@@ -1332,6 +1336,7 @@ + COLUMNS information_schema.COLUMNS 1 + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 ++INNODB_BUFFER_POOL_CONTENT information_schema.INNODB_BUFFER_POOL_CONTENT 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 + PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 +diff -r fe944d2c6e1f mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Mon Nov 10 19:48:25 2008 -0800 +@@ -11,6 +11,7 @@ + COLLATION_CHARACTER_SET_APPLICABILITY + COLUMNS + COLUMN_PRIVILEGES ++INNODB_BUFFER_POOL_CONTENT + INDEX_STATISTICS + KEY_COLUMN_USAGE + PROCESSLIST +diff -r fe944d2c6e1f mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Mon Nov 10 19:47:27 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Mon Nov 10 19:48:25 2008 -0800 +@@ -85,6 +85,7 @@ + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INNODB_BUFFER_POOL_CONTENT | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROCESSLIST | +@@ -112,6 +113,7 @@ + | COLLATION_CHARACTER_SET_APPLICABILITY | + | COLUMNS | + | COLUMN_PRIVILEGES | ++| INNODB_BUFFER_POOL_CONTENT | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | + | PROCESSLIST | +diff -r fe944d2c6e1f patch_info/innodb_show_bp.info +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/patch_info/innodb_show_bp.info Mon Nov 10 19:48:25 2008 -0800 +@@ -0,0 +1,6 @@ ++File=innodb_show_bp.patch ++Name=show innodb buffer pool content ++Version=1.0 ++Author=Percona ++License=GPL ++Comment= +diff -r fe944d2c6e1f sql/ha_innodb.cc +--- a/sql/ha_innodb.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/ha_innodb.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -128,10 +128,12 @@ + #include "../innobase/include/lock0lock.h" + #include "../innobase/include/dict0crea.h" + #include "../innobase/include/btr0cur.h" ++#include "../innobase/include/buf0buf.h" + #include "../innobase/include/btr0btr.h" + #include "../innobase/include/fsp0fsp.h" + #include "../innobase/include/sync0sync.h" + #include "../innobase/include/fil0fil.h" ++#include "../innobase/include/page0page.h" + #include "../innobase/include/trx0xa.h" + } + +@@ -6483,6 +6485,116 @@ + DBUG_RETURN(FALSE); + } + ++bool ++innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables) ++{ ++ ulint size; ++ ulint i; ++ dulint id; ++ ulint n_found; ++ buf_frame_t* frame; ++ dict_index_t* index; ++ buf_block_t* block; ++ ++ char *p; ++ char db_name_raw[NAME_LEN*5+1]; ++ char table_name_raw[NAME_LEN*5+1]; ++ ++ DBUG_ENTER("innodb_I_S_buffer_pool_content"); ++ ++ ++ size = buf_pool->curr_size; ++ ++ n_found = 0; ++ ++ TABLE *table= tables->table; ++ ++ ++ //buf_pool_dump(); ++ ++ ++ for (i = 0; i < size; i++) { ++ block = buf_pool_get_nth_block_no_inline(buf_pool, i); ++ frame = block->frame; ++ if (fil_page_get_type(frame)==0) continue; ++ ++ char page_type[64]; ++ ++ switch(fil_page_get_type(frame)) ++ { ++ case FIL_PAGE_INDEX: ++ strcpy(page_type, "index"); ++ break; ++ case FIL_PAGE_UNDO_LOG: ++ strcpy(page_type, "undo_log"); ++ break; ++ case FIL_PAGE_INODE: ++ strcpy(page_type, "inode"); ++ break; ++ case FIL_PAGE_IBUF_FREE_LIST: ++ strcpy(page_type, "ibuf_free_list"); ++ break; ++ default: ++ sprintf(page_type, "unknown", fil_page_get_type(frame)); ++ } ++ ++ table->field[0]->store((longlong)i, TRUE); ++ table->field[1]->store((longlong)block->space, TRUE); ++ table->field[2]->store((longlong)block->offset, TRUE); ++ table->field[3]->store((longlong)page_get_n_recs_noninline(block->frame), TRUE); ++ table->field[4]->store( ( fil_page_get_type(frame) == FIL_PAGE_INDEX ) ? (longlong)page_get_data_size_noninline(block->frame):0, TRUE); ++ table->field[5]->store((longlong)block->flush_type, TRUE); ++ table->field[6]->store((longlong)block->buf_fix_count, TRUE); ++ table->field[7]->store((longlong)block->LRU_position, TRUE); ++ table->field[8]->store((longlong)fil_page_get_type(frame), TRUE); ++ ++ table->field[9]->store(page_type, strlen(page_type), system_charset_info); ++ ++ //fprintf(stderr, "block N %d, space %d, offset %d, records %d, datasize %d, page_type %s, flush_type %d, buf_fix_count %d, LRU_position %d", i, block->space, block->offset, page_get_n_recs_noninline(block->frame), page_get_data_size_noninline(block->frame), page_type,block->flush_type, block->buf_fix_count, block->LRU_position); ++ ++ // flush_type, buf_fix_count, LRU_position ++ ++ if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { ++ ++ id = btr_page_get_index_id_noninline(frame); ++ index = dict_index_get_if_in_cache(id); ++ if (index) { ++ table->field[10]->store(index->name, strlen(index->name), system_charset_info); ++ // fprintf(stderr, " index %s, table %s", index->name, index->table_name); ++ ++ if((p = strchr(index->table_name, '/'))) ++ { ++ strncpy(db_name_raw, index->table_name, p-index->table_name); ++ db_name_raw[p-index->table_name] = 0; ++ table->field[11]->store(db_name_raw, strlen(db_name_raw), system_charset_info); ++ p++; ++ } else { ++ table->field[11]->store(NULL, 0, system_charset_info); ++ p = (char *)index->table_name; ++ } ++ strcpy(table_name_raw, p); ++ ++ table->field[12]->store(table_name_raw, strlen(table_name_raw), system_charset_info); ++ } else { ++ table->field[10]->store(NULL, 0, system_charset_info); ++ table->field[11]->store(NULL, 0, system_charset_info); ++ table->field[12]->store(NULL, 0, system_charset_info); ++ } ++ }else{ ++ table->field[10]->store(NULL, 0, system_charset_info); ++ table->field[11]->store(NULL, 0, system_charset_info); ++ table->field[12]->store(NULL, 0, system_charset_info); ++ } ++ //fprintf(stderr, "\n"); ++ if (schema_table_store_record(thd, table)) ++ { ++ DBUG_RETURN(1); ++ } ++ } ++ ++ DBUG_RETURN(0); ++} ++ + /**************************************************************************** + Implements the SHOW MUTEX STATUS command. . */ + +diff -r fe944d2c6e1f sql/ha_innodb.h +--- a/sql/ha_innodb.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/ha_innodb.h Mon Nov 10 19:48:25 2008 -0800 +@@ -263,6 +263,7 @@ + + int innobase_drop_database(char *path); + bool innodb_show_status(THD* thd); ++bool innodb_I_S_buffer_pool_content(THD* thd, TABLE_LIST *tables); + bool innodb_mutex_show_status(THD* thd); + void innodb_export_status(void); + +diff -r fe944d2c6e1f sql/sql_parse.cc +--- a/sql/sql_parse.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/sql_parse.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -2926,6 +2926,7 @@ + case SCH_COLUMN_PRIVILEGES: + case SCH_TABLE_CONSTRAINTS: + case SCH_KEY_COLUMN_USAGE: ++ case SCH_INNODB_I_S_BUFFER_POOL_CONTENT: + default: + break; + } +diff -r fe944d2c6e1f sql/sql_show.cc +--- a/sql/sql_show.cc Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/sql_show.cc Mon Nov 10 19:48:25 2008 -0800 +@@ -27,6 +27,10 @@ + + #ifdef HAVE_BERKELEY_DB + #include "ha_berkeley.h" // For berkeley_show_logs ++#endif ++ ++#ifdef HAVE_INNOBASE_DB ++#include "ha_innodb.h" + #endif + + #ifndef NO_EMBEDDED_ACCESS_CHECKS +@@ -4042,6 +4046,13 @@ + DBUG_RETURN(res); + } + ++int fill_innodb_bp_content(THD *thd, TABLE_LIST *tables, COND *cond) ++{ ++ DBUG_ENTER("fill_innodb_bp_content"); ++ int res= 0; ++ innodb_I_S_buffer_pool_content(thd, tables); ++ DBUG_RETURN(res); ++} + + /* + Find schema_tables elment by name +@@ -4951,6 +4962,24 @@ + }; + + ++ST_FIELD_INFO innodb_bp_content_fields_info[]= ++{ ++ {"BLOCK_NUM", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Block_num"}, ++ {"SPACE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Space"}, ++ {"OFFSET", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Offset"}, ++ {"RECORDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Records"}, ++ {"DATASIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Datasize"}, ++ {"FLUSH_TYPE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Flush_type"}, ++ {"FIX_COUNT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Fix_count"}, ++ {"LRU_POSITION", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "LRU_position"}, ++ {"PAGE_TYPE_ID", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Page_type_id"}, ++ {"PAGE_TYPE", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Page_type"}, ++ {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name"}, ++ {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schem"}, ++ {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ + /* + Description of ST_FIELD_INFO in table.h + */ +@@ -4969,6 +4998,8 @@ + get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0}, + {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, + fill_schema_column_privileges, 0, 0, -1, -1, 0}, ++ {"INNODB_BUFFER_POOL_CONTENT", innodb_bp_content_fields_info, create_schema_table, ++ fill_innodb_bp_content, 0, 0, -1, -1, 0}, + {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table, + fill_schema_index_stats, make_old_format, 0, -1, -1, 0}, + {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, +diff -r fe944d2c6e1f sql/table.h +--- a/sql/table.h Mon Nov 10 19:47:27 2008 -0800 ++++ b/sql/table.h Mon Nov 10 19:48:25 2008 -0800 +@@ -375,6 +375,7 @@ + SCH_COLLATION_CHARACTER_SET_APPLICABILITY, + SCH_COLUMNS, + SCH_COLUMN_PRIVILEGES, ++ SCH_INNODB_I_S_BUFFER_POOL_CONTENT, + SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, diff --git a/mysql-innodb_show_hashed_memory.patch b/mysql-innodb_show_hashed_memory.patch new file mode 100644 index 0000000..191193e --- /dev/null +++ b/mysql-innodb_show_hashed_memory.patch @@ -0,0 +1,275 @@ +diff -ruN mysql-5.0.67_highperf/innobase/buf/buf0buf.c mysql-5.0.67_highperf_tmp/innobase/buf/buf0buf.c +--- mysql-5.0.67_highperf/innobase/buf/buf0buf.c 2008-11-12 09:25:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/buf/buf0buf.c 2008-11-12 09:27:52.000000000 +0900 +@@ -2454,13 +2454,15 @@ + (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped)); + } + fprintf(file, +- "Buffer pool size %lu\n" +- "Free buffers %lu\n" +- "Database pages %lu\n" +- "Modified db pages %lu\n" ++ "Buffer pool size %lu\n" ++ "Buffer pool size, bytes %lu\n" ++ "Free buffers %lu\n" ++ "Database pages %lu\n" ++ "Modified db pages %lu\n" + "Pending reads %lu\n" + "Pending writes: LRU %lu, flush list %lu, single page %lu\n", + (ulong) size, ++ (ulong) size * UNIV_PAGE_SIZE, + (ulong) UT_LIST_GET_LEN(buf_pool->free), + (ulong) UT_LIST_GET_LEN(buf_pool->LRU), + (ulong) UT_LIST_GET_LEN(buf_pool->flush_list), +diff -ruN mysql-5.0.67_highperf/innobase/fil/fil0fil.c mysql-5.0.67_highperf_tmp/innobase/fil/fil0fil.c +--- mysql-5.0.67_highperf/innobase/fil/fil0fil.c 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/fil/fil0fil.c 2008-11-12 09:27:52.000000000 +0900 +@@ -4472,3 +4472,30 @@ + + return(mach_read_from_2(page + FIL_PAGE_TYPE)); + } ++ ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++fil_system_hash_cells(void) ++/*=======================*/ ++{ ++ if (fil_system) { ++ return (fil_system->spaces->n_cells ++ + fil_system->name_hash->n_cells); ++ } else { ++ return 0; ++ } ++} ++ ++ulint ++fil_system_hash_nodes(void) ++/*=======================*/ ++{ ++ if (fil_system) { ++ return (UT_LIST_GET_LEN(fil_system->space_list) ++ * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE)); ++ } else { ++ return 0; ++ } ++} +diff -ruN mysql-5.0.67_highperf/innobase/include/fil0fil.h mysql-5.0.67_highperf_tmp/innobase/include/fil0fil.h +--- mysql-5.0.67_highperf/innobase/include/fil0fil.h 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/include/fil0fil.h 2008-11-12 09:27:52.000000000 +0900 +@@ -701,6 +701,16 @@ + written to page, the return value not defined */ + byte* page); /* in: file page */ + ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++fil_system_hash_cells(void); ++/*========================*/ ++ ++ulint ++fil_system_hash_nodes(void); ++/*========================*/ + + typedef struct fil_space_struct fil_space_t; + +diff -ruN mysql-5.0.67_highperf/innobase/include/thr0loc.h mysql-5.0.67_highperf_tmp/innobase/include/thr0loc.h +--- mysql-5.0.67_highperf/innobase/include/thr0loc.h 2008-11-12 09:24:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/include/thr0loc.h 2008-11-12 09:27:52.000000000 +0900 +@@ -77,6 +77,17 @@ + /*=============================*/ + /* out: pointer to the in_ibuf field */ + ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++thr_local_hash_cells(void); ++/*=======================*/ ++ ++ulint ++thr_local_hash_nodes(void); ++/*=======================*/ ++ + #ifndef UNIV_NONINL + #include "thr0loc.ic" + #endif +diff -ruN mysql-5.0.67_highperf/innobase/srv/srv0srv.c mysql-5.0.67_highperf_tmp/innobase/srv/srv0srv.c +--- mysql-5.0.67_highperf/innobase/srv/srv0srv.c 2008-11-12 09:26:07.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/srv/srv0srv.c 2008-11-12 09:54:19.000000000 +0900 +@@ -1645,6 +1645,14 @@ + time_t current_time; + ulint n_reserved; + ++ ulint btr_search_sys_subtotal; ++ ulint lock_sys_subtotal; ++ ulint recv_sys_subtotal; ++ ulint io_counter_subtotal; ++ ++ ulint i; ++ trx_t* trx; ++ + mutex_enter(&srv_innodb_monitor_mutex); + + current_time = time(NULL); +@@ -1747,6 +1755,91 @@ + ut_total_allocated_memory, + mem_pool_get_reserved(mem_comm_pool)); + ++ /* Calcurate reserved memories */ ++ if (btr_search_sys && btr_search_sys->hash_index->heap) { ++ btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap); ++ } else { ++ btr_search_sys_subtotal = 0; ++ for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) { ++ btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]); ++ } ++ } ++ ++ lock_sys_subtotal = 0; ++ if (trx_sys) { ++ mutex_enter(&kernel_mutex); ++ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); ++ while (trx) { ++ lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0); ++ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); ++ } ++ mutex_exit(&kernel_mutex); ++ } ++ ++ recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash) ++ ? mem_heap_get_size(recv_sys->heap) : 0); ++ ++ io_counter_subtotal = ((buf_pool->io_counter_heap) ++ ? mem_heap_get_size(buf_pool->io_counter_heap) : 0); ++ ++ fprintf(file, ++ "Internal hash tables (constant factor + variable factor)\n" ++ " Adaptive hash index %lu \t(%lu + %lu)\n" ++ " Page hash %lu\n" ++ " Dictionary cache %lu \t(%lu + %lu)\n" ++ " File system %lu \t(%lu + %lu)\n" ++ " Lock system %lu \t(%lu + %lu)\n" ++ " Recovery system %lu \t(%lu + %lu)\n" ++ " Threads %lu \t(%lu + %lu)\n" ++ " innodb_io_pattern %lu \t(%lu + %lu)\n", ++ ++ (ulong) (btr_search_sys ++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0) ++ + btr_search_sys_subtotal, ++ (ulong) (btr_search_sys ++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) btr_search_sys_subtotal, ++ ++ (ulong) (buf_pool->page_hash->n_cells * sizeof(hash_cell_t)), ++ ++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t) ++ + dict_sys->size) : 0), ++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells ++ + dict_sys->table_id_hash->n_cells ++ + dict_sys->col_hash->n_cells) * sizeof(hash_cell_t)) : 0), ++ (ulong) (dict_sys ? (dict_sys->size) : 0), ++ ++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t) ++ + fil_system_hash_nodes()), ++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)), ++ (ulong) fil_system_hash_nodes(), ++ ++ (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + lock_sys_subtotal), ++ (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) lock_sys_subtotal, ++ ++ (ulong) (((recv_sys && recv_sys->addr_hash) ++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + recv_sys_subtotal), ++ (ulong) ((recv_sys && recv_sys->addr_hash) ++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) recv_sys_subtotal, ++ ++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t) ++ + thr_local_hash_nodes()), ++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)), ++ (ulong) thr_local_hash_nodes(), ++ ++ (ulong) (((buf_pool->io_counter_hash) /* needs &(buf_pool->mutex) ? */ ++ ? (buf_pool->io_counter_hash->n_cells * sizeof(hash_cell_t)) : 0) ++ + io_counter_subtotal), ++ (ulong) ((buf_pool->io_counter_hash) /* needs &(buf_pool->mutex) ? */ ++ ? (buf_pool->io_counter_hash->n_cells * sizeof(hash_cell_t)) : 0), ++ (ulong) io_counter_subtotal); ++ + if (srv_use_awe) { + fprintf(file, + "In addition to that %lu MB of AWE memory allocated\n", +diff -ruN mysql-5.0.67_highperf/innobase/thr/thr0loc.c mysql-5.0.67_highperf_tmp/innobase/thr/thr0loc.c +--- mysql-5.0.67_highperf/innobase/thr/thr0loc.c 2008-11-12 09:24:58.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/innobase/thr/thr0loc.c 2008-11-12 09:27:52.000000000 +0900 +@@ -32,6 +32,7 @@ + + /* The hash table. The module is not yet initialized when it is NULL. */ + hash_table_t* thr_local_hash = NULL; ++ulint thr_local_hash_n_nodes = 0; + + /* The private data for each thread should be put to + the structure below and the accessor functions written +@@ -223,6 +224,7 @@ + HASH_INSERT(thr_local_t, hash, thr_local_hash, + os_thread_pf(os_thread_get_curr_id()), + local); ++ thr_local_hash_n_nodes++; + + mutex_exit(&thr_local_mutex); + } +@@ -251,6 +253,7 @@ + + HASH_DELETE(thr_local_t, hash, thr_local_hash, + os_thread_pf(id), local); ++ thr_local_hash_n_nodes--; + + mutex_exit(&thr_local_mutex); + +@@ -274,3 +277,29 @@ + mutex_create(&thr_local_mutex); + mutex_set_level(&thr_local_mutex, SYNC_THR_LOCAL); + } ++ ++/************************************************************************* ++Return local hash table informations. */ ++ ++ulint ++thr_local_hash_cells(void) ++/*======================*/ ++{ ++ if (thr_local_hash) { ++ return (thr_local_hash->n_cells); ++ } else { ++ return 0; ++ } ++} ++ ++ulint ++thr_local_hash_nodes(void) ++/*======================*/ ++{ ++ if (thr_local_hash) { ++ return (thr_local_hash_n_nodes ++ * (sizeof(thr_local_t) + MEM_BLOCK_HEADER_SIZE)); ++ } else { ++ return 0; ++ } ++} +diff -ruN mysql-5.0.67_highperf/patch_info/innodb_show_hashed_memory.info mysql-5.0.67_highperf_tmp/patch_info/innodb_show_hashed_memory.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/patch_info/innodb_show_hashed_memory.info 2008-11-12 09:27:52.000000000 +0900 +@@ -0,0 +1,6 @@ ++File=innodb_show_hashed_memory.patch ++Name=Adds additional information of InnoDB internal hash table memories in SHOW INNODB STATUS ++Version=1.0 ++Author=Percona ++License=GPL ++Comment= diff --git a/mysql-microsec_process.patch b/mysql-microsec_process.patch new file mode 100644 index 0000000..440f509 --- /dev/null +++ b/mysql-microsec_process.patch @@ -0,0 +1,281 @@ +diff -r 327ce7a34c91 mysql-test/r/information_schema.result +--- a/mysql-test/r/information_schema.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/information_schema.result Fri Nov 07 15:52:53 2008 -0800 +@@ -44,6 +44,7 @@ + COLUMN_PRIVILEGES + INDEX_STATISTICS + KEY_COLUMN_USAGE ++PROCESSLIST + PROFILING + ROUTINES + SCHEMATA +@@ -740,7 +741,7 @@ + CREATE VIEW a1 (t_CRASHME) AS SELECT f1 FROM t_crashme GROUP BY f1; + CREATE VIEW a2 AS SELECT t_CRASHME FROM a1; + count(*) +-106 ++107 + drop view a2, a1; + drop table t_crashme; + select table_schema,table_name, column_name from +@@ -749,6 +750,7 @@ + table_schema table_name column_name + information_schema COLUMNS COLUMN_DEFAULT + information_schema COLUMNS COLUMN_TYPE ++information_schema PROCESSLIST INFO + information_schema ROUTINES ROUTINE_DEFINITION + information_schema ROUTINES SQL_MODE + information_schema TRIGGERS ACTION_CONDITION +@@ -813,7 +815,7 @@ + flush privileges; + SELECT table_schema, count(*) FROM information_schema.TABLES GROUP BY TABLE_SCHEMA; + table_schema count(*) +-information_schema 21 ++information_schema 22 + mysql 17 + create table t1 (i int, j int); + create trigger trg1 before insert on t1 for each row +@@ -1206,6 +1208,7 @@ + COLUMN_PRIVILEGES TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA ++PROCESSLIST ID + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA + SCHEMATA SCHEMA_NAME +@@ -1242,6 +1245,7 @@ + COLUMN_PRIVILEGES TABLE_SCHEMA + INDEX_STATISTICS TABLE_SCHEMA + KEY_COLUMN_USAGE CONSTRAINT_SCHEMA ++PROCESSLIST ID + PROFILING QUERY_ID + ROUTINES ROUTINE_SCHEMA + SCHEMATA SCHEMA_NAME +@@ -1329,6 +1333,7 @@ + COLUMN_PRIVILEGES information_schema.COLUMN_PRIVILEGES 1 + INDEX_STATISTICS information_schema.INDEX_STATISTICS 1 + KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 ++PROCESSLIST information_schema.PROCESSLIST 1 + PROFILING information_schema.PROFILING 1 + ROUTINES information_schema.ROUTINES 1 + SCHEMATA information_schema.SCHEMATA 1 +diff -r 327ce7a34c91 mysql-test/r/information_schema_db.result +--- a/mysql-test/r/information_schema_db.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/information_schema_db.result Fri Nov 07 15:52:53 2008 -0800 +@@ -13,6 +13,7 @@ + COLUMN_PRIVILEGES + INDEX_STATISTICS + KEY_COLUMN_USAGE ++PROCESSLIST + PROFILING + ROUTINES + SCHEMATA +diff -r 327ce7a34c91 mysql-test/r/mysqlshow.result +--- a/mysql-test/r/mysqlshow.result Fri Nov 07 15:44:23 2008 -0800 ++++ b/mysql-test/r/mysqlshow.result Fri Nov 07 15:52:53 2008 -0800 +@@ -87,6 +87,7 @@ + | COLUMN_PRIVILEGES | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | ++| PROCESSLIST | + | PROFILING | + | ROUTINES | + | SCHEMATA | +@@ -113,6 +114,7 @@ + | COLUMN_PRIVILEGES | + | INDEX_STATISTICS | + | KEY_COLUMN_USAGE | ++| PROCESSLIST | + | PROFILING | + | ROUTINES | + | SCHEMATA | +diff -r 327ce7a34c91 sql/mysql_priv.h +--- a/sql/mysql_priv.h Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/mysql_priv.h Fri Nov 07 15:52:53 2008 -0800 +@@ -244,6 +244,8 @@ + + /* Characters shown for the command in 'show processlist' */ + #define PROCESS_LIST_WIDTH 100 ++/* Characters shown for the command in 'information_schema.processlist' */ ++#define PROCESS_LIST_INFO_WIDTH 65535 + + #define PRECISION_FOR_DOUBLE 53 + #define PRECISION_FOR_FLOAT 24 +diff -r 327ce7a34c91 sql/sql_show.cc +--- a/sql/sql_show.cc Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/sql_show.cc Fri Nov 07 15:52:53 2008 -0800 +@@ -1466,6 +1466,120 @@ + } + send_eof(thd); + DBUG_VOID_RETURN; ++} ++ ++int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond) ++{ ++ TABLE *table= tables->table; ++ CHARSET_INFO *cs= system_charset_info; ++ char *user; ++ ulonglong current_timer= my_timer(¤t_timer, frequency); ++ DBUG_ENTER("fill_process_list"); ++ ++ user= thd->security_ctx->master_access & PROCESS_ACL ? ++ NullS : thd->security_ctx->priv_user; ++ ++ VOID(pthread_mutex_lock(&LOCK_thread_count)); ++ ++ if (!thd->killed) ++ { ++ I_List_iterator it(threads); ++ THD* tmp; ++ ++ while ((tmp= it++)) ++ { ++ Security_context *tmp_sctx= tmp->security_ctx; ++ struct st_my_thread_var *mysys_var; ++ const char *val; ++ ++ if ((!tmp->vio_ok() && !tmp->system_thread) || ++ (user && (!tmp_sctx->user || strcmp(tmp_sctx->user, user)))) ++ continue; ++ ++ restore_record(table, s->default_values); ++ /* ID */ ++ table->field[0]->store((longlong) tmp->thread_id, TRUE); ++ /* USER */ ++ val= tmp_sctx->user ? tmp_sctx->user : ++ (tmp->system_thread ? "system user" : "unauthenticated user"); ++ table->field[1]->store(val, strlen(val), cs); ++ /* HOST */ ++ if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && ++ thd->security_ctx->host_or_ip[0]) ++ { ++ char host[LIST_PROCESS_HOST_LEN + 1]; ++ my_snprintf(host, LIST_PROCESS_HOST_LEN, "%s:%u", ++ tmp_sctx->host_or_ip, tmp->peer_port); ++ table->field[2]->store(host, strlen(host), cs); ++ } ++ else ++ table->field[2]->store(tmp_sctx->host_or_ip, ++ strlen(tmp_sctx->host_or_ip), cs); ++ /* DB */ ++ if (tmp->db) ++ { ++ table->field[3]->store(tmp->db, strlen(tmp->db), cs); ++ table->field[3]->set_notnull(); ++ } ++ ++ if ((mysys_var= tmp->mysys_var)) ++ pthread_mutex_lock(&mysys_var->mutex); ++ /* COMMAND */ ++ if ((val= (char *) (tmp->killed == THD::KILL_CONNECTION? "Killed" : 0))) ++ table->field[4]->store(val, strlen(val), cs); ++ else ++ table->field[4]->store(command_name[tmp->command], ++ strlen(command_name[tmp->command]), cs); ++ /* MYSQL_TIME */ ++ const ulonglong utime= tmp->start_timer ? current_timer - tmp->start_timer : 0; ++ table->field[5]->store(utime / 1000000, TRUE); ++ /* STATE */ ++#ifndef EMBEDDED_LIBRARY ++ val= (char*) (tmp->locked ? "Locked" : ++ tmp->net.reading_or_writing ? ++ (tmp->net.reading_or_writing == 2 ? ++ "Writing to net" : ++ tmp->command == COM_SLEEP ? "" : ++ "Reading from net") : ++ tmp->proc_info ? tmp->proc_info : ++ tmp->mysys_var && ++ tmp->mysys_var->current_cond ? ++ "Waiting on cond" : NullS); ++#else ++ val= (char *) "Writing to net"; ++#endif ++ if (val) ++ { ++ table->field[6]->store(val, strlen(val), cs); ++ table->field[6]->set_notnull(); ++ } ++ ++ if (mysys_var) ++ pthread_mutex_unlock(&mysys_var->mutex); ++ ++ /* INFO */ ++ if (tmp->query) ++ { ++ table->field[7]->store(tmp->query, ++ min(PROCESS_LIST_INFO_WIDTH, ++ tmp->query_length), cs); ++ table->field[7]->set_notnull(); ++ } ++ ++ /* TIME_MS */ ++ table->field[8]->store((double)(utime / 1000.0)); ++ ++ if (schema_table_store_record(thd, table)) ++ { ++ VOID(pthread_mutex_unlock(&LOCK_thread_count)); ++ DBUG_RETURN(1); ++ } ++ ++ } ++ } ++ ++ VOID(pthread_mutex_unlock(&LOCK_thread_count)); ++ DBUG_RETURN(0); + } + + /***************************************************************************** +@@ -4821,6 +4941,22 @@ + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} + }; + ++ST_FIELD_INFO processlist_fields_info[]= ++{ ++ {"ID", 4, MYSQL_TYPE_LONG, 0, 0, "Id"}, ++ {"USER", 16, MYSQL_TYPE_STRING, 0, 0, "User"}, ++ {"HOST", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Host"}, ++ {"DB", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, "Db"}, ++ {"COMMAND", 16, MYSQL_TYPE_STRING, 0, 0, "Command"}, ++ {"TIME", 7, MYSQL_TYPE_LONG, 0, 0, "Time"}, ++ {"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State"}, ++ {"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info"}, ++ {"TIME_MS", 100 * (MY_INT64_NUM_DECIMAL_DIGITS + 1) + 3, MYSQL_TYPE_DECIMAL, ++ 0, 0, "Time_ms"}, ++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} ++}; ++ ++ + /* + Description of ST_FIELD_INFO in table.h + */ +@@ -4845,6 +4981,8 @@ + get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0}, + {"OPEN_TABLES", open_tables_fields_info, create_schema_table, + fill_open_tables, make_old_format, 0, -1, -1, 1}, ++ {"PROCESSLIST", processlist_fields_info, create_schema_table, ++ fill_schema_processlist, make_old_format, 0, -1, -1, 0}, + {"PROFILING", query_profile_statistics_info, create_schema_table, + fill_query_profile_statistics_info, make_profile_table_for_show, + NULL, -1, -1, false}, +diff -r 327ce7a34c91 sql/table.h +--- a/sql/table.h Fri Nov 07 15:44:23 2008 -0800 ++++ b/sql/table.h Fri Nov 07 15:52:53 2008 -0800 +@@ -378,6 +378,7 @@ + SCH_INDEX_STATS, + SCH_KEY_COLUMN_USAGE, + SCH_OPEN_TABLES, ++ SCH_PROCESSLIST, + SCH_PROFILES, + SCH_PROCEDURES, + SCH_SCHEMATA, +diff -ruN mysql-5.0.67_highperf/patch_info/microsec_process.info mysql-5.0.67_highperf_tmp/patch_info/microsec_process.info +--- /dev/null 1970-01-01 09:00:00.000000000 +0900 ++++ mysql-5.0.67_highperf_tmp/patch_info/microsec_process.info 2008-11-12 09:27:52.000000000 +0900 +@@ -0,0 +1,6 @@ ++File=microsec_process.patch ++Name=Adds INFOMATION_SCHEMA.PROCESSLIST with TIME_MS column ++Version=1.0 ++Author=Percona ++License=GPL ++Comment= ++ChangeLog= -- 2.44.0