diff -r 45683461331d innobase/buf/buf0rea.c --- a/innobase/buf/buf0rea.c Mon Dec 22 00:31:16 2008 -0800 +++ b/innobase/buf/buf0rea.c Mon Dec 22 00:32:02 2008 -0800 @@ -188,6 +188,10 @@ ulint low, high; ulint err; ulint i; + + if (!(srv_read_ahead & 1)) { + return(0); + } if (srv_startup_is_before_trx_rollback_phase) { /* No read-ahead to avoid thread deadlocks */ @@ -396,6 +400,10 @@ ulint err; ulint i; + if (!(srv_read_ahead & 2)) { + return(0); + } + if (srv_startup_is_before_trx_rollback_phase) { /* No read-ahead to avoid thread deadlocks */ return(0); diff -r 45683461331d innobase/include/os0file.h --- a/innobase/include/os0file.h Mon Dec 22 00:31:16 2008 -0800 +++ b/innobase/include/os0file.h Mon Dec 22 00:32:02 2008 -0800 @@ -551,8 +551,10 @@ /*========*/ ulint n, /* in: maximum number of pending aio operations allowed; n must be divisible by n_segments */ - ulint n_segments, /* in: combined number of segments in the four - first aio arrays; must be >= 4 */ +// ulint n_segments, /* in: combined number of segments in the four +// first aio arrays; must be >= 4 */ + ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */ + ulint n_write_threads, /**/ ulint n_slots_sync); /* in: number of slots in the sync aio array */ /*********************************************************************** Requests an asynchronous i/o operation. */ diff -r 45683461331d innobase/include/srv0srv.h --- a/innobase/include/srv0srv.h Mon Dec 22 00:31:16 2008 -0800 +++ b/innobase/include/srv0srv.h Mon Dec 22 00:32:02 2008 -0800 @@ -89,6 +89,8 @@ extern ulint srv_lock_table_size; extern ulint srv_n_file_io_threads; +extern ulint srv_n_read_io_threads; +extern ulint srv_n_write_io_threads; #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; @@ -133,6 +135,10 @@ extern ulong srv_max_purge_lag; extern ibool srv_use_awe; extern ibool srv_use_adaptive_hash_indexes; + +extern ulint srv_io_capacity; +extern ulint srv_read_ahead; +extern ulint srv_adaptive_checkpoint; /*-------------------------------------------*/ extern ulint srv_n_rows_inserted; diff -r 45683461331d innobase/log/log0log.c --- a/innobase/log/log0log.c Mon Dec 22 00:31:16 2008 -0800 +++ b/innobase/log/log0log.c Mon Dec 22 00:32:02 2008 -0800 @@ -3326,6 +3326,15 @@ (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn), (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn)); + fprintf(file, + "Max checkpoint age %lu\n" + "Modified age %lu\n" + "Checkpoint age %lu\n", + (ulong) log_sys->max_checkpoint_age, + (ulong) ut_dulint_minus(log_sys->lsn, + log_buf_pool_get_oldest_modification()), + (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn)); + current_time = time(NULL); time_elapsed = 0.001 + difftime(current_time, diff -r 45683461331d innobase/os/os0file.c --- a/innobase/os/os0file.c Mon Dec 22 00:31:16 2008 -0800 +++ b/innobase/os/os0file.c Mon Dec 22 00:32:02 2008 -0800 @@ -2877,8 +2877,10 @@ /*========*/ ulint n, /* in: maximum number of pending aio operations allowed; n must be divisible by n_segments */ - ulint n_segments, /* in: combined number of segments in the four - first aio arrays; must be >= 4 */ +// ulint n_segments, /* in: combined number of segments in the four +// first aio arrays; must be >= 4 */ + ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/ + ulint n_write_threads, /**/ ulint n_slots_sync) /* in: number of slots in the sync aio array */ { ulint n_read_segs; @@ -2888,6 +2890,8 @@ #ifdef POSIX_ASYNC_IO sigset_t sigset; #endif + ulint n_segments = 2 + n_read_threads + n_write_threads; + ut_ad(n % n_segments == 0); ut_ad(n_segments >= 4); @@ -2898,8 +2902,8 @@ } n_per_seg = n / n_segments; - n_write_segs = (n_segments - 2) / 2; - n_read_segs = n_segments - 2 - n_write_segs; + n_write_segs = n_write_threads; + n_read_segs = n_read_threads; /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */ @@ -3180,6 +3184,13 @@ struct aiocb* control; #endif ulint i; + ulint prim_segment; + ulint n; + + n = array->n_slots / array->n_segments; + /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */ + prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments); + loop: os_mutex_enter(array->mutex); @@ -3198,12 +3209,23 @@ goto loop; } + for (i = prim_segment * n; i < array->n_slots; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved == FALSE) { + break; + } + } + + if (slot->reserved == TRUE){ + /* Not found after the intended segment. So we should search before. */ for (i = 0;; i++) { slot = os_aio_array_get_nth_slot(array, i); if (slot->reserved == FALSE) { break; } + } } array->n_reserved++; diff -r 45683461331d innobase/srv/srv0srv.c --- a/innobase/srv/srv0srv.c Mon Dec 22 00:31:16 2008 -0800 +++ b/innobase/srv/srv0srv.c Mon Dec 22 00:32:02 2008 -0800 @@ -167,6 +167,8 @@ ulint srv_lock_table_size = ULINT_MAX; ulint srv_n_file_io_threads = ULINT_MAX; +ulint srv_n_read_io_threads = 1; +ulint srv_n_write_io_threads = 1; #ifdef UNIV_LOG_ARCHIVE ibool srv_log_archive_on = FALSE; @@ -324,6 +326,15 @@ ibool srv_use_awe = FALSE; ibool srv_use_adaptive_hash_indexes = TRUE; +ulint srv_io_capacity = 100; + +/* Returns the number of IO operations that is X percent of the capacity. +PCT_IO(5) -> returns the number of IO operations that is 5% of the max +where max is srv_io_capacity. */ +#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0))) + +ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ +ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */ /*-------------------------------------------*/ ulong srv_n_spin_wait_rounds = 20; ulong srv_n_free_tickets_to_enter = 500; @@ -2214,6 +2225,8 @@ ibool skip_sleep = FALSE; ulint i; + dulint oldest_lsn; + #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Master thread starts, id %lu\n", os_thread_pf(os_thread_get_curr_id())); @@ -2302,9 +2315,9 @@ + log_sys->n_pending_writes; n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { + if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) { srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages(TRUE, 5); + ibuf_contract_for_n_pages(TRUE, PCT_IO(5)); srv_main_thread_op_info = "flushing log"; @@ -2317,7 +2330,7 @@ /* Try to keep the number of modified pages in the buffer pool under the limit wished by the user */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); /* If we had to do the flush, it may have taken @@ -2326,6 +2339,44 @@ iteration of this loop. */ skip_sleep = TRUE; + } else if (srv_adaptive_checkpoint) { + + /* Try to keep modified age not to exceed + max_checkpoint_age * 7/8 line */ + + mutex_enter(&(log_sys->mutex)); + + oldest_lsn = buf_pool_get_oldest_modification(); + if (ut_dulint_is_zero(oldest_lsn)) { + + mutex_exit(&(log_sys->mutex)); + + } else { + if (ut_dulint_minus(log_sys->lsn, oldest_lsn) + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) { + + /* 2nd defence line (max_checkpoint_age * 3/4) */ + + mutex_exit(&(log_sys->mutex)); + + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + ut_dulint_max); + skip_sleep = TRUE; + } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn) + > (log_sys->max_checkpoint_age)/2 ) { + + /* 1st defence line (max_checkpoint_age * 1/2) */ + + mutex_exit(&(log_sys->mutex)); + + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), + ut_dulint_max); + skip_sleep = TRUE; + } else { + mutex_exit(&(log_sys->mutex)); + } + } + } if (srv_activity_count == old_activity_count) { @@ -2352,10 +2403,10 @@ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { + if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) { srv_main_thread_op_info = "flushing buffer pool pages"; - buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); srv_main_thread_op_info = "flushing log"; log_buffer_flush_to_disk(); @@ -2365,7 +2416,7 @@ even if the server were active */ srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages(TRUE, 5); + ibuf_contract_for_n_pages(TRUE, PCT_IO(5)); srv_main_thread_op_info = "flushing log"; log_buffer_flush_to_disk(); @@ -2407,14 +2458,14 @@ (> 70 %), we assume we can afford reserving the disk(s) for the time it requires to flush 100 pages */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); } else { /* Otherwise, we only flush a small number of pages so that we do not unnecessarily use much disk i/o capacity from other work */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10), ut_dulint_max); } @@ -2503,7 +2554,7 @@ if (srv_fast_shutdown && srv_shutdown_state > 0) { n_bytes_merged = 0; } else { - n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20); + n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100)); } srv_main_thread_op_info = "reserving kernel mutex"; @@ -2520,7 +2571,7 @@ if (srv_fast_shutdown < 2) { n_pages_flushed = - buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); } else { /* In the fastest shutdown we do not flush the buffer pool to data files: we set n_pages_flushed to 0 artificially. */ diff -r 45683461331d innobase/srv/srv0start.c --- a/innobase/srv/srv0start.c Mon Dec 22 00:31:16 2008 -0800 +++ b/innobase/srv/srv0start.c Mon Dec 22 00:32:02 2008 -0800 @@ -1205,24 +1205,28 @@ return(DB_ERROR); } + /* over write innodb_file_io_threads */ + srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads; + /* Restrict the maximum number of file i/o threads */ if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; + srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2; } if (!os_aio_use_native_aio) { /* In simulated aio we currently have use only for 4 threads */ - srv_n_file_io_threads = 4; + /*srv_n_file_io_threads = 4;*/ os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD * srv_n_file_io_threads, - srv_n_file_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS); + srv_n_read_io_threads, srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS * 8); } else { os_aio_init(SRV_N_PENDING_IOS_PER_THREAD * srv_n_file_io_threads, - srv_n_file_io_threads, + srv_n_read_io_threads, srv_n_write_io_threads, SRV_MAX_N_PENDING_SYNC_IOS); } diff -r 45683461331d patch_info/innodb_io_patches.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patch_info/innodb_io_patches.info Mon Dec 22 00:32:02 2008 -0800 @@ -0,0 +1,9 @@ +File=innodb_io_patches.patch +Name=Cluster of past InnoDB IO patches +Version=1.0 +Author=Percona +License=GPL +Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush) +ChangeLog= +2008-11-06 +YK: Initial release diff -r 45683461331d sql/ha_innodb.cc --- a/sql/ha_innodb.cc Mon Dec 22 00:31:16 2008 -0800 +++ b/sql/ha_innodb.cc Mon Dec 22 00:32:02 2008 -0800 @@ -149,6 +149,7 @@ innobase_lock_wait_timeout, innobase_force_recovery, innobase_open_files; +long innobase_read_io_threads, innobase_write_io_threads; longlong innobase_buffer_pool_size, innobase_log_file_size; /* The default values for the following char* start-up parameters @@ -1403,6 +1404,8 @@ srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; srv_n_file_io_threads = (ulint) innobase_file_io_threads; + srv_n_read_io_threads = (ulint) innobase_read_io_threads; + srv_n_write_io_threads = (ulint) innobase_write_io_threads; srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; srv_force_recovery = (ulint) innobase_force_recovery; diff -r 45683461331d sql/ha_innodb.h --- a/sql/ha_innodb.h Mon Dec 22 00:31:16 2008 -0800 +++ b/sql/ha_innodb.h Mon Dec 22 00:32:02 2008 -0800 @@ -204,6 +204,7 @@ extern long innobase_additional_mem_pool_size; extern long innobase_buffer_pool_awe_mem_mb; extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_read_io_threads, innobase_write_io_threads; extern long innobase_force_recovery; extern long innobase_open_files; extern char *innobase_data_home_dir, *innobase_data_file_path; @@ -234,6 +235,9 @@ extern ulong srv_thread_concurrency; extern ulong srv_commit_concurrency; extern ulong srv_flush_log_at_trx_commit; +extern ulong srv_io_capacity; +extern ulong srv_read_ahead; +extern ulong srv_adaptive_checkpoint; } bool innobase_init(void); diff -r 45683461331d sql/mysqld.cc --- a/sql/mysqld.cc Mon Dec 22 00:31:16 2008 -0800 +++ b/sql/mysqld.cc Mon Dec 22 00:32:02 2008 -0800 @@ -5036,6 +5036,11 @@ OPT_INNODB_ROLLBACK_ON_TIMEOUT, OPT_SECURE_FILE_PRIV, OPT_KEEP_FILES_ON_CREATE, + OPT_INNODB_IO_CAPACITY, + OPT_INNODB_READ_AHEAD, + OPT_INNODB_ADAPTIVE_CHECKPOINT, + OPT_INNODB_READ_IO_THREADS, + OPT_INNODB_WRITE_IO_THREADS, OPT_INNODB_ADAPTIVE_HASH_INDEX, OPT_FEDERATED }; @@ -5344,6 +5349,26 @@ (gptr*) &global_system_variables.innodb_table_locks, (gptr*) &global_system_variables.innodb_table_locks, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, + {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY, + "Number of IO operations per second the server can do. Tunes background IO rate.", + (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity, + 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0}, + {"innodb_read_ahead", OPT_INNODB_READ_AHEAD, + "Enable/Diasable read aheads bit0:random bit1:linear", + (gptr*) &srv_read_ahead, (gptr*) &srv_read_ahead, + 0, GET_ULONG, REQUIRED_ARG, 3, 0, 3, 0, 0, 0}, + {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT, + "Enable/Diasable flushing along modified age 0:disable 1:enable", + (gptr*) &srv_adaptive_checkpoint, (gptr*) &srv_adaptive_checkpoint, + 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0}, + {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS, + "Number of background read I/O threads in InnoDB.", + (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads, + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, + {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS, + "Number of background write I/O threads in InnoDB.", + (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads, + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0}, #endif /* End HAVE_INNOBASE_DB */ {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, diff -r 45683461331d sql/set_var.cc --- a/sql/set_var.cc Mon Dec 22 00:31:16 2008 -0800 +++ b/sql/set_var.cc Mon Dec 22 00:32:02 2008 -0800 @@ -484,6 +484,12 @@ sys_var_long_ptr sys_innodb_flush_log_at_trx_commit( "innodb_flush_log_at_trx_commit", &srv_flush_log_at_trx_commit); +sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity", + &srv_io_capacity); +sys_var_long_ptr sys_innodb_read_ahead("innodb_read_ahead", + &srv_read_ahead); +sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint", + &srv_adaptive_checkpoint); sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path", &innobase_data_file_path); sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir", @@ -847,6 +853,9 @@ &sys_innodb_thread_concurrency, &sys_innodb_commit_concurrency, &sys_innodb_flush_log_at_trx_commit, + &sys_innodb_io_capacity, + &sys_innodb_read_ahead, + &sys_innodb_adaptive_checkpoint, #endif &sys_trust_routine_creators, &sys_trust_function_creators, @@ -982,6 +991,11 @@ {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, + {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS}, + {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS}, + {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS}, + {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG}, + {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG}, #endif {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS},