1 diff -r 45683461331d innobase/buf/buf0rea.c
2 --- a/innobase/buf/buf0rea.c Mon Dec 22 00:31:16 2008 -0800
3 +++ b/innobase/buf/buf0rea.c Mon Dec 22 00:32:02 2008 -0800
9 + if (!(srv_read_ahead & 1)) {
13 if (srv_startup_is_before_trx_rollback_phase) {
14 /* No read-ahead to avoid thread deadlocks */
19 + if (!(srv_read_ahead & 2)) {
23 if (srv_startup_is_before_trx_rollback_phase) {
24 /* No read-ahead to avoid thread deadlocks */
26 diff -r 45683461331d innobase/include/os0file.h
27 --- a/innobase/include/os0file.h Mon Dec 22 00:31:16 2008 -0800
28 +++ b/innobase/include/os0file.h Mon Dec 22 00:32:02 2008 -0800
31 ulint n, /* in: maximum number of pending aio operations
32 allowed; n must be divisible by n_segments */
33 - ulint n_segments, /* in: combined number of segments in the four
34 - first aio arrays; must be >= 4 */
35 +// ulint n_segments, /* in: combined number of segments in the four
36 +// first aio arrays; must be >= 4 */
37 + ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */
38 + ulint n_write_threads, /**/
39 ulint n_slots_sync); /* in: number of slots in the sync aio array */
40 /***********************************************************************
41 Requests an asynchronous i/o operation. */
42 diff -r 45683461331d innobase/include/srv0srv.h
43 --- a/innobase/include/srv0srv.h Mon Dec 22 00:31:16 2008 -0800
44 +++ b/innobase/include/srv0srv.h Mon Dec 22 00:32:02 2008 -0800
46 extern ulint srv_lock_table_size;
48 extern ulint srv_n_file_io_threads;
49 +extern ulint srv_n_read_io_threads;
50 +extern ulint srv_n_write_io_threads;
52 #ifdef UNIV_LOG_ARCHIVE
53 extern ibool srv_log_archive_on;
55 extern ulong srv_max_purge_lag;
56 extern ibool srv_use_awe;
57 extern ibool srv_use_adaptive_hash_indexes;
59 +extern ulint srv_io_capacity;
60 +extern ulint srv_read_ahead;
61 +extern ulint srv_adaptive_checkpoint;
62 /*-------------------------------------------*/
64 extern ulint srv_n_rows_inserted;
65 diff -r 45683461331d innobase/log/log0log.c
66 --- a/innobase/log/log0log.c Mon Dec 22 00:31:16 2008 -0800
67 +++ b/innobase/log/log0log.c Mon Dec 22 00:32:02 2008 -0800
68 @@ -3326,6 +3326,15 @@
69 (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
70 (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
73 + "Max checkpoint age %lu\n"
74 + "Modified age %lu\n"
75 + "Checkpoint age %lu\n",
76 + (ulong) log_sys->max_checkpoint_age,
77 + (ulong) ut_dulint_minus(log_sys->lsn,
78 + log_buf_pool_get_oldest_modification()),
79 + (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn));
81 current_time = time(NULL);
83 time_elapsed = 0.001 + difftime(current_time,
84 diff -r 45683461331d innobase/os/os0file.c
85 --- a/innobase/os/os0file.c Mon Dec 22 00:31:16 2008 -0800
86 +++ b/innobase/os/os0file.c Mon Dec 22 00:32:02 2008 -0800
87 @@ -2877,8 +2877,10 @@
89 ulint n, /* in: maximum number of pending aio operations
90 allowed; n must be divisible by n_segments */
91 - ulint n_segments, /* in: combined number of segments in the four
92 - first aio arrays; must be >= 4 */
93 +// ulint n_segments, /* in: combined number of segments in the four
94 +// first aio arrays; must be >= 4 */
95 + ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/
96 + ulint n_write_threads, /**/
97 ulint n_slots_sync) /* in: number of slots in the sync aio array */
100 @@ -2888,6 +2890,8 @@
101 #ifdef POSIX_ASYNC_IO
104 + ulint n_segments = 2 + n_read_threads + n_write_threads;
106 ut_ad(n % n_segments == 0);
107 ut_ad(n_segments >= 4);
109 @@ -2898,8 +2902,8 @@
112 n_per_seg = n / n_segments;
113 - n_write_segs = (n_segments - 2) / 2;
114 - n_read_segs = n_segments - 2 - n_write_segs;
115 + n_write_segs = n_write_threads;
116 + n_read_segs = n_read_threads;
118 /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
120 @@ -3180,6 +3184,13 @@
121 struct aiocb* control;
124 + ulint prim_segment;
127 + n = array->n_slots / array->n_segments;
128 + /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
129 + prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
132 os_mutex_enter(array->mutex);
134 @@ -3198,12 +3209,23 @@
138 + for (i = prim_segment * n; i < array->n_slots; i++) {
139 + slot = os_aio_array_get_nth_slot(array, i);
141 + if (slot->reserved == FALSE) {
146 + if (slot->reserved == TRUE){
147 + /* Not found after the intended segment. So we should search before. */
149 slot = os_aio_array_get_nth_slot(array, i);
151 if (slot->reserved == FALSE) {
158 diff -r 45683461331d innobase/srv/srv0srv.c
159 --- a/innobase/srv/srv0srv.c Mon Dec 22 00:31:16 2008 -0800
160 +++ b/innobase/srv/srv0srv.c Mon Dec 22 00:32:02 2008 -0800
162 ulint srv_lock_table_size = ULINT_MAX;
164 ulint srv_n_file_io_threads = ULINT_MAX;
165 +ulint srv_n_read_io_threads = 1;
166 +ulint srv_n_write_io_threads = 1;
168 #ifdef UNIV_LOG_ARCHIVE
169 ibool srv_log_archive_on = FALSE;
171 ibool srv_use_awe = FALSE;
172 ibool srv_use_adaptive_hash_indexes = TRUE;
174 +ulint srv_io_capacity = 100;
176 +/* Returns the number of IO operations that is X percent of the capacity.
177 +PCT_IO(5) -> returns the number of IO operations that is 5% of the max
178 +where max is srv_io_capacity. */
179 +#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
181 +ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
182 +ulint srv_adaptive_checkpoint = 0; /* 0:disable 1:enable */
183 /*-------------------------------------------*/
184 ulong srv_n_spin_wait_rounds = 20;
185 ulong srv_n_free_tickets_to_enter = 500;
186 @@ -2214,6 +2225,8 @@
187 ibool skip_sleep = FALSE;
192 #ifdef UNIV_DEBUG_THREAD_CREATION
193 fprintf(stderr, "Master thread starts, id %lu\n",
194 os_thread_pf(os_thread_get_curr_id()));
195 @@ -2302,9 +2315,9 @@
196 + log_sys->n_pending_writes;
197 n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
198 + buf_pool->n_pages_written;
199 - if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
200 + if (n_pend_ios < 3 && (n_ios - n_ios_old < PCT_IO(5))) {
201 srv_main_thread_op_info = "doing insert buffer merge";
202 - ibuf_contract_for_n_pages(TRUE, 5);
203 + ibuf_contract_for_n_pages(TRUE, PCT_IO(5));
205 srv_main_thread_op_info = "flushing log";
207 @@ -2317,7 +2330,7 @@
208 /* Try to keep the number of modified pages in the
209 buffer pool under the limit wished by the user */
211 - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
212 + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
215 /* If we had to do the flush, it may have taken
216 @@ -2326,6 +2339,44 @@
217 iteration of this loop. */
220 + } else if (srv_adaptive_checkpoint) {
222 + /* Try to keep modified age not to exceed
223 + max_checkpoint_age * 7/8 line */
225 + mutex_enter(&(log_sys->mutex));
227 + oldest_lsn = buf_pool_get_oldest_modification();
228 + if (ut_dulint_is_zero(oldest_lsn)) {
230 + mutex_exit(&(log_sys->mutex));
233 + if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
234 + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) {
236 + /* 2nd defence line (max_checkpoint_age * 3/4) */
238 + mutex_exit(&(log_sys->mutex));
240 + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
243 + } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
244 + > (log_sys->max_checkpoint_age)/2 ) {
246 + /* 1st defence line (max_checkpoint_age * 1/2) */
248 + mutex_exit(&(log_sys->mutex));
250 + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
254 + mutex_exit(&(log_sys->mutex));
260 if (srv_activity_count == old_activity_count) {
261 @@ -2352,10 +2403,10 @@
262 n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
263 n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
264 + buf_pool->n_pages_written;
265 - if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
266 + if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
268 srv_main_thread_op_info = "flushing buffer pool pages";
269 - buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
270 + buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
272 srv_main_thread_op_info = "flushing log";
273 log_buffer_flush_to_disk();
274 @@ -2365,7 +2416,7 @@
275 even if the server were active */
277 srv_main_thread_op_info = "doing insert buffer merge";
278 - ibuf_contract_for_n_pages(TRUE, 5);
279 + ibuf_contract_for_n_pages(TRUE, PCT_IO(5));
281 srv_main_thread_op_info = "flushing log";
282 log_buffer_flush_to_disk();
283 @@ -2407,14 +2458,14 @@
284 (> 70 %), we assume we can afford reserving the disk(s) for
285 the time it requires to flush 100 pages */
287 - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
288 + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
291 /* Otherwise, we only flush a small number of pages so that
292 we do not unnecessarily use much disk i/o capacity from
295 - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
296 + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
300 @@ -2503,7 +2554,7 @@
301 if (srv_fast_shutdown && srv_shutdown_state > 0) {
304 - n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
305 + n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
308 srv_main_thread_op_info = "reserving kernel mutex";
309 @@ -2520,7 +2571,7 @@
311 if (srv_fast_shutdown < 2) {
313 - buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
314 + buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
316 /* In the fastest shutdown we do not flush the buffer pool
317 to data files: we set n_pages_flushed to 0 artificially. */
318 diff -r 45683461331d innobase/srv/srv0start.c
319 --- a/innobase/srv/srv0start.c Mon Dec 22 00:31:16 2008 -0800
320 +++ b/innobase/srv/srv0start.c Mon Dec 22 00:32:02 2008 -0800
321 @@ -1205,24 +1205,28 @@
325 + /* over write innodb_file_io_threads */
326 + srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads;
328 /* Restrict the maximum number of file i/o threads */
329 if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
331 srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
332 + srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2;
335 if (!os_aio_use_native_aio) {
336 /* In simulated aio we currently have use only for 4 threads */
337 - srv_n_file_io_threads = 4;
338 + /*srv_n_file_io_threads = 4;*/
340 os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
341 * srv_n_file_io_threads,
342 - srv_n_file_io_threads,
343 - SRV_MAX_N_PENDING_SYNC_IOS);
344 + srv_n_read_io_threads, srv_n_write_io_threads,
345 + SRV_MAX_N_PENDING_SYNC_IOS * 8);
347 os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
348 * srv_n_file_io_threads,
349 - srv_n_file_io_threads,
350 + srv_n_read_io_threads, srv_n_write_io_threads,
351 SRV_MAX_N_PENDING_SYNC_IOS);
354 diff -r 45683461331d patch_info/innodb_io_patches.info
355 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
356 +++ b/patch_info/innodb_io_patches.info Mon Dec 22 00:32:02 2008 -0800
358 +File=innodb_io_patches.patch
359 +Name=Cluster of past InnoDB IO patches
363 +Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush)
367 diff -r 45683461331d sql/ha_innodb.cc
368 --- a/sql/ha_innodb.cc Mon Dec 22 00:31:16 2008 -0800
369 +++ b/sql/ha_innodb.cc Mon Dec 22 00:32:02 2008 -0800
371 innobase_lock_wait_timeout, innobase_force_recovery,
374 +long innobase_read_io_threads, innobase_write_io_threads;
375 longlong innobase_buffer_pool_size, innobase_log_file_size;
377 /* The default values for the following char* start-up parameters
378 @@ -1403,6 +1404,8 @@
379 srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
381 srv_n_file_io_threads = (ulint) innobase_file_io_threads;
382 + srv_n_read_io_threads = (ulint) innobase_read_io_threads;
383 + srv_n_write_io_threads = (ulint) innobase_write_io_threads;
385 srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
386 srv_force_recovery = (ulint) innobase_force_recovery;
387 diff -r 45683461331d sql/ha_innodb.h
388 --- a/sql/ha_innodb.h Mon Dec 22 00:31:16 2008 -0800
389 +++ b/sql/ha_innodb.h Mon Dec 22 00:32:02 2008 -0800
391 extern long innobase_additional_mem_pool_size;
392 extern long innobase_buffer_pool_awe_mem_mb;
393 extern long innobase_file_io_threads, innobase_lock_wait_timeout;
394 +extern long innobase_read_io_threads, innobase_write_io_threads;
395 extern long innobase_force_recovery;
396 extern long innobase_open_files;
397 extern char *innobase_data_home_dir, *innobase_data_file_path;
399 extern ulong srv_thread_concurrency;
400 extern ulong srv_commit_concurrency;
401 extern ulong srv_flush_log_at_trx_commit;
402 +extern ulong srv_io_capacity;
403 +extern ulong srv_read_ahead;
404 +extern ulong srv_adaptive_checkpoint;
407 bool innobase_init(void);
408 diff -r 45683461331d sql/mysqld.cc
409 --- a/sql/mysqld.cc Mon Dec 22 00:31:16 2008 -0800
410 +++ b/sql/mysqld.cc Mon Dec 22 00:32:02 2008 -0800
411 @@ -5036,6 +5036,11 @@
412 OPT_INNODB_ROLLBACK_ON_TIMEOUT,
413 OPT_SECURE_FILE_PRIV,
414 OPT_KEEP_FILES_ON_CREATE,
415 + OPT_INNODB_IO_CAPACITY,
416 + OPT_INNODB_READ_AHEAD,
417 + OPT_INNODB_ADAPTIVE_CHECKPOINT,
418 + OPT_INNODB_READ_IO_THREADS,
419 + OPT_INNODB_WRITE_IO_THREADS,
420 OPT_INNODB_ADAPTIVE_HASH_INDEX,
423 @@ -5344,6 +5349,26 @@
424 (gptr*) &global_system_variables.innodb_table_locks,
425 (gptr*) &global_system_variables.innodb_table_locks,
426 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
427 + {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY,
428 + "Number of IO operations per second the server can do. Tunes background IO rate.",
429 + (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity,
430 + 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0},
431 + {"innodb_read_ahead", OPT_INNODB_READ_AHEAD,
432 + "Enable/Diasable read aheads bit0:random bit1:linear",
433 + (gptr*) &srv_read_ahead, (gptr*) &srv_read_ahead,
434 + 0, GET_ULONG, REQUIRED_ARG, 3, 0, 3, 0, 0, 0},
435 + {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT,
436 + "Enable/Diasable flushing along modified age 0:disable 1:enable",
437 + (gptr*) &srv_adaptive_checkpoint, (gptr*) &srv_adaptive_checkpoint,
438 + 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
439 + {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS,
440 + "Number of background read I/O threads in InnoDB.",
441 + (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads,
442 + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
443 + {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS,
444 + "Number of background write I/O threads in InnoDB.",
445 + (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads,
446 + 0, GET_LONG, REQUIRED_ARG, 1, 1, 64, 0, 0, 0},
447 #endif /* End HAVE_INNOBASE_DB */
448 {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
449 (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
450 diff -r 45683461331d sql/set_var.cc
451 --- a/sql/set_var.cc Mon Dec 22 00:31:16 2008 -0800
452 +++ b/sql/set_var.cc Mon Dec 22 00:32:02 2008 -0800
454 sys_var_long_ptr sys_innodb_flush_log_at_trx_commit(
455 "innodb_flush_log_at_trx_commit",
456 &srv_flush_log_at_trx_commit);
457 +sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity",
459 +sys_var_long_ptr sys_innodb_read_ahead("innodb_read_ahead",
461 +sys_var_long_ptr sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint",
462 + &srv_adaptive_checkpoint);
463 sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path",
464 &innobase_data_file_path);
465 sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir",
467 &sys_innodb_thread_concurrency,
468 &sys_innodb_commit_concurrency,
469 &sys_innodb_flush_log_at_trx_commit,
470 + &sys_innodb_io_capacity,
471 + &sys_innodb_read_ahead,
472 + &sys_innodb_adaptive_checkpoint,
474 &sys_trust_routine_creators,
475 &sys_trust_function_creators,
477 {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
478 {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
479 {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
480 + {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS},
481 + {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS},
482 + {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS},
483 + {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG},
484 + {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG},
486 {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS},
487 {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS},