]> git.pld-linux.org Git - packages/mysql.git/blame - mysql-innodb_io_patches.patch
- expire-logs-days sample
[packages/mysql.git] / mysql-innodb_io_patches.patch
CommitLineData
89b96684
ER
1diff -ruN a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
2--- a/innobase/buf/buf0flu.c 2009-05-08 06:12:03.000000000 +0900
3+++ b/innobase/buf/buf0flu.c 2009-07-02 16:44:49.000000000 +0900
dcc72bc6
ER
4@@ -898,10 +898,17 @@
5
6 old_page_count = page_count;
7
8+ if (srv_flush_neighbor_pages) {
9 /* Try to flush also all the neighbors */
10 page_count +=
11 buf_flush_try_neighbors(space, offset,
12 flush_type);
13+ } else {
14+ /* Try to flush the page only */
15+ page_count +=
16+ buf_flush_try_page(space, offset,
17+ flush_type);
18+ }
19 /* fprintf(stderr,
20 "Flush type %lu, page no %lu, neighb %lu\n",
21 flush_type, offset,
89b96684
ER
22diff -ruN a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c
23--- a/innobase/buf/buf0rea.c 2009-07-02 16:43:23.000000000 +0900
24+++ b/innobase/buf/buf0rea.c 2009-07-02 16:44:49.000000000 +0900
45532174
ER
25@@ -20,6 +20,7 @@
26 #include "os0file.h"
27 #include "srv0start.h"
28
29+extern uint srv_read_ahead;
30 extern ulint srv_read_ahead_rnd;
31 extern ulint srv_read_ahead_seq;
32 extern ulint srv_buf_pool_reads;
33@@ -189,6 +190,10 @@
eccb488f
ER
34 ulint err;
35 ulint i;
dcc72bc6 36
eccb488f
ER
37+ if (!(srv_read_ahead & 1)) {
38+ return(0);
39+ }
dcc72bc6 40+
eccb488f
ER
41 if (srv_startup_is_before_trx_rollback_phase) {
42 /* No read-ahead to avoid thread deadlocks */
dcc72bc6 43 return(0);
45532174 44@@ -396,6 +401,10 @@
eccb488f
ER
45 ulint err;
46 ulint i;
47
48+ if (!(srv_read_ahead & 2)) {
49+ return(0);
50+ }
51+
52 if (srv_startup_is_before_trx_rollback_phase) {
53 /* No read-ahead to avoid thread deadlocks */
54 return(0);
89b96684
ER
55diff -ruN a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c
56--- a/innobase/ibuf/ibuf0ibuf.c 2009-05-08 06:12:04.000000000 +0900
57+++ b/innobase/ibuf/ibuf0ibuf.c 2009-07-02 16:44:49.000000000 +0900
dcc72bc6
ER
58@@ -370,8 +370,9 @@
59 grow in size, as the references on the upper levels of the tree can
60 change */
61
62- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
63- / IBUF_POOL_SIZE_PER_MAX_SIZE;
64+ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
65+ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
66+ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
67 ibuf->meter = IBUF_THRESHOLD + 1;
68
69 UT_LIST_INIT(ibuf->data_list);
70@@ -2258,11 +2259,13 @@
71
72 mutex_enter(&ibuf_mutex);
73
74+ if (!srv_ibuf_active_contract) {
75 if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
76 mutex_exit(&ibuf_mutex);
77
78 return;
79 }
80+ }
81
82 sync = FALSE;
83
89b96684
ER
84diff -ruN a/innobase/include/log0log.h b/innobase/include/log0log.h
85--- a/innobase/include/log0log.h 2009-05-08 06:12:06.000000000 +0900
86+++ b/innobase/include/log0log.h 2009-07-02 16:44:49.000000000 +0900
87@@ -169,6 +169,13 @@
88 log_buffer_flush_to_disk(void);
89 /*==========================*/
90 /********************************************************************
91+Flushes the log buffer. Forces it to disk depending on the value of
92+the configuration parameter innodb_flush_log_at_trx_commit. */
93+
94+void
95+log_buffer_flush_maybe_sync(void);
96+/*=============================*/
97+/********************************************************************
98 Advances the smallest lsn for which there are unflushed dirty blocks in the
99 buffer pool and also may make a new checkpoint. NOTE: this function may only
100 be called if the calling thread owns no synchronization objects! */
101diff -ruN a/innobase/include/os0file.h b/innobase/include/os0file.h
102--- a/innobase/include/os0file.h 2009-07-02 16:43:23.000000000 +0900
103+++ b/innobase/include/os0file.h 2009-07-02 16:44:49.000000000 +0900
eccb488f
ER
104@@ -551,8 +551,10 @@
105 /*========*/
106 ulint n, /* in: maximum number of pending aio operations
107 allowed; n must be divisible by n_segments */
108- ulint n_segments, /* in: combined number of segments in the four
109- first aio arrays; must be >= 4 */
110+// ulint n_segments, /* in: combined number of segments in the four
111+// first aio arrays; must be >= 4 */
112+ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */
113+ ulint n_write_threads, /**/
114 ulint n_slots_sync); /* in: number of slots in the sync aio array */
115 /***********************************************************************
116 Requests an asynchronous i/o operation. */
89b96684
ER
117diff -ruN a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
118--- a/innobase/include/srv0srv.h 2009-07-02 16:43:23.000000000 +0900
119+++ b/innobase/include/srv0srv.h 2009-07-02 18:02:38.000000000 +0900
eccb488f
ER
120@@ -89,6 +89,8 @@
121 extern ulint srv_lock_table_size;
122
123 extern ulint srv_n_file_io_threads;
124+extern ulint srv_n_read_io_threads;
125+extern ulint srv_n_write_io_threads;
126
127 #ifdef UNIV_LOG_ARCHIVE
128 extern ibool srv_log_archive_on;
45532174 129@@ -133,6 +135,15 @@
eccb488f
ER
130 extern ulong srv_max_purge_lag;
131 extern ibool srv_use_awe;
132 extern ibool srv_use_adaptive_hash_indexes;
133+
134+extern ulint srv_io_capacity;
dcc72bc6
ER
135+extern long long srv_ibuf_max_size;
136+extern ulint srv_ibuf_active_contract;
137+extern ulint srv_ibuf_accel_rate;
138+extern ulint srv_flush_neighbor_pages;
45532174 139+extern ulint srv_enable_unsafe_group_commit;
dcc72bc6 140+extern uint srv_read_ahead;
89b96684 141+extern uint srv_adaptive_checkpoint;
eccb488f
ER
142 /*-------------------------------------------*/
143
144 extern ulint srv_n_rows_inserted;
89b96684
ER
145diff -ruN a/innobase/log/log0log.c b/innobase/log/log0log.c
146--- a/innobase/log/log0log.c 2009-05-08 06:12:10.000000000 +0900
147+++ b/innobase/log/log0log.c 2009-07-02 16:44:49.000000000 +0900
148@@ -1524,6 +1524,29 @@
149 }
150
151 /********************************************************************
152+Flush the log buffer. Force it to disk depending on the value of
153+innodb_flush_log_at_trx_commit. */
154+
155+void
156+log_buffer_flush_maybe_sync(void)
157+/*=============================*/
158+{
159+ dulint lsn;
160+
161+ mutex_enter(&(log_sys->mutex));
162+
163+ lsn = log_sys->lsn;
164+
165+ mutex_exit(&(log_sys->mutex));
166+
167+ /* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
168+ log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
169+ srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE,
170+ srv_flush_log_at_trx_commit == 1 ?
171+ LOG_WRITE_FROM_BACKGROUND_SYNC :
172+ LOG_WRITE_FROM_BACKGROUND_ASYNC);
173+}
174+/********************************************************************
175 Tries to establish a big enough margin of free space in the log buffer, such
176 that a new log entry can be catenated without an immediate need for a flush. */
177 static
178@@ -3326,6 +3349,15 @@
eccb488f
ER
179 (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
180 (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
181
182+ fprintf(file,
183+ "Max checkpoint age %lu\n"
184+ "Modified age %lu\n"
185+ "Checkpoint age %lu\n",
186+ (ulong) log_sys->max_checkpoint_age,
187+ (ulong) ut_dulint_minus(log_sys->lsn,
188+ log_buf_pool_get_oldest_modification()),
189+ (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn));
190+
191 current_time = time(NULL);
192
193 time_elapsed = 0.001 + difftime(current_time,
89b96684
ER
194diff -ruN a/innobase/os/os0file.c b/innobase/os/os0file.c
195--- a/innobase/os/os0file.c 2009-07-02 16:43:23.000000000 +0900
196+++ b/innobase/os/os0file.c 2009-07-02 16:44:49.000000000 +0900
197@@ -66,6 +66,28 @@
198
199 ibool os_aio_print_debug = FALSE;
200
201+/* State for the state of an IO request in simulated AIO.
202+ Protocol for simulated aio:
203+ client requests IO: find slot with reserved = FALSE. Add entry with
204+ status = OS_AIO_NOT_ISSUED.
205+ IO thread wakes: find adjacent slots with reserved = TRUE and status =
206+ OS_AIO_NOT_ISSUED. Change status for slots to
207+ OS_AIO_ISSUED.
208+ IO operation completes: set status for slots to OS_AIO_DONE. set status
209+ for the first slot to OS_AIO_CLAIMED and return
210+ result for that slot.
211+ When there are multiple read and write threads, they all compete to execute
212+ the requests in the array (os_aio_array_t). This avoids the need to load
213+ balance requests at the time the request is made at the cost of waking all
214+ threads when a request is available.
215+*/
216+typedef enum {
217+ OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */
218+ OS_AIO_ISSUED, /* Being processed by an IO thread. */
219+ OS_AIO_DONE, /* Request processed. */
220+ OS_AIO_CLAIMED /* Result being returned to client. */
221+} os_aio_status;
222+
223 /* The aio array slot structure */
224 typedef struct os_aio_slot_struct os_aio_slot_t;
225
226@@ -74,6 +96,8 @@
227 ulint pos; /* index of the slot in the aio
228 array */
229 ibool reserved; /* TRUE if this slot is reserved */
230+ os_aio_status status; /* Status for current request. Valid when reserved
231+ is TRUE. Used only in simulated aio. */
232 time_t reservation_time;/* time when reserved */
233 ulint len; /* length of the block to read or
234 write */
235@@ -84,11 +108,11 @@
236 ulint offset_high; /* 32 high bits of file offset */
237 os_file_t file; /* file where to read or write */
238 const char* name; /* file name or path */
239- ibool io_already_done;/* used only in simulated aio:
240- TRUE if the physical i/o already
241- made and only the slot message
242- needs to be passed to the caller
243- of os_aio_simulated_handle */
244+// ibool io_already_done;/* used only in simulated aio:
245+// TRUE if the physical i/o already
246+// made and only the slot message
247+// needs to be passed to the caller
248+// of os_aio_simulated_handle */
249 fil_node_t* message1; /* message which is given by the */
250 void* message2; /* the requester of an aio operation
251 and which can be used to identify
252@@ -137,6 +161,13 @@
253 /* Array of events used in simulated aio */
254 os_event_t* os_aio_segment_wait_events = NULL;
255
256+/* Number for the first global segment for reading. */
257+const ulint os_aio_first_read_segment = 2;
258+
259+/* Number for the first global segment for writing. Set to
260+2 + os_aio_read_write_threads. */
261+ulint os_aio_first_write_segment = 0;
262+
263 /* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
264 are NULL when the module has not yet been initialized. */
265 static os_aio_array_t* os_aio_read_array = NULL;
266@@ -145,11 +176,17 @@
267 static os_aio_array_t* os_aio_log_array = NULL;
268 static os_aio_array_t* os_aio_sync_array = NULL;
269
270+/* Per thread buffer used for merged IO requests. Used by
271+os_aio_simulated_handle so that a buffer doesn't have to be allocated
272+for each request. */
273+static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS];
274+static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS];
275+
276 static ulint os_aio_n_segments = ULINT_UNDEFINED;
277
278 /* If the following is TRUE, read i/o handler threads try to
279 wait until a batch of new read requests have been posted */
280-static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
281+static volatile ibool os_aio_recommend_sleep_for_read_threads = FALSE;
282
283 ulint os_n_file_reads = 0;
284 ulint os_bytes_read_since_printout = 0;
285@@ -2878,8 +2915,10 @@
eccb488f
ER
286 /*========*/
287 ulint n, /* in: maximum number of pending aio operations
288 allowed; n must be divisible by n_segments */
289- ulint n_segments, /* in: combined number of segments in the four
290- first aio arrays; must be >= 4 */
291+// ulint n_segments, /* in: combined number of segments in the four
292+// first aio arrays; must be >= 4 */
293+ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/
294+ ulint n_write_threads, /**/
295 ulint n_slots_sync) /* in: number of slots in the sync aio array */
296 {
297 ulint n_read_segs;
89b96684 298@@ -2889,6 +2928,8 @@
eccb488f
ER
299 #ifdef POSIX_ASYNC_IO
300 sigset_t sigset;
301 #endif
302+ ulint n_segments = 2 + n_read_threads + n_write_threads;
303+
304 ut_ad(n % n_segments == 0);
305 ut_ad(n_segments >= 4);
306
89b96684
ER
307@@ -2896,14 +2937,17 @@
308
309 for (i = 0; i < n_segments; i++) {
310 srv_set_io_thread_op_info(i, "not started yet");
311+ os_aio_thread_buffer[i] = 0;
312+ os_aio_thread_buffer_size[i] = 0;
eccb488f
ER
313 }
314
315 n_per_seg = n / n_segments;
316- n_write_segs = (n_segments - 2) / 2;
317- n_read_segs = n_segments - 2 - n_write_segs;
318+ n_write_segs = n_write_threads;
319+ n_read_segs = n_read_threads;
320
321 /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
322
89b96684
ER
323+ os_aio_first_write_segment = os_aio_first_read_segment + n_read_threads;
324 os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
325
326 srv_io_thread_function[0] = "insert buffer thread";
327@@ -2912,14 +2956,14 @@
328
329 srv_io_thread_function[1] = "log thread";
330
331- os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
332+ os_aio_read_array = os_aio_array_create(n_per_seg,
333 n_read_segs);
334 for (i = 2; i < 2 + n_read_segs; i++) {
335 ut_a(i < SRV_MAX_N_IO_THREADS);
336 srv_io_thread_function[i] = "read thread";
337 }
338
339- os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
340+ os_aio_write_array = os_aio_array_create(n_per_seg,
341 n_write_segs);
342 for (i = 2 + n_read_segs; i < n_segments; i++) {
343 ut_a(i < SRV_MAX_N_IO_THREADS);
344@@ -3181,6 +3225,13 @@
eccb488f
ER
345 struct aiocb* control;
346 #endif
347 ulint i;
348+ ulint prim_segment;
349+ ulint n;
350+
351+ n = array->n_slots / array->n_segments;
352+ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
353+ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
354+
355 loop:
356 os_mutex_enter(array->mutex);
357
89b96684 358@@ -3199,6 +3250,16 @@
eccb488f
ER
359 goto loop;
360 }
361
362+ for (i = prim_segment * n; i < array->n_slots; i++) {
363+ slot = os_aio_array_get_nth_slot(array, i);
364+
365+ if (slot->reserved == FALSE) {
366+ break;
367+ }
368+ }
369+
370+ if (slot->reserved == TRUE){
371+ /* Not found after the intended segment. So we should search before. */
372 for (i = 0;; i++) {
373 slot = os_aio_array_get_nth_slot(array, i);
374
89b96684 375@@ -3206,6 +3267,7 @@
eccb488f
ER
376 break;
377 }
eccb488f 378 }
dcc72bc6 379+ }
eccb488f
ER
380
381 array->n_reserved++;
dcc72bc6 382
89b96684
ER
383@@ -3228,7 +3290,8 @@
384 slot->buf = buf;
385 slot->offset = offset;
386 slot->offset_high = offset_high;
387- slot->io_already_done = FALSE;
388+// slot->io_already_done = FALSE;
389+ slot->status = OS_AIO_NOT_ISSUED;
390
391 #ifdef WIN_ASYNC_IO
392 control = &(slot->control);
393@@ -3281,6 +3344,7 @@
394 ut_ad(slot->reserved);
395
396 slot->reserved = FALSE;
397+ slot->status = OS_AIO_NOT_ISSUED;
398
399 array->n_reserved--;
400
401@@ -3317,16 +3381,18 @@
402
403 segment = os_aio_get_array_and_local_segment(&array, global_segment);
404
405- n = array->n_slots / array->n_segments;
406+ n = array->n_slots;
407
408 /* Look through n slots after the segment * n'th slot */
409
410 os_mutex_enter(array->mutex);
411
412 for (i = 0; i < n; i++) {
413- slot = os_aio_array_get_nth_slot(array, i + segment * n);
414+ slot = os_aio_array_get_nth_slot(array, i);
415
416- if (slot->reserved) {
417+ if (slot->reserved &&
418+ (slot->status == OS_AIO_NOT_ISSUED ||
419+ slot->status == OS_AIO_DONE)) {
420 /* Found an i/o request */
421
422 break;
423@@ -3336,7 +3402,25 @@
424 os_mutex_exit(array->mutex);
425
426 if (i < n) {
427- os_event_set(os_aio_segment_wait_events[global_segment]);
428+ if (array == os_aio_ibuf_array) {
429+ os_event_set(os_aio_segment_wait_events[0]);
430+
431+ } else if (array == os_aio_log_array) {
432+ os_event_set(os_aio_segment_wait_events[1]);
433+
434+ } else if (array == os_aio_read_array) {
435+ ulint x;
436+ for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++)
437+ os_event_set(os_aio_segment_wait_events[x]);
438+
439+ } else if (array == os_aio_write_array) {
440+ ulint x;
441+ for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++)
442+ os_event_set(os_aio_segment_wait_events[x]);
443+
444+ } else {
445+ ut_a(0);
446+ }
447 }
448 }
449
450@@ -3347,8 +3431,6 @@
451 os_aio_simulated_wake_handler_threads(void)
452 /*=======================================*/
453 {
454- ulint i;
455-
456 if (os_aio_use_native_aio) {
457 /* We do not use simulated aio: do nothing */
458
459@@ -3357,9 +3439,10 @@
460
461 os_aio_recommend_sleep_for_read_threads = FALSE;
462
463- for (i = 0; i < os_aio_n_segments; i++) {
464- os_aio_simulated_wake_handler_thread(i);
465- }
466+ os_aio_simulated_wake_handler_thread(0);
467+ os_aio_simulated_wake_handler_thread(1);
468+ os_aio_simulated_wake_handler_thread(os_aio_first_read_segment);
469+ os_aio_simulated_wake_handler_thread(os_aio_first_write_segment);
470 }
471
472 /**************************************************************************
473@@ -3640,7 +3723,7 @@
474 ut_ad(os_aio_validate());
475 ut_ad(segment < array->n_segments);
476
477- n = array->n_slots / array->n_segments;
478+ n = array->n_slots;
479
480 if (array == os_aio_sync_array) {
481 os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
482@@ -3648,12 +3731,12 @@
483 } else {
484 srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
485 i = os_event_wait_multiple(n,
486- (array->native_events) + segment * n);
487+ (array->native_events));
488 }
489
490 os_mutex_enter(array->mutex);
491
492- slot = os_aio_array_get_nth_slot(array, i + segment * n);
493+ slot = os_aio_array_get_nth_slot(array, i);
494
495 ut_a(slot->reserved);
496
497@@ -3830,10 +3913,13 @@
498 os_aio_slot_t* slot;
499 os_aio_slot_t* slot2;
500 os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
501+ os_aio_slot_t* lowest_request;
502+ os_aio_slot_t* oldest_request;
503 ulint n_consecutive;
504 ulint total_len;
505 ulint offs;
506 ulint lowest_offset;
507+ ulint oldest_offset;
508 ulint biggest_age;
509 ulint age;
510 byte* combined_buf;
511@@ -3841,6 +3927,7 @@
512 ibool ret;
513 ulint n;
514 ulint i;
515+ time_t now;
516
517 segment = os_aio_get_array_and_local_segment(&array, global_segment);
518
519@@ -3853,7 +3940,7 @@
520 ut_ad(os_aio_validate());
521 ut_ad(segment < array->n_segments);
522
523- n = array->n_slots / array->n_segments;
524+ n = array->n_slots;
525
526 /* Look through n slots after the segment * n'th slot */
527
528@@ -3875,9 +3962,9 @@
529 done */
530
531 for (i = 0; i < n; i++) {
532- slot = os_aio_array_get_nth_slot(array, i + segment * n);
533+ slot = os_aio_array_get_nth_slot(array, i);
534
535- if (slot->reserved && slot->io_already_done) {
536+ if (slot->reserved && slot->status == OS_AIO_DONE) {
537
538 if (os_aio_print_debug) {
539 fprintf(stderr,
540@@ -3897,67 +3984,57 @@
541 then pick the one at the lowest offset. */
542
543 biggest_age = 0;
544- lowest_offset = ULINT_MAX;
545+ now = time(NULL);
546+ oldest_request = lowest_request = NULL;
547+ oldest_offset = lowest_offset = ULINT_MAX;
548
549+ /* Find the oldest request and the request with the smallest offset */
550 for (i = 0; i < n; i++) {
551- slot = os_aio_array_get_nth_slot(array, i + segment * n);
552+ slot = os_aio_array_get_nth_slot(array, i);
553
554- if (slot->reserved) {
555- age = (ulint)difftime(time(NULL),
556- slot->reservation_time);
557+ if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) {
558+ age = (ulint)difftime(now, slot->reservation_time);
559
560 if ((age >= 2 && age > biggest_age)
561 || (age >= 2 && age == biggest_age
562- && slot->offset < lowest_offset)) {
563+ && slot->offset < oldest_offset)) {
564
565 /* Found an i/o request */
566- consecutive_ios[0] = slot;
567-
568- n_consecutive = 1;
569-
570 biggest_age = age;
571- lowest_offset = slot->offset;
572+ oldest_request = slot;
573+ oldest_offset = slot->offset;
574 }
575- }
576- }
577-
578- if (n_consecutive == 0) {
579- /* There were no old requests. Look for an i/o request at the
580- lowest offset in the array (we ignore the high 32 bits of the
581- offset in these heuristics) */
582-
583- lowest_offset = ULINT_MAX;
584-
585- for (i = 0; i < n; i++) {
586- slot = os_aio_array_get_nth_slot(array,
587- i + segment * n);
588-
589- if (slot->reserved && slot->offset < lowest_offset) {
590
591+ /* Look for an i/o request at the lowest offset in the array
592+ * (we ignore the high 32 bits of the offset) */
593+ if (slot->offset < lowest_offset) {
594 /* Found an i/o request */
595- consecutive_ios[0] = slot;
596-
597- n_consecutive = 1;
598-
599+ lowest_request = slot;
600 lowest_offset = slot->offset;
601 }
602 }
603 }
604
605- if (n_consecutive == 0) {
606+ if (!lowest_request && !oldest_request) {
607
608 /* No i/o requested at the moment */
609
610 goto wait_for_io;
611 }
612
613- slot = consecutive_ios[0];
614+ if (oldest_request) {
615+ slot = oldest_request;
616+ } else {
617+ slot = lowest_request;
618+ }
619+ consecutive_ios[0] = slot;
620+ n_consecutive = 1;
621
622 /* Check if there are several consecutive blocks to read or write */
623
624 consecutive_loop:
625 for (i = 0; i < n; i++) {
626- slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
627+ slot2 = os_aio_array_get_nth_slot(array, i);
628
629 if (slot2->reserved && slot2 != slot
630 && slot2->offset == slot->offset + slot->len
631@@ -3965,7 +4042,8 @@
632 sum does not wrap over */
633 && slot2->offset_high == slot->offset_high
634 && slot2->type == slot->type
635- && slot2->file == slot->file) {
636+ && slot2->file == slot->file
637+ && slot2->status == OS_AIO_NOT_ISSUED) {
638
639 /* Found a consecutive i/o request */
640
641@@ -3994,6 +4072,8 @@
642
643 for (i = 0; i < n_consecutive; i++) {
644 total_len += consecutive_ios[i]->len;
645+ ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED);
646+ consecutive_ios[i]->status = OS_AIO_ISSUED;
647 }
648
649 if (n_consecutive == 1) {
650@@ -4001,7 +4081,14 @@
651 combined_buf = slot->buf;
652 combined_buf2 = NULL;
653 } else {
654- combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
655+ if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) {
656+ if (os_aio_thread_buffer[global_segment])
657+ ut_free(os_aio_thread_buffer[global_segment]);
658+
659+ os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE);
660+ os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE;
661+ }
662+ combined_buf2 = os_aio_thread_buffer[global_segment];
663
664 ut_a(combined_buf2);
665
666@@ -4012,6 +4099,9 @@
667 this assumes that there is just one i/o-handler thread serving
668 a single segment of slots! */
669
670+ ut_a(slot->reserved);
671+ ut_a(slot->status == OS_AIO_ISSUED);
672+
673 os_mutex_exit(array->mutex);
674
675 if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
676@@ -4081,16 +4171,13 @@
677 }
678 }
679
680- if (combined_buf2) {
681- ut_free(combined_buf2);
682- }
683-
684 os_mutex_enter(array->mutex);
685
686 /* Mark the i/os done in slots */
687
688 for (i = 0; i < n_consecutive; i++) {
689- consecutive_ios[i]->io_already_done = TRUE;
690+ ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED);
691+ consecutive_ios[i]->status = OS_AIO_DONE;
692 }
693
694 /* We return the messages for the first slot now, and if there were
695@@ -4100,6 +4187,8 @@
696 slot_io_done:
697
698 ut_a(slot->reserved);
699+ ut_a(slot->status == OS_AIO_DONE);
700+ slot->status = OS_AIO_CLAIMED;
701
702 *message1 = slot->message1;
703 *message2 = slot->message2;
704diff -ruN a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
705--- a/innobase/srv/srv0srv.c 2009-07-02 16:43:23.000000000 +0900
706+++ b/innobase/srv/srv0srv.c 2009-07-02 18:36:54.000000000 +0900
eccb488f
ER
707@@ -167,6 +167,8 @@
708 ulint srv_lock_table_size = ULINT_MAX;
709
710 ulint srv_n_file_io_threads = ULINT_MAX;
711+ulint srv_n_read_io_threads = 1;
712+ulint srv_n_write_io_threads = 1;
713
714 #ifdef UNIV_LOG_ARCHIVE
715 ibool srv_log_archive_on = FALSE;
45532174 716@@ -330,6 +332,24 @@
eccb488f
ER
717 ibool srv_use_awe = FALSE;
718 ibool srv_use_adaptive_hash_indexes = TRUE;
719
720+ulint srv_io_capacity = 100;
721+
722+/* Returns the number of IO operations that is X percent of the capacity.
723+PCT_IO(5) -> returns the number of IO operations that is 5% of the max
724+where max is srv_io_capacity. */
725+#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
726+
dcc72bc6
ER
727+long long srv_ibuf_max_size = 0;
728+ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
729+ulint srv_ibuf_accel_rate = 100;
730+#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
731+
732+ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
733+
45532174
ER
734+ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
735+
dcc72bc6 736+uint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
89b96684 737+uint srv_adaptive_checkpoint = 0; /* 0: none 1: reflex 2: estimate */
eccb488f
ER
738 /*-------------------------------------------*/
739 ulong srv_n_spin_wait_rounds = 20;
740 ulong srv_n_free_tickets_to_enter = 500;
89b96684
ER
741@@ -2228,6 +2248,10 @@
742 ulint n_pend_ios;
eccb488f
ER
743 ibool skip_sleep = FALSE;
744 ulint i;
89b96684
ER
745+
746+ dulint lsn_old;
eccb488f 747+
89b96684
ER
748+ dulint oldest_lsn;
749
eccb488f
ER
750 #ifdef UNIV_DEBUG_THREAD_CREATION
751 fprintf(stderr, "Master thread starts, id %lu\n",
89b96684
ER
752@@ -2244,6 +2268,9 @@
753
754 mutex_exit(&kernel_mutex);
755
756+ mutex_enter(&(log_sys->mutex));
757+ lsn_old = log_sys->lsn;
758+ mutex_exit(&(log_sys->mutex));
759 os_event_set(srv_sys->operational);
760 loop:
761 /*****************************************************************/
762@@ -2279,6 +2306,18 @@
763 if (!skip_sleep) {
764
765 os_thread_sleep(1000000);
766+ /*
767+ mutex_enter(&(log_sys->mutex));
768+ oldest_lsn = buf_pool_get_oldest_modification();
769+ dulint lsn = log_sys->lsn;
770+ mutex_exit(&(log_sys->mutex));
771+
772+ if (!ut_dulint_is_zero(oldest_lsn))
773+ fprintf(stderr,
774+ "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
775+ ut_dulint_minus(lsn, oldest_lsn) * 100 / log_sys->max_checkpoint_age,
776+ ut_dulint_minus(lsn, lsn_old));
777+ */
778 }
779
780 skip_sleep = FALSE;
781@@ -2317,13 +2356,14 @@
eccb488f
ER
782 + log_sys->n_pending_writes;
783 n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
784 + buf_pool->n_pages_written;
785- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
89b96684 786+ if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
eccb488f
ER
787 srv_main_thread_op_info = "doing insert buffer merge";
788- ibuf_contract_for_n_pages(TRUE, 5);
dcc72bc6 789+ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5));
eccb488f
ER
790
791 srv_main_thread_op_info = "flushing log";
792
89b96684
ER
793- log_buffer_flush_to_disk();
794+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
795+ log_buffer_flush_maybe_sync();
796 }
797
798 if (buf_get_modified_ratio_pct() >
799@@ -2332,7 +2372,7 @@
eccb488f
ER
800 /* Try to keep the number of modified pages in the
801 buffer pool under the limit wished by the user */
802
803- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
804+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
805 ut_dulint_max);
806
807 /* If we had to do the flush, it may have taken
89b96684 808@@ -2341,6 +2381,140 @@
eccb488f
ER
809 iteration of this loop. */
810
811 skip_sleep = TRUE;
89b96684
ER
812+ mutex_enter(&(log_sys->mutex));
813+ lsn_old = log_sys->lsn;
814+ mutex_exit(&(log_sys->mutex));
815+ } else if (srv_adaptive_checkpoint == 1) {
eccb488f
ER
816+
817+ /* Try to keep modified age not to exceed
818+ max_checkpoint_age * 7/8 line */
819+
820+ mutex_enter(&(log_sys->mutex));
89b96684 821+ lsn_old = log_sys->lsn;
eccb488f
ER
822+ oldest_lsn = buf_pool_get_oldest_modification();
823+ if (ut_dulint_is_zero(oldest_lsn)) {
824+
825+ mutex_exit(&(log_sys->mutex));
826+
827+ } else {
828+ if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
dcc72bc6
ER
829+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
830+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
831+ /* We should not flush from here. */
832+ mutex_exit(&(log_sys->mutex));
833+ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
eccb488f
ER
834+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) {
835+
836+ /* 2nd defence line (max_checkpoint_age * 3/4) */
837+
838+ mutex_exit(&(log_sys->mutex));
839+
840+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
841+ ut_dulint_max);
842+ skip_sleep = TRUE;
843+ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
844+ > (log_sys->max_checkpoint_age)/2 ) {
845+
846+ /* 1st defence line (max_checkpoint_age * 1/2) */
847+
848+ mutex_exit(&(log_sys->mutex));
849+
850+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
851+ ut_dulint_max);
852+ skip_sleep = TRUE;
853+ } else {
854+ mutex_exit(&(log_sys->mutex));
855+ }
856+ }
89b96684
ER
857+ } else if (srv_adaptive_checkpoint == 2) {
858+
859+ /* Try to keep modified age not to exceed
860+ max_checkpoint_age * 7/8 line */
861+
862+ mutex_enter(&(log_sys->mutex));
863+
864+ oldest_lsn = buf_pool_get_oldest_modification();
865+ if (ut_dulint_is_zero(oldest_lsn)) {
866+ lsn_old = log_sys->lsn;
867+ mutex_exit(&(log_sys->mutex));
868+
869+ } else {
870+ if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
871+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
872+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
873+ /* We should not flush from here. */
874+ lsn_old = log_sys->lsn;
875+ mutex_exit(&(log_sys->mutex));
876+ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
877+ > (log_sys->max_checkpoint_age)/2 ) {
878+
879+ /* defence line (max_checkpoint_age * 1/2) */
880+ dulint lsn = log_sys->lsn;
eccb488f 881+
89b96684
ER
882+ mutex_exit(&(log_sys->mutex));
883+
884+ ib_longlong level, bpl;
885+ buf_block_t* bpage;
886+
887+ mutex_enter(&buf_pool->mutex);
888+
889+ level = 0;
890+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
891+
892+ while (bpage != NULL) {
893+ dulint oldest_modification = bpage->oldest_modification;
894+ if (!ut_dulint_is_zero(oldest_modification)) {
895+ level += log_sys->max_checkpoint_age
896+ - ut_dulint_minus(lsn, oldest_modification);
897+ }
898+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
899+ }
900+
901+ if (level) {
902+ bpl = ((ib_longlong) UT_LIST_GET_LEN(buf_pool->flush_list)
903+ * UT_LIST_GET_LEN(buf_pool->flush_list)
904+ * ut_dulint_minus(lsn, lsn_old)) / level;
905+ } else {
906+ bpl = 0;
907+ }
908+
909+ mutex_exit(&buf_pool->mutex);
910+
911+ if (!srv_use_doublewrite_buf) {
912+ /* flush is faster than when doublewrite */
913+ bpl = (bpl * 3) / 4;
914+ }
915+
916+ if(bpl) {
917+retry_flush_batch:
918+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
919+ bpl,
920+ ut_dulint_add(oldest_lsn,
921+ ut_dulint_minus(lsn,
922+ lsn_old)));
923+ if (n_pages_flushed == ULINT_UNDEFINED) {
924+ os_thread_sleep(5000);
925+ goto retry_flush_batch;
926+ }
927+ }
928+
929+ lsn_old = lsn;
930+ /*
931+ fprintf(stderr,
932+ "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
933+ ut_dulint_minus(lsn, oldest_lsn) * 100 / log_sys->max_checkpoint_age,
934+ ut_dulint_minus(lsn, lsn_old), bpl);
935+ */
936+ } else {
937+ lsn_old = log_sys->lsn;
938+ mutex_exit(&(log_sys->mutex));
939+ }
940+ }
941+
942+ } else {
943+ mutex_enter(&(log_sys->mutex));
944+ lsn_old = log_sys->lsn;
945+ mutex_exit(&(log_sys->mutex));
eccb488f
ER
946 }
947
948 if (srv_activity_count == old_activity_count) {
89b96684 949@@ -2367,23 +2541,25 @@
eccb488f
ER
950 n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
951 n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
952 + buf_pool->n_pages_written;
953- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
954+ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
955
956 srv_main_thread_op_info = "flushing buffer pool pages";
957- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
958+ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
959
960 srv_main_thread_op_info = "flushing log";
89b96684
ER
961- log_buffer_flush_to_disk();
962+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
963+ log_buffer_flush_maybe_sync();
964 }
965
966 /* We run a batch of insert buffer merge every 10 seconds,
eccb488f
ER
967 even if the server were active */
968
969 srv_main_thread_op_info = "doing insert buffer merge";
970- ibuf_contract_for_n_pages(TRUE, 5);
dcc72bc6 971+ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5));
eccb488f
ER
972
973 srv_main_thread_op_info = "flushing log";
89b96684
ER
974- log_buffer_flush_to_disk();
975+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
976+ log_buffer_flush_maybe_sync();
977
978 /* We run a full purge every 10 seconds, even if the server
979 were active */
980@@ -2422,14 +2598,14 @@
eccb488f
ER
981 (> 70 %), we assume we can afford reserving the disk(s) for
982 the time it requires to flush 100 pages */
983
984- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
985+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
986 ut_dulint_max);
987 } else {
988 /* Otherwise, we only flush a small number of pages so that
989 we do not unnecessarily use much disk i/o capacity from
990 other work */
991
992- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
993+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
994 ut_dulint_max);
995 }
996
89b96684 997@@ -2518,7 +2694,7 @@
eccb488f
ER
998 if (srv_fast_shutdown && srv_shutdown_state > 0) {
999 n_bytes_merged = 0;
1000 } else {
1001- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
dcc72bc6 1002+ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(100));
eccb488f
ER
1003 }
1004
1005 srv_main_thread_op_info = "reserving kernel mutex";
89b96684 1006@@ -2535,7 +2711,7 @@
eccb488f
ER
1007
1008 if (srv_fast_shutdown < 2) {
1009 n_pages_flushed =
1010- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
1011+ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
1012 } else {
1013 /* In the fastest shutdown we do not flush the buffer pool
1014 to data files: we set n_pages_flushed to 0 artificially. */
89b96684
ER
1015@@ -2557,7 +2733,14 @@
1016
1017 srv_main_thread_op_info = "flushing log";
1018
1019- log_buffer_flush_to_disk();
1020+ current_time = time(NULL);
1021+ if (difftime(current_time, last_flush_time) > 1) {
1022+ log_buffer_flush_to_disk();
1023+ last_flush_time = current_time;
1024+ } else {
1025+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
1026+ log_buffer_flush_maybe_sync();
1027+ }
1028
1029 srv_main_thread_op_info = "making checkpoint";
1030
1031diff -ruN a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
1032--- a/innobase/srv/srv0start.c 2009-05-08 06:12:12.000000000 +0900
1033+++ b/innobase/srv/srv0start.c 2009-07-02 16:44:49.000000000 +0900
eccb488f
ER
1034@@ -1205,24 +1205,28 @@
1035 return(DB_ERROR);
1036 }
1037
1038+ /* over write innodb_file_io_threads */
1039+ srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads;
1040+
1041 /* Restrict the maximum number of file i/o threads */
1042 if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
1043
1044 srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
1045+ srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2;
1046 }
1047
1048 if (!os_aio_use_native_aio) {
1049 /* In simulated aio we currently have use only for 4 threads */
1050- srv_n_file_io_threads = 4;
1051+ /*srv_n_file_io_threads = 4;*/
1052
1053 os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
1054 * srv_n_file_io_threads,
1055- srv_n_file_io_threads,
eccb488f 1056+ srv_n_read_io_threads, srv_n_write_io_threads,
89b96684 1057 SRV_MAX_N_PENDING_SYNC_IOS);
eccb488f
ER
1058 } else {
1059 os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
1060 * srv_n_file_io_threads,
1061- srv_n_file_io_threads,
1062+ srv_n_read_io_threads, srv_n_write_io_threads,
1063 SRV_MAX_N_PENDING_SYNC_IOS);
1064 }
1065
89b96684
ER
1066diff -ruN a/patch_info/innodb_io_patches.info b/patch_info/innodb_io_patches.info
1067--- /dev/null 1970-01-01 09:00:00.000000000 +0900
1068+++ b/patch_info/innodb_io_patches.info 2009-07-02 16:44:49.000000000 +0900
dcc72bc6 1069@@ -0,0 +1,11 @@
eccb488f
ER
1070+File=innodb_io_patches.patch
1071+Name=Cluster of past InnoDB IO patches
dcc72bc6 1072+Version=1.1
eccb488f
ER
1073+Author=Percona
1074+License=GPL
1075+Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush)
1076+ChangeLog=
1077+2008-11-06
1078+YK: Initial release
dcc72bc6
ER
1079+2009-01-09
1080+YK: Some parameters are added
89b96684
ER
1081diff -ruN a/sql/ha_innodb.cc b/sql/ha_innodb.cc
1082--- a/sql/ha_innodb.cc 2009-07-02 16:43:23.000000000 +0900
1083+++ b/sql/ha_innodb.cc 2009-07-02 16:44:49.000000000 +0900
eccb488f
ER
1084@@ -149,6 +149,7 @@
1085 innobase_lock_wait_timeout, innobase_force_recovery,
1086 innobase_open_files;
1087
1088+long innobase_read_io_threads, innobase_write_io_threads;
1089 longlong innobase_buffer_pool_size, innobase_log_file_size;
1090
1091 /* The default values for the following char* start-up parameters
45532174 1092@@ -1417,6 +1418,8 @@
eccb488f
ER
1093 srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
1094
1095 srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1096+ srv_n_read_io_threads = (ulint) innobase_read_io_threads;
1097+ srv_n_write_io_threads = (ulint) innobase_write_io_threads;
1098
1099 srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
1100 srv_force_recovery = (ulint) innobase_force_recovery;
45532174
ER
1101@@ -7330,6 +7333,10 @@
1102 trx_t* trx = check_trx_exists(thd);
1103
1104 if (thd->lex->sql_command != SQLCOM_XA_PREPARE) {
1105+ if (srv_enable_unsafe_group_commit && !thd->variables.innodb_support_xa) {
1106+ /* choose group commit rather than binlog order */
1107+ return(0);
1108+ }
1109
1110 /* For ibbackup to work the order of transactions in binlog
1111 and InnoDB must be the same. Consider the situation
89b96684
ER
1112diff -ruN a/sql/ha_innodb.h b/sql/ha_innodb.h
1113--- a/sql/ha_innodb.h 2009-07-02 16:43:23.000000000 +0900
1114+++ b/sql/ha_innodb.h 2009-07-02 18:10:51.000000000 +0900
eccb488f
ER
1115@@ -204,6 +204,7 @@
1116 extern long innobase_additional_mem_pool_size;
1117 extern long innobase_buffer_pool_awe_mem_mb;
1118 extern long innobase_file_io_threads, innobase_lock_wait_timeout;
1119+extern long innobase_read_io_threads, innobase_write_io_threads;
1120 extern long innobase_force_recovery;
1121 extern long innobase_open_files;
1122 extern char *innobase_data_home_dir, *innobase_data_file_path;
45532174
ER
1123@@ -234,6 +235,15 @@
1124 extern ulong srv_thread_concurrency;
1125 extern ulong srv_commit_concurrency;
1126 extern ulong srv_flush_log_at_trx_commit;
eccb488f 1127+extern ulong srv_io_capacity;
dcc72bc6
ER
1128+extern long long srv_ibuf_max_size;
1129+extern ulong srv_ibuf_active_contract;
1130+extern ulong srv_ibuf_accel_rate;
1131+extern ulong srv_flush_neighbor_pages;
45532174 1132+extern ulong srv_enable_unsafe_group_commit;
dcc72bc6 1133+extern uint srv_read_ahead;
89b96684 1134+extern uint srv_adaptive_checkpoint;
45532174
ER
1135+
1136 /* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
1137 NOT update cardinality for indexes of InnoDB table". By default we are
1138 running with the fix disabled because MySQL 5.1 is frozen for such
89b96684
ER
1139diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
1140--- a/sql/mysqld.cc 2009-07-02 16:43:23.000000000 +0900
1141+++ b/sql/mysqld.cc 2009-07-02 18:00:04.000000000 +0900
45532174 1142@@ -5086,6 +5086,16 @@
eccb488f
ER
1143 OPT_INNODB_ROLLBACK_ON_TIMEOUT,
1144 OPT_SECURE_FILE_PRIV,
1145 OPT_KEEP_FILES_ON_CREATE,
1146+ OPT_INNODB_IO_CAPACITY,
dcc72bc6
ER
1147+ OPT_INNODB_IBUF_MAX_SIZE,
1148+ OPT_INNODB_IBUF_ACTIVE_CONTRACT,
1149+ OPT_INNODB_IBUF_ACCEL_RATE,
1150+ OPT_INNODB_FLUSH_NEIGHBOR_PAGES,
45532174 1151+ OPT_INNODB_ENABLE_UNSAFE_GROUP_COMMIT,
eccb488f
ER
1152+ OPT_INNODB_READ_AHEAD,
1153+ OPT_INNODB_ADAPTIVE_CHECKPOINT,
1154+ OPT_INNODB_READ_IO_THREADS,
1155+ OPT_INNODB_WRITE_IO_THREADS,
1156 OPT_INNODB_ADAPTIVE_HASH_INDEX,
45532174
ER
1157 OPT_FEDERATED,
1158 OPT_INNODB_USE_LEGACY_CARDINALITY_ALGORITHM
89b96684 1159@@ -5403,6 +5413,44 @@
45532174
ER
1160 (gptr*) &srv_use_legacy_cardinality_algorithm,
1161 (gptr*) &srv_use_legacy_cardinality_algorithm,
eccb488f
ER
1162 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
1163+ {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY,
1164+ "Number of IO operations per second the server can do. Tunes background IO rate.",
1165+ (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity,
89b96684 1166+ 0, GET_ULONG, REQUIRED_ARG, 200, 100, 999999999, 0, 0, 0},
dcc72bc6
ER
1167+ {"innodb_ibuf_max_size", OPT_INNODB_IBUF_MAX_SIZE,
1168+ "The maximum size of the insert buffer. (in bytes)",
1169+ (gptr*) &srv_ibuf_max_size, (gptr*) &srv_ibuf_max_size, 0,
1170+ GET_LL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0},
1171+ {"innodb_ibuf_active_contract", OPT_INNODB_IBUF_ACTIVE_CONTRACT,
1172+ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
1173+ (gptr*) &srv_ibuf_active_contract, (gptr*) &srv_ibuf_active_contract,
1174+ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
1175+ {"innodb_ibuf_accel_rate", OPT_INNODB_IBUF_ACCEL_RATE,
1176+ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
1177+ (gptr*) &srv_ibuf_accel_rate, (gptr*) &srv_ibuf_accel_rate,
1178+ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0},
1179+ {"innodb_flush_neighbor_pages", OPT_INNODB_FLUSH_NEIGHBOR_PAGES,
1180+ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
1181+ (gptr*) &srv_flush_neighbor_pages, (gptr*) &srv_flush_neighbor_pages,
1182+ 0, GET_ULONG, REQUIRED_ARG, 1, 0, 1, 0, 0, 0},
eccb488f 1183+ {"innodb_read_ahead", OPT_INNODB_READ_AHEAD,
dcc72bc6
ER
1184+ "Control read ahead activity. (none, random, linear, [both])",
1185+ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
eccb488f 1186+ {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT,
89b96684
ER
1187+ "Enable/Diasable flushing along modified age. ([none], reflex, estimate)",
1188+ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
45532174
ER
1189+ {"innodb_enable_unsafe_group_commit", OPT_INNODB_ENABLE_UNSAFE_GROUP_COMMIT,
1190+ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
1191+ (gptr*) &srv_enable_unsafe_group_commit, (gptr*) &srv_enable_unsafe_group_commit,
1192+ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
eccb488f
ER
1193+ {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS,
1194+ "Number of background read I/O threads in InnoDB.",
1195+ (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads,
89b96684 1196+ 0, GET_LONG, REQUIRED_ARG, 8, 1, 64, 0, 0, 0},
eccb488f
ER
1197+ {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS,
1198+ "Number of background write I/O threads in InnoDB.",
1199+ (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads,
89b96684 1200+ 0, GET_LONG, REQUIRED_ARG, 8, 1, 64, 0, 0, 0},
eccb488f
ER
1201 #endif /* End HAVE_INNOBASE_DB */
1202 {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
1203 (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
89b96684 1204@@ -7644,6 +7692,38 @@
dcc72bc6
ER
1205 case OPT_INNODB_LOG_ARCHIVE:
1206 innobase_log_archive= argument ? test(atoi(argument)) : 1;
1207 break;
1208+ case OPT_INNODB_READ_AHEAD:
1209+ if (argument == disabled_my_option)
1210+ srv_read_ahead = 0;
1211+ else if (! argument)
1212+ srv_read_ahead = 3;
1213+ else
1214+ {
1215+ int type;
1216+ if ((type=find_type(argument, &innodb_read_ahead_typelib, 2)) <= 0)
1217+ {
1218+ fprintf(stderr,"Unknown innodb_read_ahead type: %s\n",argument);
1219+ exit(1);
1220+ }
1221+ srv_read_ahead = (uint) ((type - 1) & 3);
1222+ }
89b96684
ER
1223+ break;
1224+ case OPT_INNODB_ADAPTIVE_CHECKPOINT:
1225+ if (argument == disabled_my_option)
1226+ srv_adaptive_checkpoint = 0;
1227+ else if (! argument)
1228+ srv_adaptive_checkpoint = 0;
1229+ else
1230+ {
1231+ int type;
1232+ if ((type=find_type(argument, &innodb_adaptive_checkpoint_typelib, 2)) <= 0)
1233+ {
1234+ fprintf(stderr,"Unknown innodb_adaptive_checkpoint type: %s\n",argument);
1235+ exit(1);
1236+ }
1237+ srv_adaptive_checkpoint = (uint) ((type - 1) % 3);
1238+ }
dcc72bc6
ER
1239+ break;
1240 #endif /* HAVE_INNOBASE_DB */
1241 case OPT_MYISAM_RECOVER:
1242 {
89b96684
ER
1243diff -ruN a/sql/set_var.cc b/sql/set_var.cc
1244--- a/sql/set_var.cc 2009-07-02 16:43:23.000000000 +0900
1245+++ b/sql/set_var.cc 2009-07-02 17:45:29.000000000 +0900
1246@@ -489,6 +489,57 @@
eccb488f
ER
1247 sys_var_long_ptr sys_innodb_flush_log_at_trx_commit(
1248 "innodb_flush_log_at_trx_commit",
1249 &srv_flush_log_at_trx_commit);
1250+sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity",
1251+ &srv_io_capacity);
dcc72bc6
ER
1252+sys_var_long_ptr sys_innodb_ibuf_active_contract("innodb_ibuf_active_contract",
1253+ &srv_ibuf_active_contract);
1254+sys_var_long_ptr sys_innodb_ibuf_accel_rate("innodb_ibuf_accel_rate",
1255+ &srv_ibuf_accel_rate);
1256+sys_var_long_ptr sys_innodb_flush_neighbor_pages("innodb_flush_neighbor_pages",
1257+ &srv_flush_neighbor_pages);
1258+
1259+const char *innodb_read_ahead_names[]=
1260+{
1261+ "none", /* 0 */
1262+ "random",
1263+ "linear",
1264+ "both", /* 3 */
1265+ /* For compatibility of the older patch */
1266+ "0", /* 4 ("none" + 4) */
1267+ "1",
1268+ "2",
1269+ "3", /* 7 ("both" + 4) */
1270+ NullS
1271+};
1272+TYPELIB innodb_read_ahead_typelib=
1273+{
1274+ array_elements(innodb_read_ahead_names) - 1, "innodb_read_ahead_typelib",
1275+ innodb_read_ahead_names, NULL
1276+};
1277+sys_var_enum sys_innodb_read_ahead("innodb_read_ahead", &srv_read_ahead,
1278+ &innodb_read_ahead_typelib, fix_innodb_read_ahead);
45532174
ER
1279+sys_var_long_ptr sys_innodb_enable_unsafe_group_commit("innodb_enable_unsafe_group_commit",
1280+ &srv_enable_unsafe_group_commit);
89b96684
ER
1281+
1282+const char *innodb_adaptive_checkpoint_names[]=
1283+{
1284+ "none", /* 0 */
1285+ "reflex", /* 1 */
1286+ "estimate", /* 2 */
1287+ /* For compatibility of the older patch */
1288+ "0", /* 3 ("none" + 3) */
1289+ "1", /* 4 ("reflex" + 3) */
1290+ "2", /* 5 ("estimate" + 3) */
1291+ NullS
1292+};
1293+TYPELIB innodb_adaptive_checkpoint_typelib=
1294+{
1295+ array_elements(innodb_adaptive_checkpoint_names) - 1, "innodb_adaptive_checkpoint_typelib",
1296+ innodb_adaptive_checkpoint_names, NULL
1297+};
1298+sys_var_enum sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint",
1299+ &srv_adaptive_checkpoint,
1300+ &innodb_adaptive_checkpoint_typelib, fix_innodb_adaptive_checkpoint);
eccb488f
ER
1301 sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path",
1302 &innobase_data_file_path);
1303 sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir",
89b96684 1304@@ -860,6 +911,13 @@
eccb488f
ER
1305 &sys_innodb_thread_concurrency,
1306 &sys_innodb_commit_concurrency,
1307 &sys_innodb_flush_log_at_trx_commit,
1308+ &sys_innodb_io_capacity,
dcc72bc6
ER
1309+ &sys_innodb_ibuf_active_contract,
1310+ &sys_innodb_ibuf_accel_rate,
1311+ &sys_innodb_flush_neighbor_pages,
eccb488f 1312+ &sys_innodb_read_ahead,
45532174 1313+ &sys_innodb_enable_unsafe_group_commit,
eccb488f
ER
1314+ &sys_innodb_adaptive_checkpoint,
1315 #endif
1316 &sys_trust_routine_creators,
1317 &sys_trust_function_creators,
89b96684 1318@@ -997,6 +1055,16 @@
45532174
ER
1319 {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
1320 {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
eccb488f
ER
1321 {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
1322+ {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS},
dcc72bc6
ER
1323+ {"innodb_ibuf_max_size", (char*) &srv_ibuf_max_size, SHOW_LONGLONG},
1324+ {sys_innodb_ibuf_active_contract.name, (char*) &sys_innodb_ibuf_active_contract, SHOW_SYS},
1325+ {sys_innodb_ibuf_accel_rate.name, (char*) &sys_innodb_ibuf_accel_rate, SHOW_SYS},
1326+ {sys_innodb_flush_neighbor_pages.name, (char*) &sys_innodb_flush_neighbor_pages, SHOW_SYS},
eccb488f 1327+ {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS},
45532174 1328+ {sys_innodb_enable_unsafe_group_commit.name, (char*) &sys_innodb_enable_unsafe_group_commit, SHOW_SYS},
eccb488f
ER
1329+ {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS},
1330+ {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG},
1331+ {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG},
45532174
ER
1332 {sys_innodb_use_legacy_cardinality_algorithm.name,
1333 (char*) &sys_innodb_use_legacy_cardinality_algorithm, SHOW_SYS},
eccb488f 1334 #endif
89b96684 1335@@ -1459,6 +1527,18 @@
dcc72bc6
ER
1336 }
1337 }
45532174 1338
dcc72bc6
ER
1339+#ifdef HAVE_INNOBASE_DB
1340+extern void fix_innodb_read_ahead(THD *thd, enum_var_type type)
1341+{
1342+ srv_read_ahead &= 3;
1343+}
89b96684
ER
1344+
1345+extern void fix_innodb_adaptive_checkpoint(THD *thd, enum_var_type type)
1346+{
1347+ srv_adaptive_checkpoint %= 3;
1348+}
dcc72bc6
ER
1349+#endif /* HAVE_INNOBASE_DB */
1350+
1351 static void fix_max_binlog_size(THD *thd, enum_var_type type)
1352 {
1353 DBUG_ENTER("fix_max_binlog_size");
89b96684
ER
1354diff -ruN a/sql/set_var.h b/sql/set_var.h
1355--- a/sql/set_var.h 2009-07-02 16:43:23.000000000 +0900
1356+++ b/sql/set_var.h 2009-07-02 17:35:17.000000000 +0900
1357@@ -31,6 +31,11 @@
45532174 1358
dcc72bc6 1359 extern TYPELIB bool_typelib, delay_key_write_typelib, sql_mode_typelib;
45532174 1360
dcc72bc6
ER
1361+#ifdef HAVE_INNOBASE_DB
1362+extern TYPELIB innodb_read_ahead_typelib;
89b96684 1363+extern TYPELIB innodb_adaptive_checkpoint_typelib;
dcc72bc6
ER
1364+#endif /* HAVE_INNOBASE_DB */
1365+
1366 typedef int (*sys_check_func)(THD *, set_var *);
1367 typedef bool (*sys_update_func)(THD *, set_var *);
1368 typedef void (*sys_after_update_func)(THD *,enum_var_type);
89b96684 1369@@ -1148,6 +1153,10 @@
dcc72bc6
ER
1370 int sql_set_variables(THD *thd, List<set_var_base> *var_list);
1371 bool not_all_support_one_shot(List<set_var_base> *var_list);
1372 void fix_delay_key_write(THD *thd, enum_var_type type);
1373+#ifdef HAVE_INNOBASE_DB
1374+void fix_innodb_read_ahead(THD *thd, enum_var_type type);
89b96684 1375+void fix_innodb_adaptive_checkpoint(THD *thd, enum_var_type type);
dcc72bc6
ER
1376+#endif /* HAVE_INNOBASE_DB */
1377 ulong fix_sql_mode(ulong sql_mode);
1378 extern sys_var_const_str sys_charset_system;
1379 extern sys_var_str sys_init_connect;
This page took 0.212954 seconds and 4 git commands to generate.