]> git.pld-linux.org Git - packages/mysql.git/blame_incremental - mysql-innodb_io_patches.patch
- expire-logs-days sample
[packages/mysql.git] / mysql-innodb_io_patches.patch
... / ...
CommitLineData
1diff -ruN a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
2--- a/innobase/buf/buf0flu.c 2009-05-08 06:12:03.000000000 +0900
3+++ b/innobase/buf/buf0flu.c 2009-07-02 16:44:49.000000000 +0900
4@@ -898,10 +898,17 @@
5
6 old_page_count = page_count;
7
8+ if (srv_flush_neighbor_pages) {
9 /* Try to flush also all the neighbors */
10 page_count +=
11 buf_flush_try_neighbors(space, offset,
12 flush_type);
13+ } else {
14+ /* Try to flush the page only */
15+ page_count +=
16+ buf_flush_try_page(space, offset,
17+ flush_type);
18+ }
19 /* fprintf(stderr,
20 "Flush type %lu, page no %lu, neighb %lu\n",
21 flush_type, offset,
22diff -ruN a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c
23--- a/innobase/buf/buf0rea.c 2009-07-02 16:43:23.000000000 +0900
24+++ b/innobase/buf/buf0rea.c 2009-07-02 16:44:49.000000000 +0900
25@@ -20,6 +20,7 @@
26 #include "os0file.h"
27 #include "srv0start.h"
28
29+extern uint srv_read_ahead;
30 extern ulint srv_read_ahead_rnd;
31 extern ulint srv_read_ahead_seq;
32 extern ulint srv_buf_pool_reads;
33@@ -189,6 +190,10 @@
34 ulint err;
35 ulint i;
36
37+ if (!(srv_read_ahead & 1)) {
38+ return(0);
39+ }
40+
41 if (srv_startup_is_before_trx_rollback_phase) {
42 /* No read-ahead to avoid thread deadlocks */
43 return(0);
44@@ -396,6 +401,10 @@
45 ulint err;
46 ulint i;
47
48+ if (!(srv_read_ahead & 2)) {
49+ return(0);
50+ }
51+
52 if (srv_startup_is_before_trx_rollback_phase) {
53 /* No read-ahead to avoid thread deadlocks */
54 return(0);
55diff -ruN a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c
56--- a/innobase/ibuf/ibuf0ibuf.c 2009-05-08 06:12:04.000000000 +0900
57+++ b/innobase/ibuf/ibuf0ibuf.c 2009-07-02 16:44:49.000000000 +0900
58@@ -370,8 +370,9 @@
59 grow in size, as the references on the upper levels of the tree can
60 change */
61
62- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
63- / IBUF_POOL_SIZE_PER_MAX_SIZE;
64+ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
65+ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
66+ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
67 ibuf->meter = IBUF_THRESHOLD + 1;
68
69 UT_LIST_INIT(ibuf->data_list);
70@@ -2258,11 +2259,13 @@
71
72 mutex_enter(&ibuf_mutex);
73
74+ if (!srv_ibuf_active_contract) {
75 if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
76 mutex_exit(&ibuf_mutex);
77
78 return;
79 }
80+ }
81
82 sync = FALSE;
83
84diff -ruN a/innobase/include/log0log.h b/innobase/include/log0log.h
85--- a/innobase/include/log0log.h 2009-05-08 06:12:06.000000000 +0900
86+++ b/innobase/include/log0log.h 2009-07-02 16:44:49.000000000 +0900
87@@ -169,6 +169,13 @@
88 log_buffer_flush_to_disk(void);
89 /*==========================*/
90 /********************************************************************
91+Flushes the log buffer. Forces it to disk depending on the value of
92+the configuration parameter innodb_flush_log_at_trx_commit. */
93+
94+void
95+log_buffer_flush_maybe_sync(void);
96+/*=============================*/
97+/********************************************************************
98 Advances the smallest lsn for which there are unflushed dirty blocks in the
99 buffer pool and also may make a new checkpoint. NOTE: this function may only
100 be called if the calling thread owns no synchronization objects! */
101diff -ruN a/innobase/include/os0file.h b/innobase/include/os0file.h
102--- a/innobase/include/os0file.h 2009-07-02 16:43:23.000000000 +0900
103+++ b/innobase/include/os0file.h 2009-07-02 16:44:49.000000000 +0900
104@@ -551,8 +551,10 @@
105 /*========*/
106 ulint n, /* in: maximum number of pending aio operations
107 allowed; n must be divisible by n_segments */
108- ulint n_segments, /* in: combined number of segments in the four
109- first aio arrays; must be >= 4 */
110+// ulint n_segments, /* in: combined number of segments in the four
111+// first aio arrays; must be >= 4 */
112+ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */
113+ ulint n_write_threads, /**/
114 ulint n_slots_sync); /* in: number of slots in the sync aio array */
115 /***********************************************************************
116 Requests an asynchronous i/o operation. */
117diff -ruN a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
118--- a/innobase/include/srv0srv.h 2009-07-02 16:43:23.000000000 +0900
119+++ b/innobase/include/srv0srv.h 2009-07-02 18:02:38.000000000 +0900
120@@ -89,6 +89,8 @@
121 extern ulint srv_lock_table_size;
122
123 extern ulint srv_n_file_io_threads;
124+extern ulint srv_n_read_io_threads;
125+extern ulint srv_n_write_io_threads;
126
127 #ifdef UNIV_LOG_ARCHIVE
128 extern ibool srv_log_archive_on;
129@@ -133,6 +135,15 @@
130 extern ulong srv_max_purge_lag;
131 extern ibool srv_use_awe;
132 extern ibool srv_use_adaptive_hash_indexes;
133+
134+extern ulint srv_io_capacity;
135+extern long long srv_ibuf_max_size;
136+extern ulint srv_ibuf_active_contract;
137+extern ulint srv_ibuf_accel_rate;
138+extern ulint srv_flush_neighbor_pages;
139+extern ulint srv_enable_unsafe_group_commit;
140+extern uint srv_read_ahead;
141+extern uint srv_adaptive_checkpoint;
142 /*-------------------------------------------*/
143
144 extern ulint srv_n_rows_inserted;
145diff -ruN a/innobase/log/log0log.c b/innobase/log/log0log.c
146--- a/innobase/log/log0log.c 2009-05-08 06:12:10.000000000 +0900
147+++ b/innobase/log/log0log.c 2009-07-02 16:44:49.000000000 +0900
148@@ -1524,6 +1524,29 @@
149 }
150
151 /********************************************************************
152+Flush the log buffer. Force it to disk depending on the value of
153+innodb_flush_log_at_trx_commit. */
154+
155+void
156+log_buffer_flush_maybe_sync(void)
157+/*=============================*/
158+{
159+ dulint lsn;
160+
161+ mutex_enter(&(log_sys->mutex));
162+
163+ lsn = log_sys->lsn;
164+
165+ mutex_exit(&(log_sys->mutex));
166+
167+ /* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
168+ log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
169+ srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE,
170+ srv_flush_log_at_trx_commit == 1 ?
171+ LOG_WRITE_FROM_BACKGROUND_SYNC :
172+ LOG_WRITE_FROM_BACKGROUND_ASYNC);
173+}
174+/********************************************************************
175 Tries to establish a big enough margin of free space in the log buffer, such
176 that a new log entry can be catenated without an immediate need for a flush. */
177 static
178@@ -3326,6 +3349,15 @@
179 (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
180 (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
181
182+ fprintf(file,
183+ "Max checkpoint age %lu\n"
184+ "Modified age %lu\n"
185+ "Checkpoint age %lu\n",
186+ (ulong) log_sys->max_checkpoint_age,
187+ (ulong) ut_dulint_minus(log_sys->lsn,
188+ log_buf_pool_get_oldest_modification()),
189+ (ulong) ut_dulint_minus(log_sys->lsn, log_sys->last_checkpoint_lsn));
190+
191 current_time = time(NULL);
192
193 time_elapsed = 0.001 + difftime(current_time,
194diff -ruN a/innobase/os/os0file.c b/innobase/os/os0file.c
195--- a/innobase/os/os0file.c 2009-07-02 16:43:23.000000000 +0900
196+++ b/innobase/os/os0file.c 2009-07-02 16:44:49.000000000 +0900
197@@ -66,6 +66,28 @@
198
199 ibool os_aio_print_debug = FALSE;
200
201+/* State for the state of an IO request in simulated AIO.
202+ Protocol for simulated aio:
203+ client requests IO: find slot with reserved = FALSE. Add entry with
204+ status = OS_AIO_NOT_ISSUED.
205+ IO thread wakes: find adjacent slots with reserved = TRUE and status =
206+ OS_AIO_NOT_ISSUED. Change status for slots to
207+ OS_AIO_ISSUED.
208+ IO operation completes: set status for slots to OS_AIO_DONE. set status
209+ for the first slot to OS_AIO_CLAIMED and return
210+ result for that slot.
211+ When there are multiple read and write threads, they all compete to execute
212+ the requests in the array (os_aio_array_t). This avoids the need to load
213+ balance requests at the time the request is made at the cost of waking all
214+ threads when a request is available.
215+*/
216+typedef enum {
217+ OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */
218+ OS_AIO_ISSUED, /* Being processed by an IO thread. */
219+ OS_AIO_DONE, /* Request processed. */
220+ OS_AIO_CLAIMED /* Result being returned to client. */
221+} os_aio_status;
222+
223 /* The aio array slot structure */
224 typedef struct os_aio_slot_struct os_aio_slot_t;
225
226@@ -74,6 +96,8 @@
227 ulint pos; /* index of the slot in the aio
228 array */
229 ibool reserved; /* TRUE if this slot is reserved */
230+ os_aio_status status; /* Status for current request. Valid when reserved
231+ is TRUE. Used only in simulated aio. */
232 time_t reservation_time;/* time when reserved */
233 ulint len; /* length of the block to read or
234 write */
235@@ -84,11 +108,11 @@
236 ulint offset_high; /* 32 high bits of file offset */
237 os_file_t file; /* file where to read or write */
238 const char* name; /* file name or path */
239- ibool io_already_done;/* used only in simulated aio:
240- TRUE if the physical i/o already
241- made and only the slot message
242- needs to be passed to the caller
243- of os_aio_simulated_handle */
244+// ibool io_already_done;/* used only in simulated aio:
245+// TRUE if the physical i/o already
246+// made and only the slot message
247+// needs to be passed to the caller
248+// of os_aio_simulated_handle */
249 fil_node_t* message1; /* message which is given by the */
250 void* message2; /* the requester of an aio operation
251 and which can be used to identify
252@@ -137,6 +161,13 @@
253 /* Array of events used in simulated aio */
254 os_event_t* os_aio_segment_wait_events = NULL;
255
256+/* Number for the first global segment for reading. */
257+const ulint os_aio_first_read_segment = 2;
258+
259+/* Number for the first global segment for writing. Set to
260+2 + os_aio_read_write_threads. */
261+ulint os_aio_first_write_segment = 0;
262+
263 /* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
264 are NULL when the module has not yet been initialized. */
265 static os_aio_array_t* os_aio_read_array = NULL;
266@@ -145,11 +176,17 @@
267 static os_aio_array_t* os_aio_log_array = NULL;
268 static os_aio_array_t* os_aio_sync_array = NULL;
269
270+/* Per thread buffer used for merged IO requests. Used by
271+os_aio_simulated_handle so that a buffer doesn't have to be allocated
272+for each request. */
273+static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS];
274+static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS];
275+
276 static ulint os_aio_n_segments = ULINT_UNDEFINED;
277
278 /* If the following is TRUE, read i/o handler threads try to
279 wait until a batch of new read requests have been posted */
280-static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
281+static volatile ibool os_aio_recommend_sleep_for_read_threads = FALSE;
282
283 ulint os_n_file_reads = 0;
284 ulint os_bytes_read_since_printout = 0;
285@@ -2878,8 +2915,10 @@
286 /*========*/
287 ulint n, /* in: maximum number of pending aio operations
288 allowed; n must be divisible by n_segments */
289- ulint n_segments, /* in: combined number of segments in the four
290- first aio arrays; must be >= 4 */
291+// ulint n_segments, /* in: combined number of segments in the four
292+// first aio arrays; must be >= 4 */
293+ ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads*/
294+ ulint n_write_threads, /**/
295 ulint n_slots_sync) /* in: number of slots in the sync aio array */
296 {
297 ulint n_read_segs;
298@@ -2889,6 +2928,8 @@
299 #ifdef POSIX_ASYNC_IO
300 sigset_t sigset;
301 #endif
302+ ulint n_segments = 2 + n_read_threads + n_write_threads;
303+
304 ut_ad(n % n_segments == 0);
305 ut_ad(n_segments >= 4);
306
307@@ -2896,14 +2937,17 @@
308
309 for (i = 0; i < n_segments; i++) {
310 srv_set_io_thread_op_info(i, "not started yet");
311+ os_aio_thread_buffer[i] = 0;
312+ os_aio_thread_buffer_size[i] = 0;
313 }
314
315 n_per_seg = n / n_segments;
316- n_write_segs = (n_segments - 2) / 2;
317- n_read_segs = n_segments - 2 - n_write_segs;
318+ n_write_segs = n_write_threads;
319+ n_read_segs = n_read_threads;
320
321 /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
322
323+ os_aio_first_write_segment = os_aio_first_read_segment + n_read_threads;
324 os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
325
326 srv_io_thread_function[0] = "insert buffer thread";
327@@ -2912,14 +2956,14 @@
328
329 srv_io_thread_function[1] = "log thread";
330
331- os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
332+ os_aio_read_array = os_aio_array_create(n_per_seg,
333 n_read_segs);
334 for (i = 2; i < 2 + n_read_segs; i++) {
335 ut_a(i < SRV_MAX_N_IO_THREADS);
336 srv_io_thread_function[i] = "read thread";
337 }
338
339- os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
340+ os_aio_write_array = os_aio_array_create(n_per_seg,
341 n_write_segs);
342 for (i = 2 + n_read_segs; i < n_segments; i++) {
343 ut_a(i < SRV_MAX_N_IO_THREADS);
344@@ -3181,6 +3225,13 @@
345 struct aiocb* control;
346 #endif
347 ulint i;
348+ ulint prim_segment;
349+ ulint n;
350+
351+ n = array->n_slots / array->n_segments;
352+ /* 64 blocks' striping ( aligning max(BUF_READ_AHEAD_AREA) ) */
353+ prim_segment = ( offset >> (UNIV_PAGE_SIZE_SHIFT + 6) ) % (array->n_segments);
354+
355 loop:
356 os_mutex_enter(array->mutex);
357
358@@ -3199,6 +3250,16 @@
359 goto loop;
360 }
361
362+ for (i = prim_segment * n; i < array->n_slots; i++) {
363+ slot = os_aio_array_get_nth_slot(array, i);
364+
365+ if (slot->reserved == FALSE) {
366+ break;
367+ }
368+ }
369+
370+ if (slot->reserved == TRUE){
371+ /* Not found after the intended segment. So we should search before. */
372 for (i = 0;; i++) {
373 slot = os_aio_array_get_nth_slot(array, i);
374
375@@ -3206,6 +3267,7 @@
376 break;
377 }
378 }
379+ }
380
381 array->n_reserved++;
382
383@@ -3228,7 +3290,8 @@
384 slot->buf = buf;
385 slot->offset = offset;
386 slot->offset_high = offset_high;
387- slot->io_already_done = FALSE;
388+// slot->io_already_done = FALSE;
389+ slot->status = OS_AIO_NOT_ISSUED;
390
391 #ifdef WIN_ASYNC_IO
392 control = &(slot->control);
393@@ -3281,6 +3344,7 @@
394 ut_ad(slot->reserved);
395
396 slot->reserved = FALSE;
397+ slot->status = OS_AIO_NOT_ISSUED;
398
399 array->n_reserved--;
400
401@@ -3317,16 +3381,18 @@
402
403 segment = os_aio_get_array_and_local_segment(&array, global_segment);
404
405- n = array->n_slots / array->n_segments;
406+ n = array->n_slots;
407
408 /* Look through n slots after the segment * n'th slot */
409
410 os_mutex_enter(array->mutex);
411
412 for (i = 0; i < n; i++) {
413- slot = os_aio_array_get_nth_slot(array, i + segment * n);
414+ slot = os_aio_array_get_nth_slot(array, i);
415
416- if (slot->reserved) {
417+ if (slot->reserved &&
418+ (slot->status == OS_AIO_NOT_ISSUED ||
419+ slot->status == OS_AIO_DONE)) {
420 /* Found an i/o request */
421
422 break;
423@@ -3336,7 +3402,25 @@
424 os_mutex_exit(array->mutex);
425
426 if (i < n) {
427- os_event_set(os_aio_segment_wait_events[global_segment]);
428+ if (array == os_aio_ibuf_array) {
429+ os_event_set(os_aio_segment_wait_events[0]);
430+
431+ } else if (array == os_aio_log_array) {
432+ os_event_set(os_aio_segment_wait_events[1]);
433+
434+ } else if (array == os_aio_read_array) {
435+ ulint x;
436+ for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++)
437+ os_event_set(os_aio_segment_wait_events[x]);
438+
439+ } else if (array == os_aio_write_array) {
440+ ulint x;
441+ for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++)
442+ os_event_set(os_aio_segment_wait_events[x]);
443+
444+ } else {
445+ ut_a(0);
446+ }
447 }
448 }
449
450@@ -3347,8 +3431,6 @@
451 os_aio_simulated_wake_handler_threads(void)
452 /*=======================================*/
453 {
454- ulint i;
455-
456 if (os_aio_use_native_aio) {
457 /* We do not use simulated aio: do nothing */
458
459@@ -3357,9 +3439,10 @@
460
461 os_aio_recommend_sleep_for_read_threads = FALSE;
462
463- for (i = 0; i < os_aio_n_segments; i++) {
464- os_aio_simulated_wake_handler_thread(i);
465- }
466+ os_aio_simulated_wake_handler_thread(0);
467+ os_aio_simulated_wake_handler_thread(1);
468+ os_aio_simulated_wake_handler_thread(os_aio_first_read_segment);
469+ os_aio_simulated_wake_handler_thread(os_aio_first_write_segment);
470 }
471
472 /**************************************************************************
473@@ -3640,7 +3723,7 @@
474 ut_ad(os_aio_validate());
475 ut_ad(segment < array->n_segments);
476
477- n = array->n_slots / array->n_segments;
478+ n = array->n_slots;
479
480 if (array == os_aio_sync_array) {
481 os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
482@@ -3648,12 +3731,12 @@
483 } else {
484 srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
485 i = os_event_wait_multiple(n,
486- (array->native_events) + segment * n);
487+ (array->native_events));
488 }
489
490 os_mutex_enter(array->mutex);
491
492- slot = os_aio_array_get_nth_slot(array, i + segment * n);
493+ slot = os_aio_array_get_nth_slot(array, i);
494
495 ut_a(slot->reserved);
496
497@@ -3830,10 +3913,13 @@
498 os_aio_slot_t* slot;
499 os_aio_slot_t* slot2;
500 os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
501+ os_aio_slot_t* lowest_request;
502+ os_aio_slot_t* oldest_request;
503 ulint n_consecutive;
504 ulint total_len;
505 ulint offs;
506 ulint lowest_offset;
507+ ulint oldest_offset;
508 ulint biggest_age;
509 ulint age;
510 byte* combined_buf;
511@@ -3841,6 +3927,7 @@
512 ibool ret;
513 ulint n;
514 ulint i;
515+ time_t now;
516
517 segment = os_aio_get_array_and_local_segment(&array, global_segment);
518
519@@ -3853,7 +3940,7 @@
520 ut_ad(os_aio_validate());
521 ut_ad(segment < array->n_segments);
522
523- n = array->n_slots / array->n_segments;
524+ n = array->n_slots;
525
526 /* Look through n slots after the segment * n'th slot */
527
528@@ -3875,9 +3962,9 @@
529 done */
530
531 for (i = 0; i < n; i++) {
532- slot = os_aio_array_get_nth_slot(array, i + segment * n);
533+ slot = os_aio_array_get_nth_slot(array, i);
534
535- if (slot->reserved && slot->io_already_done) {
536+ if (slot->reserved && slot->status == OS_AIO_DONE) {
537
538 if (os_aio_print_debug) {
539 fprintf(stderr,
540@@ -3897,67 +3984,57 @@
541 then pick the one at the lowest offset. */
542
543 biggest_age = 0;
544- lowest_offset = ULINT_MAX;
545+ now = time(NULL);
546+ oldest_request = lowest_request = NULL;
547+ oldest_offset = lowest_offset = ULINT_MAX;
548
549+ /* Find the oldest request and the request with the smallest offset */
550 for (i = 0; i < n; i++) {
551- slot = os_aio_array_get_nth_slot(array, i + segment * n);
552+ slot = os_aio_array_get_nth_slot(array, i);
553
554- if (slot->reserved) {
555- age = (ulint)difftime(time(NULL),
556- slot->reservation_time);
557+ if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) {
558+ age = (ulint)difftime(now, slot->reservation_time);
559
560 if ((age >= 2 && age > biggest_age)
561 || (age >= 2 && age == biggest_age
562- && slot->offset < lowest_offset)) {
563+ && slot->offset < oldest_offset)) {
564
565 /* Found an i/o request */
566- consecutive_ios[0] = slot;
567-
568- n_consecutive = 1;
569-
570 biggest_age = age;
571- lowest_offset = slot->offset;
572+ oldest_request = slot;
573+ oldest_offset = slot->offset;
574 }
575- }
576- }
577-
578- if (n_consecutive == 0) {
579- /* There were no old requests. Look for an i/o request at the
580- lowest offset in the array (we ignore the high 32 bits of the
581- offset in these heuristics) */
582-
583- lowest_offset = ULINT_MAX;
584-
585- for (i = 0; i < n; i++) {
586- slot = os_aio_array_get_nth_slot(array,
587- i + segment * n);
588-
589- if (slot->reserved && slot->offset < lowest_offset) {
590
591+ /* Look for an i/o request at the lowest offset in the array
592+ * (we ignore the high 32 bits of the offset) */
593+ if (slot->offset < lowest_offset) {
594 /* Found an i/o request */
595- consecutive_ios[0] = slot;
596-
597- n_consecutive = 1;
598-
599+ lowest_request = slot;
600 lowest_offset = slot->offset;
601 }
602 }
603 }
604
605- if (n_consecutive == 0) {
606+ if (!lowest_request && !oldest_request) {
607
608 /* No i/o requested at the moment */
609
610 goto wait_for_io;
611 }
612
613- slot = consecutive_ios[0];
614+ if (oldest_request) {
615+ slot = oldest_request;
616+ } else {
617+ slot = lowest_request;
618+ }
619+ consecutive_ios[0] = slot;
620+ n_consecutive = 1;
621
622 /* Check if there are several consecutive blocks to read or write */
623
624 consecutive_loop:
625 for (i = 0; i < n; i++) {
626- slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
627+ slot2 = os_aio_array_get_nth_slot(array, i);
628
629 if (slot2->reserved && slot2 != slot
630 && slot2->offset == slot->offset + slot->len
631@@ -3965,7 +4042,8 @@
632 sum does not wrap over */
633 && slot2->offset_high == slot->offset_high
634 && slot2->type == slot->type
635- && slot2->file == slot->file) {
636+ && slot2->file == slot->file
637+ && slot2->status == OS_AIO_NOT_ISSUED) {
638
639 /* Found a consecutive i/o request */
640
641@@ -3994,6 +4072,8 @@
642
643 for (i = 0; i < n_consecutive; i++) {
644 total_len += consecutive_ios[i]->len;
645+ ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED);
646+ consecutive_ios[i]->status = OS_AIO_ISSUED;
647 }
648
649 if (n_consecutive == 1) {
650@@ -4001,7 +4081,14 @@
651 combined_buf = slot->buf;
652 combined_buf2 = NULL;
653 } else {
654- combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
655+ if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) {
656+ if (os_aio_thread_buffer[global_segment])
657+ ut_free(os_aio_thread_buffer[global_segment]);
658+
659+ os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE);
660+ os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE;
661+ }
662+ combined_buf2 = os_aio_thread_buffer[global_segment];
663
664 ut_a(combined_buf2);
665
666@@ -4012,6 +4099,9 @@
667 this assumes that there is just one i/o-handler thread serving
668 a single segment of slots! */
669
670+ ut_a(slot->reserved);
671+ ut_a(slot->status == OS_AIO_ISSUED);
672+
673 os_mutex_exit(array->mutex);
674
675 if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
676@@ -4081,16 +4171,13 @@
677 }
678 }
679
680- if (combined_buf2) {
681- ut_free(combined_buf2);
682- }
683-
684 os_mutex_enter(array->mutex);
685
686 /* Mark the i/os done in slots */
687
688 for (i = 0; i < n_consecutive; i++) {
689- consecutive_ios[i]->io_already_done = TRUE;
690+ ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED);
691+ consecutive_ios[i]->status = OS_AIO_DONE;
692 }
693
694 /* We return the messages for the first slot now, and if there were
695@@ -4100,6 +4187,8 @@
696 slot_io_done:
697
698 ut_a(slot->reserved);
699+ ut_a(slot->status == OS_AIO_DONE);
700+ slot->status = OS_AIO_CLAIMED;
701
702 *message1 = slot->message1;
703 *message2 = slot->message2;
704diff -ruN a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
705--- a/innobase/srv/srv0srv.c 2009-07-02 16:43:23.000000000 +0900
706+++ b/innobase/srv/srv0srv.c 2009-07-02 18:36:54.000000000 +0900
707@@ -167,6 +167,8 @@
708 ulint srv_lock_table_size = ULINT_MAX;
709
710 ulint srv_n_file_io_threads = ULINT_MAX;
711+ulint srv_n_read_io_threads = 1;
712+ulint srv_n_write_io_threads = 1;
713
714 #ifdef UNIV_LOG_ARCHIVE
715 ibool srv_log_archive_on = FALSE;
716@@ -330,6 +332,24 @@
717 ibool srv_use_awe = FALSE;
718 ibool srv_use_adaptive_hash_indexes = TRUE;
719
720+ulint srv_io_capacity = 100;
721+
722+/* Returns the number of IO operations that is X percent of the capacity.
723+PCT_IO(5) -> returns the number of IO operations that is 5% of the max
724+where max is srv_io_capacity. */
725+#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
726+
727+long long srv_ibuf_max_size = 0;
728+ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
729+ulint srv_ibuf_accel_rate = 100;
730+#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
731+
732+ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
733+
734+ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
735+
736+uint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
737+uint srv_adaptive_checkpoint = 0; /* 0: none 1: reflex 2: estimate */
738 /*-------------------------------------------*/
739 ulong srv_n_spin_wait_rounds = 20;
740 ulong srv_n_free_tickets_to_enter = 500;
741@@ -2228,6 +2248,10 @@
742 ulint n_pend_ios;
743 ibool skip_sleep = FALSE;
744 ulint i;
745+
746+ dulint lsn_old;
747+
748+ dulint oldest_lsn;
749
750 #ifdef UNIV_DEBUG_THREAD_CREATION
751 fprintf(stderr, "Master thread starts, id %lu\n",
752@@ -2244,6 +2268,9 @@
753
754 mutex_exit(&kernel_mutex);
755
756+ mutex_enter(&(log_sys->mutex));
757+ lsn_old = log_sys->lsn;
758+ mutex_exit(&(log_sys->mutex));
759 os_event_set(srv_sys->operational);
760 loop:
761 /*****************************************************************/
762@@ -2279,6 +2306,18 @@
763 if (!skip_sleep) {
764
765 os_thread_sleep(1000000);
766+ /*
767+ mutex_enter(&(log_sys->mutex));
768+ oldest_lsn = buf_pool_get_oldest_modification();
769+ dulint lsn = log_sys->lsn;
770+ mutex_exit(&(log_sys->mutex));
771+
772+ if (!ut_dulint_is_zero(oldest_lsn))
773+ fprintf(stderr,
774+ "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
775+ ut_dulint_minus(lsn, oldest_lsn) * 100 / log_sys->max_checkpoint_age,
776+ ut_dulint_minus(lsn, lsn_old));
777+ */
778 }
779
780 skip_sleep = FALSE;
781@@ -2317,13 +2356,14 @@
782 + log_sys->n_pending_writes;
783 n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
784 + buf_pool->n_pages_written;
785- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
786+ if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
787 srv_main_thread_op_info = "doing insert buffer merge";
788- ibuf_contract_for_n_pages(TRUE, 5);
789+ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5));
790
791 srv_main_thread_op_info = "flushing log";
792
793- log_buffer_flush_to_disk();
794+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
795+ log_buffer_flush_maybe_sync();
796 }
797
798 if (buf_get_modified_ratio_pct() >
799@@ -2332,7 +2372,7 @@
800 /* Try to keep the number of modified pages in the
801 buffer pool under the limit wished by the user */
802
803- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
804+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
805 ut_dulint_max);
806
807 /* If we had to do the flush, it may have taken
808@@ -2341,6 +2381,140 @@
809 iteration of this loop. */
810
811 skip_sleep = TRUE;
812+ mutex_enter(&(log_sys->mutex));
813+ lsn_old = log_sys->lsn;
814+ mutex_exit(&(log_sys->mutex));
815+ } else if (srv_adaptive_checkpoint == 1) {
816+
817+ /* Try to keep modified age not to exceed
818+ max_checkpoint_age * 7/8 line */
819+
820+ mutex_enter(&(log_sys->mutex));
821+ lsn_old = log_sys->lsn;
822+ oldest_lsn = buf_pool_get_oldest_modification();
823+ if (ut_dulint_is_zero(oldest_lsn)) {
824+
825+ mutex_exit(&(log_sys->mutex));
826+
827+ } else {
828+ if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
829+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
830+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
831+ /* We should not flush from here. */
832+ mutex_exit(&(log_sys->mutex));
833+ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
834+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 4)) {
835+
836+ /* 2nd defence line (max_checkpoint_age * 3/4) */
837+
838+ mutex_exit(&(log_sys->mutex));
839+
840+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
841+ ut_dulint_max);
842+ skip_sleep = TRUE;
843+ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
844+ > (log_sys->max_checkpoint_age)/2 ) {
845+
846+ /* 1st defence line (max_checkpoint_age * 1/2) */
847+
848+ mutex_exit(&(log_sys->mutex));
849+
850+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
851+ ut_dulint_max);
852+ skip_sleep = TRUE;
853+ } else {
854+ mutex_exit(&(log_sys->mutex));
855+ }
856+ }
857+ } else if (srv_adaptive_checkpoint == 2) {
858+
859+ /* Try to keep modified age not to exceed
860+ max_checkpoint_age * 7/8 line */
861+
862+ mutex_enter(&(log_sys->mutex));
863+
864+ oldest_lsn = buf_pool_get_oldest_modification();
865+ if (ut_dulint_is_zero(oldest_lsn)) {
866+ lsn_old = log_sys->lsn;
867+ mutex_exit(&(log_sys->mutex));
868+
869+ } else {
870+ if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
871+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
872+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
873+ /* We should not flush from here. */
874+ lsn_old = log_sys->lsn;
875+ mutex_exit(&(log_sys->mutex));
876+ } else if (ut_dulint_minus(log_sys->lsn, oldest_lsn)
877+ > (log_sys->max_checkpoint_age)/2 ) {
878+
879+ /* defence line (max_checkpoint_age * 1/2) */
880+ dulint lsn = log_sys->lsn;
881+
882+ mutex_exit(&(log_sys->mutex));
883+
884+ ib_longlong level, bpl;
885+ buf_block_t* bpage;
886+
887+ mutex_enter(&buf_pool->mutex);
888+
889+ level = 0;
890+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
891+
892+ while (bpage != NULL) {
893+ dulint oldest_modification = bpage->oldest_modification;
894+ if (!ut_dulint_is_zero(oldest_modification)) {
895+ level += log_sys->max_checkpoint_age
896+ - ut_dulint_minus(lsn, oldest_modification);
897+ }
898+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
899+ }
900+
901+ if (level) {
902+ bpl = ((ib_longlong) UT_LIST_GET_LEN(buf_pool->flush_list)
903+ * UT_LIST_GET_LEN(buf_pool->flush_list)
904+ * ut_dulint_minus(lsn, lsn_old)) / level;
905+ } else {
906+ bpl = 0;
907+ }
908+
909+ mutex_exit(&buf_pool->mutex);
910+
911+ if (!srv_use_doublewrite_buf) {
912+ /* flush is faster than when doublewrite */
913+ bpl = (bpl * 3) / 4;
914+ }
915+
916+ if(bpl) {
917+retry_flush_batch:
918+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
919+ bpl,
920+ ut_dulint_add(oldest_lsn,
921+ ut_dulint_minus(lsn,
922+ lsn_old)));
923+ if (n_pages_flushed == ULINT_UNDEFINED) {
924+ os_thread_sleep(5000);
925+ goto retry_flush_batch;
926+ }
927+ }
928+
929+ lsn_old = lsn;
930+ /*
931+ fprintf(stderr,
932+ "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
933+ ut_dulint_minus(lsn, oldest_lsn) * 100 / log_sys->max_checkpoint_age,
934+ ut_dulint_minus(lsn, lsn_old), bpl);
935+ */
936+ } else {
937+ lsn_old = log_sys->lsn;
938+ mutex_exit(&(log_sys->mutex));
939+ }
940+ }
941+
942+ } else {
943+ mutex_enter(&(log_sys->mutex));
944+ lsn_old = log_sys->lsn;
945+ mutex_exit(&(log_sys->mutex));
946 }
947
948 if (srv_activity_count == old_activity_count) {
949@@ -2367,23 +2541,25 @@
950 n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
951 n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
952 + buf_pool->n_pages_written;
953- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
954+ if (n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
955
956 srv_main_thread_op_info = "flushing buffer pool pages";
957- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
958+ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
959
960 srv_main_thread_op_info = "flushing log";
961- log_buffer_flush_to_disk();
962+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
963+ log_buffer_flush_maybe_sync();
964 }
965
966 /* We run a batch of insert buffer merge every 10 seconds,
967 even if the server were active */
968
969 srv_main_thread_op_info = "doing insert buffer merge";
970- ibuf_contract_for_n_pages(TRUE, 5);
971+ ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(5));
972
973 srv_main_thread_op_info = "flushing log";
974- log_buffer_flush_to_disk();
975+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
976+ log_buffer_flush_maybe_sync();
977
978 /* We run a full purge every 10 seconds, even if the server
979 were active */
980@@ -2422,14 +2598,14 @@
981 (> 70 %), we assume we can afford reserving the disk(s) for
982 the time it requires to flush 100 pages */
983
984- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
985+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
986 ut_dulint_max);
987 } else {
988 /* Otherwise, we only flush a small number of pages so that
989 we do not unnecessarily use much disk i/o capacity from
990 other work */
991
992- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
993+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(10),
994 ut_dulint_max);
995 }
996
997@@ -2518,7 +2694,7 @@
998 if (srv_fast_shutdown && srv_shutdown_state > 0) {
999 n_bytes_merged = 0;
1000 } else {
1001- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
1002+ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IBUF_IO(100));
1003 }
1004
1005 srv_main_thread_op_info = "reserving kernel mutex";
1006@@ -2535,7 +2711,7 @@
1007
1008 if (srv_fast_shutdown < 2) {
1009 n_pages_flushed =
1010- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
1011+ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
1012 } else {
1013 /* In the fastest shutdown we do not flush the buffer pool
1014 to data files: we set n_pages_flushed to 0 artificially. */
1015@@ -2557,7 +2733,14 @@
1016
1017 srv_main_thread_op_info = "flushing log";
1018
1019- log_buffer_flush_to_disk();
1020+ current_time = time(NULL);
1021+ if (difftime(current_time, last_flush_time) > 1) {
1022+ log_buffer_flush_to_disk();
1023+ last_flush_time = current_time;
1024+ } else {
1025+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
1026+ log_buffer_flush_maybe_sync();
1027+ }
1028
1029 srv_main_thread_op_info = "making checkpoint";
1030
1031diff -ruN a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
1032--- a/innobase/srv/srv0start.c 2009-05-08 06:12:12.000000000 +0900
1033+++ b/innobase/srv/srv0start.c 2009-07-02 16:44:49.000000000 +0900
1034@@ -1205,24 +1205,28 @@
1035 return(DB_ERROR);
1036 }
1037
1038+ /* over write innodb_file_io_threads */
1039+ srv_n_file_io_threads = 2 + srv_n_read_io_threads + srv_n_write_io_threads;
1040+
1041 /* Restrict the maximum number of file i/o threads */
1042 if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
1043
1044 srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
1045+ srv_n_read_io_threads = srv_n_write_io_threads = (SRV_MAX_N_IO_THREADS - 2) / 2;
1046 }
1047
1048 if (!os_aio_use_native_aio) {
1049 /* In simulated aio we currently have use only for 4 threads */
1050- srv_n_file_io_threads = 4;
1051+ /*srv_n_file_io_threads = 4;*/
1052
1053 os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
1054 * srv_n_file_io_threads,
1055- srv_n_file_io_threads,
1056+ srv_n_read_io_threads, srv_n_write_io_threads,
1057 SRV_MAX_N_PENDING_SYNC_IOS);
1058 } else {
1059 os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
1060 * srv_n_file_io_threads,
1061- srv_n_file_io_threads,
1062+ srv_n_read_io_threads, srv_n_write_io_threads,
1063 SRV_MAX_N_PENDING_SYNC_IOS);
1064 }
1065
1066diff -ruN a/patch_info/innodb_io_patches.info b/patch_info/innodb_io_patches.info
1067--- /dev/null 1970-01-01 09:00:00.000000000 +0900
1068+++ b/patch_info/innodb_io_patches.info 2009-07-02 16:44:49.000000000 +0900
1069@@ -0,0 +1,11 @@
1070+File=innodb_io_patches.patch
1071+Name=Cluster of past InnoDB IO patches
1072+Version=1.1
1073+Author=Percona
1074+License=GPL
1075+Comment=This patch contains fixed (control_flush_and_merge_and_read, control_io-threads, adaptive_flush)
1076+ChangeLog=
1077+2008-11-06
1078+YK: Initial release
1079+2009-01-09
1080+YK: Some parameters are added
1081diff -ruN a/sql/ha_innodb.cc b/sql/ha_innodb.cc
1082--- a/sql/ha_innodb.cc 2009-07-02 16:43:23.000000000 +0900
1083+++ b/sql/ha_innodb.cc 2009-07-02 16:44:49.000000000 +0900
1084@@ -149,6 +149,7 @@
1085 innobase_lock_wait_timeout, innobase_force_recovery,
1086 innobase_open_files;
1087
1088+long innobase_read_io_threads, innobase_write_io_threads;
1089 longlong innobase_buffer_pool_size, innobase_log_file_size;
1090
1091 /* The default values for the following char* start-up parameters
1092@@ -1417,6 +1418,8 @@
1093 srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
1094
1095 srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1096+ srv_n_read_io_threads = (ulint) innobase_read_io_threads;
1097+ srv_n_write_io_threads = (ulint) innobase_write_io_threads;
1098
1099 srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
1100 srv_force_recovery = (ulint) innobase_force_recovery;
1101@@ -7330,6 +7333,10 @@
1102 trx_t* trx = check_trx_exists(thd);
1103
1104 if (thd->lex->sql_command != SQLCOM_XA_PREPARE) {
1105+ if (srv_enable_unsafe_group_commit && !thd->variables.innodb_support_xa) {
1106+ /* choose group commit rather than binlog order */
1107+ return(0);
1108+ }
1109
1110 /* For ibbackup to work the order of transactions in binlog
1111 and InnoDB must be the same. Consider the situation
1112diff -ruN a/sql/ha_innodb.h b/sql/ha_innodb.h
1113--- a/sql/ha_innodb.h 2009-07-02 16:43:23.000000000 +0900
1114+++ b/sql/ha_innodb.h 2009-07-02 18:10:51.000000000 +0900
1115@@ -204,6 +204,7 @@
1116 extern long innobase_additional_mem_pool_size;
1117 extern long innobase_buffer_pool_awe_mem_mb;
1118 extern long innobase_file_io_threads, innobase_lock_wait_timeout;
1119+extern long innobase_read_io_threads, innobase_write_io_threads;
1120 extern long innobase_force_recovery;
1121 extern long innobase_open_files;
1122 extern char *innobase_data_home_dir, *innobase_data_file_path;
1123@@ -234,6 +235,15 @@
1124 extern ulong srv_thread_concurrency;
1125 extern ulong srv_commit_concurrency;
1126 extern ulong srv_flush_log_at_trx_commit;
1127+extern ulong srv_io_capacity;
1128+extern long long srv_ibuf_max_size;
1129+extern ulong srv_ibuf_active_contract;
1130+extern ulong srv_ibuf_accel_rate;
1131+extern ulong srv_flush_neighbor_pages;
1132+extern ulong srv_enable_unsafe_group_commit;
1133+extern uint srv_read_ahead;
1134+extern uint srv_adaptive_checkpoint;
1135+
1136 /* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
1137 NOT update cardinality for indexes of InnoDB table". By default we are
1138 running with the fix disabled because MySQL 5.1 is frozen for such
1139diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
1140--- a/sql/mysqld.cc 2009-07-02 16:43:23.000000000 +0900
1141+++ b/sql/mysqld.cc 2009-07-02 18:00:04.000000000 +0900
1142@@ -5086,6 +5086,16 @@
1143 OPT_INNODB_ROLLBACK_ON_TIMEOUT,
1144 OPT_SECURE_FILE_PRIV,
1145 OPT_KEEP_FILES_ON_CREATE,
1146+ OPT_INNODB_IO_CAPACITY,
1147+ OPT_INNODB_IBUF_MAX_SIZE,
1148+ OPT_INNODB_IBUF_ACTIVE_CONTRACT,
1149+ OPT_INNODB_IBUF_ACCEL_RATE,
1150+ OPT_INNODB_FLUSH_NEIGHBOR_PAGES,
1151+ OPT_INNODB_ENABLE_UNSAFE_GROUP_COMMIT,
1152+ OPT_INNODB_READ_AHEAD,
1153+ OPT_INNODB_ADAPTIVE_CHECKPOINT,
1154+ OPT_INNODB_READ_IO_THREADS,
1155+ OPT_INNODB_WRITE_IO_THREADS,
1156 OPT_INNODB_ADAPTIVE_HASH_INDEX,
1157 OPT_FEDERATED,
1158 OPT_INNODB_USE_LEGACY_CARDINALITY_ALGORITHM
1159@@ -5403,6 +5413,44 @@
1160 (gptr*) &srv_use_legacy_cardinality_algorithm,
1161 (gptr*) &srv_use_legacy_cardinality_algorithm,
1162 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
1163+ {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY,
1164+ "Number of IO operations per second the server can do. Tunes background IO rate.",
1165+ (gptr*) &srv_io_capacity, (gptr*) &srv_io_capacity,
1166+ 0, GET_ULONG, REQUIRED_ARG, 200, 100, 999999999, 0, 0, 0},
1167+ {"innodb_ibuf_max_size", OPT_INNODB_IBUF_MAX_SIZE,
1168+ "The maximum size of the insert buffer. (in bytes)",
1169+ (gptr*) &srv_ibuf_max_size, (gptr*) &srv_ibuf_max_size, 0,
1170+ GET_LL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0},
1171+ {"innodb_ibuf_active_contract", OPT_INNODB_IBUF_ACTIVE_CONTRACT,
1172+ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
1173+ (gptr*) &srv_ibuf_active_contract, (gptr*) &srv_ibuf_active_contract,
1174+ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
1175+ {"innodb_ibuf_accel_rate", OPT_INNODB_IBUF_ACCEL_RATE,
1176+ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
1177+ (gptr*) &srv_ibuf_accel_rate, (gptr*) &srv_ibuf_accel_rate,
1178+ 0, GET_ULONG, REQUIRED_ARG, 100, 100, 999999999, 0, 0, 0},
1179+ {"innodb_flush_neighbor_pages", OPT_INNODB_FLUSH_NEIGHBOR_PAGES,
1180+ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
1181+ (gptr*) &srv_flush_neighbor_pages, (gptr*) &srv_flush_neighbor_pages,
1182+ 0, GET_ULONG, REQUIRED_ARG, 1, 0, 1, 0, 0, 0},
1183+ {"innodb_read_ahead", OPT_INNODB_READ_AHEAD,
1184+ "Control read ahead activity. (none, random, linear, [both])",
1185+ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
1186+ {"innodb_adaptive_checkpoint", OPT_INNODB_ADAPTIVE_CHECKPOINT,
1187+ "Enable/Diasable flushing along modified age. ([none], reflex, estimate)",
1188+ 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
1189+ {"innodb_enable_unsafe_group_commit", OPT_INNODB_ENABLE_UNSAFE_GROUP_COMMIT,
1190+ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
1191+ (gptr*) &srv_enable_unsafe_group_commit, (gptr*) &srv_enable_unsafe_group_commit,
1192+ 0, GET_ULONG, REQUIRED_ARG, 0, 0, 1, 0, 0, 0},
1193+ {"innodb_read_io_threads", OPT_INNODB_READ_IO_THREADS,
1194+ "Number of background read I/O threads in InnoDB.",
1195+ (gptr*) &innobase_read_io_threads, (gptr*) &innobase_read_io_threads,
1196+ 0, GET_LONG, REQUIRED_ARG, 8, 1, 64, 0, 0, 0},
1197+ {"innodb_write_io_threads", OPT_INNODB_WRITE_IO_THREADS,
1198+ "Number of background write I/O threads in InnoDB.",
1199+ (gptr*) &innobase_write_io_threads, (gptr*) &innobase_write_io_threads,
1200+ 0, GET_LONG, REQUIRED_ARG, 8, 1, 64, 0, 0, 0},
1201 #endif /* End HAVE_INNOBASE_DB */
1202 {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.",
1203 (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0,
1204@@ -7644,6 +7692,38 @@
1205 case OPT_INNODB_LOG_ARCHIVE:
1206 innobase_log_archive= argument ? test(atoi(argument)) : 1;
1207 break;
1208+ case OPT_INNODB_READ_AHEAD:
1209+ if (argument == disabled_my_option)
1210+ srv_read_ahead = 0;
1211+ else if (! argument)
1212+ srv_read_ahead = 3;
1213+ else
1214+ {
1215+ int type;
1216+ if ((type=find_type(argument, &innodb_read_ahead_typelib, 2)) <= 0)
1217+ {
1218+ fprintf(stderr,"Unknown innodb_read_ahead type: %s\n",argument);
1219+ exit(1);
1220+ }
1221+ srv_read_ahead = (uint) ((type - 1) & 3);
1222+ }
1223+ break;
1224+ case OPT_INNODB_ADAPTIVE_CHECKPOINT:
1225+ if (argument == disabled_my_option)
1226+ srv_adaptive_checkpoint = 0;
1227+ else if (! argument)
1228+ srv_adaptive_checkpoint = 0;
1229+ else
1230+ {
1231+ int type;
1232+ if ((type=find_type(argument, &innodb_adaptive_checkpoint_typelib, 2)) <= 0)
1233+ {
1234+ fprintf(stderr,"Unknown innodb_adaptive_checkpoint type: %s\n",argument);
1235+ exit(1);
1236+ }
1237+ srv_adaptive_checkpoint = (uint) ((type - 1) % 3);
1238+ }
1239+ break;
1240 #endif /* HAVE_INNOBASE_DB */
1241 case OPT_MYISAM_RECOVER:
1242 {
1243diff -ruN a/sql/set_var.cc b/sql/set_var.cc
1244--- a/sql/set_var.cc 2009-07-02 16:43:23.000000000 +0900
1245+++ b/sql/set_var.cc 2009-07-02 17:45:29.000000000 +0900
1246@@ -489,6 +489,57 @@
1247 sys_var_long_ptr sys_innodb_flush_log_at_trx_commit(
1248 "innodb_flush_log_at_trx_commit",
1249 &srv_flush_log_at_trx_commit);
1250+sys_var_long_ptr sys_innodb_io_capacity("innodb_io_capacity",
1251+ &srv_io_capacity);
1252+sys_var_long_ptr sys_innodb_ibuf_active_contract("innodb_ibuf_active_contract",
1253+ &srv_ibuf_active_contract);
1254+sys_var_long_ptr sys_innodb_ibuf_accel_rate("innodb_ibuf_accel_rate",
1255+ &srv_ibuf_accel_rate);
1256+sys_var_long_ptr sys_innodb_flush_neighbor_pages("innodb_flush_neighbor_pages",
1257+ &srv_flush_neighbor_pages);
1258+
1259+const char *innodb_read_ahead_names[]=
1260+{
1261+ "none", /* 0 */
1262+ "random",
1263+ "linear",
1264+ "both", /* 3 */
1265+ /* For compatibility of the older patch */
1266+ "0", /* 4 ("none" + 4) */
1267+ "1",
1268+ "2",
1269+ "3", /* 7 ("both" + 4) */
1270+ NullS
1271+};
1272+TYPELIB innodb_read_ahead_typelib=
1273+{
1274+ array_elements(innodb_read_ahead_names) - 1, "innodb_read_ahead_typelib",
1275+ innodb_read_ahead_names, NULL
1276+};
1277+sys_var_enum sys_innodb_read_ahead("innodb_read_ahead", &srv_read_ahead,
1278+ &innodb_read_ahead_typelib, fix_innodb_read_ahead);
1279+sys_var_long_ptr sys_innodb_enable_unsafe_group_commit("innodb_enable_unsafe_group_commit",
1280+ &srv_enable_unsafe_group_commit);
1281+
1282+const char *innodb_adaptive_checkpoint_names[]=
1283+{
1284+ "none", /* 0 */
1285+ "reflex", /* 1 */
1286+ "estimate", /* 2 */
1287+ /* For compatibility of the older patch */
1288+ "0", /* 3 ("none" + 3) */
1289+ "1", /* 4 ("reflex" + 3) */
1290+ "2", /* 5 ("estimate" + 3) */
1291+ NullS
1292+};
1293+TYPELIB innodb_adaptive_checkpoint_typelib=
1294+{
1295+ array_elements(innodb_adaptive_checkpoint_names) - 1, "innodb_adaptive_checkpoint_typelib",
1296+ innodb_adaptive_checkpoint_names, NULL
1297+};
1298+sys_var_enum sys_innodb_adaptive_checkpoint("innodb_adaptive_checkpoint",
1299+ &srv_adaptive_checkpoint,
1300+ &innodb_adaptive_checkpoint_typelib, fix_innodb_adaptive_checkpoint);
1301 sys_var_const_os_str_ptr sys_innodb_data_file_path("innodb_data_file_path",
1302 &innobase_data_file_path);
1303 sys_var_const_os_str_ptr sys_innodb_data_home_dir("innodb_data_home_dir",
1304@@ -860,6 +911,13 @@
1305 &sys_innodb_thread_concurrency,
1306 &sys_innodb_commit_concurrency,
1307 &sys_innodb_flush_log_at_trx_commit,
1308+ &sys_innodb_io_capacity,
1309+ &sys_innodb_ibuf_active_contract,
1310+ &sys_innodb_ibuf_accel_rate,
1311+ &sys_innodb_flush_neighbor_pages,
1312+ &sys_innodb_read_ahead,
1313+ &sys_innodb_enable_unsafe_group_commit,
1314+ &sys_innodb_adaptive_checkpoint,
1315 #endif
1316 &sys_trust_routine_creators,
1317 &sys_trust_function_creators,
1318@@ -997,6 +1055,16 @@
1319 {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS},
1320 {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS},
1321 {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS},
1322+ {sys_innodb_io_capacity.name, (char*) &sys_innodb_io_capacity, SHOW_SYS},
1323+ {"innodb_ibuf_max_size", (char*) &srv_ibuf_max_size, SHOW_LONGLONG},
1324+ {sys_innodb_ibuf_active_contract.name, (char*) &sys_innodb_ibuf_active_contract, SHOW_SYS},
1325+ {sys_innodb_ibuf_accel_rate.name, (char*) &sys_innodb_ibuf_accel_rate, SHOW_SYS},
1326+ {sys_innodb_flush_neighbor_pages.name, (char*) &sys_innodb_flush_neighbor_pages, SHOW_SYS},
1327+ {sys_innodb_read_ahead.name, (char*) &sys_innodb_read_ahead, SHOW_SYS},
1328+ {sys_innodb_enable_unsafe_group_commit.name, (char*) &sys_innodb_enable_unsafe_group_commit, SHOW_SYS},
1329+ {sys_innodb_adaptive_checkpoint.name, (char*) &sys_innodb_adaptive_checkpoint, SHOW_SYS},
1330+ {"innodb_read_io_threads", (char*) &innobase_read_io_threads, SHOW_LONG},
1331+ {"innodb_write_io_threads", (char*) &innobase_write_io_threads, SHOW_LONG},
1332 {sys_innodb_use_legacy_cardinality_algorithm.name,
1333 (char*) &sys_innodb_use_legacy_cardinality_algorithm, SHOW_SYS},
1334 #endif
1335@@ -1459,6 +1527,18 @@
1336 }
1337 }
1338
1339+#ifdef HAVE_INNOBASE_DB
1340+extern void fix_innodb_read_ahead(THD *thd, enum_var_type type)
1341+{
1342+ srv_read_ahead &= 3;
1343+}
1344+
1345+extern void fix_innodb_adaptive_checkpoint(THD *thd, enum_var_type type)
1346+{
1347+ srv_adaptive_checkpoint %= 3;
1348+}
1349+#endif /* HAVE_INNOBASE_DB */
1350+
1351 static void fix_max_binlog_size(THD *thd, enum_var_type type)
1352 {
1353 DBUG_ENTER("fix_max_binlog_size");
1354diff -ruN a/sql/set_var.h b/sql/set_var.h
1355--- a/sql/set_var.h 2009-07-02 16:43:23.000000000 +0900
1356+++ b/sql/set_var.h 2009-07-02 17:35:17.000000000 +0900
1357@@ -31,6 +31,11 @@
1358
1359 extern TYPELIB bool_typelib, delay_key_write_typelib, sql_mode_typelib;
1360
1361+#ifdef HAVE_INNOBASE_DB
1362+extern TYPELIB innodb_read_ahead_typelib;
1363+extern TYPELIB innodb_adaptive_checkpoint_typelib;
1364+#endif /* HAVE_INNOBASE_DB */
1365+
1366 typedef int (*sys_check_func)(THD *, set_var *);
1367 typedef bool (*sys_update_func)(THD *, set_var *);
1368 typedef void (*sys_after_update_func)(THD *,enum_var_type);
1369@@ -1148,6 +1153,10 @@
1370 int sql_set_variables(THD *thd, List<set_var_base> *var_list);
1371 bool not_all_support_one_shot(List<set_var_base> *var_list);
1372 void fix_delay_key_write(THD *thd, enum_var_type type);
1373+#ifdef HAVE_INNOBASE_DB
1374+void fix_innodb_read_ahead(THD *thd, enum_var_type type);
1375+void fix_innodb_adaptive_checkpoint(THD *thd, enum_var_type type);
1376+#endif /* HAVE_INNOBASE_DB */
1377 ulong fix_sql_mode(ulong sql_mode);
1378 extern sys_var_const_str sys_charset_system;
1379 extern sys_var_str sys_init_connect;
This page took 0.051149 seconds and 4 git commands to generate.