1 # name : innodb_io_patches.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
9 --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:09:51.273986410 +0900
10 +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:10:08.934990091 +0900
13 /* When we traverse all the flush lists we don't want another
14 thread to add a dirty page to any flush list. */
15 + if (srv_buf_pool_instances > 1)
16 log_flush_order_mutex_enter();
18 for (i = 0; i < srv_buf_pool_instances; i++) {
23 + if (srv_buf_pool_instances > 1)
24 log_flush_order_mutex_exit();
26 /* The returned answer may be out of date: the flush_list can
27 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
28 --- a/storage/innobase/buf/buf0flu.c 2010-11-03 07:01:13.000000000 +0900
29 +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:10:08.934990091 +0900
32 /* Now flush the doublewrite buffer data to disk */
34 - fil_flush(TRX_SYS_SPACE);
35 + fil_flush(TRX_SYS_SPACE, FALSE);
37 /* We know that the writes have been flushed to disk now
38 and in recovery we will find them in the doublewrite buffer
41 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
43 - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
44 + if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
45 /* If there is little space, it is better not to flush
46 any block except from the end of the LRU list */
48 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
49 --- a/storage/innobase/buf/buf0rea.c 2010-11-03 07:01:13.000000000 +0900
50 +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:10:08.937050537 +0900
52 = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
55 + if (!(srv_read_ahead & 2)) {
59 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
60 /* No read-ahead to avoid thread deadlocks */
62 diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
63 --- a/storage/innobase/fil/fil0fil.c 2011-06-29 17:48:24.797971571 +0900
64 +++ b/storage/innobase/fil/fil0fil.c 2011-06-29 18:04:02.548053286 +0900
67 os_thread_sleep(20000);
70 + fil_flush(id, TRUE);
78 - ret = os_file_flush(file);
79 + ret = os_file_flush(file, TRUE);
82 fputs("InnoDB: Error: file flush of tablespace ", stderr);
87 - success = os_file_flush(file);
88 + success = os_file_flush(file, TRUE);
96 - success = os_file_flush(file);
97 + success = os_file_flush(file, TRUE);
101 @@ -4005,7 +4005,7 @@
102 size_after_extend, *actual_size); */
103 mutex_exit(&fil_system->mutex);
105 - fil_flush(space_id);
106 + fil_flush(space_id, TRUE);
110 @@ -4576,8 +4576,9 @@
114 - ulint space_id) /*!< in: file space id (this can be a group of
115 + ulint space_id, /*!< in: file space id (this can be a group of
116 log files or a tablespace of the database) */
121 @@ -4648,7 +4649,7 @@
122 /* fprintf(stderr, "Flushing to file %s\n",
125 - os_file_flush(file);
126 + os_file_flush(file, metadata);
128 mutex_enter(&fil_system->mutex);
130 @@ -4731,7 +4732,7 @@
131 a non-existing space id. */
132 for (i = 0; i < n_space_ids; i++) {
134 - fil_flush(space_ids[i]);
135 + fil_flush(space_ids[i], TRUE);
139 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
140 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:09:51.283956391 +0900
141 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:10:08.963980444 +0900
143 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
144 NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
146 +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
147 + "Set to 0 (write and flush once per second),"
148 + " 1 (write and flush at each commit)"
149 + " or 2 (write at commit, flush once per second).",
150 + NULL, NULL, 1, 0, 2, 0);
153 static handler *innobase_create_handler(handlerton *hton,
159 +/******************************************************************//**
161 +extern "C" UNIV_INTERN
163 +thd_flush_log_at_trx_commit(
164 +/*================================*/
167 + return(THDVAR((THD*) thd, flush_log_at_trx_commit));
170 /********************************************************************//**
171 Obtain the InnoDB transaction of a MySQL thread.
172 @return reference to transaction pointer */
173 @@ -2437,6 +2454,9 @@
174 srv_n_read_io_threads = (ulint) innobase_read_io_threads;
175 srv_n_write_io_threads = (ulint) innobase_write_io_threads;
177 + srv_read_ahead &= 3;
178 + srv_adaptive_flushing_method %= 3;
180 srv_force_recovery = (ulint) innobase_force_recovery;
182 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
183 @@ -11025,7 +11045,7 @@
184 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
185 "Purge threads can be either 0 or 1.",
187 - 0, /* Default setting */
188 + 1, /* Default setting */
189 0, /* Minimum value */
190 1, 0); /* Maximum value */
192 @@ -11067,12 +11087,18 @@
193 innodb_file_format_max_validate,
194 innodb_file_format_max_update, "Antelope");
196 -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
197 - PLUGIN_VAR_OPCMDARG,
198 - "Set to 0 (write and flush once per second),"
199 - " 1 (write and flush at each commit)"
200 - " or 2 (write at commit, flush once per second).",
201 - NULL, NULL, 1, 0, 2, 0);
202 +/* Changed to the THDVAR */
203 +//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
204 +// PLUGIN_VAR_OPCMDARG,
205 +// "Set to 0 (write and flush once per second),"
206 +// " 1 (write and flush at each commit)"
207 +// " or 2 (write at commit, flush once per second).",
208 +// NULL, NULL, 1, 0, 2, 0);
210 +static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
211 + PLUGIN_VAR_NOCMDARG,
212 + "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
215 static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
216 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
217 @@ -11167,7 +11193,7 @@
218 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
219 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
220 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
221 - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
222 + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
224 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
225 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
226 @@ -11319,6 +11345,95 @@
227 "trigger a readahead.",
228 NULL, NULL, 56, 0, 64, 0);
230 +static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
231 + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
232 + "The maximum size of the insert buffer. (in bytes)",
233 + NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
235 +static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
236 + PLUGIN_VAR_RQCMDARG,
237 + "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
238 + NULL, NULL, 1, 0, 1, 0);
240 +static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
241 + PLUGIN_VAR_RQCMDARG,
242 + "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
243 + NULL, NULL, 100, 100, 999999999, 0);
245 +static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
246 + PLUGIN_VAR_RQCMDARG,
247 + "Control soft limit of checkpoint age. (0 : not control)",
248 + NULL, NULL, 0, 0, ~0UL, 0);
250 +static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
251 + PLUGIN_VAR_RQCMDARG,
252 + "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
253 + NULL, NULL, 1, 0, 1, 0);
257 +innodb_read_ahead_update(
259 + struct st_mysql_sys_var* var,
263 + *(long *)var_ptr= (*(long *)save) & 3;
265 +const char *read_ahead_names[]=
271 + /* For compatibility of the older patch */
272 + "0", /* 4 ("none" + 4) */
275 + "3", /* 7 ("both" + 4) */
278 +TYPELIB read_ahead_typelib=
280 + array_elements(read_ahead_names) - 1, "read_ahead_typelib",
281 + read_ahead_names, NULL
283 +static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
284 + PLUGIN_VAR_RQCMDARG,
285 + "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
286 + NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
290 +innodb_adaptive_flushing_method_update(
292 + struct st_mysql_sys_var* var,
296 + *(long *)var_ptr= (*(long *)save) % 4;
298 +const char *adaptive_flushing_method_names[]=
301 + "estimate", /* 1 */
302 + "keep_average", /* 2 */
303 + /* For compatibility of the older patch */
304 + "0", /* 3 ("none" + 3) */
305 + "1", /* 4 ("estimate" + 3) */
306 + "2", /* 5 ("keep_average" + 3) */
309 +TYPELIB adaptive_flushing_method_typelib=
311 + array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
312 + adaptive_flushing_method_names, NULL
314 +static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
315 + PLUGIN_VAR_RQCMDARG,
316 + "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
317 + NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
319 static struct st_mysql_sys_var* innobase_system_variables[]= {
320 MYSQL_SYSVAR(additional_mem_pool_size),
321 MYSQL_SYSVAR(autoextend_increment),
322 @@ -11339,6 +11454,7 @@
323 MYSQL_SYSVAR(file_format_check),
324 MYSQL_SYSVAR(file_format_max),
325 MYSQL_SYSVAR(flush_log_at_trx_commit),
326 + MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
327 MYSQL_SYSVAR(flush_method),
328 MYSQL_SYSVAR(force_recovery),
329 MYSQL_SYSVAR(locks_unsafe_for_binlog),
330 @@ -11376,6 +11492,13 @@
331 MYSQL_SYSVAR(show_verbose_locks),
332 MYSQL_SYSVAR(show_locks_held),
333 MYSQL_SYSVAR(version),
334 + MYSQL_SYSVAR(ibuf_max_size),
335 + MYSQL_SYSVAR(ibuf_active_contract),
336 + MYSQL_SYSVAR(ibuf_accel_rate),
337 + MYSQL_SYSVAR(checkpoint_age_target),
338 + MYSQL_SYSVAR(flush_neighbor_pages),
339 + MYSQL_SYSVAR(read_ahead),
340 + MYSQL_SYSVAR(adaptive_flushing_method),
341 MYSQL_SYSVAR(use_sys_malloc),
342 MYSQL_SYSVAR(use_native_aio),
343 MYSQL_SYSVAR(change_buffering),
344 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
345 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-11-03 07:01:13.000000000 +0900
346 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:10:09.073984282 +0900
348 grow in size, as the references on the upper levels of the tree can
351 - ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
352 - / IBUF_POOL_SIZE_PER_MAX_SIZE;
353 + ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
354 + / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
356 + srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
358 mutex_create(ibuf_pessimistic_insert_mutex_key,
359 &ibuf_pessimistic_insert_mutex,
360 @@ -2753,9 +2755,11 @@
362 max_size = ibuf->max_size;
364 + if (!srv_ibuf_active_contract) {
365 if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
370 sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
372 diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
373 --- a/storage/innobase/include/buf0rea.h 2010-11-03 07:01:13.000000000 +0900
374 +++ b/storage/innobase/include/buf0rea.h 2010-12-03 15:10:09.076066335 +0900
377 /** The size in pages of the area which the read-ahead algorithms read if
379 -#define BUF_READ_AHEAD_AREA(b) \
380 - ut_min(64, ut_2_power_up((b)->curr_size / 32))
381 +#define BUF_READ_AHEAD_AREA(b) 64
383 /** @name Modes used in read-ahead @{ */
384 /** read only pages belonging to the insert buffer tree */
385 diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
386 --- a/storage/innobase/include/fil0fil.h 2011-06-29 17:48:24.818969583 +0900
387 +++ b/storage/innobase/include/fil0fil.h 2011-06-29 17:58:49.215971540 +0900
392 - ulint space_id); /*!< in: file space id (this can be a group of
393 + ulint space_id, /*!< in: file space id (this can be a group of
394 log files or a tablespace of the database) */
396 /**********************************************************************//**
397 Flushes to disk writes in file spaces of the given type possibly cached by
399 diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
400 --- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900
401 +++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900
403 /*===================*/
404 void* thd, /*!< in: thread handle (THD*) */
405 ulint value); /*!< in: time waited for the lock */
406 +/******************************************************************//**
410 +thd_flush_log_at_trx_commit(
411 +/*================================*/
414 /**********************************************************************//**
415 Get the current setting of the lower_case_table_names global parameter from
416 diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
417 --- a/storage/innobase/include/os0file.h 2011-05-11 20:54:12.000000000 +0900
418 +++ b/storage/innobase/include/os0file.h 2011-06-29 17:55:21.947041132 +0900
420 pfs_os_file_write_func(name, file, buf, offset, offset_high, \
421 n, __FILE__, __LINE__)
423 -# define os_file_flush(file) \
424 - pfs_os_file_flush_func(file, __FILE__, __LINE__)
425 +# define os_file_flush(file, metadata) \
426 + pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
428 # define os_file_rename(key, oldpath, newpath) \
429 pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
431 # define os_file_write(name, file, buf, offset, offset_high, n) \
432 os_file_write_func(name, file, buf, offset, offset_high, n)
434 -# define os_file_flush(file) os_file_flush_func(file)
435 +# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
437 # define os_file_rename(key, oldpath, newpath) \
438 os_file_rename_func(oldpath, newpath)
440 pfs_os_file_flush_func(
441 /*===================*/
442 os_file_t file, /*!< in, own: handle to a file */
444 const char* src_file,/*!< in: file name where func invoked */
445 ulint src_line);/*!< in: line where the func invoked */
451 - os_file_t file); /*!< in, own: handle to a file */
452 + os_file_t file, /*!< in, own: handle to a file */
454 /***********************************************************************//**
455 Retrieves the last error number if an error occurs in a file io function.
456 The number should be retrieved before any other OS calls (because they may
457 diff -ruN a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
458 --- a/storage/innobase/include/os0file.ic 2011-05-11 20:54:12.000000000 +0900
459 +++ b/storage/innobase/include/os0file.ic 2011-06-29 17:56:01.510958172 +0900
461 pfs_os_file_flush_func(
462 /*===================*/
463 os_file_t file, /*!< in, own: handle to a file */
465 const char* src_file,/*!< in: file name where func invoked */
466 ulint src_line)/*!< in: line where the func invoked */
470 register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
472 - result = os_file_flush_func(file);
473 + result = os_file_flush_func(file, metadata);
475 register_pfs_file_io_end(locker, 0);
477 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
478 --- a/storage/innobase/include/srv0srv.h 2010-12-03 15:09:51.291955835 +0900
479 +++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:10:09.079029047 +0900
481 extern ulint srv_n_log_files;
482 extern ulint srv_log_file_size;
483 extern ulint srv_log_buffer_size;
484 -extern ulong srv_flush_log_at_trx_commit;
485 +//extern ulong srv_flush_log_at_trx_commit;
486 +extern char srv_use_global_flush_log_at_trx_commit;
487 extern char srv_adaptive_flushing;
491 extern ulong srv_max_purge_lag;
493 extern ulong srv_replication_delay;
495 +extern long long srv_ibuf_max_size;
496 +extern ulint srv_ibuf_active_contract;
497 +extern ulint srv_ibuf_accel_rate;
498 +extern ulint srv_checkpoint_age_target;
499 +extern ulint srv_flush_neighbor_pages;
500 +extern ulint srv_enable_unsafe_group_commit;
501 +extern ulint srv_read_ahead;
502 +extern ulint srv_adaptive_flushing_method;
504 /*-------------------------------------------*/
506 extern ulint srv_n_rows_inserted;
508 when writing data files, but do flush
509 after writing to log files */
510 SRV_UNIX_NOSYNC, /*!< do not flush after writing */
511 - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
512 + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
514 + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
517 /** Alternatives for file i/o in Windows */
518 diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
519 --- a/storage/innobase/log/log0log.c 2010-11-03 07:01:13.000000000 +0900
520 +++ b/storage/innobase/log/log0log.c 2010-12-03 15:10:09.084023562 +0900
522 #include "srv0start.h"
525 +#include "ha_prototypes.h"
528 General philosophy of InnoDB redo-logs:
532 /************************************************************//**
536 +log_max_modified_age_async()
538 + if (srv_checkpoint_age_target) {
539 + return(ut_min(log_sys->max_modified_age_async,
540 + srv_checkpoint_age_target
541 + - srv_checkpoint_age_target / 8));
543 + return(log_sys->max_modified_age_async);
549 +log_max_checkpoint_age_async()
551 + if (srv_checkpoint_age_target) {
552 + return(ut_min(log_sys->max_checkpoint_age_async,
553 + srv_checkpoint_age_target));
555 + return(log_sys->max_checkpoint_age_async);
559 +/************************************************************//**
567 - if (checkpoint_age <= log->max_modified_age_async) {
568 + if (checkpoint_age <= log_max_modified_age_async()) {
573 oldest_lsn = buf_pool_get_oldest_modification();
576 - || lsn - oldest_lsn > log->max_modified_age_async
577 - || checkpoint_age > log->max_checkpoint_age_async) {
578 + || lsn - oldest_lsn > log_max_modified_age_async()
579 + || checkpoint_age > log_max_checkpoint_age_async()) {
581 log->check_flush_or_checkpoint = TRUE;
583 @@ -1100,9 +1128,10 @@
584 group = (log_group_t*)((ulint)group - 1);
586 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
587 + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
588 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
590 - fil_flush(group->space_id);
591 + fil_flush(group->space_id, FALSE);
595 @@ -1121,10 +1150,11 @@
596 logs and cannot end up here! */
598 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
599 + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
600 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
601 - && srv_flush_log_at_trx_commit != 2) {
602 + && thd_flush_log_at_trx_commit(NULL) != 2) {
604 - fil_flush(group->space_id);
605 + fil_flush(group->space_id, FALSE);
608 mutex_enter(&(log_sys->mutex));
609 @@ -1501,7 +1531,8 @@
611 mutex_exit(&(log_sys->mutex));
613 - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
614 + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
615 + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
616 /* O_DSYNC means the OS did not buffer the log file at all:
617 so we have also flushed to disk what we have written */
619 @@ -1511,7 +1542,7 @@
621 group = UT_LIST_GET_FIRST(log_sys->log_groups);
623 - fil_flush(group->space_id);
624 + fil_flush(group->space_id, FALSE);
625 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
628 @@ -2120,10 +2151,10 @@
631 advance = 2 * (age - log->max_modified_age_sync);
632 - } else if (age > log->max_modified_age_async) {
633 + } else if (age > log_max_modified_age_async()) {
635 /* A flush is not urgent: we do an asynchronous preflush */
636 - advance = age - log->max_modified_age_async;
637 + advance = age - log_max_modified_age_async();
641 @@ -2137,7 +2168,7 @@
643 do_checkpoint = TRUE;
645 - } else if (checkpoint_age > log->max_checkpoint_age_async) {
646 + } else if (checkpoint_age > log_max_checkpoint_age_async()) {
647 /* A checkpoint is not urgent: do it asynchronously */
649 do_checkpoint = TRUE;
650 @@ -2607,7 +2638,7 @@
652 mutex_exit(&(log_sys->mutex));
654 - fil_flush(group->archive_space_id);
655 + fil_flush(group->archive_space_id, TRUE);
657 mutex_enter(&(log_sys->mutex));
659 @@ -3349,6 +3380,17 @@
660 log_sys->flushed_to_disk_lsn,
661 log_sys->last_checkpoint_lsn);
664 + "Max checkpoint age %lu\n"
665 + "Checkpoint age target %lu\n"
666 + "Modified age %lu\n"
667 + "Checkpoint age %lu\n",
668 + (ulong) log_sys->max_checkpoint_age,
669 + (ulong) log_max_checkpoint_age_async(),
670 + (ulong) (log_sys->lsn -
671 + log_buf_pool_get_oldest_modification()),
672 + (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
674 current_time = time(NULL);
676 time_elapsed = 0.001 + difftime(current_time,
677 diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
678 --- a/storage/innobase/log/log0recv.c 2010-11-03 07:01:13.000000000 +0900
679 +++ b/storage/innobase/log/log0recv.c 2010-12-03 15:10:09.089024191 +0900
680 @@ -2906,9 +2906,12 @@
681 ib_uint64_t archived_lsn;
682 #endif /* UNIV_LOG_ARCHIVE */
684 - byte log_hdr_buf[LOG_FILE_HDR_SIZE];
686 + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
689 + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
691 #ifdef UNIV_LOG_ARCHIVE
692 ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
693 /** TRUE when recovering from a checkpoint */
694 @@ -3468,7 +3471,7 @@
698 - os_file_flush(log_file);
699 + os_file_flush(log_file, TRUE);
700 os_file_close(log_file);
703 @@ -3492,7 +3495,7 @@
705 os_file_write(name, log_file, buf, 0, 0,
706 LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
707 - os_file_flush(log_file);
708 + os_file_flush(log_file, TRUE);
709 os_file_close(log_file);
712 diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
713 --- a/storage/innobase/os/os0file.c 2010-11-03 07:01:13.000000000 +0900
714 +++ b/storage/innobase/os/os0file.c 2010-12-03 15:10:09.093023540 +0900
715 @@ -1424,7 +1424,7 @@
717 #ifdef UNIV_NON_BUFFERED_IO
718 # ifndef UNIV_HOTBACKUP
719 - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
720 + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
721 /* Do not use unbuffered i/o to log files because
722 value 2 denotes that we do not flush the log at every
723 commit, but only once per second */
724 @@ -1440,7 +1440,7 @@
726 #ifdef UNIV_NON_BUFFERED_IO
727 # ifndef UNIV_HOTBACKUP
728 - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
729 + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
730 /* Do not use unbuffered i/o to log files because
731 value 2 denotes that we do not flush the log at every
732 commit, but only once per second */
733 @@ -1585,6 +1585,11 @@
734 os_file_set_nocache(file, name, mode_str);
737 + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
738 + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
739 + os_file_set_nocache(file, name, mode_str);
743 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
745 @@ -2008,7 +2013,7 @@
749 - ret = os_file_flush(file);
750 + ret = os_file_flush(file, TRUE);
754 @@ -2046,7 +2051,8 @@
758 - os_file_t file) /*!< in: handle to a file */
759 + os_file_t file, /*!< in: handle to a file */
764 @@ -2055,7 +2061,15 @@
768 +#ifdef HAVE_FDATASYNC
772 + ret = fdatasync(file);
780 @@ -2092,7 +2106,8 @@
784 - os_file_t file) /*!< in, own: handle to a file */
785 + os_file_t file, /*!< in, own: handle to a file */
790 @@ -2142,18 +2157,18 @@
791 /* If we are not on an operating system that supports this,
792 then fall back to a plain fsync. */
794 - ret = os_file_fsync(file);
795 + ret = os_file_fsync(file, metadata);
797 ret = fcntl(file, F_FULLFSYNC, NULL);
800 /* If we are not on a file system that supports this,
801 then fall back to a plain fsync. */
802 - ret = os_file_fsync(file);
803 + ret = os_file_fsync(file, metadata);
807 - ret = os_file_fsync(file);
808 + ret = os_file_fsync(file, metadata);
812 @@ -2336,7 +2351,7 @@
813 the OS crashes, a database page is only partially
814 physically written to disk. */
816 - ut_a(TRUE == os_file_flush(file));
817 + ut_a(TRUE == os_file_flush(file, TRUE));
819 # endif /* UNIV_DO_FLUSH */
821 @@ -2378,7 +2393,7 @@
822 the OS crashes, a database page is only partially
823 physically written to disk. */
825 - ut_a(TRUE == os_file_flush(file));
826 + ut_a(TRUE == os_file_flush(file, TRUE));
828 # endif /* UNIV_DO_FLUSH */
830 @@ -2750,7 +2765,7 @@
832 # ifdef UNIV_DO_FLUSH
833 if (!os_do_not_call_flush_at_each_write) {
834 - ut_a(TRUE == os_file_flush(file));
835 + ut_a(TRUE == os_file_flush(file, TRUE));
837 # endif /* UNIV_DO_FLUSH */
839 @@ -4289,7 +4304,7 @@
841 if (slot->type == OS_FILE_WRITE
842 && !os_do_not_call_flush_at_each_write) {
843 - if (!os_file_flush(slot->file)) {
844 + if (!os_file_flush(slot->file, TRUE)) {
848 @@ -4590,7 +4605,7 @@
850 if (slot->type == OS_FILE_WRITE
851 && !os_do_not_call_flush_at_each_write)
852 - && !os_file_flush(slot->file) {
853 + && !os_file_flush(slot->file, TRUE) {
856 #endif /* UNIV_DO_FLUSH */
857 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
858 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:09:51.301987792 +0900
859 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:13:29.369986988 +0900
861 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
862 /* size in database pages */
863 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
864 -UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
865 +//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
866 +UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
868 /* Try to flush dirty pages so as to avoid IO bursts at
872 UNIV_INTERN ulong srv_replication_delay = 0;
874 +UNIV_INTERN long long srv_ibuf_max_size = 0;
875 +UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
876 +UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
877 +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
879 +UNIV_INTERN ulint srv_checkpoint_age_target = 0;
880 +UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
882 +UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
883 +UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
884 +UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
885 /*-------------------------------------------*/
886 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
887 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
888 @@ -2742,6 +2754,7 @@
889 ulint n_pages_purged = 0;
890 ulint n_bytes_merged;
891 ulint n_pages_flushed;
892 + ulint n_pages_flushed_prev = 0;
893 ulint n_bytes_archived;
894 ulint n_tables_to_drop;
896 @@ -2749,7 +2762,20 @@
897 ulint n_ios_very_old;
900 + ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
901 + ulint inner_loop = 0;
902 + ibool skip_sleep = FALSE;
904 + struct t_prev_flush_info_struct {
907 + unsigned offset:32;
908 + ib_uint64_t oldest_modification;
909 + } prev_flush_info[MAX_BUFFER_POOLS];
911 + ib_uint64_t lsn_old;
913 + ib_uint64_t oldest_lsn;
915 #ifdef UNIV_DEBUG_THREAD_CREATION
916 fprintf(stderr, "Master thread starts, id %lu\n",
917 @@ -2771,6 +2797,9 @@
919 mutex_exit(&kernel_mutex);
921 + mutex_enter(&(log_sys->mutex));
922 + lsn_old = log_sys->lsn;
923 + mutex_exit(&(log_sys->mutex));
925 /*****************************************************************/
926 /* ---- When there is database activity by users, we cycle in this
927 @@ -2801,9 +2830,13 @@
928 /* Sleep for 1 second on entrying the for loop below the first time. */
929 next_itr_time = ut_time_ms() + 1000;
931 + skip_sleep = FALSE;
933 for (i = 0; i < 10; i++) {
934 ulint cur_time = ut_time_ms();
936 + n_pages_flushed = 0; /* initialize */
938 /* ALTER TABLE in MySQL requires on Unix that the table handler
939 can drop tables lazily after there no longer are SELECT
941 @@ -2827,6 +2860,7 @@
942 srv_main_thread_op_info = "sleeping";
943 srv_main_1_second_loops++;
946 if (next_itr_time > cur_time
947 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
949 @@ -2837,10 +2871,26 @@
950 (next_itr_time - cur_time)
955 + mutex_enter(&(log_sys->mutex));
956 + oldest_lsn = buf_pool_get_oldest_modification();
957 + ib_uint64_t lsn = log_sys->lsn;
958 + mutex_exit(&(log_sys->mutex));
962 + "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
963 + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
968 /* Each iteration should happen at 1 second interval. */
969 next_itr_time = ut_time_ms() + 1000;
970 + } /* if (!skip_sleep) */
972 + skip_sleep = FALSE;
974 /* Flush logs if needed */
975 srv_sync_log_buffer_in_background();
976 @@ -2860,7 +2910,7 @@
977 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
978 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
979 srv_main_thread_op_info = "doing insert buffer merge";
980 - ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
981 + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
983 /* Flush logs if needed */
984 srv_sync_log_buffer_in_background();
985 @@ -2877,7 +2927,11 @@
986 n_pages_flushed = buf_flush_list(
987 PCT_IO(100), IB_ULONGLONG_MAX);
989 - } else if (srv_adaptive_flushing) {
990 + mutex_enter(&(log_sys->mutex));
991 + lsn_old = log_sys->lsn;
992 + mutex_exit(&(log_sys->mutex));
993 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
994 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
996 /* Try to keep the rate of flushing of dirty
997 pages such that redo log generation does not
998 @@ -2893,6 +2947,224 @@
1003 + mutex_enter(&(log_sys->mutex));
1004 + lsn_old = log_sys->lsn;
1005 + mutex_exit(&(log_sys->mutex));
1006 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
1007 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1009 + /* Try to keep modified age not to exceed
1010 + max_checkpoint_age * 7/8 line */
1012 + mutex_enter(&(log_sys->mutex));
1014 + oldest_lsn = buf_pool_get_oldest_modification();
1015 + if (oldest_lsn == 0) {
1016 + lsn_old = log_sys->lsn;
1017 + mutex_exit(&(log_sys->mutex));
1020 + if ((log_sys->lsn - oldest_lsn)
1021 + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1022 + /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1023 + /* We should not flush from here. */
1024 + lsn_old = log_sys->lsn;
1025 + mutex_exit(&(log_sys->mutex));
1026 + } else if ((log_sys->lsn - oldest_lsn)
1027 + > (log_sys->max_checkpoint_age)/4 ) {
1029 + /* defence line (max_checkpoint_age * 1/2) */
1030 + ib_uint64_t lsn = log_sys->lsn;
1032 + ib_uint64_t level, bpl;
1033 + buf_page_t* bpage;
1036 + mutex_exit(&(log_sys->mutex));
1040 + for (j = 0; j < srv_buf_pool_instances; j++) {
1041 + buf_pool_t* buf_pool;
1044 + buf_pool = buf_pool_from_array(j);
1046 + /* The scanning flush_list is optimistic here */
1050 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1052 + while (bpage != NULL) {
1053 + ib_uint64_t oldest_modification = bpage->oldest_modification;
1054 + if (oldest_modification != 0) {
1055 + level += log_sys->max_checkpoint_age
1056 + - (lsn - oldest_modification);
1058 + bpage = UT_LIST_GET_NEXT(list, bpage);
1063 + bpl += ((ib_uint64_t) n_blocks * n_blocks
1064 + * (lsn - lsn_old)) / level;
1069 + if (!srv_use_doublewrite_buf) {
1070 + /* flush is faster than when doublewrite */
1071 + bpl = (bpl * 7) / 8;
1076 + n_pages_flushed = buf_flush_list(bpl,
1077 + oldest_lsn + (lsn - lsn_old));
1078 + if (n_pages_flushed == ULINT_UNDEFINED) {
1079 + os_thread_sleep(5000);
1080 + goto retry_flush_batch;
1087 + "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1088 + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1089 + lsn - lsn_old, bpl);
1092 + lsn_old = log_sys->lsn;
1093 + mutex_exit(&(log_sys->mutex));
1096 + prev_adaptive_flushing_method = 1;
1097 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1098 + buf_pool_t* buf_pool;
1099 + buf_page_t* bpage;
1103 + mutex_enter(&(log_sys->mutex));
1104 + oldest_lsn = buf_pool_get_oldest_modification();
1105 + lsn = log_sys->lsn;
1106 + mutex_exit(&(log_sys->mutex));
1108 + /* upper loop/sec. (x10) */
1109 + next_itr_time -= 900; /* 1000 - 900 == 100 */
1111 + if (inner_loop < 10) {
1117 + if (prev_adaptive_flushing_method == 2) {
1120 + ulint new_blocks_sum, flushed_blocks_sum;
1122 + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1124 + /* prev_flush_info[j] should be the previous loop's */
1125 + for (j = 0; j < srv_buf_pool_instances; j++) {
1126 + lint blocks_num, new_blocks_num, flushed_blocks_num;
1129 + buf_pool = buf_pool_from_array(j);
1131 + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1132 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1133 + new_blocks_num = 0;
1136 + while (bpage != NULL) {
1137 + if (prev_flush_info[j].space == bpage->space
1138 + && prev_flush_info[j].offset == bpage->offset
1139 + && prev_flush_info[j].oldest_modification
1140 + == bpage->oldest_modification) {
1144 + bpage = UT_LIST_GET_NEXT(list, bpage);
1148 + new_blocks_num = blocks_num;
1151 + flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1153 + if (flushed_blocks_num < 0) {
1154 + flushed_blocks_num = 0;
1157 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1159 + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1161 + prev_flush_info[j].space = bpage->space;
1162 + prev_flush_info[j].offset = bpage->offset;
1163 + prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1165 + prev_flush_info[j].space = 0;
1166 + prev_flush_info[j].offset = 0;
1167 + prev_flush_info[j].oldest_modification = 0;
1170 + new_blocks_sum += new_blocks_num;
1171 + flushed_blocks_sum += flushed_blocks_num;
1172 + blocks_sum += blocks_num;
1175 + n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1176 + if (flushed_blocks_sum > n_pages_flushed_prev) {
1177 + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1180 + if (n_flush > 0) {
1182 + n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1184 + n_pages_flushed = 0;
1187 + /* store previous first pages of the flush_list */
1188 + for (j = 0; j < srv_buf_pool_instances; j++) {
1189 + buf_pool = buf_pool_from_array(j);
1191 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1193 + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1195 + prev_flush_info[j].space = bpage->space;
1196 + prev_flush_info[j].offset = bpage->offset;
1197 + prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1199 + prev_flush_info[j].space = 0;
1200 + prev_flush_info[j].offset = 0;
1201 + prev_flush_info[j].oldest_modification = 0;
1204 + n_pages_flushed = 0;
1208 + prev_adaptive_flushing_method = 2;
1210 + mutex_enter(&(log_sys->mutex));
1211 + lsn_old = log_sys->lsn;
1212 + mutex_exit(&(log_sys->mutex));
1213 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
1216 + if (n_pages_flushed == ULINT_UNDEFINED) {
1217 + n_pages_flushed_prev = 0;
1219 + n_pages_flushed_prev = n_pages_flushed;
1222 if (srv_activity_count == old_activity_count) {
1223 @@ -2941,7 +3213,7 @@
1224 even if the server were active */
1226 srv_main_thread_op_info = "doing insert buffer merge";
1227 - ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1228 + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1230 /* Flush logs if needed */
1231 srv_sync_log_buffer_in_background();
1232 @@ -3049,7 +3321,7 @@
1233 buf_flush_list below. Otherwise, the system favors
1234 clean pages over cleanup throughput. */
1235 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1237 + PCT_IBUF_IO(100));
1240 srv_main_thread_op_info = "reserving kernel mutex";
1241 @@ -3189,6 +3461,7 @@
1244 ulint n_total_purged = ULINT_UNDEFINED;
1245 + ulint next_itr_time;
1247 ut_a(srv_n_purge_threads == 1);
1249 @@ -3209,9 +3482,12 @@
1251 mutex_exit(&kernel_mutex);
1253 + next_itr_time = ut_time_ms();
1255 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1257 ulint n_pages_purged = 0;
1260 /* If there are very few records to purge or the last
1261 purge didn't purge any records then wait for activity.
1262 @@ -3258,6 +3534,16 @@
1263 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1265 srv_sync_log_buffer_in_background();
1267 + cur_time = ut_time_ms();
1268 + if (next_itr_time > cur_time) {
1269 + os_thread_sleep(ut_min(1000000,
1270 + (next_itr_time - cur_time)
1272 + next_itr_time = ut_time_ms() + 1000;
1274 + next_itr_time = cur_time + 1000;
1278 mutex_enter(&kernel_mutex);
1279 diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
1280 --- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900
1281 +++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900
1282 @@ -1217,6 +1217,9 @@
1283 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1284 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1286 + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1287 + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1289 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1290 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1292 diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
1293 --- a/storage/innobase/trx/trx0purge.c 2011-04-12 14:14:14.000000000 +0900
1294 +++ b/storage/innobase/trx/trx0purge.c 2011-04-12 14:15:44.000000000 +0900
1295 @@ -392,10 +392,10 @@
1296 trx_sys->rseg_history_len++;
1297 mutex_exit(&kernel_mutex);
1299 - if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1300 +// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1301 /* Inform the purge thread that there is work to do. */
1302 srv_wake_purge_thread_if_not_active();
1307 /**********************************************************************//**
1308 diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
1309 --- a/storage/innobase/trx/trx0trx.c 2010-11-03 07:01:13.000000000 +0900
1310 +++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:10:09.106023937 +0900
1312 trx->read_view = NULL;
1315 + ulint flush_log_at_trx_commit;
1317 mutex_exit(&kernel_mutex);
1319 @@ -992,6 +993,12 @@
1320 trx_undo_insert_cleanup(trx);
1323 + if (srv_use_global_flush_log_at_trx_commit) {
1324 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1326 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1329 /* NOTE that we could possibly make a group commit more
1330 efficient here: call os_thread_yield here to allow also other
1331 trxs to come to commit! */
1332 @@ -1023,9 +1030,9 @@
1333 if (trx->flush_log_later) {
1334 /* Do nothing yet */
1335 trx->must_flush_log_later = TRUE;
1336 - } else if (srv_flush_log_at_trx_commit == 0) {
1337 + } else if (flush_log_at_trx_commit == 0) {
1339 - } else if (srv_flush_log_at_trx_commit == 1) {
1340 + } else if (flush_log_at_trx_commit == 1) {
1341 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1342 /* Write the log but do not flush it to disk */
1344 @@ -1037,7 +1044,7 @@
1346 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1348 - } else if (srv_flush_log_at_trx_commit == 2) {
1349 + } else if (flush_log_at_trx_commit == 2) {
1351 /* Write the log but do not flush it to disk */
1353 @@ -1701,16 +1708,23 @@
1354 trx_t* trx) /*!< in: trx handle */
1356 ib_uint64_t lsn = trx->commit_lsn;
1357 + ulint flush_log_at_trx_commit;
1361 trx->op_info = "flushing log";
1363 + if (srv_use_global_flush_log_at_trx_commit) {
1364 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1366 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1369 if (!trx->must_flush_log_later) {
1371 - } else if (srv_flush_log_at_trx_commit == 0) {
1372 + } else if (flush_log_at_trx_commit == 0) {
1374 - } else if (srv_flush_log_at_trx_commit == 1) {
1375 + } else if (flush_log_at_trx_commit == 1) {
1376 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1377 /* Write the log but do not flush it to disk */
1379 @@ -1721,7 +1735,7 @@
1381 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1383 - } else if (srv_flush_log_at_trx_commit == 2) {
1384 + } else if (flush_log_at_trx_commit == 2) {
1386 /* Write the log but do not flush it to disk */
1388 @@ -1969,6 +1983,8 @@
1389 /*--------------------------------------*/
1392 + ulint flush_log_at_trx_commit;
1394 /* Depending on the my.cnf options, we may now write the log
1395 buffer to the log files, making the prepared state of the
1396 transaction durable if the OS does not crash. We may also
1397 @@ -1988,9 +2004,15 @@
1399 mutex_exit(&kernel_mutex);
1401 - if (srv_flush_log_at_trx_commit == 0) {
1402 + if (srv_use_global_flush_log_at_trx_commit) {
1403 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1405 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1408 + if (flush_log_at_trx_commit == 0) {
1410 - } else if (srv_flush_log_at_trx_commit == 1) {
1411 + } else if (flush_log_at_trx_commit == 1) {
1412 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1413 /* Write the log but do not flush it to disk */
1415 @@ -2002,7 +2024,7 @@
1417 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1419 - } else if (srv_flush_log_at_trx_commit == 2) {
1420 + } else if (flush_log_at_trx_commit == 2) {
1422 /* Write the log but do not flush it to disk */