1 # name : innodb_io_patches.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innobase/buf/buf0buf.c
9 +++ b/storage/innobase/buf/buf0buf.c
12 /* When we traverse all the flush lists we don't want another
13 thread to add a dirty page to any flush list. */
14 + if (srv_buf_pool_instances > 1)
15 log_flush_order_mutex_enter();
17 for (i = 0; i < srv_buf_pool_instances; i++) {
22 + if (srv_buf_pool_instances > 1)
23 log_flush_order_mutex_exit();
25 /* The returned answer may be out of date: the flush_list can
26 --- a/storage/innobase/buf/buf0flu.c
27 +++ b/storage/innobase/buf/buf0flu.c
30 /* Now flush the doublewrite buffer data to disk */
32 - fil_flush(TRX_SYS_SPACE);
33 + fil_flush(TRX_SYS_SPACE, FALSE);
35 /* We know that the writes have been flushed to disk now
36 and in recovery we will find them in the doublewrite buffer
39 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
41 - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
42 + if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
43 /* If there is little space, it is better not to flush
44 any block except from the end of the LRU list */
46 --- a/storage/innobase/buf/buf0rea.c
47 +++ b/storage/innobase/buf/buf0rea.c
49 = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
52 + if (!(srv_read_ahead & 2)) {
56 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
57 /* No read-ahead to avoid thread deadlocks */
59 --- a/storage/innobase/fil/fil0fil.c
60 +++ b/storage/innobase/fil/fil0fil.c
63 os_thread_sleep(20000);
66 + fil_flush(id, TRUE);
74 - ret = os_file_flush(file);
75 + ret = os_file_flush(file, TRUE);
78 fputs("InnoDB: Error: file flush of tablespace ", stderr);
83 - success = os_file_flush(file);
84 + success = os_file_flush(file, TRUE);
92 - success = os_file_flush(file);
93 + success = os_file_flush(file, TRUE);
98 size_after_extend, *actual_size); */
99 mutex_exit(&fil_system->mutex);
101 - fil_flush(space_id);
102 + fil_flush(space_id, TRUE);
106 @@ -4577,8 +4577,9 @@
110 - ulint space_id) /*!< in: file space id (this can be a group of
111 + ulint space_id, /*!< in: file space id (this can be a group of
112 log files or a tablespace of the database) */
117 @@ -4649,7 +4650,7 @@
118 /* fprintf(stderr, "Flushing to file %s\n",
121 - os_file_flush(file);
122 + os_file_flush(file, metadata);
124 mutex_enter(&fil_system->mutex);
126 @@ -4732,7 +4733,7 @@
127 a non-existing space id. */
128 for (i = 0; i < n_space_ids; i++) {
130 - fil_flush(space_ids[i]);
131 + fil_flush(space_ids[i], TRUE);
135 --- a/storage/innobase/handler/ha_innodb.cc
136 +++ b/storage/innobase/handler/ha_innodb.cc
138 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
139 NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
141 +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
142 + "Set to 0 (write and flush once per second),"
143 + " 1 (write and flush at each commit)"
144 + " or 2 (write at commit, flush once per second).",
145 + NULL, NULL, 1, 0, 2, 0);
148 static handler *innobase_create_handler(handlerton *hton,
154 +/******************************************************************//**
156 +extern "C" UNIV_INTERN
158 +thd_flush_log_at_trx_commit(
159 +/*================================*/
162 + return(THDVAR((THD*) thd, flush_log_at_trx_commit));
165 /********************************************************************//**
166 Obtain the InnoDB transaction of a MySQL thread.
167 @return reference to transaction pointer */
168 @@ -2442,6 +2459,9 @@
169 srv_n_read_io_threads = (ulint) innobase_read_io_threads;
170 srv_n_write_io_threads = (ulint) innobase_write_io_threads;
172 + srv_read_ahead &= 3;
173 + srv_adaptive_flushing_method %= 3;
175 srv_force_recovery = (ulint) innobase_force_recovery;
177 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
178 @@ -11036,7 +11056,7 @@
179 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
180 "Purge threads can be either 0 or 1.",
182 - 0, /* Default setting */
183 + 1, /* Default setting */
184 0, /* Minimum value */
185 1, 0); /* Maximum value */
187 @@ -11078,12 +11098,18 @@
188 innodb_file_format_max_validate,
189 innodb_file_format_max_update, "Antelope");
191 -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
192 - PLUGIN_VAR_OPCMDARG,
193 - "Set to 0 (write and flush once per second),"
194 - " 1 (write and flush at each commit)"
195 - " or 2 (write at commit, flush once per second).",
196 - NULL, NULL, 1, 0, 2, 0);
197 +/* Changed to the THDVAR */
198 +//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
199 +// PLUGIN_VAR_OPCMDARG,
200 +// "Set to 0 (write and flush once per second),"
201 +// " 1 (write and flush at each commit)"
202 +// " or 2 (write at commit, flush once per second).",
203 +// NULL, NULL, 1, 0, 2, 0);
205 +static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
206 + PLUGIN_VAR_NOCMDARG,
207 + "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
210 static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
211 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
212 @@ -11183,7 +11209,7 @@
213 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
214 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
215 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
216 - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
217 + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
219 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
220 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
221 @@ -11335,6 +11361,95 @@
222 "trigger a readahead.",
223 NULL, NULL, 56, 0, 64, 0);
225 +static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
226 + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
227 + "The maximum size of the insert buffer. (in bytes)",
228 + NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
230 +static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
231 + PLUGIN_VAR_RQCMDARG,
232 + "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
233 + NULL, NULL, 1, 0, 1, 0);
235 +static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
236 + PLUGIN_VAR_RQCMDARG,
237 + "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
238 + NULL, NULL, 100, 100, 999999999, 0);
240 +static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
241 + PLUGIN_VAR_RQCMDARG,
242 + "Control soft limit of checkpoint age. (0 : not control)",
243 + NULL, NULL, 0, 0, ~0UL, 0);
245 +static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
246 + PLUGIN_VAR_RQCMDARG,
247 + "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
248 + NULL, NULL, 1, 0, 1, 0);
252 +innodb_read_ahead_update(
254 + struct st_mysql_sys_var* var,
258 + *(long *)var_ptr= (*(long *)save) & 3;
260 +const char *read_ahead_names[]=
266 + /* For compatibility of the older patch */
267 + "0", /* 4 ("none" + 4) */
270 + "3", /* 7 ("both" + 4) */
273 +TYPELIB read_ahead_typelib=
275 + array_elements(read_ahead_names) - 1, "read_ahead_typelib",
276 + read_ahead_names, NULL
278 +static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
279 + PLUGIN_VAR_RQCMDARG,
280 + "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
281 + NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
285 +innodb_adaptive_flushing_method_update(
287 + struct st_mysql_sys_var* var,
291 + *(long *)var_ptr= (*(long *)save) % 4;
293 +const char *adaptive_flushing_method_names[]=
296 + "estimate", /* 1 */
297 + "keep_average", /* 2 */
298 + /* For compatibility of the older patch */
299 + "0", /* 3 ("none" + 3) */
300 + "1", /* 4 ("estimate" + 3) */
301 + "2", /* 5 ("keep_average" + 3) */
304 +TYPELIB adaptive_flushing_method_typelib=
306 + array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
307 + adaptive_flushing_method_names, NULL
309 +static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
310 + PLUGIN_VAR_RQCMDARG,
311 + "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
312 + NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
314 static struct st_mysql_sys_var* innobase_system_variables[]= {
315 MYSQL_SYSVAR(additional_mem_pool_size),
316 MYSQL_SYSVAR(autoextend_increment),
317 @@ -11355,6 +11470,7 @@
318 MYSQL_SYSVAR(file_format_check),
319 MYSQL_SYSVAR(file_format_max),
320 MYSQL_SYSVAR(flush_log_at_trx_commit),
321 + MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
322 MYSQL_SYSVAR(flush_method),
323 MYSQL_SYSVAR(force_recovery),
324 MYSQL_SYSVAR(large_prefix),
325 @@ -11393,6 +11509,13 @@
326 MYSQL_SYSVAR(show_verbose_locks),
327 MYSQL_SYSVAR(show_locks_held),
328 MYSQL_SYSVAR(version),
329 + MYSQL_SYSVAR(ibuf_max_size),
330 + MYSQL_SYSVAR(ibuf_active_contract),
331 + MYSQL_SYSVAR(ibuf_accel_rate),
332 + MYSQL_SYSVAR(checkpoint_age_target),
333 + MYSQL_SYSVAR(flush_neighbor_pages),
334 + MYSQL_SYSVAR(read_ahead),
335 + MYSQL_SYSVAR(adaptive_flushing_method),
336 MYSQL_SYSVAR(use_sys_malloc),
337 MYSQL_SYSVAR(use_native_aio),
338 MYSQL_SYSVAR(change_buffering),
339 --- a/storage/innobase/ibuf/ibuf0ibuf.c
340 +++ b/storage/innobase/ibuf/ibuf0ibuf.c
342 grow in size, as the references on the upper levels of the tree can
345 - ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
346 - / IBUF_POOL_SIZE_PER_MAX_SIZE;
347 + ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
348 + / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
350 + srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
352 mutex_create(ibuf_pessimistic_insert_mutex_key,
353 &ibuf_pessimistic_insert_mutex,
354 @@ -2753,9 +2755,11 @@
356 max_size = ibuf->max_size;
358 + if (!srv_ibuf_active_contract) {
359 if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
364 sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
366 --- a/storage/innobase/include/buf0rea.h
367 +++ b/storage/innobase/include/buf0rea.h
370 /** The size in pages of the area which the read-ahead algorithms read if
372 -#define BUF_READ_AHEAD_AREA(b) \
373 - ut_min(64, ut_2_power_up((b)->curr_size / 32))
374 +#define BUF_READ_AHEAD_AREA(b) 64
376 /** @name Modes used in read-ahead @{ */
377 /** read only pages belonging to the insert buffer tree */
378 --- a/storage/innobase/include/fil0fil.h
379 +++ b/storage/innobase/include/fil0fil.h
384 - ulint space_id); /*!< in: file space id (this can be a group of
385 + ulint space_id, /*!< in: file space id (this can be a group of
386 log files or a tablespace of the database) */
388 /**********************************************************************//**
389 Flushes to disk writes in file spaces of the given type possibly cached by
391 --- a/storage/innobase/include/ha_prototypes.h
392 +++ b/storage/innobase/include/ha_prototypes.h
394 /*===================*/
395 void* thd, /*!< in: thread handle (THD*) */
396 ulint value); /*!< in: time waited for the lock */
397 +/******************************************************************//**
401 +thd_flush_log_at_trx_commit(
402 +/*================================*/
405 /**********************************************************************//**
406 Get the current setting of the lower_case_table_names global parameter from
407 --- a/storage/innobase/include/os0file.h
408 +++ b/storage/innobase/include/os0file.h
410 pfs_os_file_write_func(name, file, buf, offset, offset_high, \
411 n, __FILE__, __LINE__)
413 -# define os_file_flush(file) \
414 - pfs_os_file_flush_func(file, __FILE__, __LINE__)
415 +# define os_file_flush(file, metadata) \
416 + pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
418 # define os_file_rename(key, oldpath, newpath) \
419 pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
421 # define os_file_write(name, file, buf, offset, offset_high, n) \
422 os_file_write_func(name, file, buf, offset, offset_high, n)
424 -# define os_file_flush(file) os_file_flush_func(file)
425 +# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
427 # define os_file_rename(key, oldpath, newpath) \
428 os_file_rename_func(oldpath, newpath)
430 pfs_os_file_flush_func(
431 /*===================*/
432 os_file_t file, /*!< in, own: handle to a file */
434 const char* src_file,/*!< in: file name where func invoked */
435 ulint src_line);/*!< in: line where the func invoked */
441 - os_file_t file); /*!< in, own: handle to a file */
442 + os_file_t file, /*!< in, own: handle to a file */
444 /***********************************************************************//**
445 Retrieves the last error number if an error occurs in a file io function.
446 The number should be retrieved before any other OS calls (because they may
447 --- a/storage/innobase/include/os0file.ic
448 +++ b/storage/innobase/include/os0file.ic
450 pfs_os_file_flush_func(
451 /*===================*/
452 os_file_t file, /*!< in, own: handle to a file */
454 const char* src_file,/*!< in: file name where func invoked */
455 ulint src_line)/*!< in: line where the func invoked */
459 register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
461 - result = os_file_flush_func(file);
462 + result = os_file_flush_func(file, metadata);
464 register_pfs_file_io_end(locker, 0);
466 --- a/storage/innobase/include/srv0srv.h
467 +++ b/storage/innobase/include/srv0srv.h
469 extern ulint srv_n_log_files;
470 extern ulint srv_log_file_size;
471 extern ulint srv_log_buffer_size;
472 -extern ulong srv_flush_log_at_trx_commit;
473 +//extern ulong srv_flush_log_at_trx_commit;
474 +extern char srv_use_global_flush_log_at_trx_commit;
475 extern char srv_adaptive_flushing;
479 extern ulong srv_max_purge_lag;
481 extern ulong srv_replication_delay;
483 +extern long long srv_ibuf_max_size;
484 +extern ulint srv_ibuf_active_contract;
485 +extern ulint srv_ibuf_accel_rate;
486 +extern ulint srv_checkpoint_age_target;
487 +extern ulint srv_flush_neighbor_pages;
488 +extern ulint srv_enable_unsafe_group_commit;
489 +extern ulint srv_read_ahead;
490 +extern ulint srv_adaptive_flushing_method;
492 /*-------------------------------------------*/
494 extern ulint srv_n_rows_inserted;
496 when writing data files, but do flush
497 after writing to log files */
498 SRV_UNIX_NOSYNC, /*!< do not flush after writing */
499 - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
500 + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
502 + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
505 /** Alternatives for file i/o in Windows */
506 --- a/storage/innobase/log/log0log.c
507 +++ b/storage/innobase/log/log0log.c
509 #include "srv0start.h"
512 +#include "ha_prototypes.h"
515 General philosophy of InnoDB redo-logs:
519 /************************************************************//**
523 +log_max_modified_age_async()
525 + if (srv_checkpoint_age_target) {
526 + return(ut_min(log_sys->max_modified_age_async,
527 + srv_checkpoint_age_target
528 + - srv_checkpoint_age_target / 8));
530 + return(log_sys->max_modified_age_async);
536 +log_max_checkpoint_age_async()
538 + if (srv_checkpoint_age_target) {
539 + return(ut_min(log_sys->max_checkpoint_age_async,
540 + srv_checkpoint_age_target));
542 + return(log_sys->max_checkpoint_age_async);
546 +/************************************************************//**
554 - if (checkpoint_age <= log->max_modified_age_async) {
555 + if (checkpoint_age <= log_max_modified_age_async()) {
560 oldest_lsn = buf_pool_get_oldest_modification();
563 - || lsn - oldest_lsn > log->max_modified_age_async
564 - || checkpoint_age > log->max_checkpoint_age_async) {
565 + || lsn - oldest_lsn > log_max_modified_age_async()
566 + || checkpoint_age > log_max_checkpoint_age_async()) {
568 log->check_flush_or_checkpoint = TRUE;
570 @@ -1100,9 +1128,10 @@
571 group = (log_group_t*)((ulint)group - 1);
573 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
574 + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
575 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
577 - fil_flush(group->space_id);
578 + fil_flush(group->space_id, FALSE);
582 @@ -1121,10 +1150,11 @@
583 logs and cannot end up here! */
585 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
586 + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
587 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
588 - && srv_flush_log_at_trx_commit != 2) {
589 + && thd_flush_log_at_trx_commit(NULL) != 2) {
591 - fil_flush(group->space_id);
592 + fil_flush(group->space_id, FALSE);
595 mutex_enter(&(log_sys->mutex));
596 @@ -1501,7 +1531,8 @@
598 mutex_exit(&(log_sys->mutex));
600 - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
601 + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
602 + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
603 /* O_DSYNC means the OS did not buffer the log file at all:
604 so we have also flushed to disk what we have written */
606 @@ -1511,7 +1542,7 @@
608 group = UT_LIST_GET_FIRST(log_sys->log_groups);
610 - fil_flush(group->space_id);
611 + fil_flush(group->space_id, FALSE);
612 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
615 @@ -2120,10 +2151,10 @@
618 advance = 2 * (age - log->max_modified_age_sync);
619 - } else if (age > log->max_modified_age_async) {
620 + } else if (age > log_max_modified_age_async()) {
622 /* A flush is not urgent: we do an asynchronous preflush */
623 - advance = age - log->max_modified_age_async;
624 + advance = age - log_max_modified_age_async();
628 @@ -2137,7 +2168,7 @@
630 do_checkpoint = TRUE;
632 - } else if (checkpoint_age > log->max_checkpoint_age_async) {
633 + } else if (checkpoint_age > log_max_checkpoint_age_async()) {
634 /* A checkpoint is not urgent: do it asynchronously */
636 do_checkpoint = TRUE;
637 @@ -2607,7 +2638,7 @@
639 mutex_exit(&(log_sys->mutex));
641 - fil_flush(group->archive_space_id);
642 + fil_flush(group->archive_space_id, TRUE);
644 mutex_enter(&(log_sys->mutex));
646 @@ -3349,6 +3380,17 @@
647 log_sys->flushed_to_disk_lsn,
648 log_sys->last_checkpoint_lsn);
651 + "Max checkpoint age %lu\n"
652 + "Checkpoint age target %lu\n"
653 + "Modified age %lu\n"
654 + "Checkpoint age %lu\n",
655 + (ulong) log_sys->max_checkpoint_age,
656 + (ulong) log_max_checkpoint_age_async(),
657 + (ulong) (log_sys->lsn -
658 + log_buf_pool_get_oldest_modification()),
659 + (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
661 current_time = time(NULL);
663 time_elapsed = 0.001 + difftime(current_time,
664 --- a/storage/innobase/log/log0recv.c
665 +++ b/storage/innobase/log/log0recv.c
666 @@ -2906,9 +2906,12 @@
667 ib_uint64_t archived_lsn;
668 #endif /* UNIV_LOG_ARCHIVE */
670 - byte log_hdr_buf[LOG_FILE_HDR_SIZE];
672 + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
675 + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
677 #ifdef UNIV_LOG_ARCHIVE
678 ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
679 /** TRUE when recovering from a checkpoint */
680 @@ -3468,7 +3471,7 @@
684 - os_file_flush(log_file);
685 + os_file_flush(log_file, TRUE);
686 os_file_close(log_file);
689 @@ -3492,7 +3495,7 @@
691 os_file_write(name, log_file, buf, 0, 0,
692 LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
693 - os_file_flush(log_file);
694 + os_file_flush(log_file, TRUE);
695 os_file_close(log_file);
698 --- a/storage/innobase/os/os0file.c
699 +++ b/storage/innobase/os/os0file.c
700 @@ -1424,7 +1424,7 @@
702 #ifdef UNIV_NON_BUFFERED_IO
703 # ifndef UNIV_HOTBACKUP
704 - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
705 + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
706 /* Do not use unbuffered i/o to log files because
707 value 2 denotes that we do not flush the log at every
708 commit, but only once per second */
709 @@ -1440,7 +1440,7 @@
711 #ifdef UNIV_NON_BUFFERED_IO
712 # ifndef UNIV_HOTBACKUP
713 - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
714 + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
715 /* Do not use unbuffered i/o to log files because
716 value 2 denotes that we do not flush the log at every
717 commit, but only once per second */
718 @@ -1585,6 +1585,11 @@
719 os_file_set_nocache(file, name, mode_str);
722 + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
723 + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
724 + os_file_set_nocache(file, name, mode_str);
728 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
730 @@ -2008,7 +2013,7 @@
734 - ret = os_file_flush(file);
735 + ret = os_file_flush(file, TRUE);
739 @@ -2046,7 +2051,8 @@
743 - os_file_t file) /*!< in: handle to a file */
744 + os_file_t file, /*!< in: handle to a file */
749 @@ -2055,7 +2061,16 @@
753 +#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
757 + ret = fdatasync(file);
766 @@ -2092,7 +2107,8 @@
770 - os_file_t file) /*!< in, own: handle to a file */
771 + os_file_t file, /*!< in, own: handle to a file */
776 @@ -2142,18 +2158,18 @@
777 /* If we are not on an operating system that supports this,
778 then fall back to a plain fsync. */
780 - ret = os_file_fsync(file);
781 + ret = os_file_fsync(file, metadata);
783 ret = fcntl(file, F_FULLFSYNC, NULL);
786 /* If we are not on a file system that supports this,
787 then fall back to a plain fsync. */
788 - ret = os_file_fsync(file);
789 + ret = os_file_fsync(file, metadata);
793 - ret = os_file_fsync(file);
794 + ret = os_file_fsync(file, metadata);
798 @@ -2336,7 +2352,7 @@
799 the OS crashes, a database page is only partially
800 physically written to disk. */
802 - ut_a(TRUE == os_file_flush(file));
803 + ut_a(TRUE == os_file_flush(file, TRUE));
805 # endif /* UNIV_DO_FLUSH */
807 @@ -2378,7 +2394,7 @@
808 the OS crashes, a database page is only partially
809 physically written to disk. */
811 - ut_a(TRUE == os_file_flush(file));
812 + ut_a(TRUE == os_file_flush(file, TRUE));
814 # endif /* UNIV_DO_FLUSH */
816 @@ -2750,7 +2766,7 @@
818 # ifdef UNIV_DO_FLUSH
819 if (!os_do_not_call_flush_at_each_write) {
820 - ut_a(TRUE == os_file_flush(file));
821 + ut_a(TRUE == os_file_flush(file, TRUE));
823 # endif /* UNIV_DO_FLUSH */
825 @@ -4296,7 +4312,7 @@
827 if (slot->type == OS_FILE_WRITE
828 && !os_do_not_call_flush_at_each_write) {
829 - if (!os_file_flush(slot->file)) {
830 + if (!os_file_flush(slot->file, TRUE)) {
834 @@ -4597,7 +4613,7 @@
836 if (slot->type == OS_FILE_WRITE
837 && !os_do_not_call_flush_at_each_write)
838 - && !os_file_flush(slot->file) {
839 + && !os_file_flush(slot->file, TRUE) {
842 #endif /* UNIV_DO_FLUSH */
843 --- a/storage/innobase/srv/srv0srv.c
844 +++ b/storage/innobase/srv/srv0srv.c
846 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
847 /* size in database pages */
848 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
849 -UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
850 +//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
851 +UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
853 /* Try to flush dirty pages so as to avoid IO bursts at
857 UNIV_INTERN ulong srv_replication_delay = 0;
859 +UNIV_INTERN long long srv_ibuf_max_size = 0;
860 +UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
861 +UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
862 +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
864 +UNIV_INTERN ulint srv_checkpoint_age_target = 0;
865 +UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
867 +UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
868 +UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
869 +UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
870 /*-------------------------------------------*/
871 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
872 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
873 @@ -2709,7 +2721,7 @@
875 ut_ad(!mutex_own(&kernel_mutex));
877 - ut_a(srv_n_purge_threads == 0);
878 + ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
881 /* Check for shutdown and change in purge config. */
882 @@ -2742,6 +2754,7 @@
883 ulint n_pages_purged = 0;
884 ulint n_bytes_merged;
885 ulint n_pages_flushed;
886 + ulint n_pages_flushed_prev = 0;
887 ulint n_bytes_archived;
888 ulint n_tables_to_drop;
890 @@ -2749,7 +2762,20 @@
891 ulint n_ios_very_old;
894 + ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
895 + ulint inner_loop = 0;
896 + ibool skip_sleep = FALSE;
898 + struct t_prev_flush_info_struct {
901 + unsigned offset:32;
902 + ib_uint64_t oldest_modification;
903 + } prev_flush_info[MAX_BUFFER_POOLS];
905 + ib_uint64_t lsn_old;
907 + ib_uint64_t oldest_lsn;
909 #ifdef UNIV_DEBUG_THREAD_CREATION
910 fprintf(stderr, "Master thread starts, id %lu\n",
911 @@ -2771,6 +2797,9 @@
913 mutex_exit(&kernel_mutex);
915 + mutex_enter(&(log_sys->mutex));
916 + lsn_old = log_sys->lsn;
917 + mutex_exit(&(log_sys->mutex));
919 /*****************************************************************/
920 /* ---- When there is database activity by users, we cycle in this
921 @@ -2801,9 +2830,13 @@
922 /* Sleep for 1 second on entrying the for loop below the first time. */
923 next_itr_time = ut_time_ms() + 1000;
925 + skip_sleep = FALSE;
927 for (i = 0; i < 10; i++) {
928 ulint cur_time = ut_time_ms();
930 + n_pages_flushed = 0; /* initialize */
932 /* ALTER TABLE in MySQL requires on Unix that the table handler
933 can drop tables lazily after there no longer are SELECT
935 @@ -2827,6 +2860,7 @@
936 srv_main_thread_op_info = "sleeping";
937 srv_main_1_second_loops++;
940 if (next_itr_time > cur_time
941 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
943 @@ -2837,10 +2871,26 @@
944 (next_itr_time - cur_time)
949 + mutex_enter(&(log_sys->mutex));
950 + oldest_lsn = buf_pool_get_oldest_modification();
951 + ib_uint64_t lsn = log_sys->lsn;
952 + mutex_exit(&(log_sys->mutex));
956 + "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
957 + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
962 /* Each iteration should happen at 1 second interval. */
963 next_itr_time = ut_time_ms() + 1000;
964 + } /* if (!skip_sleep) */
966 + skip_sleep = FALSE;
968 /* Flush logs if needed */
969 srv_sync_log_buffer_in_background();
970 @@ -2860,7 +2910,7 @@
971 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
972 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
973 srv_main_thread_op_info = "doing insert buffer merge";
974 - ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
975 + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
977 /* Flush logs if needed */
978 srv_sync_log_buffer_in_background();
979 @@ -2877,7 +2927,11 @@
980 n_pages_flushed = buf_flush_list(
981 PCT_IO(100), IB_ULONGLONG_MAX);
983 - } else if (srv_adaptive_flushing) {
984 + mutex_enter(&(log_sys->mutex));
985 + lsn_old = log_sys->lsn;
986 + mutex_exit(&(log_sys->mutex));
987 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
988 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
990 /* Try to keep the rate of flushing of dirty
991 pages such that redo log generation does not
992 @@ -2893,6 +2947,224 @@
997 + mutex_enter(&(log_sys->mutex));
998 + lsn_old = log_sys->lsn;
999 + mutex_exit(&(log_sys->mutex));
1000 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
1001 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1003 + /* Try to keep modified age not to exceed
1004 + max_checkpoint_age * 7/8 line */
1006 + mutex_enter(&(log_sys->mutex));
1008 + oldest_lsn = buf_pool_get_oldest_modification();
1009 + if (oldest_lsn == 0) {
1010 + lsn_old = log_sys->lsn;
1011 + mutex_exit(&(log_sys->mutex));
1014 + if ((log_sys->lsn - oldest_lsn)
1015 + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1016 + /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1017 + /* We should not flush from here. */
1018 + lsn_old = log_sys->lsn;
1019 + mutex_exit(&(log_sys->mutex));
1020 + } else if ((log_sys->lsn - oldest_lsn)
1021 + > (log_sys->max_checkpoint_age)/4 ) {
1023 + /* defence line (max_checkpoint_age * 1/2) */
1024 + ib_uint64_t lsn = log_sys->lsn;
1026 + ib_uint64_t level, bpl;
1027 + buf_page_t* bpage;
1030 + mutex_exit(&(log_sys->mutex));
1034 + for (j = 0; j < srv_buf_pool_instances; j++) {
1035 + buf_pool_t* buf_pool;
1038 + buf_pool = buf_pool_from_array(j);
1040 + /* The scanning flush_list is optimistic here */
1044 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1046 + while (bpage != NULL) {
1047 + ib_uint64_t oldest_modification = bpage->oldest_modification;
1048 + if (oldest_modification != 0) {
1049 + level += log_sys->max_checkpoint_age
1050 + - (lsn - oldest_modification);
1052 + bpage = UT_LIST_GET_NEXT(list, bpage);
1057 + bpl += ((ib_uint64_t) n_blocks * n_blocks
1058 + * (lsn - lsn_old)) / level;
1063 + if (!srv_use_doublewrite_buf) {
1064 + /* flush is faster than when doublewrite */
1065 + bpl = (bpl * 7) / 8;
1070 + n_pages_flushed = buf_flush_list(bpl,
1071 + oldest_lsn + (lsn - lsn_old));
1072 + if (n_pages_flushed == ULINT_UNDEFINED) {
1073 + os_thread_sleep(5000);
1074 + goto retry_flush_batch;
1081 + "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1082 + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1083 + lsn - lsn_old, bpl);
1086 + lsn_old = log_sys->lsn;
1087 + mutex_exit(&(log_sys->mutex));
1090 + prev_adaptive_flushing_method = 1;
1091 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1092 + buf_pool_t* buf_pool;
1093 + buf_page_t* bpage;
1097 + mutex_enter(&(log_sys->mutex));
1098 + oldest_lsn = buf_pool_get_oldest_modification();
1099 + lsn = log_sys->lsn;
1100 + mutex_exit(&(log_sys->mutex));
1102 + /* upper loop/sec. (x10) */
1103 + next_itr_time -= 900; /* 1000 - 900 == 100 */
1105 + if (inner_loop < 10) {
1111 + if (prev_adaptive_flushing_method == 2) {
1114 + ulint new_blocks_sum, flushed_blocks_sum;
1116 + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1118 + /* prev_flush_info[j] should be the previous loop's */
1119 + for (j = 0; j < srv_buf_pool_instances; j++) {
1120 + lint blocks_num, new_blocks_num, flushed_blocks_num;
1123 + buf_pool = buf_pool_from_array(j);
1125 + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1126 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1127 + new_blocks_num = 0;
1130 + while (bpage != NULL) {
1131 + if (prev_flush_info[j].space == bpage->space
1132 + && prev_flush_info[j].offset == bpage->offset
1133 + && prev_flush_info[j].oldest_modification
1134 + == bpage->oldest_modification) {
1138 + bpage = UT_LIST_GET_NEXT(list, bpage);
1142 + new_blocks_num = blocks_num;
1145 + flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1147 + if (flushed_blocks_num < 0) {
1148 + flushed_blocks_num = 0;
1151 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1153 + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1155 + prev_flush_info[j].space = bpage->space;
1156 + prev_flush_info[j].offset = bpage->offset;
1157 + prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1159 + prev_flush_info[j].space = 0;
1160 + prev_flush_info[j].offset = 0;
1161 + prev_flush_info[j].oldest_modification = 0;
1164 + new_blocks_sum += new_blocks_num;
1165 + flushed_blocks_sum += flushed_blocks_num;
1166 + blocks_sum += blocks_num;
1169 + n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1170 + if (flushed_blocks_sum > n_pages_flushed_prev) {
1171 + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1174 + if (n_flush > 0) {
1176 + n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1178 + n_pages_flushed = 0;
1181 + /* store previous first pages of the flush_list */
1182 + for (j = 0; j < srv_buf_pool_instances; j++) {
1183 + buf_pool = buf_pool_from_array(j);
1185 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1187 + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1189 + prev_flush_info[j].space = bpage->space;
1190 + prev_flush_info[j].offset = bpage->offset;
1191 + prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1193 + prev_flush_info[j].space = 0;
1194 + prev_flush_info[j].offset = 0;
1195 + prev_flush_info[j].oldest_modification = 0;
1198 + n_pages_flushed = 0;
1202 + prev_adaptive_flushing_method = 2;
1204 + mutex_enter(&(log_sys->mutex));
1205 + lsn_old = log_sys->lsn;
1206 + mutex_exit(&(log_sys->mutex));
1207 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
1210 + if (n_pages_flushed == ULINT_UNDEFINED) {
1211 + n_pages_flushed_prev = 0;
1213 + n_pages_flushed_prev = n_pages_flushed;
1216 if (srv_activity_count == old_activity_count) {
1217 @@ -2941,12 +3213,12 @@
1218 even if the server were active */
1220 srv_main_thread_op_info = "doing insert buffer merge";
1221 - ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1222 + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1224 /* Flush logs if needed */
1225 srv_sync_log_buffer_in_background();
1227 - if (srv_n_purge_threads == 0) {
1228 + if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1229 srv_main_thread_op_info = "master purging";
1231 srv_master_do_purge();
1232 @@ -3024,7 +3296,7 @@
1236 - if (srv_n_purge_threads == 0) {
1237 + if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1238 srv_main_thread_op_info = "master purging";
1240 srv_master_do_purge();
1241 @@ -3049,7 +3321,7 @@
1242 buf_flush_list below. Otherwise, the system favors
1243 clean pages over cleanup throughput. */
1244 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1246 + PCT_IBUF_IO(100));
1249 srv_main_thread_op_info = "reserving kernel mutex";
1250 @@ -3189,6 +3461,7 @@
1253 ulint n_total_purged = ULINT_UNDEFINED;
1254 + ulint next_itr_time;
1256 ut_a(srv_n_purge_threads == 1);
1258 @@ -3209,9 +3482,12 @@
1260 mutex_exit(&kernel_mutex);
1262 + next_itr_time = ut_time_ms();
1264 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1266 ulint n_pages_purged = 0;
1269 /* If there are very few records to purge or the last
1270 purge didn't purge any records then wait for activity.
1271 @@ -3258,6 +3534,16 @@
1272 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1274 srv_sync_log_buffer_in_background();
1276 + cur_time = ut_time_ms();
1277 + if (next_itr_time > cur_time) {
1278 + os_thread_sleep(ut_min(1000000,
1279 + (next_itr_time - cur_time)
1281 + next_itr_time = ut_time_ms() + 1000;
1283 + next_itr_time = cur_time + 1000;
1287 mutex_enter(&kernel_mutex);
1288 --- a/storage/innobase/srv/srv0start.c
1289 +++ b/storage/innobase/srv/srv0start.c
1290 @@ -1217,6 +1217,9 @@
1291 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1292 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1294 + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1295 + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1297 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1298 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1300 --- a/storage/innobase/trx/trx0purge.c
1301 +++ b/storage/innobase/trx/trx0purge.c
1302 @@ -392,10 +392,10 @@
1303 trx_sys->rseg_history_len++;
1304 mutex_exit(&kernel_mutex);
1306 - if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1307 +// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1308 /* Inform the purge thread that there is work to do. */
1309 srv_wake_purge_thread_if_not_active();
1314 /**********************************************************************//**
1315 --- a/storage/innobase/trx/trx0trx.c
1316 +++ b/storage/innobase/trx/trx0trx.c
1318 trx->read_view = NULL;
1321 + ulint flush_log_at_trx_commit;
1323 mutex_exit(&kernel_mutex);
1325 @@ -992,6 +993,12 @@
1326 trx_undo_insert_cleanup(trx);
1329 + if (srv_use_global_flush_log_at_trx_commit) {
1330 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1332 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1335 /* NOTE that we could possibly make a group commit more
1336 efficient here: call os_thread_yield here to allow also other
1337 trxs to come to commit! */
1338 @@ -1023,9 +1030,9 @@
1339 if (trx->flush_log_later) {
1340 /* Do nothing yet */
1341 trx->must_flush_log_later = TRUE;
1342 - } else if (srv_flush_log_at_trx_commit == 0) {
1343 + } else if (flush_log_at_trx_commit == 0) {
1345 - } else if (srv_flush_log_at_trx_commit == 1) {
1346 + } else if (flush_log_at_trx_commit == 1) {
1347 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1348 /* Write the log but do not flush it to disk */
1350 @@ -1037,7 +1044,7 @@
1352 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1354 - } else if (srv_flush_log_at_trx_commit == 2) {
1355 + } else if (flush_log_at_trx_commit == 2) {
1357 /* Write the log but do not flush it to disk */
1359 @@ -1701,16 +1708,23 @@
1360 trx_t* trx) /*!< in: trx handle */
1362 ib_uint64_t lsn = trx->commit_lsn;
1363 + ulint flush_log_at_trx_commit;
1367 trx->op_info = "flushing log";
1369 + if (srv_use_global_flush_log_at_trx_commit) {
1370 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1372 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1375 if (!trx->must_flush_log_later) {
1377 - } else if (srv_flush_log_at_trx_commit == 0) {
1378 + } else if (flush_log_at_trx_commit == 0) {
1380 - } else if (srv_flush_log_at_trx_commit == 1) {
1381 + } else if (flush_log_at_trx_commit == 1) {
1382 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1383 /* Write the log but do not flush it to disk */
1385 @@ -1721,7 +1735,7 @@
1387 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1389 - } else if (srv_flush_log_at_trx_commit == 2) {
1390 + } else if (flush_log_at_trx_commit == 2) {
1392 /* Write the log but do not flush it to disk */
1394 @@ -1969,6 +1983,8 @@
1395 /*--------------------------------------*/
1398 + ulint flush_log_at_trx_commit;
1400 /* Depending on the my.cnf options, we may now write the log
1401 buffer to the log files, making the prepared state of the
1402 transaction durable if the OS does not crash. We may also
1403 @@ -1988,9 +2004,15 @@
1405 mutex_exit(&kernel_mutex);
1407 - if (srv_flush_log_at_trx_commit == 0) {
1408 + if (srv_use_global_flush_log_at_trx_commit) {
1409 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1411 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1414 + if (flush_log_at_trx_commit == 0) {
1416 - } else if (srv_flush_log_at_trx_commit == 1) {
1417 + } else if (flush_log_at_trx_commit == 1) {
1418 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1419 /* Write the log but do not flush it to disk */
1421 @@ -2002,7 +2024,7 @@
1423 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1425 - } else if (srv_flush_log_at_trx_commit == 2) {
1426 + } else if (flush_log_at_trx_commit == 2) {
1428 /* Write the log but do not flush it to disk */