1 # name : innodb_io_patches.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innobase/buf/buf0buf.c
9 +++ b/storage/innobase/buf/buf0buf.c
12 /* When we traverse all the flush lists we don't want another
13 thread to add a dirty page to any flush list. */
14 + if (srv_buf_pool_instances > 1)
15 log_flush_order_mutex_enter();
17 for (i = 0; i < srv_buf_pool_instances; i++) {
22 + if (srv_buf_pool_instances > 1)
23 log_flush_order_mutex_exit();
25 /* The returned answer may be out of date: the flush_list can
26 --- a/storage/innobase/buf/buf0flu.c
27 +++ b/storage/innobase/buf/buf0flu.c
30 /* Now flush the doublewrite buffer data to disk */
32 - fil_flush(TRX_SYS_SPACE);
33 + fil_flush(TRX_SYS_SPACE, FALSE);
35 /* We know that the writes have been flushed to disk now
36 and in recovery we will find them in the doublewrite buffer
37 @@ -1375,10 +1375,11 @@
40 buf_pool_t* buf_pool = buf_pool_get(space, offset);
41 + ibool is_forward_scan;
43 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
45 - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
46 + if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
47 /* If there is little space, it is better not to flush
48 any block except from the end of the LRU list */
50 @@ -1405,7 +1406,32 @@
51 high = fil_space_get_size(space);
54 - for (i = low; i < high; i++) {
55 + if (srv_flush_neighbor_pages == 2) {
57 + /* In the case of contiguous flush where the requested page
58 + does not fall at the start of flush area, first scan backward
59 + from the page and later forward from it. */
60 + is_forward_scan = (offset == low);
63 + is_forward_scan = TRUE;
67 + if (srv_flush_neighbor_pages == 2) {
68 + if (is_forward_scan) {
79 + for (; is_forward_scan ? (i < high) : (i >= low);
80 + is_forward_scan ? i++ : i--) {
84 @@ -1434,6 +1460,12 @@
87 buf_pool_mutex_exit(buf_pool);
88 + if (srv_flush_neighbor_pages == 2) {
90 + /* This is contiguous neighbor page flush and
91 + the pages here are not contiguous. */
97 @@ -1470,6 +1502,22 @@
100 buf_pool_mutex_exit(buf_pool);
102 + if (srv_flush_neighbor_pages == 2) {
104 + /* We are trying to do the contiguous neighbor page
105 + flush, but the last page we checked was unflushable,
106 + making a "hole" in the flush, so stop this attempt. */
111 + if (!is_forward_scan) {
113 + /* Backward scan done, now do the forward scan */
114 + ut_a (srv_flush_neighbor_pages == 2);
115 + is_forward_scan = TRUE;
120 --- a/storage/innobase/buf/buf0rea.c
121 +++ b/storage/innobase/buf/buf0rea.c
123 = BUF_READ_AHEAD_AREA(buf_pool);
126 + if (!(srv_read_ahead & 2)) {
130 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
131 /* No read-ahead to avoid thread deadlocks */
133 --- a/storage/innobase/fil/fil0fil.c
134 +++ b/storage/innobase/fil/fil0fil.c
135 @@ -2609,7 +2609,7 @@
137 os_thread_sleep(20000);
140 + fil_flush(id, TRUE);
144 @@ -2823,7 +2823,7 @@
148 - ret = os_file_flush(file);
149 + ret = os_file_flush(file, TRUE);
152 fputs("InnoDB: Error: file flush of tablespace ", stderr);
153 @@ -3009,7 +3009,7 @@
157 - success = os_file_flush(file);
158 + success = os_file_flush(file, TRUE);
162 @@ -3031,7 +3031,7 @@
166 - success = os_file_flush(file);
167 + success = os_file_flush(file, TRUE);
171 @@ -4014,7 +4014,7 @@
172 size_after_extend, *actual_size); */
173 mutex_exit(&fil_system->mutex);
175 - fil_flush(space_id);
176 + fil_flush(space_id, TRUE);
180 @@ -4585,8 +4585,9 @@
184 - ulint space_id) /*!< in: file space id (this can be a group of
185 + ulint space_id, /*!< in: file space id (this can be a group of
186 log files or a tablespace of the database) */
191 @@ -4657,7 +4658,7 @@
192 /* fprintf(stderr, "Flushing to file %s\n",
195 - os_file_flush(file);
196 + os_file_flush(file, metadata);
198 mutex_enter(&fil_system->mutex);
200 @@ -4740,7 +4741,7 @@
201 a non-existing space id. */
202 for (i = 0; i < n_space_ids; i++) {
204 - fil_flush(space_ids[i]);
205 + fil_flush(space_ids[i], TRUE);
209 --- a/storage/innobase/handler/ha_innodb.cc
210 +++ b/storage/innobase/handler/ha_innodb.cc
212 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
213 NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
215 +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
216 + "Set to 0 (write and flush once per second),"
217 + " 1 (write and flush at each commit)"
218 + " or 2 (write at commit, flush once per second).",
219 + NULL, NULL, 1, 0, 2, 0);
222 static handler *innobase_create_handler(handlerton *hton,
228 +/******************************************************************//**
230 +extern "C" UNIV_INTERN
232 +thd_flush_log_at_trx_commit(
233 +/*================================*/
236 + return(THDVAR((THD*) thd, flush_log_at_trx_commit));
239 /********************************************************************//**
240 Obtain the InnoDB transaction of a MySQL thread.
241 @return reference to transaction pointer */
242 @@ -2471,6 +2488,9 @@
243 srv_n_read_io_threads = (ulint) innobase_read_io_threads;
244 srv_n_write_io_threads = (ulint) innobase_write_io_threads;
246 + srv_read_ahead &= 3;
247 + srv_adaptive_flushing_method %= 3;
249 srv_force_recovery = (ulint) innobase_force_recovery;
251 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
252 @@ -11141,7 +11161,7 @@
253 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
254 "Purge threads can be either 0 or 1.",
256 - 0, /* Default setting */
257 + 1, /* Default setting */
258 0, /* Minimum value */
259 1, 0); /* Maximum value */
261 @@ -11183,12 +11203,18 @@
262 innodb_file_format_max_validate,
263 innodb_file_format_max_update, "Antelope");
265 -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
266 - PLUGIN_VAR_OPCMDARG,
267 - "Set to 0 (write and flush once per second),"
268 - " 1 (write and flush at each commit)"
269 - " or 2 (write at commit, flush once per second).",
270 - NULL, NULL, 1, 0, 2, 0);
271 +/* Changed to the THDVAR */
272 +//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
273 +// PLUGIN_VAR_OPCMDARG,
274 +// "Set to 0 (write and flush once per second),"
275 +// " 1 (write and flush at each commit)"
276 +// " or 2 (write at commit, flush once per second).",
277 +// NULL, NULL, 1, 0, 2, 0);
279 +static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
280 + PLUGIN_VAR_NOCMDARG,
281 + "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
284 static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
285 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
286 @@ -11293,7 +11319,7 @@
287 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
288 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
289 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
290 - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
291 + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
293 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
294 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
295 @@ -11442,6 +11468,127 @@
296 "trigger a readahead.",
297 NULL, NULL, 56, 0, 64, 0);
299 +static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
300 + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
301 + "The maximum size of the insert buffer. (in bytes)",
302 + NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
304 +static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
305 + PLUGIN_VAR_RQCMDARG,
306 + "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
307 + NULL, NULL, 1, 0, 1, 0);
309 +static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
310 + PLUGIN_VAR_RQCMDARG,
311 + "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
312 + NULL, NULL, 100, 100, 999999999, 0);
314 +static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
315 + PLUGIN_VAR_RQCMDARG,
316 + "Control soft limit of checkpoint age. (0 : not control)",
317 + NULL, NULL, 0, 0, ~0UL, 0);
321 +innodb_flush_neighbor_pages_update(
323 + struct st_mysql_sys_var* var,
327 + *(long *)var_ptr = (*(long *)save) % 3;
330 +const char *flush_neighbor_pages_names[]=
335 + /* For compatibility with the older patch */
336 + "0", /* "none" + 3 */
337 + "1", /* "area" + 3 */
338 + "2", /* "cont" + 3 */
342 +TYPELIB flush_neighbor_pages_typelib=
344 + array_elements(flush_neighbor_pages_names) - 1,
345 + "flush_neighbor_pages_typelib",
346 + flush_neighbor_pages_names,
350 +static MYSQL_SYSVAR_ENUM(flush_neighbor_pages, srv_flush_neighbor_pages,
351 + PLUGIN_VAR_RQCMDARG, "Neighbor page flushing behaviour: none: do not flush, "
352 + "[area]: flush selected pages one-by-one, "
353 + "cont: flush a contiguous block of pages", NULL,
354 + innodb_flush_neighbor_pages_update, 1, &flush_neighbor_pages_typelib);
358 +innodb_read_ahead_update(
360 + struct st_mysql_sys_var* var,
364 + *(long *)var_ptr= (*(long *)save) & 3;
366 +const char *read_ahead_names[]=
372 + /* For compatibility of the older patch */
373 + "0", /* 4 ("none" + 4) */
376 + "3", /* 7 ("both" + 4) */
379 +TYPELIB read_ahead_typelib=
381 + array_elements(read_ahead_names) - 1, "read_ahead_typelib",
382 + read_ahead_names, NULL
384 +static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
385 + PLUGIN_VAR_RQCMDARG,
386 + "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
387 + NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
391 +innodb_adaptive_flushing_method_update(
393 + struct st_mysql_sys_var* var,
397 + *(long *)var_ptr= (*(long *)save) % 4;
399 +const char *adaptive_flushing_method_names[]=
402 + "estimate", /* 1 */
403 + "keep_average", /* 2 */
404 + /* For compatibility of the older patch */
405 + "0", /* 3 ("none" + 3) */
406 + "1", /* 4 ("estimate" + 3) */
407 + "2", /* 5 ("keep_average" + 3) */
410 +TYPELIB adaptive_flushing_method_typelib=
412 + array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
413 + adaptive_flushing_method_names, NULL
415 +static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
416 + PLUGIN_VAR_RQCMDARG,
417 + "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
418 + NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
420 static struct st_mysql_sys_var* innobase_system_variables[]= {
421 MYSQL_SYSVAR(additional_mem_pool_size),
422 MYSQL_SYSVAR(autoextend_increment),
423 @@ -11462,6 +11609,7 @@
424 MYSQL_SYSVAR(file_format_check),
425 MYSQL_SYSVAR(file_format_max),
426 MYSQL_SYSVAR(flush_log_at_trx_commit),
427 + MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
428 MYSQL_SYSVAR(flush_method),
429 MYSQL_SYSVAR(force_recovery),
430 MYSQL_SYSVAR(large_prefix),
431 @@ -11501,6 +11649,13 @@
432 MYSQL_SYSVAR(show_verbose_locks),
433 MYSQL_SYSVAR(show_locks_held),
434 MYSQL_SYSVAR(version),
435 + MYSQL_SYSVAR(ibuf_max_size),
436 + MYSQL_SYSVAR(ibuf_active_contract),
437 + MYSQL_SYSVAR(ibuf_accel_rate),
438 + MYSQL_SYSVAR(checkpoint_age_target),
439 + MYSQL_SYSVAR(flush_neighbor_pages),
440 + MYSQL_SYSVAR(read_ahead),
441 + MYSQL_SYSVAR(adaptive_flushing_method),
442 MYSQL_SYSVAR(use_sys_malloc),
443 MYSQL_SYSVAR(use_native_aio),
444 MYSQL_SYSVAR(change_buffering),
445 --- a/storage/innobase/ibuf/ibuf0ibuf.c
446 +++ b/storage/innobase/ibuf/ibuf0ibuf.c
448 grow in size, as the references on the upper levels of the tree can
451 - ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
452 - / IBUF_POOL_SIZE_PER_MAX_SIZE;
453 + ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
454 + / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
456 + srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
458 mutex_create(ibuf_pessimistic_insert_mutex_key,
459 &ibuf_pessimistic_insert_mutex,
460 @@ -2763,9 +2765,11 @@
462 max_size = ibuf->max_size;
464 + if (!srv_ibuf_active_contract) {
465 if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
470 sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
472 --- a/storage/innobase/include/buf0rea.h
473 +++ b/storage/innobase/include/buf0rea.h
476 /** The size in pages of the area which the read-ahead algorithms read if
478 -#define BUF_READ_AHEAD_AREA(b) \
479 - ut_min(64, ut_2_power_up((b)->curr_size / 32))
480 +#define BUF_READ_AHEAD_AREA(b) 64
482 /** @name Modes used in read-ahead @{ */
483 /** read only pages belonging to the insert buffer tree */
484 --- a/storage/innobase/include/fil0fil.h
485 +++ b/storage/innobase/include/fil0fil.h
490 - ulint space_id); /*!< in: file space id (this can be a group of
491 + ulint space_id, /*!< in: file space id (this can be a group of
492 log files or a tablespace of the database) */
494 /**********************************************************************//**
495 Flushes to disk writes in file spaces of the given type possibly cached by
497 --- a/storage/innobase/include/ha_prototypes.h
498 +++ b/storage/innobase/include/ha_prototypes.h
500 /*===================*/
501 void* thd, /*!< in: thread handle (THD*) */
502 ulint value); /*!< in: time waited for the lock */
503 +/******************************************************************//**
507 +thd_flush_log_at_trx_commit(
508 +/*================================*/
511 /**********************************************************************//**
512 Get the current setting of the lower_case_table_names global parameter from
513 --- a/storage/innobase/include/os0file.h
514 +++ b/storage/innobase/include/os0file.h
516 pfs_os_file_write_func(name, file, buf, offset, offset_high, \
517 n, __FILE__, __LINE__)
519 -# define os_file_flush(file) \
520 - pfs_os_file_flush_func(file, __FILE__, __LINE__)
521 +# define os_file_flush(file, metadata) \
522 + pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
524 # define os_file_rename(key, oldpath, newpath) \
525 pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
527 # define os_file_write(name, file, buf, offset, offset_high, n) \
528 os_file_write_func(name, file, buf, offset, offset_high, n)
530 -# define os_file_flush(file) os_file_flush_func(file)
531 +# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
533 # define os_file_rename(key, oldpath, newpath) \
534 os_file_rename_func(oldpath, newpath)
536 pfs_os_file_flush_func(
537 /*===================*/
538 os_file_t file, /*!< in, own: handle to a file */
540 const char* src_file,/*!< in: file name where func invoked */
541 ulint src_line);/*!< in: line where the func invoked */
547 - os_file_t file); /*!< in, own: handle to a file */
548 + os_file_t file, /*!< in, own: handle to a file */
550 /***********************************************************************//**
551 Retrieves the last error number if an error occurs in a file io function.
552 The number should be retrieved before any other OS calls (because they may
553 --- a/storage/innobase/include/os0file.ic
554 +++ b/storage/innobase/include/os0file.ic
556 pfs_os_file_flush_func(
557 /*===================*/
558 os_file_t file, /*!< in, own: handle to a file */
560 const char* src_file,/*!< in: file name where func invoked */
561 ulint src_line)/*!< in: line where the func invoked */
565 register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
567 - result = os_file_flush_func(file);
568 + result = os_file_flush_func(file, metadata);
570 register_pfs_file_io_end(locker, 0);
572 --- a/storage/innobase/include/srv0srv.h
573 +++ b/storage/innobase/include/srv0srv.h
575 extern ulint srv_n_log_files;
576 extern ulint srv_log_file_size;
577 extern ulint srv_log_buffer_size;
578 -extern ulong srv_flush_log_at_trx_commit;
579 +//extern ulong srv_flush_log_at_trx_commit;
580 +extern char srv_use_global_flush_log_at_trx_commit;
581 extern char srv_adaptive_flushing;
583 /* If this flag is TRUE, then we will load the indexes' (and tables') metadata
585 extern ulong srv_max_purge_lag;
587 extern ulong srv_replication_delay;
589 +extern long long srv_ibuf_max_size;
590 +extern ulint srv_ibuf_active_contract;
591 +extern ulint srv_ibuf_accel_rate;
592 +extern ulint srv_checkpoint_age_target;
593 +extern ulint srv_flush_neighbor_pages;
594 +extern ulint srv_enable_unsafe_group_commit;
595 +extern ulint srv_read_ahead;
596 +extern ulint srv_adaptive_flushing_method;
598 /*-------------------------------------------*/
600 extern ulint srv_n_rows_inserted;
602 when writing data files, but do flush
603 after writing to log files */
604 SRV_UNIX_NOSYNC, /*!< do not flush after writing */
605 - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
606 + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
608 + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
611 /** Alternatives for file i/o in Windows */
612 --- a/storage/innobase/log/log0log.c
613 +++ b/storage/innobase/log/log0log.c
615 #include "srv0start.h"
618 +#include "ha_prototypes.h"
621 General philosophy of InnoDB redo-logs:
625 /************************************************************//**
629 +log_max_modified_age_async()
631 + if (srv_checkpoint_age_target) {
632 + return(ut_min(log_sys->max_modified_age_async,
633 + srv_checkpoint_age_target
634 + - srv_checkpoint_age_target / 8));
636 + return(log_sys->max_modified_age_async);
642 +log_max_checkpoint_age_async()
644 + if (srv_checkpoint_age_target) {
645 + return(ut_min(log_sys->max_checkpoint_age_async,
646 + srv_checkpoint_age_target));
648 + return(log_sys->max_checkpoint_age_async);
652 +/************************************************************//**
660 - if (checkpoint_age <= log->max_modified_age_async) {
661 + if (checkpoint_age <= log_max_modified_age_async()) {
666 oldest_lsn = buf_pool_get_oldest_modification();
669 - || lsn - oldest_lsn > log->max_modified_age_async
670 - || checkpoint_age > log->max_checkpoint_age_async) {
671 + || lsn - oldest_lsn > log_max_modified_age_async()
672 + || checkpoint_age > log_max_checkpoint_age_async()) {
674 log->check_flush_or_checkpoint = TRUE;
676 @@ -1100,9 +1128,10 @@
677 group = (log_group_t*)((ulint)group - 1);
679 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
680 + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
681 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
683 - fil_flush(group->space_id);
684 + fil_flush(group->space_id, FALSE);
688 @@ -1121,10 +1150,11 @@
689 logs and cannot end up here! */
691 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
692 + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
693 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
694 - && srv_flush_log_at_trx_commit != 2) {
695 + && thd_flush_log_at_trx_commit(NULL) != 2) {
697 - fil_flush(group->space_id);
698 + fil_flush(group->space_id, FALSE);
701 mutex_enter(&(log_sys->mutex));
702 @@ -1501,7 +1531,8 @@
704 mutex_exit(&(log_sys->mutex));
706 - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
707 + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
708 + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
709 /* O_DSYNC means the OS did not buffer the log file at all:
710 so we have also flushed to disk what we have written */
712 @@ -1511,7 +1542,7 @@
714 group = UT_LIST_GET_FIRST(log_sys->log_groups);
716 - fil_flush(group->space_id);
717 + fil_flush(group->space_id, FALSE);
718 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
721 @@ -2120,10 +2151,10 @@
724 advance = 2 * (age - log->max_modified_age_sync);
725 - } else if (age > log->max_modified_age_async) {
726 + } else if (age > log_max_modified_age_async()) {
728 /* A flush is not urgent: we do an asynchronous preflush */
729 - advance = age - log->max_modified_age_async;
730 + advance = age - log_max_modified_age_async();
734 @@ -2137,7 +2168,7 @@
736 do_checkpoint = TRUE;
738 - } else if (checkpoint_age > log->max_checkpoint_age_async) {
739 + } else if (checkpoint_age > log_max_checkpoint_age_async()) {
740 /* A checkpoint is not urgent: do it asynchronously */
742 do_checkpoint = TRUE;
743 @@ -2607,7 +2638,7 @@
745 mutex_exit(&(log_sys->mutex));
747 - fil_flush(group->archive_space_id);
748 + fil_flush(group->archive_space_id, TRUE);
750 mutex_enter(&(log_sys->mutex));
752 @@ -3349,6 +3380,17 @@
753 log_sys->flushed_to_disk_lsn,
754 log_sys->last_checkpoint_lsn);
757 + "Max checkpoint age %lu\n"
758 + "Checkpoint age target %lu\n"
759 + "Modified age %lu\n"
760 + "Checkpoint age %lu\n",
761 + (ulong) log_sys->max_checkpoint_age,
762 + (ulong) log_max_checkpoint_age_async(),
763 + (ulong) (log_sys->lsn -
764 + log_buf_pool_get_oldest_modification()),
765 + (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
767 current_time = time(NULL);
769 time_elapsed = 0.001 + difftime(current_time,
770 --- a/storage/innobase/log/log0recv.c
771 +++ b/storage/innobase/log/log0recv.c
772 @@ -2906,9 +2906,12 @@
773 ib_uint64_t archived_lsn;
774 #endif /* UNIV_LOG_ARCHIVE */
776 - byte log_hdr_buf[LOG_FILE_HDR_SIZE];
778 + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
781 + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
783 #ifdef UNIV_LOG_ARCHIVE
784 ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
785 /** TRUE when recovering from a checkpoint */
786 @@ -3468,7 +3471,7 @@
790 - os_file_flush(log_file);
791 + os_file_flush(log_file, TRUE);
792 os_file_close(log_file);
795 @@ -3492,7 +3495,7 @@
797 os_file_write(name, log_file, buf, 0, 0,
798 LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
799 - os_file_flush(log_file);
800 + os_file_flush(log_file, TRUE);
801 os_file_close(log_file);
804 --- a/storage/innobase/os/os0file.c
805 +++ b/storage/innobase/os/os0file.c
806 @@ -1424,7 +1424,7 @@
808 #ifdef UNIV_NON_BUFFERED_IO
809 # ifndef UNIV_HOTBACKUP
810 - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
811 + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
812 /* Do not use unbuffered i/o to log files because
813 value 2 denotes that we do not flush the log at every
814 commit, but only once per second */
815 @@ -1440,7 +1440,7 @@
817 #ifdef UNIV_NON_BUFFERED_IO
818 # ifndef UNIV_HOTBACKUP
819 - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
820 + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
821 /* Do not use unbuffered i/o to log files because
822 value 2 denotes that we do not flush the log at every
823 commit, but only once per second */
824 @@ -1585,6 +1585,11 @@
825 os_file_set_nocache(file, name, mode_str);
828 + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
829 + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
830 + os_file_set_nocache(file, name, mode_str);
834 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
836 @@ -2008,7 +2013,7 @@
840 - ret = os_file_flush(file);
841 + ret = os_file_flush(file, TRUE);
845 @@ -2046,7 +2051,8 @@
849 - os_file_t file) /*!< in: handle to a file */
850 + os_file_t file, /*!< in: handle to a file */
855 @@ -2055,7 +2061,16 @@
859 +#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
863 + ret = fdatasync(file);
872 @@ -2092,7 +2107,8 @@
876 - os_file_t file) /*!< in, own: handle to a file */
877 + os_file_t file, /*!< in, own: handle to a file */
882 @@ -2142,18 +2158,18 @@
883 /* If we are not on an operating system that supports this,
884 then fall back to a plain fsync. */
886 - ret = os_file_fsync(file);
887 + ret = os_file_fsync(file, metadata);
889 ret = fcntl(file, F_FULLFSYNC, NULL);
892 /* If we are not on a file system that supports this,
893 then fall back to a plain fsync. */
894 - ret = os_file_fsync(file);
895 + ret = os_file_fsync(file, metadata);
899 - ret = os_file_fsync(file);
900 + ret = os_file_fsync(file, metadata);
904 @@ -2336,7 +2352,7 @@
905 the OS crashes, a database page is only partially
906 physically written to disk. */
908 - ut_a(TRUE == os_file_flush(file));
909 + ut_a(TRUE == os_file_flush(file, TRUE));
911 # endif /* UNIV_DO_FLUSH */
913 @@ -2378,7 +2394,7 @@
914 the OS crashes, a database page is only partially
915 physically written to disk. */
917 - ut_a(TRUE == os_file_flush(file));
918 + ut_a(TRUE == os_file_flush(file, TRUE));
920 # endif /* UNIV_DO_FLUSH */
922 @@ -2750,7 +2766,7 @@
924 # ifdef UNIV_DO_FLUSH
925 if (!os_do_not_call_flush_at_each_write) {
926 - ut_a(TRUE == os_file_flush(file));
927 + ut_a(TRUE == os_file_flush(file, TRUE));
929 # endif /* UNIV_DO_FLUSH */
931 @@ -4296,7 +4312,7 @@
933 if (slot->type == OS_FILE_WRITE
934 && !os_do_not_call_flush_at_each_write) {
935 - if (!os_file_flush(slot->file)) {
936 + if (!os_file_flush(slot->file, TRUE)) {
940 @@ -4597,7 +4613,7 @@
942 if (slot->type == OS_FILE_WRITE
943 && !os_do_not_call_flush_at_each_write)
944 - && !os_file_flush(slot->file) {
945 + && !os_file_flush(slot->file, TRUE) {
948 #endif /* UNIV_DO_FLUSH */
949 --- a/storage/innobase/srv/srv0srv.c
950 +++ b/storage/innobase/srv/srv0srv.c
952 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
953 /* size in database pages */
954 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
955 -UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
956 +//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
957 +UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
959 /* Try to flush dirty pages so as to avoid IO bursts at
963 UNIV_INTERN ulong srv_replication_delay = 0;
965 +UNIV_INTERN long long srv_ibuf_max_size = 0;
966 +UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
967 +UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
968 +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
970 +UNIV_INTERN ulint srv_checkpoint_age_target = 0;
971 +UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:area 2:contiguous */
973 +UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
974 +UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
975 +UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
976 /*-------------------------------------------*/
977 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
978 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
979 @@ -2713,7 +2725,7 @@
981 ut_ad(!mutex_own(&kernel_mutex));
983 - ut_a(srv_n_purge_threads == 0);
984 + ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
987 /* Check for shutdown and change in purge config. */
988 @@ -2746,6 +2758,7 @@
989 ulint n_pages_purged = 0;
990 ulint n_bytes_merged;
991 ulint n_pages_flushed;
992 + ulint n_pages_flushed_prev = 0;
993 ulint n_bytes_archived;
994 ulint n_tables_to_drop;
996 @@ -2753,7 +2766,20 @@
997 ulint n_ios_very_old;
1000 + ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
1001 + ulint inner_loop = 0;
1002 + ibool skip_sleep = FALSE;
1004 + struct t_prev_flush_info_struct {
1006 + unsigned space:32;
1007 + unsigned offset:32;
1008 + ib_uint64_t oldest_modification;
1009 + } prev_flush_info[MAX_BUFFER_POOLS];
1011 + ib_uint64_t lsn_old;
1013 + ib_uint64_t oldest_lsn;
1015 #ifdef UNIV_DEBUG_THREAD_CREATION
1016 fprintf(stderr, "Master thread starts, id %lu\n",
1017 @@ -2775,6 +2801,9 @@
1019 mutex_exit(&kernel_mutex);
1021 + mutex_enter(&(log_sys->mutex));
1022 + lsn_old = log_sys->lsn;
1023 + mutex_exit(&(log_sys->mutex));
1025 /*****************************************************************/
1026 /* ---- When there is database activity by users, we cycle in this
1027 @@ -2805,9 +2834,13 @@
1028 /* Sleep for 1 second on entrying the for loop below the first time. */
1029 next_itr_time = ut_time_ms() + 1000;
1031 + skip_sleep = FALSE;
1033 for (i = 0; i < 10; i++) {
1034 ulint cur_time = ut_time_ms();
1036 + n_pages_flushed = 0; /* initialize */
1038 /* ALTER TABLE in MySQL requires on Unix that the table handler
1039 can drop tables lazily after there no longer are SELECT
1041 @@ -2831,6 +2864,7 @@
1042 srv_main_thread_op_info = "sleeping";
1043 srv_main_1_second_loops++;
1045 + if (!skip_sleep) {
1046 if (next_itr_time > cur_time
1047 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
1049 @@ -2841,10 +2875,26 @@
1050 (next_itr_time - cur_time)
1055 + mutex_enter(&(log_sys->mutex));
1056 + oldest_lsn = buf_pool_get_oldest_modification();
1057 + ib_uint64_t lsn = log_sys->lsn;
1058 + mutex_exit(&(log_sys->mutex));
1062 + "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
1063 + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1068 /* Each iteration should happen at 1 second interval. */
1069 next_itr_time = ut_time_ms() + 1000;
1070 + } /* if (!skip_sleep) */
1072 + skip_sleep = FALSE;
1074 /* Flush logs if needed */
1075 srv_sync_log_buffer_in_background();
1076 @@ -2864,7 +2914,7 @@
1077 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
1078 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
1079 srv_main_thread_op_info = "doing insert buffer merge";
1080 - ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1081 + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1083 /* Flush logs if needed */
1084 srv_sync_log_buffer_in_background();
1085 @@ -2881,7 +2931,11 @@
1086 n_pages_flushed = buf_flush_list(
1087 PCT_IO(100), IB_ULONGLONG_MAX);
1089 - } else if (srv_adaptive_flushing) {
1090 + mutex_enter(&(log_sys->mutex));
1091 + lsn_old = log_sys->lsn;
1092 + mutex_exit(&(log_sys->mutex));
1093 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
1094 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
1096 /* Try to keep the rate of flushing of dirty
1097 pages such that redo log generation does not
1098 @@ -2897,6 +2951,224 @@
1103 + mutex_enter(&(log_sys->mutex));
1104 + lsn_old = log_sys->lsn;
1105 + mutex_exit(&(log_sys->mutex));
1106 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
1107 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1109 + /* Try to keep modified age not to exceed
1110 + max_checkpoint_age * 7/8 line */
1112 + mutex_enter(&(log_sys->mutex));
1114 + oldest_lsn = buf_pool_get_oldest_modification();
1115 + if (oldest_lsn == 0) {
1116 + lsn_old = log_sys->lsn;
1117 + mutex_exit(&(log_sys->mutex));
1120 + if ((log_sys->lsn - oldest_lsn)
1121 + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1122 + /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1123 + /* We should not flush from here. */
1124 + lsn_old = log_sys->lsn;
1125 + mutex_exit(&(log_sys->mutex));
1126 + } else if ((log_sys->lsn - oldest_lsn)
1127 + > (log_sys->max_checkpoint_age)/4 ) {
1129 + /* defence line (max_checkpoint_age * 1/2) */
1130 + ib_uint64_t lsn = log_sys->lsn;
1132 + ib_uint64_t level, bpl;
1133 + buf_page_t* bpage;
1136 + mutex_exit(&(log_sys->mutex));
1140 + for (j = 0; j < srv_buf_pool_instances; j++) {
1141 + buf_pool_t* buf_pool;
1144 + buf_pool = buf_pool_from_array(j);
1146 + /* The scanning flush_list is optimistic here */
1150 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1152 + while (bpage != NULL) {
1153 + ib_uint64_t oldest_modification = bpage->oldest_modification;
1154 + if (oldest_modification != 0) {
1155 + level += log_sys->max_checkpoint_age
1156 + - (lsn - oldest_modification);
1158 + bpage = UT_LIST_GET_NEXT(list, bpage);
1163 + bpl += ((ib_uint64_t) n_blocks * n_blocks
1164 + * (lsn - lsn_old)) / level;
1169 + if (!srv_use_doublewrite_buf) {
1170 + /* flush is faster than when doublewrite */
1171 + bpl = (bpl * 7) / 8;
1176 + n_pages_flushed = buf_flush_list(bpl,
1177 + oldest_lsn + (lsn - lsn_old));
1178 + if (n_pages_flushed == ULINT_UNDEFINED) {
1179 + os_thread_sleep(5000);
1180 + goto retry_flush_batch;
1187 + "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1188 + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1189 + lsn - lsn_old, bpl);
1192 + lsn_old = log_sys->lsn;
1193 + mutex_exit(&(log_sys->mutex));
1196 + prev_adaptive_flushing_method = 1;
1197 + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1198 + buf_pool_t* buf_pool;
1199 + buf_page_t* bpage;
1203 + mutex_enter(&(log_sys->mutex));
1204 + oldest_lsn = buf_pool_get_oldest_modification();
1205 + lsn = log_sys->lsn;
1206 + mutex_exit(&(log_sys->mutex));
1208 + /* upper loop/sec. (x10) */
1209 + next_itr_time -= 900; /* 1000 - 900 == 100 */
1211 + if (inner_loop < 10) {
1217 + if (prev_adaptive_flushing_method == 2) {
1220 + ulint new_blocks_sum, flushed_blocks_sum;
1222 + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1224 + /* prev_flush_info[j] should be the previous loop's */
1225 + for (j = 0; j < srv_buf_pool_instances; j++) {
1226 + lint blocks_num, new_blocks_num, flushed_blocks_num;
1229 + buf_pool = buf_pool_from_array(j);
1231 + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1232 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1233 + new_blocks_num = 0;
1236 + while (bpage != NULL) {
1237 + if (prev_flush_info[j].space == bpage->space
1238 + && prev_flush_info[j].offset == bpage->offset
1239 + && prev_flush_info[j].oldest_modification
1240 + == bpage->oldest_modification) {
1244 + bpage = UT_LIST_GET_NEXT(list, bpage);
1248 + new_blocks_num = blocks_num;
1251 + flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1253 + if (flushed_blocks_num < 0) {
1254 + flushed_blocks_num = 0;
1257 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1259 + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1261 + prev_flush_info[j].space = bpage->space;
1262 + prev_flush_info[j].offset = bpage->offset;
1263 + prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1265 + prev_flush_info[j].space = 0;
1266 + prev_flush_info[j].offset = 0;
1267 + prev_flush_info[j].oldest_modification = 0;
1270 + new_blocks_sum += new_blocks_num;
1271 + flushed_blocks_sum += flushed_blocks_num;
1272 + blocks_sum += blocks_num;
1275 + n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1276 + if (flushed_blocks_sum > n_pages_flushed_prev) {
1277 + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1280 + if (n_flush > 0) {
1282 + n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1284 + n_pages_flushed = 0;
1287 + /* store previous first pages of the flush_list */
1288 + for (j = 0; j < srv_buf_pool_instances; j++) {
1289 + buf_pool = buf_pool_from_array(j);
1291 + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1293 + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1295 + prev_flush_info[j].space = bpage->space;
1296 + prev_flush_info[j].offset = bpage->offset;
1297 + prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1299 + prev_flush_info[j].space = 0;
1300 + prev_flush_info[j].offset = 0;
1301 + prev_flush_info[j].oldest_modification = 0;
1304 + n_pages_flushed = 0;
1308 + prev_adaptive_flushing_method = 2;
1310 + mutex_enter(&(log_sys->mutex));
1311 + lsn_old = log_sys->lsn;
1312 + mutex_exit(&(log_sys->mutex));
1313 + prev_adaptive_flushing_method = ULINT_UNDEFINED;
1316 + if (n_pages_flushed == ULINT_UNDEFINED) {
1317 + n_pages_flushed_prev = 0;
1319 + n_pages_flushed_prev = n_pages_flushed;
1322 if (srv_activity_count == old_activity_count) {
1323 @@ -2945,12 +3217,12 @@
1324 even if the server were active */
1326 srv_main_thread_op_info = "doing insert buffer merge";
1327 - ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1328 + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1330 /* Flush logs if needed */
1331 srv_sync_log_buffer_in_background();
1333 - if (srv_n_purge_threads == 0) {
1334 + if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1335 srv_main_thread_op_info = "master purging";
1337 srv_master_do_purge();
1338 @@ -3028,7 +3300,7 @@
1342 - if (srv_n_purge_threads == 0) {
1343 + if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1344 srv_main_thread_op_info = "master purging";
1346 srv_master_do_purge();
1347 @@ -3053,7 +3325,7 @@
1348 buf_flush_list below. Otherwise, the system favors
1349 clean pages over cleanup throughput. */
1350 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1352 + PCT_IBUF_IO(100));
1355 srv_main_thread_op_info = "reserving kernel mutex";
1356 @@ -3193,6 +3465,7 @@
1359 ulint n_total_purged = ULINT_UNDEFINED;
1360 + ulint next_itr_time;
1362 ut_a(srv_n_purge_threads == 1);
1364 @@ -3213,9 +3486,12 @@
1366 mutex_exit(&kernel_mutex);
1368 + next_itr_time = ut_time_ms();
1370 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1372 ulint n_pages_purged = 0;
1375 /* If there are very few records to purge or the last
1376 purge didn't purge any records then wait for activity.
1377 @@ -3262,6 +3538,16 @@
1378 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1380 srv_sync_log_buffer_in_background();
1382 + cur_time = ut_time_ms();
1383 + if (next_itr_time > cur_time) {
1384 + os_thread_sleep(ut_min(1000000,
1385 + (next_itr_time - cur_time)
1387 + next_itr_time = ut_time_ms() + 1000;
1389 + next_itr_time = cur_time + 1000;
1393 mutex_enter(&kernel_mutex);
1394 --- a/storage/innobase/srv/srv0start.c
1395 +++ b/storage/innobase/srv/srv0start.c
1396 @@ -1237,6 +1237,9 @@
1397 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1398 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1400 + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1401 + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1403 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1404 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1406 --- a/storage/innobase/trx/trx0purge.c
1407 +++ b/storage/innobase/trx/trx0purge.c
1408 @@ -392,10 +392,10 @@
1409 trx_sys->rseg_history_len++;
1410 mutex_exit(&kernel_mutex);
1412 - if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1413 +// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1414 /* Inform the purge thread that there is work to do. */
1415 srv_wake_purge_thread_if_not_active();
1420 /**********************************************************************//**
1421 --- a/storage/innobase/trx/trx0trx.c
1422 +++ b/storage/innobase/trx/trx0trx.c
1424 trx->read_view = NULL;
1427 + ulint flush_log_at_trx_commit;
1429 mutex_exit(&kernel_mutex);
1431 @@ -992,6 +993,12 @@
1432 trx_undo_insert_cleanup(trx);
1435 + if (srv_use_global_flush_log_at_trx_commit) {
1436 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1438 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1441 /* NOTE that we could possibly make a group commit more
1442 efficient here: call os_thread_yield here to allow also other
1443 trxs to come to commit! */
1444 @@ -1023,9 +1030,9 @@
1445 if (trx->flush_log_later) {
1446 /* Do nothing yet */
1447 trx->must_flush_log_later = TRUE;
1448 - } else if (srv_flush_log_at_trx_commit == 0) {
1449 + } else if (flush_log_at_trx_commit == 0) {
1451 - } else if (srv_flush_log_at_trx_commit == 1) {
1452 + } else if (flush_log_at_trx_commit == 1) {
1453 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1454 /* Write the log but do not flush it to disk */
1456 @@ -1037,7 +1044,7 @@
1458 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1460 - } else if (srv_flush_log_at_trx_commit == 2) {
1461 + } else if (flush_log_at_trx_commit == 2) {
1463 /* Write the log but do not flush it to disk */
1465 @@ -1701,16 +1708,23 @@
1466 trx_t* trx) /*!< in: trx handle */
1468 ib_uint64_t lsn = trx->commit_lsn;
1469 + ulint flush_log_at_trx_commit;
1473 trx->op_info = "flushing log";
1475 + if (srv_use_global_flush_log_at_trx_commit) {
1476 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1478 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1481 if (!trx->must_flush_log_later) {
1483 - } else if (srv_flush_log_at_trx_commit == 0) {
1484 + } else if (flush_log_at_trx_commit == 0) {
1486 - } else if (srv_flush_log_at_trx_commit == 1) {
1487 + } else if (flush_log_at_trx_commit == 1) {
1488 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1489 /* Write the log but do not flush it to disk */
1491 @@ -1721,7 +1735,7 @@
1493 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1495 - } else if (srv_flush_log_at_trx_commit == 2) {
1496 + } else if (flush_log_at_trx_commit == 2) {
1498 /* Write the log but do not flush it to disk */
1500 @@ -1969,6 +1983,8 @@
1501 /*--------------------------------------*/
1504 + ulint flush_log_at_trx_commit;
1506 /* Depending on the my.cnf options, we may now write the log
1507 buffer to the log files, making the prepared state of the
1508 transaction durable if the OS does not crash. We may also
1509 @@ -1988,9 +2004,15 @@
1511 mutex_exit(&kernel_mutex);
1513 - if (srv_flush_log_at_trx_commit == 0) {
1514 + if (srv_use_global_flush_log_at_trx_commit) {
1515 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1517 + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1520 + if (flush_log_at_trx_commit == 0) {
1522 - } else if (srv_flush_log_at_trx_commit == 1) {
1523 + } else if (flush_log_at_trx_commit == 1) {
1524 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1525 /* Write the log but do not flush it to disk */
1527 @@ -2002,7 +2024,7 @@
1529 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1531 - } else if (srv_flush_log_at_trx_commit == 2) {
1532 + } else if (flush_log_at_trx_commit == 2) {
1534 /* Write the log but do not flush it to disk */
1536 --- a/mysql-test/include/default_mysqld.cnf
1537 +++ b/mysql-test/include/default_mysqld.cnf
1539 max_heap_table_size= 1M
1541 loose-innodb_data_file_path= ibdata1:10M:autoextend
1542 -loose-innodb_buffer_pool_size= 8M
1543 +loose-innodb_buffer_pool_size= 32M
1544 loose-innodb_write_io_threads= 2
1545 loose-innodb_read_io_threads= 2
1546 loose-innodb_log_buffer_size= 1M
1547 --- a/mysql-test/suite/innodb/r/innodb.result
1548 +++ b/mysql-test/suite/innodb/r/innodb.result
1549 @@ -1678,7 +1678,7 @@
1551 SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
1555 SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
1559 +++ b/mysql-test/suite/innodb/r/percona_flush_contiguous_neighbors.result
1561 +DROP TABLE IF EXISTS t1;
1562 +CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1563 +INSERT INTO t1(foo) VALUES ('a'), ('b');
1564 +INSERT INTO t1(foo) SELECT foo FROM t1;
1565 +INSERT INTO t1(foo) SELECT foo FROM t1;
1566 +INSERT INTO t1(foo) SELECT foo FROM t1;
1567 +INSERT INTO t1(foo) SELECT foo FROM t1;
1568 +INSERT INTO t1(foo) SELECT foo FROM t1;
1569 +INSERT INTO t1(foo) SELECT foo FROM t1;
1570 +INSERT INTO t1(foo) SELECT foo FROM t1;
1571 +INSERT INTO t1(foo) SELECT foo FROM t1;
1572 +INSERT INTO t1(foo) SELECT foo FROM t1;
1573 +INSERT INTO t1(foo) SELECT foo FROM t1;
1574 +INSERT INTO t1(foo) SELECT foo FROM t1;
1575 +INSERT INTO t1(foo) SELECT foo FROM t1;
1576 +INSERT INTO t1(foo) SELECT foo FROM t1;
1577 +INSERT INTO t1(foo) SELECT foo FROM t1;
1578 +INSERT INTO t1(foo) SELECT foo FROM t1;
1579 +INSERT INTO t1(foo) SELECT foo FROM t1;
1580 +INSERT INTO t1(foo) SELECT foo FROM t1;
1583 +++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors-master.opt
1585 +--innodb_flush_neighbor_pages=cont
1587 +++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors.test
1589 +# Test for innodb_flush_neighbor_pages=contiguous.
1590 +# The test is very crude: we simply overflow the buffer pool with such a number of
1591 +# new/modified pages that some flushing is bound to happen.
1593 +--source include/have_innodb.inc
1596 +DROP TABLE IF EXISTS t1;
1599 +CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1601 +INSERT INTO t1(foo) VALUES ('a'), ('b');
1602 +INSERT INTO t1(foo) SELECT foo FROM t1;
1603 +INSERT INTO t1(foo) SELECT foo FROM t1;
1604 +INSERT INTO t1(foo) SELECT foo FROM t1;
1605 +INSERT INTO t1(foo) SELECT foo FROM t1;
1606 +INSERT INTO t1(foo) SELECT foo FROM t1;
1607 +INSERT INTO t1(foo) SELECT foo FROM t1;
1608 +INSERT INTO t1(foo) SELECT foo FROM t1;
1609 +INSERT INTO t1(foo) SELECT foo FROM t1;
1610 +INSERT INTO t1(foo) SELECT foo FROM t1;
1611 +INSERT INTO t1(foo) SELECT foo FROM t1;
1612 +INSERT INTO t1(foo) SELECT foo FROM t1;
1613 +INSERT INTO t1(foo) SELECT foo FROM t1;
1614 +INSERT INTO t1(foo) SELECT foo FROM t1;
1615 +INSERT INTO t1(foo) SELECT foo FROM t1;
1616 +INSERT INTO t1(foo) SELECT foo FROM t1;
1617 +INSERT INTO t1(foo) SELECT foo FROM t1;
1618 +INSERT INTO t1(foo) SELECT foo FROM t1;
1620 +# TODO: cannot record a stable value here. A check of > 0 should be enough,
1621 +# but the variable is not accessible through INFORMATION_SCHEMA currently.
1622 +# SHOW GLOBAL STATUS LIKE 'Innodb_buffer_pool_pages_flushed';
1625 --- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
1626 +++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
1628 ---innodb-buffer-pool-size=8M
1629 +--innodb-buffer-pool-size=32M
1630 --- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
1631 +++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
1634 -- disable_query_log
1641 insert into t2 values(repeat('abcdefghijklmnopqrstuvwxyz',1000));
1648 # now there should be no 8K pages in the buffer pool