X-Git-Url: https://git.pld-linux.org/?a=blobdiff_plain;f=innodb_io_patches.patch;h=97e546bfd9f52f1ffaf17be9be97f24f8759fea0;hb=6350313fb618733598e985b127d0f65545235414;hp=c81c90953b2ad4dedb19a66fdf2f24fd7be826e5;hpb=adf0fb138dfeff9e736ccfac9ea148f9e1d53738;p=packages%2Fmysql.git diff --git a/innodb_io_patches.patch b/innodb_io_patches.patch index c81c909..97e546b 100644 --- a/innodb_io_patches.patch +++ b/innodb_io_patches.patch @@ -5,9 +5,8 @@ #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! -diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c ---- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:09:51.273986410 +0900 -+++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:10:08.934990091 +0900 +--- a/storage/innobase/buf/buf0buf.c ++++ b/storage/innobase/buf/buf0buf.c @@ -320,6 +320,7 @@ /* When we traverse all the flush lists we don't want another @@ -24,10 +23,22 @@ diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c log_flush_order_mutex_exit(); /* The returned answer may be out of date: the flush_list can -diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c ---- a/storage/innobase/buf/buf0flu.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:10:08.934990091 +0900 -@@ -1376,7 +1376,7 @@ +--- a/storage/innobase/buf/buf0flu.c ++++ b/storage/innobase/buf/buf0flu.c +@@ -857,7 +857,7 @@ + flush: + /* Now flush the doublewrite buffer data to disk */ + +- fil_flush(TRX_SYS_SPACE); ++ fil_flush(TRX_SYS_SPACE, FALSE); + + /* We know that the writes have been flushed to disk now + and in recovery we will find them in the doublewrite buffer +@@ -1375,10 +1375,11 @@ + ulint high; + ulint count = 0; + buf_pool_t* buf_pool = buf_pool_get(space, offset); ++ ibool is_forward_scan; ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); @@ -36,11 +47,80 @@ diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c /* If there is little space, it is better not to flush any block except from the end of the LRU list */ -diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c ---- a/storage/innobase/buf/buf0rea.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:10:08.937050537 +0900 -@@ -260,6 +260,10 @@ - = BUF_READ_AHEAD_LINEAR_AREA(buf_pool); +@@ -1405,7 +1406,32 @@ + high = fil_space_get_size(space); + } + +- for (i = low; i < high; i++) { ++ if (srv_flush_neighbor_pages == 2) { ++ ++ /* In the case of contiguous flush where the requested page ++ does not fall at the start of flush area, first scan backward ++ from the page and later forward from it. */ ++ is_forward_scan = (offset == low); ++ } ++ else { ++ is_forward_scan = TRUE; ++ } ++ ++scan: ++ if (srv_flush_neighbor_pages == 2) { ++ if (is_forward_scan) { ++ i = offset; ++ } ++ else { ++ i = offset - 1; ++ } ++ } ++ else { ++ i = low; ++ } ++ ++ for (; is_forward_scan ? (i < high) : (i >= low); ++ is_forward_scan ? i++ : i--) { + + buf_page_t* bpage; + +@@ -1434,6 +1460,12 @@ + if (!bpage) { + + buf_pool_mutex_exit(buf_pool); ++ if (srv_flush_neighbor_pages == 2) { ++ ++ /* This is contiguous neighbor page flush and ++ the pages here are not contiguous. */ ++ break; ++ } + continue; + } + +@@ -1470,6 +1502,22 @@ + } + } + buf_pool_mutex_exit(buf_pool); ++ ++ if (srv_flush_neighbor_pages == 2) { ++ ++ /* We are trying to do the contiguous neighbor page ++ flush, but the last page we checked was unflushable, ++ making a "hole" in the flush, so stop this attempt. */ ++ break; ++ } ++ } ++ ++ if (!is_forward_scan) { ++ ++ /* Backward scan done, now do the forward scan */ ++ ut_a (srv_flush_neighbor_pages == 2); ++ is_forward_scan = TRUE; ++ goto scan; + } + + return(count); +--- a/storage/innobase/buf/buf0rea.c ++++ b/storage/innobase/buf/buf0rea.c +@@ -427,6 +427,10 @@ + = BUF_READ_AHEAD_AREA(buf_pool); ulint threshold; + if (!(srv_read_ahead & 2)) { @@ -50,10 +130,85 @@ diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) { /* No read-ahead to avoid thread deadlocks */ return(0); -diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc ---- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:09:51.283956391 +0900 -+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:10:08.963980444 +0900 -@@ -444,6 +444,12 @@ +--- a/storage/innobase/fil/fil0fil.c ++++ b/storage/innobase/fil/fil0fil.c +@@ -2609,7 +2609,7 @@ + + os_thread_sleep(20000); + +- fil_flush(id); ++ fil_flush(id, TRUE); + + goto retry; + +@@ -2823,7 +2823,7 @@ + goto error_exit; + } + +- ret = os_file_flush(file); ++ ret = os_file_flush(file, TRUE); + + if (!ret) { + fputs("InnoDB: Error: file flush of tablespace ", stderr); +@@ -3009,7 +3009,7 @@ + } + } + +- success = os_file_flush(file); ++ success = os_file_flush(file, TRUE); + if (!success) { + + goto func_exit; +@@ -3031,7 +3031,7 @@ + + goto func_exit; + } +- success = os_file_flush(file); ++ success = os_file_flush(file, TRUE); + func_exit: + os_file_close(file); + ut_free(buf2); +@@ -4014,7 +4014,7 @@ + size_after_extend, *actual_size); */ + mutex_exit(&fil_system->mutex); + +- fil_flush(space_id); ++ fil_flush(space_id, TRUE); + + return(success); + } +@@ -4585,8 +4585,9 @@ + void + fil_flush( + /*======*/ +- ulint space_id) /*!< in: file space id (this can be a group of ++ ulint space_id, /*!< in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ ibool metadata) + { + fil_space_t* space; + fil_node_t* node; +@@ -4657,7 +4658,7 @@ + /* fprintf(stderr, "Flushing to file %s\n", + node->name); */ + +- os_file_flush(file); ++ os_file_flush(file, metadata); + + mutex_enter(&fil_system->mutex); + +@@ -4740,7 +4741,7 @@ + a non-existing space id. */ + for (i = 0; i < n_space_ids; i++) { + +- fil_flush(space_ids[i]); ++ fil_flush(space_ids[i], TRUE); + } + + mem_free(space_ids); +--- a/storage/innobase/handler/ha_innodb.cc ++++ b/storage/innobase/handler/ha_innodb.cc +@@ -445,6 +445,12 @@ "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); @@ -66,7 +221,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ static handler *innobase_create_handler(handlerton *hton, TABLE_SHARE *table, -@@ -838,6 +844,17 @@ +@@ -841,6 +847,17 @@ } } @@ -84,7 +239,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ /********************************************************************//** Obtain the InnoDB transaction of a MySQL thread. @return reference to transaction pointer */ -@@ -2437,6 +2454,9 @@ +@@ -2471,6 +2488,9 @@ srv_n_read_io_threads = (ulint) innobase_read_io_threads; srv_n_write_io_threads = (ulint) innobase_write_io_threads; @@ -94,7 +249,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ srv_force_recovery = (ulint) innobase_force_recovery; srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; -@@ -11025,7 +11045,7 @@ +@@ -11141,7 +11161,7 @@ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, "Purge threads can be either 0 or 1.", NULL, NULL, @@ -103,7 +258,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ 0, /* Minimum value */ 1, 0); /* Maximum value */ -@@ -11067,12 +11087,18 @@ +@@ -11183,12 +11203,18 @@ innodb_file_format_max_validate, innodb_file_format_max_update, "Antelope"); @@ -128,7 +283,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, -@@ -11167,7 +11193,7 @@ +@@ -11293,7 +11319,7 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", @@ -137,7 +292,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, -@@ -11319,6 +11345,95 @@ +@@ -11442,6 +11468,127 @@ "trigger a readahead.", NULL, NULL, 56, 0, 64, 0); @@ -161,10 +316,42 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ + "Control soft limit of checkpoint age. (0 : not control)", + NULL, NULL, 0, 0, ~0UL, 0); + -+static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages, -+ PLUGIN_VAR_RQCMDARG, -+ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable", -+ NULL, NULL, 1, 0, 1, 0); ++static ++void ++innodb_flush_neighbor_pages_update( ++ THD* thd, ++ struct st_mysql_sys_var* var, ++ void* var_ptr, ++ const void* save) ++{ ++ *(long *)var_ptr = (*(long *)save) % 3; ++} ++ ++const char *flush_neighbor_pages_names[]= ++{ ++ "none", /* 0 */ ++ "area", ++ "cont", /* 2 */ ++ /* For compatibility with the older patch */ ++ "0", /* "none" + 3 */ ++ "1", /* "area" + 3 */ ++ "2", /* "cont" + 3 */ ++ NullS ++}; ++ ++TYPELIB flush_neighbor_pages_typelib= ++{ ++ array_elements(flush_neighbor_pages_names) - 1, ++ "flush_neighbor_pages_typelib", ++ flush_neighbor_pages_names, ++ NULL ++}; ++ ++static MYSQL_SYSVAR_ENUM(flush_neighbor_pages, srv_flush_neighbor_pages, ++ PLUGIN_VAR_RQCMDARG, "Neighbor page flushing behaviour: none: do not flush, " ++ "[area]: flush selected pages one-by-one, " ++ "cont: flush a contiguous block of pages", NULL, ++ innodb_flush_neighbor_pages_update, 1, &flush_neighbor_pages_typelib); + +static +void @@ -233,15 +420,15 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), -@@ -11339,6 +11454,7 @@ +@@ -11462,6 +11609,7 @@ MYSQL_SYSVAR(file_format_check), MYSQL_SYSVAR(file_format_max), MYSQL_SYSVAR(flush_log_at_trx_commit), + MYSQL_SYSVAR(use_global_flush_log_at_trx_commit), MYSQL_SYSVAR(flush_method), MYSQL_SYSVAR(force_recovery), - MYSQL_SYSVAR(locks_unsafe_for_binlog), -@@ -11376,6 +11492,13 @@ + MYSQL_SYSVAR(large_prefix), +@@ -11501,6 +11649,13 @@ MYSQL_SYSVAR(show_verbose_locks), MYSQL_SYSVAR(show_locks_held), MYSQL_SYSVAR(version), @@ -255,10 +442,9 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_ MYSQL_SYSVAR(use_sys_malloc), MYSQL_SYSVAR(use_native_aio), MYSQL_SYSVAR(change_buffering), -diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c ---- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:10:09.073984282 +0900 -@@ -514,8 +514,10 @@ +--- a/storage/innobase/ibuf/ibuf0ibuf.c ++++ b/storage/innobase/ibuf/ibuf0ibuf.c +@@ -523,8 +523,10 @@ grow in size, as the references on the upper levels of the tree can change */ @@ -271,7 +457,7 @@ diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf. mutex_create(ibuf_pessimistic_insert_mutex_key, &ibuf_pessimistic_insert_mutex, -@@ -2753,9 +2755,11 @@ +@@ -2763,9 +2765,11 @@ size = ibuf->size; max_size = ibuf->max_size; @@ -283,10 +469,9 @@ diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf. sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); -diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h ---- a/storage/innobase/include/buf0rea.h 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/include/buf0rea.h 2010-12-03 15:10:09.076066335 +0900 -@@ -124,8 +124,7 @@ +--- a/storage/innobase/include/buf0rea.h ++++ b/storage/innobase/include/buf0rea.h +@@ -149,8 +149,7 @@ /** The size in pages of the area which the read-ahead algorithms read if invoked */ @@ -296,9 +481,21 @@ diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0re /** @name Modes used in read-ahead @{ */ /** read only pages belonging to the insert buffer tree */ -diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h ---- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900 +--- a/storage/innobase/include/fil0fil.h ++++ b/storage/innobase/include/fil0fil.h +@@ -663,8 +663,9 @@ + void + fil_flush( + /*======*/ +- ulint space_id); /*!< in: file space id (this can be a group of ++ ulint space_id, /*!< in: file space id (this can be a group of + log files or a tablespace of the database) */ ++ ibool metadata); + /**********************************************************************//** + Flushes to disk writes in file spaces of the given type possibly cached by + the OS. */ +--- a/storage/innobase/include/ha_prototypes.h ++++ b/storage/innobase/include/ha_prototypes.h @@ -284,6 +284,13 @@ /*===================*/ void* thd, /*!< in: thread handle (THD*) */ @@ -313,9 +510,67 @@ diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ /**********************************************************************//** Get the current setting of the lower_case_table_names global parameter from -diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h ---- a/storage/innobase/include/srv0srv.h 2010-12-03 15:09:51.291955835 +0900 -+++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:10:09.079029047 +0900 +--- a/storage/innobase/include/os0file.h ++++ b/storage/innobase/include/os0file.h +@@ -296,8 +296,8 @@ + pfs_os_file_write_func(name, file, buf, offset, offset_high, \ + n, __FILE__, __LINE__) + +-# define os_file_flush(file) \ +- pfs_os_file_flush_func(file, __FILE__, __LINE__) ++# define os_file_flush(file, metadata) \ ++ pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__) + + # define os_file_rename(key, oldpath, newpath) \ + pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__) +@@ -333,7 +333,7 @@ + # define os_file_write(name, file, buf, offset, offset_high, n) \ + os_file_write_func(name, file, buf, offset, offset_high, n) + +-# define os_file_flush(file) os_file_flush_func(file) ++# define os_file_flush(file, metadata) os_file_flush_func(file, metadata) + + # define os_file_rename(key, oldpath, newpath) \ + os_file_rename_func(oldpath, newpath) +@@ -781,6 +781,7 @@ + pfs_os_file_flush_func( + /*===================*/ + os_file_t file, /*!< in, own: handle to a file */ ++ ibool metadata, + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +@@ -860,7 +861,8 @@ + ibool + os_file_flush_func( + /*===============*/ +- os_file_t file); /*!< in, own: handle to a file */ ++ os_file_t file, /*!< in, own: handle to a file */ ++ ibool metadata); + /***********************************************************************//** + Retrieves the last error number if an error occurs in a file io function. + The number should be retrieved before any other OS calls (because they may +--- a/storage/innobase/include/os0file.ic ++++ b/storage/innobase/include/os0file.ic +@@ -369,6 +369,7 @@ + pfs_os_file_flush_func( + /*===================*/ + os_file_t file, /*!< in, own: handle to a file */ ++ ibool metadata, + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ + { +@@ -378,7 +379,7 @@ + + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC, + src_file, src_line); +- result = os_file_flush_func(file); ++ result = os_file_flush_func(file, metadata); + + register_pfs_file_io_end(locker, 0); + +--- a/storage/innobase/include/srv0srv.h ++++ b/storage/innobase/include/srv0srv.h @@ -138,7 +138,8 @@ extern ulint srv_n_log_files; extern ulint srv_log_file_size; @@ -325,8 +580,8 @@ diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0sr +extern char srv_use_global_flush_log_at_trx_commit; extern char srv_adaptive_flushing; - -@@ -216,6 +217,16 @@ + /* If this flag is TRUE, then we will load the indexes' (and tables') metadata +@@ -221,6 +222,16 @@ extern ulong srv_max_purge_lag; extern ulong srv_replication_delay; @@ -343,7 +598,7 @@ diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0sr /*-------------------------------------------*/ extern ulint srv_n_rows_inserted; -@@ -394,8 +405,9 @@ +@@ -399,8 +410,9 @@ when writing data files, but do flush after writing to log files */ SRV_UNIX_NOSYNC, /*!< do not flush after writing */ @@ -354,9 +609,8 @@ diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0sr }; /** Alternatives for file i/o in Windows */ -diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c ---- a/storage/innobase/log/log0log.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/log/log0log.c 2010-12-03 15:10:09.084023562 +0900 +--- a/storage/innobase/log/log0log.c ++++ b/storage/innobase/log/log0log.c @@ -48,6 +48,7 @@ #include "srv0start.h" #include "trx0sys.h" @@ -419,15 +673,19 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c log->check_flush_or_checkpoint = TRUE; } -@@ -1100,6 +1128,7 @@ +@@ -1100,9 +1128,10 @@ group = (log_group_t*)((ulint)group - 1); if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { - fil_flush(group->space_id); -@@ -1121,8 +1150,9 @@ +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FALSE); + } + + #ifdef UNIV_DEBUG +@@ -1121,10 +1150,11 @@ logs and cannot end up here! */ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC @@ -436,8 +694,11 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c - && srv_flush_log_at_trx_commit != 2) { + && thd_flush_log_at_trx_commit(NULL) != 2) { - fil_flush(group->space_id); +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FALSE); } + + mutex_enter(&(log_sys->mutex)); @@ -1501,7 +1531,8 @@ mutex_exit(&(log_sys->mutex)); @@ -448,6 +709,15 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c /* O_DSYNC means the OS did not buffer the log file at all: so we have also flushed to disk what we have written */ +@@ -1511,7 +1542,7 @@ + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + +- fil_flush(group->space_id); ++ fil_flush(group->space_id, FALSE); + log_sys->flushed_to_disk_lsn = log_sys->write_lsn; + } + @@ -2120,10 +2151,10 @@ sync = TRUE; @@ -470,6 +740,15 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c /* A checkpoint is not urgent: do it asynchronously */ do_checkpoint = TRUE; +@@ -2607,7 +2638,7 @@ + + mutex_exit(&(log_sys->mutex)); + +- fil_flush(group->archive_space_id); ++ fil_flush(group->archive_space_id, TRUE); + + mutex_enter(&(log_sys->mutex)); + @@ -3349,6 +3380,17 @@ log_sys->flushed_to_disk_lsn, log_sys->last_checkpoint_lsn); @@ -488,9 +767,8 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c current_time = time(NULL); time_elapsed = 0.001 + difftime(current_time, -diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c ---- a/storage/innobase/log/log0recv.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/log/log0recv.c 2010-12-03 15:10:09.089024191 +0900 +--- a/storage/innobase/log/log0recv.c ++++ b/storage/innobase/log/log0recv.c @@ -2906,9 +2906,12 @@ ib_uint64_t archived_lsn; #endif /* UNIV_LOG_ARCHIVE */ @@ -505,9 +783,26 @@ diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c #ifdef UNIV_LOG_ARCHIVE ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX); /** TRUE when recovering from a checkpoint */ -diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c ---- a/storage/innobase/os/os0file.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/os/os0file.c 2010-12-03 15:10:09.093023540 +0900 +@@ -3468,7 +3471,7 @@ + exit(1); + } + +- os_file_flush(log_file); ++ os_file_flush(log_file, TRUE); + os_file_close(log_file); + } + +@@ -3492,7 +3495,7 @@ + + os_file_write(name, log_file, buf, 0, 0, + LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); +- os_file_flush(log_file); ++ os_file_flush(log_file, TRUE); + os_file_close(log_file); + + ut_free(buf); +--- a/storage/innobase/os/os0file.c ++++ b/storage/innobase/os/os0file.c @@ -1424,7 +1424,7 @@ #endif #ifdef UNIV_NON_BUFFERED_IO @@ -538,9 +833,121 @@ diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c #ifdef USE_FILE_LOCK if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { -diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c ---- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:09:51.301987792 +0900 -+++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:13:29.369986988 +0900 +@@ -2008,7 +2013,7 @@ + + ut_free(buf2); + +- ret = os_file_flush(file); ++ ret = os_file_flush(file, TRUE); + + if (ret) { + return(TRUE); +@@ -2046,7 +2051,8 @@ + int + os_file_fsync( + /*==========*/ +- os_file_t file) /*!< in: handle to a file */ ++ os_file_t file, /*!< in: handle to a file */ ++ ibool metadata) + { + int ret; + int failures; +@@ -2055,7 +2061,16 @@ + failures = 0; + + do { ++#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC ++ if (metadata) { ++ ret = fsync(file); ++ } else { ++ ret = fdatasync(file); ++ } ++#else ++ (void) metadata; + ret = fsync(file); ++#endif + + os_n_fsyncs++; + +@@ -2092,7 +2107,8 @@ + ibool + os_file_flush_func( + /*===============*/ +- os_file_t file) /*!< in, own: handle to a file */ ++ os_file_t file, /*!< in, own: handle to a file */ ++ ibool metadata) + { + #ifdef __WIN__ + BOOL ret; +@@ -2142,18 +2158,18 @@ + /* If we are not on an operating system that supports this, + then fall back to a plain fsync. */ + +- ret = os_file_fsync(file); ++ ret = os_file_fsync(file, metadata); + } else { + ret = fcntl(file, F_FULLFSYNC, NULL); + + if (ret) { + /* If we are not on a file system that supports this, + then fall back to a plain fsync. */ +- ret = os_file_fsync(file); ++ ret = os_file_fsync(file, metadata); + } + } + #else +- ret = os_file_fsync(file); ++ ret = os_file_fsync(file, metadata); + #endif + + if (ret == 0) { +@@ -2336,7 +2352,7 @@ + the OS crashes, a database page is only partially + physically written to disk. */ + +- ut_a(TRUE == os_file_flush(file)); ++ ut_a(TRUE == os_file_flush(file, TRUE)); + } + # endif /* UNIV_DO_FLUSH */ + +@@ -2378,7 +2394,7 @@ + the OS crashes, a database page is only partially + physically written to disk. */ + +- ut_a(TRUE == os_file_flush(file)); ++ ut_a(TRUE == os_file_flush(file, TRUE)); + } + # endif /* UNIV_DO_FLUSH */ + +@@ -2750,7 +2766,7 @@ + + # ifdef UNIV_DO_FLUSH + if (!os_do_not_call_flush_at_each_write) { +- ut_a(TRUE == os_file_flush(file)); ++ ut_a(TRUE == os_file_flush(file, TRUE)); + } + # endif /* UNIV_DO_FLUSH */ + +@@ -4296,7 +4312,7 @@ + #ifdef UNIV_DO_FLUSH + if (slot->type == OS_FILE_WRITE + && !os_do_not_call_flush_at_each_write) { +- if (!os_file_flush(slot->file)) { ++ if (!os_file_flush(slot->file, TRUE)) { + ut_error; + } + } +@@ -4597,7 +4613,7 @@ + #ifdef UNIV_DO_FLUSH + if (slot->type == OS_FILE_WRITE + && !os_do_not_call_flush_at_each_write) +- && !os_file_flush(slot->file) { ++ && !os_file_flush(slot->file, TRUE) { + ut_error; + } + #endif /* UNIV_DO_FLUSH */ +--- a/storage/innobase/srv/srv0srv.c ++++ b/storage/innobase/srv/srv0srv.c @@ -183,7 +183,8 @@ UNIV_INTERN ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ @@ -551,7 +958,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /* Try to flush dirty pages so as to avoid IO bursts at the checkpoints. */ -@@ -402,6 +403,17 @@ +@@ -404,6 +405,17 @@ UNIV_INTERN ulong srv_replication_delay = 0; @@ -561,7 +968,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0))) + +UNIV_INTERN ulint srv_checkpoint_age_target = 0; -+UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */ ++UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:area 2:contiguous */ + +UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */ +UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ @@ -569,7 +976,16 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /*-------------------------------------------*/ UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; -@@ -2742,6 +2754,7 @@ +@@ -2713,7 +2725,7 @@ + + ut_ad(!mutex_own(&kernel_mutex)); + +- ut_a(srv_n_purge_threads == 0); ++ ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)); + + do { + /* Check for shutdown and change in purge config. */ +@@ -2746,6 +2758,7 @@ ulint n_pages_purged = 0; ulint n_bytes_merged; ulint n_pages_flushed; @@ -577,7 +993,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c ulint n_bytes_archived; ulint n_tables_to_drop; ulint n_ios; -@@ -2749,7 +2762,20 @@ +@@ -2753,7 +2766,20 @@ ulint n_ios_very_old; ulint n_pend_ios; ulint next_itr_time; @@ -598,7 +1014,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Master thread starts, id %lu\n", -@@ -2771,6 +2797,9 @@ +@@ -2775,6 +2801,9 @@ mutex_exit(&kernel_mutex); @@ -608,7 +1024,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c loop: /*****************************************************************/ /* ---- When there is database activity by users, we cycle in this -@@ -2801,9 +2830,13 @@ +@@ -2805,9 +2834,13 @@ /* Sleep for 1 second on entrying the for loop below the first time. */ next_itr_time = ut_time_ms() + 1000; @@ -622,7 +1038,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /* ALTER TABLE in MySQL requires on Unix that the table handler can drop tables lazily after there no longer are SELECT queries to them. */ -@@ -2827,6 +2860,7 @@ +@@ -2831,6 +2864,7 @@ srv_main_thread_op_info = "sleeping"; srv_main_1_second_loops++; @@ -630,7 +1046,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c if (next_itr_time > cur_time && srv_shutdown_state == SRV_SHUTDOWN_NONE) { -@@ -2837,10 +2871,26 @@ +@@ -2841,10 +2875,26 @@ (next_itr_time - cur_time) * 1000)); srv_main_sleeps++; @@ -657,7 +1073,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /* Flush logs if needed */ srv_sync_log_buffer_in_background(); -@@ -2860,7 +2910,7 @@ +@@ -2864,7 +2914,7 @@ if (n_pend_ios < SRV_PEND_IO_THRESHOLD && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) { srv_main_thread_op_info = "doing insert buffer merge"; @@ -666,7 +1082,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /* Flush logs if needed */ srv_sync_log_buffer_in_background(); -@@ -2877,7 +2927,11 @@ +@@ -2881,7 +2931,11 @@ n_pages_flushed = buf_flush_list( PCT_IO(100), IB_ULONGLONG_MAX); @@ -679,7 +1095,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /* Try to keep the rate of flushing of dirty pages such that redo log generation does not -@@ -2893,6 +2947,224 @@ +@@ -2897,6 +2951,224 @@ n_flush, IB_ULONGLONG_MAX); } @@ -904,7 +1320,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c } if (srv_activity_count == old_activity_count) { -@@ -2941,7 +3213,7 @@ +@@ -2945,12 +3217,12 @@ even if the server were active */ srv_main_thread_op_info = "doing insert buffer merge"; @@ -913,7 +1329,22 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /* Flush logs if needed */ srv_sync_log_buffer_in_background(); -@@ -3049,7 +3321,7 @@ + +- if (srv_n_purge_threads == 0) { ++ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) { + srv_main_thread_op_info = "master purging"; + + srv_master_do_purge(); +@@ -3028,7 +3300,7 @@ + } + } + +- if (srv_n_purge_threads == 0) { ++ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) { + srv_main_thread_op_info = "master purging"; + + srv_master_do_purge(); +@@ -3053,7 +3325,7 @@ buf_flush_list below. Otherwise, the system favors clean pages over cleanup throughput. */ n_bytes_merged = ibuf_contract_for_n_pages(FALSE, @@ -922,7 +1353,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c } srv_main_thread_op_info = "reserving kernel mutex"; -@@ -3189,6 +3461,7 @@ +@@ -3193,6 +3465,7 @@ srv_slot_t* slot; ulint retries = 0; ulint n_total_purged = ULINT_UNDEFINED; @@ -930,7 +1361,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c ut_a(srv_n_purge_threads == 1); -@@ -3209,9 +3482,12 @@ +@@ -3213,9 +3486,12 @@ mutex_exit(&kernel_mutex); @@ -943,7 +1374,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c /* If there are very few records to purge or the last purge didn't purge any records then wait for activity. -@@ -3258,6 +3534,16 @@ +@@ -3262,6 +3538,16 @@ } while (n_pages_purged > 0 && !srv_fast_shutdown); srv_sync_log_buffer_in_background(); @@ -960,10 +1391,9 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c } mutex_enter(&kernel_mutex); -diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c ---- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900 -@@ -1217,6 +1217,9 @@ +--- a/storage/innobase/srv/srv0start.c ++++ b/storage/innobase/srv/srv0start.c +@@ -1237,6 +1237,9 @@ } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; @@ -973,9 +1403,8 @@ diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; -diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c ---- a/storage/innobase/trx/trx0purge.c 2011-04-12 14:14:14.000000000 +0900 -+++ b/storage/innobase/trx/trx0purge.c 2011-04-12 14:15:44.000000000 +0900 +--- a/storage/innobase/trx/trx0purge.c ++++ b/storage/innobase/trx/trx0purge.c @@ -392,10 +392,10 @@ trx_sys->rseg_history_len++; mutex_exit(&kernel_mutex); @@ -989,9 +1418,8 @@ diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c } /**********************************************************************//** -diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c ---- a/storage/innobase/trx/trx0trx.c 2010-11-03 07:01:13.000000000 +0900 -+++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:10:09.106023937 +0900 +--- a/storage/innobase/trx/trx0trx.c ++++ b/storage/innobase/trx/trx0trx.c @@ -984,6 +984,7 @@ trx->read_view = NULL; @@ -1105,3 +1533,116 @@ diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c /* Write the log but do not flush it to disk */ +--- a/mysql-test/include/default_mysqld.cnf ++++ b/mysql-test/include/default_mysqld.cnf +@@ -29,7 +29,7 @@ + max_heap_table_size= 1M + + loose-innodb_data_file_path= ibdata1:10M:autoextend +-loose-innodb_buffer_pool_size= 8M ++loose-innodb_buffer_pool_size= 32M + loose-innodb_write_io_threads= 2 + loose-innodb_read_io_threads= 2 + loose-innodb_log_buffer_size= 1M +--- a/mysql-test/suite/innodb/r/innodb.result ++++ b/mysql-test/suite/innodb/r/innodb.result +@@ -1678,7 +1678,7 @@ + drop table t1; + SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total'; + variable_value +-511 ++2047 + SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size'; + variable_value + 16384 +--- /dev/null ++++ b/mysql-test/suite/innodb/r/percona_flush_contiguous_neighbors.result +@@ -0,0 +1,21 @@ ++DROP TABLE IF EXISTS t1; ++CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB; ++INSERT INTO t1(foo) VALUES ('a'), ('b'); ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++DROP TABLE t1; +--- /dev/null ++++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors-master.opt +@@ -0,0 +1 @@ ++--innodb_flush_neighbor_pages=cont +--- /dev/null ++++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors.test +@@ -0,0 +1,36 @@ ++# Test for innodb_flush_neighbor_pages=contiguous. ++# The test is very crude: we simply overflow the buffer pool with such a number of ++# new/modified pages that some flushing is bound to happen. ++ ++--source include/have_innodb.inc ++ ++--disable_warnings ++DROP TABLE IF EXISTS t1; ++--enable_warnings ++ ++CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB; ++ ++INSERT INTO t1(foo) VALUES ('a'), ('b'); ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++INSERT INTO t1(foo) SELECT foo FROM t1; ++ ++# TODO: cannot record a stable value here. A check of > 0 should be enough, ++# but the variable is not accessible through INFORMATION_SCHEMA currently. ++# SHOW GLOBAL STATUS LIKE 'Innodb_buffer_pool_pages_flushed'; ++ ++DROP TABLE t1; +--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt ++++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt +@@ -1 +1 @@ +---innodb-buffer-pool-size=8M ++--innodb-buffer-pool-size=32M +--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test ++++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test +@@ -36,13 +36,14 @@ + + -- disable_query_log + +--- let $i = 400 ++-- let $i = 4000 ++begin; + while ($i) + { + insert into t2 values(repeat('abcdefghijklmnopqrstuvwxyz',1000)); + dec $i; + } +- ++commit; + -- enable_query_log + + # now there should be no 8K pages in the buffer pool