]> git.pld-linux.org Git - packages/mysql.git/blobdiff - innodb_io_patches.patch
small reorganize, move slave related options together
[packages/mysql.git] / innodb_io_patches.patch
index c81c90953b2ad4dedb19a66fdf2f24fd7be826e5..97e546bfd9f52f1ffaf17be9be97f24f8759fea0 100644 (file)
@@ -5,9 +5,8 @@
 #!!! notice !!!
 # Any small change to this file in the main branch
 # should be done or reviewed by the maintainer!
-diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
---- a/storage/innobase/buf/buf0buf.c   2010-12-03 15:09:51.273986410 +0900
-+++ b/storage/innobase/buf/buf0buf.c   2010-12-03 15:10:08.934990091 +0900
+--- a/storage/innobase/buf/buf0buf.c
++++ b/storage/innobase/buf/buf0buf.c
 @@ -320,6 +320,7 @@
  
        /* When we traverse all the flush lists we don't want another
@@ -24,10 +23,22 @@ diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
        log_flush_order_mutex_exit();
  
        /* The returned answer may be out of date: the flush_list can
-diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
---- a/storage/innobase/buf/buf0flu.c   2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/buf/buf0flu.c   2010-12-03 15:10:08.934990091 +0900
-@@ -1376,7 +1376,7 @@
+--- a/storage/innobase/buf/buf0flu.c
++++ b/storage/innobase/buf/buf0flu.c
+@@ -857,7 +857,7 @@
+ flush:
+       /* Now flush the doublewrite buffer data to disk */
+-      fil_flush(TRX_SYS_SPACE);
++      fil_flush(TRX_SYS_SPACE, FALSE);
+       /* We know that the writes have been flushed to disk now
+       and in recovery we will find them in the doublewrite buffer
+@@ -1375,10 +1375,11 @@
+       ulint           high;
+       ulint           count = 0;
+       buf_pool_t*     buf_pool = buf_pool_get(space, offset);
++      ibool           is_forward_scan;
  
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
  
@@ -36,11 +47,80 @@ diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
                /* If there is little space, it is better not to flush
                any block except from the end of the LRU list */
  
-diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
---- a/storage/innobase/buf/buf0rea.c   2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/buf/buf0rea.c   2010-12-03 15:10:08.937050537 +0900
-@@ -260,6 +260,10 @@
-               = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
+@@ -1405,7 +1406,32 @@
+               high = fil_space_get_size(space);
+       }
+-      for (i = low; i < high; i++) {
++      if (srv_flush_neighbor_pages == 2) {
++
++              /* In the case of contiguous flush where the requested page
++              does not fall at the start of flush area, first scan backward
++              from the page and later forward from it. */
++              is_forward_scan = (offset == low);
++      }
++      else {
++              is_forward_scan = TRUE;
++      }
++
++scan:
++      if (srv_flush_neighbor_pages == 2) {
++              if (is_forward_scan) {
++                      i = offset;
++              }
++              else {
++                      i = offset - 1;
++              }
++      }
++      else {
++              i = low;
++      }
++
++      for (; is_forward_scan ? (i < high) : (i >= low);
++           is_forward_scan ? i++ : i--) {
+               buf_page_t*     bpage;
+@@ -1434,6 +1460,12 @@
+               if (!bpage) {
+                       buf_pool_mutex_exit(buf_pool);
++                      if (srv_flush_neighbor_pages == 2) {
++
++                              /* This is contiguous neighbor page flush and
++                              the pages here are not contiguous. */
++                              break;
++                      }
+                       continue;
+               }
+@@ -1470,6 +1502,22 @@
+                       }
+               }
+               buf_pool_mutex_exit(buf_pool);
++
++              if (srv_flush_neighbor_pages == 2) {
++
++                      /* We are trying to do the contiguous neighbor page
++                      flush, but the last page we checked was unflushable,
++                      making a "hole" in the flush, so stop this attempt. */
++                      break;
++              }
++      }
++
++      if (!is_forward_scan) {
++
++              /* Backward scan done, now do the forward scan */
++              ut_a (srv_flush_neighbor_pages == 2);
++              is_forward_scan = TRUE;
++              goto scan;
+       }
+       return(count);
+--- a/storage/innobase/buf/buf0rea.c
++++ b/storage/innobase/buf/buf0rea.c
+@@ -427,6 +427,10 @@
+               = BUF_READ_AHEAD_AREA(buf_pool);
        ulint           threshold;
  
 +      if (!(srv_read_ahead & 2)) {
@@ -50,10 +130,85 @@ diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
        if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
                /* No read-ahead to avoid thread deadlocks */
                return(0);
-diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
---- a/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:09:51.283956391 +0900
-+++ b/storage/innobase/handler/ha_innodb.cc    2010-12-03 15:10:08.963980444 +0900
-@@ -444,6 +444,12 @@
+--- a/storage/innobase/fil/fil0fil.c
++++ b/storage/innobase/fil/fil0fil.c
+@@ -2609,7 +2609,7 @@
+               os_thread_sleep(20000);
+-              fil_flush(id);
++              fil_flush(id, TRUE);
+               goto retry;
+@@ -2823,7 +2823,7 @@
+               goto error_exit;
+       }
+-      ret = os_file_flush(file);
++      ret = os_file_flush(file, TRUE);
+       if (!ret) {
+               fputs("InnoDB: Error: file flush of tablespace ", stderr);
+@@ -3009,7 +3009,7 @@
+               }
+       }
+-      success = os_file_flush(file);
++      success = os_file_flush(file, TRUE);
+       if (!success) {
+               goto func_exit;
+@@ -3031,7 +3031,7 @@
+               goto func_exit;
+       }
+-      success = os_file_flush(file);
++      success = os_file_flush(file, TRUE);
+ func_exit:
+       os_file_close(file);
+       ut_free(buf2);
+@@ -4014,7 +4014,7 @@
+       size_after_extend, *actual_size); */
+       mutex_exit(&fil_system->mutex);
+-      fil_flush(space_id);
++      fil_flush(space_id, TRUE);
+       return(success);
+ }
+@@ -4585,8 +4585,9 @@
+ void
+ fil_flush(
+ /*======*/
+-      ulint   space_id)       /*!< in: file space id (this can be a group of
++      ulint   space_id,       /*!< in: file space id (this can be a group of
+                               log files or a tablespace of the database) */
++      ibool   metadata)
+ {
+       fil_space_t*    space;
+       fil_node_t*     node;
+@@ -4657,7 +4658,7 @@
+                       /* fprintf(stderr, "Flushing to file %s\n",
+                       node->name); */
+-                      os_file_flush(file);
++                      os_file_flush(file, metadata);
+                       mutex_enter(&fil_system->mutex);
+@@ -4740,7 +4741,7 @@
+       a non-existing space id. */
+       for (i = 0; i < n_space_ids; i++) {
+-              fil_flush(space_ids[i]);
++              fil_flush(space_ids[i], TRUE);
+       }
+       mem_free(space_ids);
+--- a/storage/innobase/handler/ha_innodb.cc
++++ b/storage/innobase/handler/ha_innodb.cc
+@@ -445,6 +445,12 @@
    "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
    NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
  
@@ -66,7 +221,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
  
  static handler *innobase_create_handler(handlerton *hton,
                                          TABLE_SHARE *table,
-@@ -838,6 +844,17 @@
+@@ -841,6 +847,17 @@
        }
  }
  
@@ -84,7 +239,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
  /********************************************************************//**
  Obtain the InnoDB transaction of a MySQL thread.
  @return       reference to transaction pointer */
-@@ -2437,6 +2454,9 @@
+@@ -2471,6 +2488,9 @@
        srv_n_read_io_threads = (ulint) innobase_read_io_threads;
        srv_n_write_io_threads = (ulint) innobase_write_io_threads;
  
@@ -94,7 +249,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
        srv_force_recovery = (ulint) innobase_force_recovery;
  
        srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
-@@ -11025,7 +11045,7 @@
+@@ -11141,7 +11161,7 @@
    PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
    "Purge threads can be either 0 or 1.",
    NULL, NULL,
@@ -103,7 +258,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
    0,                  /* Minimum value */
    1, 0);              /* Maximum value */
  
-@@ -11067,12 +11087,18 @@
+@@ -11183,12 +11203,18 @@
    innodb_file_format_max_validate,
    innodb_file_format_max_update, "Antelope");
  
@@ -128,7 +283,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
  
  static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
    PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-@@ -11167,7 +11193,7 @@
+@@ -11293,7 +11319,7 @@
  static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
    PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
    "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
@@ -137,7 +292,7 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
  
  static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
    PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-@@ -11319,6 +11345,95 @@
+@@ -11442,6 +11468,127 @@
    "trigger a readahead.",
    NULL, NULL, 56, 0, 64, 0);
  
@@ -161,10 +316,42 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
 +  "Control soft limit of checkpoint age. (0 : not control)",
 +  NULL, NULL, 0, 0, ~0UL, 0);
 +
-+static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
-+  PLUGIN_VAR_RQCMDARG,
-+  "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
-+  NULL, NULL, 1, 0, 1, 0);
++static
++void
++innodb_flush_neighbor_pages_update(
++  THD* thd,
++  struct st_mysql_sys_var* var,
++  void* var_ptr,
++  const void* save)
++{
++  *(long *)var_ptr = (*(long *)save) % 3;
++}
++
++const char *flush_neighbor_pages_names[]=
++{
++  "none", /* 0 */
++  "area",
++  "cont", /* 2 */
++  /* For compatibility with the older patch */
++  "0", /* "none" + 3 */
++  "1", /* "area" + 3 */
++  "2", /* "cont" + 3 */
++  NullS
++};
++
++TYPELIB flush_neighbor_pages_typelib=
++{
++  array_elements(flush_neighbor_pages_names) - 1,
++  "flush_neighbor_pages_typelib",
++  flush_neighbor_pages_names,
++  NULL
++};
++
++static MYSQL_SYSVAR_ENUM(flush_neighbor_pages, srv_flush_neighbor_pages,
++  PLUGIN_VAR_RQCMDARG, "Neighbor page flushing behaviour: none: do not flush, "
++                       "[area]: flush selected pages one-by-one, "
++                       "cont: flush a contiguous block of pages", NULL,
++  innodb_flush_neighbor_pages_update, 1, &flush_neighbor_pages_typelib);
 +
 +static
 +void
@@ -233,15 +420,15 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
  static struct st_mysql_sys_var* innobase_system_variables[]= {
    MYSQL_SYSVAR(additional_mem_pool_size),
    MYSQL_SYSVAR(autoextend_increment),
-@@ -11339,6 +11454,7 @@
+@@ -11462,6 +11609,7 @@
    MYSQL_SYSVAR(file_format_check),
    MYSQL_SYSVAR(file_format_max),
    MYSQL_SYSVAR(flush_log_at_trx_commit),
 +  MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
    MYSQL_SYSVAR(flush_method),
    MYSQL_SYSVAR(force_recovery),
-   MYSQL_SYSVAR(locks_unsafe_for_binlog),
-@@ -11376,6 +11492,13 @@
+   MYSQL_SYSVAR(large_prefix),
+@@ -11501,6 +11649,13 @@
    MYSQL_SYSVAR(show_verbose_locks),
    MYSQL_SYSVAR(show_locks_held),
    MYSQL_SYSVAR(version),
@@ -255,10 +442,9 @@ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_
    MYSQL_SYSVAR(use_sys_malloc),
    MYSQL_SYSVAR(use_native_aio),
    MYSQL_SYSVAR(change_buffering),
-diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
---- a/storage/innobase/ibuf/ibuf0ibuf.c        2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/ibuf/ibuf0ibuf.c        2010-12-03 15:10:09.073984282 +0900
-@@ -514,8 +514,10 @@
+--- a/storage/innobase/ibuf/ibuf0ibuf.c
++++ b/storage/innobase/ibuf/ibuf0ibuf.c
+@@ -523,8 +523,10 @@
        grow in size, as the references on the upper levels of the tree can
        change */
  
@@ -271,7 +457,7 @@ diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.
  
        mutex_create(ibuf_pessimistic_insert_mutex_key,
                     &ibuf_pessimistic_insert_mutex,
-@@ -2753,9 +2755,11 @@
+@@ -2763,9 +2765,11 @@
        size = ibuf->size;
        max_size = ibuf->max_size;
  
@@ -283,10 +469,9 @@ diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.
  
        sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
  
-diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
---- a/storage/innobase/include/buf0rea.h       2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/include/buf0rea.h       2010-12-03 15:10:09.076066335 +0900
-@@ -124,8 +124,7 @@
+--- a/storage/innobase/include/buf0rea.h
++++ b/storage/innobase/include/buf0rea.h
+@@ -149,8 +149,7 @@
  
  /** The size in pages of the area which the read-ahead algorithms read if
  invoked */
@@ -296,9 +481,21 @@ diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0re
  
  /** @name Modes used in read-ahead @{ */
  /** read only pages belonging to the insert buffer tree */
-diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
---- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900
+--- a/storage/innobase/include/fil0fil.h
++++ b/storage/innobase/include/fil0fil.h
+@@ -663,8 +663,9 @@
+ void
+ fil_flush(
+ /*======*/
+-      ulint   space_id);      /*!< in: file space id (this can be a group of
++      ulint   space_id,       /*!< in: file space id (this can be a group of
+                               log files or a tablespace of the database) */
++      ibool   metadata);
+ /**********************************************************************//**
+ Flushes to disk writes in file spaces of the given type possibly cached by
+ the OS. */
+--- a/storage/innobase/include/ha_prototypes.h
++++ b/storage/innobase/include/ha_prototypes.h
 @@ -284,6 +284,13 @@
  /*===================*/
          void*   thd,  /*!< in: thread handle (THD*) */
@@ -313,9 +510,67 @@ diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/
  
  /**********************************************************************//**
  Get the current setting of the lower_case_table_names global parameter from
-diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
---- a/storage/innobase/include/srv0srv.h       2010-12-03 15:09:51.291955835 +0900
-+++ b/storage/innobase/include/srv0srv.h       2010-12-03 15:10:09.079029047 +0900
+--- a/storage/innobase/include/os0file.h
++++ b/storage/innobase/include/os0file.h
+@@ -296,8 +296,8 @@
+       pfs_os_file_write_func(name, file, buf, offset, offset_high,    \
+                              n, __FILE__, __LINE__)
+-# define os_file_flush(file)                                          \
+-      pfs_os_file_flush_func(file, __FILE__, __LINE__)
++# define os_file_flush(file, metadata)                                        \
++      pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
+ # define os_file_rename(key, oldpath, newpath)                                \
+       pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
+@@ -333,7 +333,7 @@
+ # define os_file_write(name, file, buf, offset, offset_high, n)               \
+       os_file_write_func(name, file, buf, offset, offset_high, n)
+-# define os_file_flush(file)  os_file_flush_func(file)
++# define os_file_flush(file, metadata)        os_file_flush_func(file, metadata)
+ # define os_file_rename(key, oldpath, newpath)                                \
+       os_file_rename_func(oldpath, newpath)
+@@ -781,6 +781,7 @@
+ pfs_os_file_flush_func(
+ /*===================*/
+       os_file_t       file,   /*!< in, own: handle to a file */
++      ibool           metadata,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line);/*!< in: line where the func invoked */
+@@ -860,7 +861,8 @@
+ ibool
+ os_file_flush_func(
+ /*===============*/
+-      os_file_t       file);  /*!< in, own: handle to a file */
++      os_file_t       file,   /*!< in, own: handle to a file */
++      ibool           metadata);
+ /***********************************************************************//**
+ Retrieves the last error number if an error occurs in a file io function.
+ The number should be retrieved before any other OS calls (because they may
+--- a/storage/innobase/include/os0file.ic
++++ b/storage/innobase/include/os0file.ic
+@@ -369,6 +369,7 @@
+ pfs_os_file_flush_func(
+ /*===================*/
+       os_file_t       file,   /*!< in, own: handle to a file */
++      ibool           metadata,
+       const char*     src_file,/*!< in: file name where func invoked */
+       ulint           src_line)/*!< in: line where the func invoked */
+ {
+@@ -378,7 +379,7 @@
+       register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
+                                  src_file, src_line);
+-      result = os_file_flush_func(file);
++      result = os_file_flush_func(file, metadata);
+       register_pfs_file_io_end(locker, 0);
+--- a/storage/innobase/include/srv0srv.h
++++ b/storage/innobase/include/srv0srv.h
 @@ -138,7 +138,8 @@
  extern ulint  srv_n_log_files;
  extern ulint  srv_log_file_size;
@@ -325,8 +580,8 @@ diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0sr
 +extern char   srv_use_global_flush_log_at_trx_commit;
  extern char   srv_adaptive_flushing;
  
-@@ -216,6 +217,16 @@
+ /* If this flag is TRUE, then we will load the indexes' (and tables') metadata
+@@ -221,6 +222,16 @@
  extern ulong  srv_max_purge_lag;
  
  extern ulong  srv_replication_delay;
@@ -343,7 +598,7 @@ diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0sr
  /*-------------------------------------------*/
  
  extern ulint  srv_n_rows_inserted;
-@@ -394,8 +405,9 @@
+@@ -399,8 +410,9 @@
                                when writing data files, but do flush
                                after writing to log files */
        SRV_UNIX_NOSYNC,        /*!< do not flush after writing */
@@ -354,9 +609,8 @@ diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0sr
  };
  
  /** Alternatives for file i/o in Windows */
-diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
---- a/storage/innobase/log/log0log.c   2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/log/log0log.c   2010-12-03 15:10:09.084023562 +0900
+--- a/storage/innobase/log/log0log.c
++++ b/storage/innobase/log/log0log.c
 @@ -48,6 +48,7 @@
  #include "srv0start.h"
  #include "trx0sys.h"
@@ -419,15 +673,19 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
  
                log->check_flush_or_checkpoint = TRUE;
        }
-@@ -1100,6 +1128,7 @@
+@@ -1100,9 +1128,10 @@
                group = (log_group_t*)((ulint)group - 1);
  
                if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
 +                  && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
                    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
  
-                       fil_flush(group->space_id);
-@@ -1121,8 +1150,9 @@
+-                      fil_flush(group->space_id);
++                      fil_flush(group->space_id, FALSE);
+               }
+ #ifdef UNIV_DEBUG
+@@ -1121,10 +1150,11 @@
                        logs and cannot end up here! */
  
        if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
@@ -436,8 +694,11 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
 -          && srv_flush_log_at_trx_commit != 2) {
 +          && thd_flush_log_at_trx_commit(NULL) != 2) {
  
-               fil_flush(group->space_id);
+-              fil_flush(group->space_id);
++              fil_flush(group->space_id, FALSE);
        }
+       mutex_enter(&(log_sys->mutex));
 @@ -1501,7 +1531,8 @@
  
        mutex_exit(&(log_sys->mutex));
@@ -448,6 +709,15 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
                /* O_DSYNC means the OS did not buffer the log file at all:
                so we have also flushed to disk what we have written */
  
+@@ -1511,7 +1542,7 @@
+               group = UT_LIST_GET_FIRST(log_sys->log_groups);
+-              fil_flush(group->space_id);
++              fil_flush(group->space_id, FALSE);
+               log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
+       }
 @@ -2120,10 +2151,10 @@
  
                sync = TRUE;
@@ -470,6 +740,15 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
                /* A checkpoint is not urgent: do it asynchronously */
  
                do_checkpoint = TRUE;
+@@ -2607,7 +2638,7 @@
+       mutex_exit(&(log_sys->mutex));
+-      fil_flush(group->archive_space_id);
++      fil_flush(group->archive_space_id, TRUE);
+       mutex_enter(&(log_sys->mutex));
 @@ -3349,6 +3380,17 @@
                log_sys->flushed_to_disk_lsn,
                log_sys->last_checkpoint_lsn);
@@ -488,9 +767,8 @@ diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
        current_time = time(NULL);
  
        time_elapsed = 0.001 + difftime(current_time,
-diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
---- a/storage/innobase/log/log0recv.c  2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/log/log0recv.c  2010-12-03 15:10:09.089024191 +0900
+--- a/storage/innobase/log/log0recv.c
++++ b/storage/innobase/log/log0recv.c
 @@ -2906,9 +2906,12 @@
        ib_uint64_t     archived_lsn;
  #endif /* UNIV_LOG_ARCHIVE */
@@ -505,9 +783,26 @@ diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
  #ifdef UNIV_LOG_ARCHIVE
        ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
  /** TRUE when recovering from a checkpoint */
-diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
---- a/storage/innobase/os/os0file.c    2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/os/os0file.c    2010-12-03 15:10:09.093023540 +0900
+@@ -3468,7 +3471,7 @@
+                       exit(1);
+               }
+-              os_file_flush(log_file);
++              os_file_flush(log_file, TRUE);
+               os_file_close(log_file);
+       }
+@@ -3492,7 +3495,7 @@
+       os_file_write(name, log_file, buf, 0, 0,
+                     LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+-      os_file_flush(log_file);
++      os_file_flush(log_file, TRUE);
+       os_file_close(log_file);
+       ut_free(buf);
+--- a/storage/innobase/os/os0file.c
++++ b/storage/innobase/os/os0file.c
 @@ -1424,7 +1424,7 @@
  #endif
  #ifdef UNIV_NON_BUFFERED_IO
@@ -538,9 +833,121 @@ diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
  #ifdef USE_FILE_LOCK
        if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
  
-diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
---- a/storage/innobase/srv/srv0srv.c   2010-12-03 15:09:51.301987792 +0900
-+++ b/storage/innobase/srv/srv0srv.c   2010-12-03 15:13:29.369986988 +0900
+@@ -2008,7 +2013,7 @@
+       ut_free(buf2);
+-      ret = os_file_flush(file);
++      ret = os_file_flush(file, TRUE);
+       if (ret) {
+               return(TRUE);
+@@ -2046,7 +2051,8 @@
+ int
+ os_file_fsync(
+ /*==========*/
+-      os_file_t       file)   /*!< in: handle to a file */
++      os_file_t       file,   /*!< in: handle to a file */
++      ibool           metadata)
+ {
+       int     ret;
+       int     failures;
+@@ -2055,7 +2061,16 @@
+       failures = 0;
+       do {
++#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
++              if (metadata) {
++                      ret = fsync(file);
++              } else {
++                      ret = fdatasync(file);
++              }
++#else
++              (void) metadata;
+               ret = fsync(file);
++#endif
+               os_n_fsyncs++;
+@@ -2092,7 +2107,8 @@
+ ibool
+ os_file_flush_func(
+ /*===============*/
+-      os_file_t       file)   /*!< in, own: handle to a file */
++      os_file_t       file,   /*!< in, own: handle to a file */
++      ibool           metadata)
+ {
+ #ifdef __WIN__
+       BOOL    ret;
+@@ -2142,18 +2158,18 @@
+               /* If we are not on an operating system that supports this,
+               then fall back to a plain fsync. */
+-              ret = os_file_fsync(file);
++              ret = os_file_fsync(file, metadata);
+       } else {
+               ret = fcntl(file, F_FULLFSYNC, NULL);
+               if (ret) {
+                       /* If we are not on a file system that supports this,
+                       then fall back to a plain fsync. */
+-                      ret = os_file_fsync(file);
++                      ret = os_file_fsync(file, metadata);
+               }
+       }
+ #else
+-      ret = os_file_fsync(file);
++      ret = os_file_fsync(file, metadata);
+ #endif
+       if (ret == 0) {
+@@ -2336,7 +2352,7 @@
+               the OS crashes, a database page is only partially
+               physically written to disk. */
+-              ut_a(TRUE == os_file_flush(file));
++              ut_a(TRUE == os_file_flush(file, TRUE));
+       }
+ # endif /* UNIV_DO_FLUSH */
+@@ -2378,7 +2394,7 @@
+                       the OS crashes, a database page is only partially
+                       physically written to disk. */
+-                      ut_a(TRUE == os_file_flush(file));
++                      ut_a(TRUE == os_file_flush(file, TRUE));
+               }
+ # endif /* UNIV_DO_FLUSH */
+@@ -2750,7 +2766,7 @@
+ # ifdef UNIV_DO_FLUSH
+       if (!os_do_not_call_flush_at_each_write) {
+-              ut_a(TRUE == os_file_flush(file));
++              ut_a(TRUE == os_file_flush(file, TRUE));
+       }
+ # endif /* UNIV_DO_FLUSH */
+@@ -4296,7 +4312,7 @@
+ #ifdef UNIV_DO_FLUSH
+               if (slot->type == OS_FILE_WRITE
+                   && !os_do_not_call_flush_at_each_write) {
+-                      if (!os_file_flush(slot->file)) {
++                      if (!os_file_flush(slot->file, TRUE)) {
+                               ut_error;
+                       }
+               }
+@@ -4597,7 +4613,7 @@
+ #ifdef UNIV_DO_FLUSH
+               if (slot->type == OS_FILE_WRITE
+                   && !os_do_not_call_flush_at_each_write)
+-                  && !os_file_flush(slot->file) {
++                  && !os_file_flush(slot->file, TRUE) {
+                       ut_error;
+               }
+ #endif /* UNIV_DO_FLUSH */
+--- a/storage/innobase/srv/srv0srv.c
++++ b/storage/innobase/srv/srv0srv.c
 @@ -183,7 +183,8 @@
  UNIV_INTERN ulint     srv_log_file_size       = ULINT_MAX;
  /* size in database pages */
@@ -551,7 +958,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
  /* Try to flush dirty pages so as to avoid IO bursts at
  the checkpoints. */
-@@ -402,6 +403,17 @@
+@@ -404,6 +405,17 @@
  
  UNIV_INTERN ulong     srv_replication_delay           = 0;
  
@@ -561,7 +968,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
 +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
 +
 +UNIV_INTERN ulint     srv_checkpoint_age_target = 0;
-+UNIV_INTERN ulint     srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
++UNIV_INTERN ulint     srv_flush_neighbor_pages = 1; /* 0:disable 1:area 2:contiguous */
 +
 +UNIV_INTERN ulint     srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
 +UNIV_INTERN ulint     srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
@@ -569,7 +976,16 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  /*-------------------------------------------*/
  UNIV_INTERN ulong     srv_n_spin_wait_rounds  = 30;
  UNIV_INTERN ulong     srv_n_free_tickets_to_enter = 500;
-@@ -2742,6 +2754,7 @@
+@@ -2713,7 +2725,7 @@
+       ut_ad(!mutex_own(&kernel_mutex));
+-      ut_a(srv_n_purge_threads == 0);
++      ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
+       do {
+               /* Check for shutdown and change in purge config. */
+@@ -2746,6 +2758,7 @@
        ulint           n_pages_purged  = 0;
        ulint           n_bytes_merged;
        ulint           n_pages_flushed;
@@ -577,7 +993,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
        ulint           n_bytes_archived;
        ulint           n_tables_to_drop;
        ulint           n_ios;
-@@ -2749,7 +2762,20 @@
+@@ -2753,7 +2766,20 @@
        ulint           n_ios_very_old;
        ulint           n_pend_ios;
        ulint           next_itr_time;
@@ -598,7 +1014,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
  #ifdef UNIV_DEBUG_THREAD_CREATION
        fprintf(stderr, "Master thread starts, id %lu\n",
-@@ -2771,6 +2797,9 @@
+@@ -2775,6 +2801,9 @@
  
        mutex_exit(&kernel_mutex);
  
@@ -608,7 +1024,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  loop:
        /*****************************************************************/
        /* ---- When there is database activity by users, we cycle in this
-@@ -2801,9 +2830,13 @@
+@@ -2805,9 +2834,13 @@
        /* Sleep for 1 second on entrying the for loop below the first time. */
        next_itr_time = ut_time_ms() + 1000;
  
@@ -622,7 +1038,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
                /* ALTER TABLE in MySQL requires on Unix that the table handler
                can drop tables lazily after there no longer are SELECT
                queries to them. */
-@@ -2827,6 +2860,7 @@
+@@ -2831,6 +2864,7 @@
                srv_main_thread_op_info = "sleeping";
                srv_main_1_second_loops++;
  
@@ -630,7 +1046,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
                if (next_itr_time > cur_time
                    && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
  
-@@ -2837,10 +2871,26 @@
+@@ -2841,10 +2875,26 @@
                                        (next_itr_time - cur_time)
                                         * 1000));
                        srv_main_sleeps++;
@@ -657,7 +1073,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
                /* Flush logs if needed */
                srv_sync_log_buffer_in_background();
-@@ -2860,7 +2910,7 @@
+@@ -2864,7 +2914,7 @@
                if (n_pend_ios < SRV_PEND_IO_THRESHOLD
                    && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
                        srv_main_thread_op_info = "doing insert buffer merge";
@@ -666,7 +1082,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
                        /* Flush logs if needed */
                        srv_sync_log_buffer_in_background();
-@@ -2877,7 +2927,11 @@
+@@ -2881,7 +2931,11 @@
                        n_pages_flushed = buf_flush_list(
                                PCT_IO(100), IB_ULONGLONG_MAX);
  
@@ -679,7 +1095,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
                        /* Try to keep the rate of flushing of dirty
                        pages such that redo log generation does not
-@@ -2893,6 +2947,224 @@
+@@ -2897,6 +2951,224 @@
                                                n_flush,
                                                IB_ULONGLONG_MAX);
                        }
@@ -904,7 +1320,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
                }
  
                if (srv_activity_count == old_activity_count) {
-@@ -2941,7 +3213,7 @@
+@@ -2945,12 +3217,12 @@
        even if the server were active */
  
        srv_main_thread_op_info = "doing insert buffer merge";
@@ -913,7 +1329,22 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
        /* Flush logs if needed */
        srv_sync_log_buffer_in_background();
-@@ -3049,7 +3321,7 @@
+-      if (srv_n_purge_threads == 0) {
++      if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
+               srv_main_thread_op_info = "master purging";
+               srv_master_do_purge();
+@@ -3028,7 +3300,7 @@
+               }
+       }
+-      if (srv_n_purge_threads == 0) {
++      if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
+               srv_main_thread_op_info = "master purging";
+               srv_master_do_purge();
+@@ -3053,7 +3325,7 @@
                buf_flush_list below. Otherwise, the system favors
                clean pages over cleanup throughput. */
                n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
@@ -922,7 +1353,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
        }
  
        srv_main_thread_op_info = "reserving kernel mutex";
-@@ -3189,6 +3461,7 @@
+@@ -3193,6 +3465,7 @@
        srv_slot_t*     slot;
        ulint           retries = 0;
        ulint           n_total_purged = ULINT_UNDEFINED;
@@ -930,7 +1361,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
        ut_a(srv_n_purge_threads == 1);
  
-@@ -3209,9 +3482,12 @@
+@@ -3213,9 +3486,12 @@
  
        mutex_exit(&kernel_mutex);
  
@@ -943,7 +1374,7 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
  
                /* If there are very few records to purge or the last
                purge didn't purge any records then wait for activity.
-@@ -3258,6 +3534,16 @@
+@@ -3262,6 +3538,16 @@
                } while (n_pages_purged > 0 && !srv_fast_shutdown);
  
                srv_sync_log_buffer_in_background();
@@ -960,10 +1391,9 @@ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
        }
  
        mutex_enter(&kernel_mutex);
-diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
---- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900
-@@ -1217,6 +1217,9 @@
+--- a/storage/innobase/srv/srv0start.c
++++ b/storage/innobase/srv/srv0start.c
+@@ -1237,6 +1237,9 @@
        } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
                srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
  
@@ -973,9 +1403,8 @@ diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
        } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
                srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
  
-diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
---- a/storage/innobase/trx/trx0purge.c 2011-04-12 14:14:14.000000000 +0900
-+++ b/storage/innobase/trx/trx0purge.c 2011-04-12 14:15:44.000000000 +0900
+--- a/storage/innobase/trx/trx0purge.c
++++ b/storage/innobase/trx/trx0purge.c
 @@ -392,10 +392,10 @@
        trx_sys->rseg_history_len++;
        mutex_exit(&kernel_mutex);
@@ -989,9 +1418,8 @@ diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
  }
  
  /**********************************************************************//**
-diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
---- a/storage/innobase/trx/trx0trx.c   2010-11-03 07:01:13.000000000 +0900
-+++ b/storage/innobase/trx/trx0trx.c   2010-12-03 15:10:09.106023937 +0900
+--- a/storage/innobase/trx/trx0trx.c
++++ b/storage/innobase/trx/trx0trx.c
 @@ -984,6 +984,7 @@
        trx->read_view = NULL;
  
@@ -1105,3 +1533,116 @@ diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
  
                        /* Write the log but do not flush it to disk */
  
+--- a/mysql-test/include/default_mysqld.cnf
++++ b/mysql-test/include/default_mysqld.cnf
+@@ -29,7 +29,7 @@
+ max_heap_table_size=        1M
+ loose-innodb_data_file_path=      ibdata1:10M:autoextend
+-loose-innodb_buffer_pool_size=    8M
++loose-innodb_buffer_pool_size=    32M
+ loose-innodb_write_io_threads=    2
+ loose-innodb_read_io_threads=     2
+ loose-innodb_log_buffer_size=     1M
+--- a/mysql-test/suite/innodb/r/innodb.result
++++ b/mysql-test/suite/innodb/r/innodb.result
+@@ -1678,7 +1678,7 @@
+ drop table t1;
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+ variable_value
+-511
++2047
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
+ variable_value
+ 16384
+--- /dev/null
++++ b/mysql-test/suite/innodb/r/percona_flush_contiguous_neighbors.result
+@@ -0,0 +1,21 @@
++DROP TABLE IF EXISTS t1;
++CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
++INSERT INTO t1(foo) VALUES ('a'), ('b');
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors-master.opt
+@@ -0,0 +1 @@
++--innodb_flush_neighbor_pages=cont
+--- /dev/null
++++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors.test
+@@ -0,0 +1,36 @@
++# Test for innodb_flush_neighbor_pages=contiguous.
++# The test is very crude: we simply overflow the buffer pool with such a number of
++# new/modified pages that some flushing is bound to happen.
++
++--source include/have_innodb.inc
++
++--disable_warnings
++DROP TABLE IF EXISTS t1;
++--enable_warnings
++
++CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
++
++INSERT INTO t1(foo) VALUES ('a'), ('b');
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++INSERT INTO t1(foo) SELECT foo FROM t1;
++
++# TODO: cannot record a stable value here.  A check of > 0 should be enough,
++# but the variable is not accessible through INFORMATION_SCHEMA currently.
++# SHOW GLOBAL STATUS LIKE 'Innodb_buffer_pool_pages_flushed';
++
++DROP TABLE t1;
+--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
++++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
+@@ -1 +1 @@
+---innodb-buffer-pool-size=8M
++--innodb-buffer-pool-size=32M
+--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
++++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
+@@ -36,13 +36,14 @@
+ -- disable_query_log
+--- let $i = 400
++-- let $i = 4000
++begin;
+ while ($i)
+ {
+   insert into t2 values(repeat('abcdefghijklmnopqrstuvwxyz',1000));
+   dec $i;
+ }
+-
++commit;
+ -- enable_query_log
+ # now there should be no 8K pages in the buffer pool
This page took 0.0661 seconds and 4 git commands to generate.