]> git.pld-linux.org Git - packages/mysql.git/blob - innodb_io_patches.patch
- up to 5.5.15
[packages/mysql.git] / innodb_io_patches.patch
1 # name       : innodb_io_patches.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innobase/buf/buf0buf.c
9 +++ b/storage/innobase/buf/buf0buf.c
10 @@ -320,6 +320,7 @@
11  
12         /* When we traverse all the flush lists we don't want another
13         thread to add a dirty page to any flush list. */
14 +       if (srv_buf_pool_instances > 1)
15         log_flush_order_mutex_enter();
16  
17         for (i = 0; i < srv_buf_pool_instances; i++) {
18 @@ -343,6 +344,7 @@
19                 }
20         }
21  
22 +       if (srv_buf_pool_instances > 1)
23         log_flush_order_mutex_exit();
24  
25         /* The returned answer may be out of date: the flush_list can
26 --- a/storage/innobase/buf/buf0flu.c
27 +++ b/storage/innobase/buf/buf0flu.c
28 @@ -857,7 +857,7 @@
29  flush:
30         /* Now flush the doublewrite buffer data to disk */
31  
32 -       fil_flush(TRX_SYS_SPACE);
33 +       fil_flush(TRX_SYS_SPACE, FALSE);
34  
35         /* We know that the writes have been flushed to disk now
36         and in recovery we will find them in the doublewrite buffer
37 @@ -1378,7 +1378,7 @@
38  
39         ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
40  
41 -       if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
42 +       if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
43                 /* If there is little space, it is better not to flush
44                 any block except from the end of the LRU list */
45  
46 --- a/storage/innobase/buf/buf0rea.c
47 +++ b/storage/innobase/buf/buf0rea.c
48 @@ -260,6 +260,10 @@
49                 = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
50         ulint           threshold;
51  
52 +       if (!(srv_read_ahead & 2)) {
53 +               return(0);
54 +       }
55 +
56         if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
57                 /* No read-ahead to avoid thread deadlocks */
58                 return(0);
59 --- a/storage/innobase/fil/fil0fil.c
60 +++ b/storage/innobase/fil/fil0fil.c
61 @@ -2601,7 +2601,7 @@
62  
63                 os_thread_sleep(20000);
64  
65 -               fil_flush(id);
66 +               fil_flush(id, TRUE);
67  
68                 goto retry;
69  
70 @@ -2815,7 +2815,7 @@
71                 goto error_exit;
72         }
73  
74 -       ret = os_file_flush(file);
75 +       ret = os_file_flush(file, TRUE);
76  
77         if (!ret) {
78                 fputs("InnoDB: Error: file flush of tablespace ", stderr);
79 @@ -3001,7 +3001,7 @@
80                 }
81         }
82  
83 -       success = os_file_flush(file);
84 +       success = os_file_flush(file, TRUE);
85         if (!success) {
86  
87                 goto func_exit;
88 @@ -3023,7 +3023,7 @@
89  
90                 goto func_exit;
91         }
92 -       success = os_file_flush(file);
93 +       success = os_file_flush(file, TRUE);
94  func_exit:
95         os_file_close(file);
96         ut_free(buf2);
97 @@ -4006,7 +4006,7 @@
98         size_after_extend, *actual_size); */
99         mutex_exit(&fil_system->mutex);
100  
101 -       fil_flush(space_id);
102 +       fil_flush(space_id, TRUE);
103  
104         return(success);
105  }
106 @@ -4577,8 +4577,9 @@
107  void
108  fil_flush(
109  /*======*/
110 -       ulint   space_id)       /*!< in: file space id (this can be a group of
111 +       ulint   space_id,       /*!< in: file space id (this can be a group of
112                                 log files or a tablespace of the database) */
113 +       ibool   metadata)
114  {
115         fil_space_t*    space;
116         fil_node_t*     node;
117 @@ -4649,7 +4650,7 @@
118                         /* fprintf(stderr, "Flushing to file %s\n",
119                         node->name); */
120  
121 -                       os_file_flush(file);
122 +                       os_file_flush(file, metadata);
123  
124                         mutex_enter(&fil_system->mutex);
125  
126 @@ -4732,7 +4733,7 @@
127         a non-existing space id. */
128         for (i = 0; i < n_space_ids; i++) {
129  
130 -               fil_flush(space_ids[i]);
131 +               fil_flush(space_ids[i], TRUE);
132         }
133  
134         mem_free(space_ids);
135 --- a/storage/innobase/handler/ha_innodb.cc
136 +++ b/storage/innobase/handler/ha_innodb.cc
137 @@ -445,6 +445,12 @@
138    "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
139    NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
140  
141 +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
142 +  "Set to 0 (write and flush once per second),"
143 +  " 1 (write and flush at each commit)"
144 +  " or 2 (write at commit, flush once per second).",
145 +  NULL, NULL, 1, 0, 2, 0);
146 +
147  
148  static handler *innobase_create_handler(handlerton *hton,
149                                          TABLE_SHARE *table,
150 @@ -839,6 +845,17 @@
151         }
152  }
153  
154 +/******************************************************************//**
155 +*/
156 +extern "C" UNIV_INTERN
157 +ulong
158 +thd_flush_log_at_trx_commit(
159 +/*================================*/
160 +       void*   thd)
161 +{
162 +       return(THDVAR((THD*) thd, flush_log_at_trx_commit));
163 +}
164 +
165  /********************************************************************//**
166  Obtain the InnoDB transaction of a MySQL thread.
167  @return        reference to transaction pointer */
168 @@ -2442,6 +2459,9 @@
169         srv_n_read_io_threads = (ulint) innobase_read_io_threads;
170         srv_n_write_io_threads = (ulint) innobase_write_io_threads;
171  
172 +       srv_read_ahead &= 3;
173 +       srv_adaptive_flushing_method %= 3;
174 +
175         srv_force_recovery = (ulint) innobase_force_recovery;
176  
177         srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
178 @@ -11036,7 +11056,7 @@
179    PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
180    "Purge threads can be either 0 or 1.",
181    NULL, NULL,
182 -  0,                   /* Default setting */
183 +  1,                   /* Default setting */
184    0,                   /* Minimum value */
185    1, 0);               /* Maximum value */
186  
187 @@ -11078,12 +11098,18 @@
188    innodb_file_format_max_validate,
189    innodb_file_format_max_update, "Antelope");
190  
191 -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
192 -  PLUGIN_VAR_OPCMDARG,
193 -  "Set to 0 (write and flush once per second),"
194 -  " 1 (write and flush at each commit)"
195 -  " or 2 (write at commit, flush once per second).",
196 -  NULL, NULL, 1, 0, 2, 0);
197 +/* Changed to the THDVAR */
198 +//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
199 +//  PLUGIN_VAR_OPCMDARG,
200 +//  "Set to 0 (write and flush once per second),"
201 +//  " 1 (write and flush at each commit)"
202 +//  " or 2 (write at commit, flush once per second).",
203 +//  NULL, NULL, 1, 0, 2, 0);
204 +
205 +static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
206 +  PLUGIN_VAR_NOCMDARG,
207 +  "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
208 +  NULL, NULL, TRUE);
209  
210  static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
211    PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
212 @@ -11183,7 +11209,7 @@
213  static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
214    PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
215    "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
216 -  NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
217 +  NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
218  
219  static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
220    PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
221 @@ -11335,6 +11361,95 @@
222    "trigger a readahead.",
223    NULL, NULL, 56, 0, 64, 0);
224  
225 +static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
226 +  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
227 +  "The maximum size of the insert buffer. (in bytes)",
228 +  NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
229 +
230 +static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
231 +  PLUGIN_VAR_RQCMDARG,
232 +  "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
233 +  NULL, NULL, 1, 0, 1, 0);
234 +
235 +static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
236 +  PLUGIN_VAR_RQCMDARG,
237 +  "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
238 +  NULL, NULL, 100, 100, 999999999, 0);
239 +
240 +static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
241 +  PLUGIN_VAR_RQCMDARG,
242 +  "Control soft limit of checkpoint age. (0 : not control)",
243 +  NULL, NULL, 0, 0, ~0UL, 0);
244 +
245 +static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
246 +  PLUGIN_VAR_RQCMDARG,
247 +  "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
248 +  NULL, NULL, 1, 0, 1, 0);
249 +
250 +static
251 +void
252 +innodb_read_ahead_update(
253 +  THD* thd,
254 +  struct st_mysql_sys_var*     var,
255 +  void*        var_ptr,
256 +  const void*  save)
257 +{
258 +  *(long *)var_ptr= (*(long *)save) & 3;
259 +}
260 +const char *read_ahead_names[]=
261 +{
262 +  "none", /* 0 */
263 +  "random",
264 +  "linear",
265 +  "both", /* 3 */
266 +  /* For compatibility of the older patch */
267 +  "0", /* 4 ("none" + 4) */
268 +  "1",
269 +  "2",
270 +  "3", /* 7 ("both" + 4) */
271 +  NullS
272 +};
273 +TYPELIB read_ahead_typelib=
274 +{
275 +  array_elements(read_ahead_names) - 1, "read_ahead_typelib",
276 +  read_ahead_names, NULL
277 +};
278 +static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
279 +  PLUGIN_VAR_RQCMDARG,
280 +  "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
281 +  NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
282 +
283 +static
284 +void
285 +innodb_adaptive_flushing_method_update(
286 +  THD* thd,
287 +  struct st_mysql_sys_var*     var,
288 +  void*        var_ptr,
289 +  const void*  save)
290 +{
291 +  *(long *)var_ptr= (*(long *)save) % 4;
292 +}
293 +const char *adaptive_flushing_method_names[]=
294 +{
295 +  "native", /* 0 */
296 +  "estimate", /* 1 */
297 +  "keep_average", /* 2 */
298 +  /* For compatibility of the older patch */
299 +  "0", /* 3 ("none" + 3) */
300 +  "1", /* 4 ("estimate" + 3) */
301 +  "2", /* 5 ("keep_average" + 3) */
302 +  NullS
303 +};
304 +TYPELIB adaptive_flushing_method_typelib=
305 +{
306 +  array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
307 +  adaptive_flushing_method_names, NULL
308 +};
309 +static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
310 +  PLUGIN_VAR_RQCMDARG,
311 +  "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
312 +  NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
313 +
314  static struct st_mysql_sys_var* innobase_system_variables[]= {
315    MYSQL_SYSVAR(additional_mem_pool_size),
316    MYSQL_SYSVAR(autoextend_increment),
317 @@ -11355,6 +11470,7 @@
318    MYSQL_SYSVAR(file_format_check),
319    MYSQL_SYSVAR(file_format_max),
320    MYSQL_SYSVAR(flush_log_at_trx_commit),
321 +  MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
322    MYSQL_SYSVAR(flush_method),
323    MYSQL_SYSVAR(force_recovery),
324    MYSQL_SYSVAR(large_prefix),
325 @@ -11393,6 +11509,13 @@
326    MYSQL_SYSVAR(show_verbose_locks),
327    MYSQL_SYSVAR(show_locks_held),
328    MYSQL_SYSVAR(version),
329 +  MYSQL_SYSVAR(ibuf_max_size),
330 +  MYSQL_SYSVAR(ibuf_active_contract),
331 +  MYSQL_SYSVAR(ibuf_accel_rate),
332 +  MYSQL_SYSVAR(checkpoint_age_target),
333 +  MYSQL_SYSVAR(flush_neighbor_pages),
334 +  MYSQL_SYSVAR(read_ahead),
335 +  MYSQL_SYSVAR(adaptive_flushing_method),
336    MYSQL_SYSVAR(use_sys_malloc),
337    MYSQL_SYSVAR(use_native_aio),
338    MYSQL_SYSVAR(change_buffering),
339 --- a/storage/innobase/ibuf/ibuf0ibuf.c
340 +++ b/storage/innobase/ibuf/ibuf0ibuf.c
341 @@ -514,8 +514,10 @@
342         grow in size, as the references on the upper levels of the tree can
343         change */
344  
345 -       ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
346 -               / IBUF_POOL_SIZE_PER_MAX_SIZE;
347 +       ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
348 +               / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
349 +
350 +       srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
351  
352         mutex_create(ibuf_pessimistic_insert_mutex_key,
353                      &ibuf_pessimistic_insert_mutex,
354 @@ -2753,9 +2755,11 @@
355         size = ibuf->size;
356         max_size = ibuf->max_size;
357  
358 +       if (!srv_ibuf_active_contract) {
359         if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
360                 return;
361         }
362 +       }
363  
364         sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
365  
366 --- a/storage/innobase/include/buf0rea.h
367 +++ b/storage/innobase/include/buf0rea.h
368 @@ -124,8 +124,7 @@
369  
370  /** The size in pages of the area which the read-ahead algorithms read if
371  invoked */
372 -#define        BUF_READ_AHEAD_AREA(b)                                  \
373 -       ut_min(64, ut_2_power_up((b)->curr_size / 32))
374 +#define        BUF_READ_AHEAD_AREA(b)          64
375  
376  /** @name Modes used in read-ahead @{ */
377  /** read only pages belonging to the insert buffer tree */
378 --- a/storage/innobase/include/fil0fil.h
379 +++ b/storage/innobase/include/fil0fil.h
380 @@ -658,8 +658,9 @@
381  void
382  fil_flush(
383  /*======*/
384 -       ulint   space_id);      /*!< in: file space id (this can be a group of
385 +       ulint   space_id,       /*!< in: file space id (this can be a group of
386                                 log files or a tablespace of the database) */
387 +       ibool   metadata);
388  /**********************************************************************//**
389  Flushes to disk writes in file spaces of the given type possibly cached by
390  the OS. */
391 --- a/storage/innobase/include/ha_prototypes.h
392 +++ b/storage/innobase/include/ha_prototypes.h
393 @@ -284,6 +284,13 @@
394  /*===================*/
395          void*   thd,   /*!< in: thread handle (THD*) */
396          ulint   value);        /*!< in: time waited for the lock */
397 +/******************************************************************//**
398 +*/
399 +
400 +ulong
401 +thd_flush_log_at_trx_commit(
402 +/*================================*/
403 +       void*   thd);
404  
405  /**********************************************************************//**
406  Get the current setting of the lower_case_table_names global parameter from
407 --- a/storage/innobase/include/os0file.h
408 +++ b/storage/innobase/include/os0file.h
409 @@ -296,8 +296,8 @@
410         pfs_os_file_write_func(name, file, buf, offset, offset_high,    \
411                                n, __FILE__, __LINE__)
412  
413 -# define os_file_flush(file)                                           \
414 -       pfs_os_file_flush_func(file, __FILE__, __LINE__)
415 +# define os_file_flush(file, metadata)                                 \
416 +       pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
417  
418  # define os_file_rename(key, oldpath, newpath)                         \
419         pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
420 @@ -333,7 +333,7 @@
421  # define os_file_write(name, file, buf, offset, offset_high, n)                \
422         os_file_write_func(name, file, buf, offset, offset_high, n)
423  
424 -# define os_file_flush(file)   os_file_flush_func(file)
425 +# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
426  
427  # define os_file_rename(key, oldpath, newpath)                         \
428         os_file_rename_func(oldpath, newpath)
429 @@ -781,6 +781,7 @@
430  pfs_os_file_flush_func(
431  /*===================*/
432         os_file_t       file,   /*!< in, own: handle to a file */
433 +       ibool           metadata,
434         const char*     src_file,/*!< in: file name where func invoked */
435         ulint           src_line);/*!< in: line where the func invoked */
436  
437 @@ -860,7 +861,8 @@
438  ibool
439  os_file_flush_func(
440  /*===============*/
441 -       os_file_t       file);  /*!< in, own: handle to a file */
442 +       os_file_t       file,   /*!< in, own: handle to a file */
443 +       ibool           metadata);
444  /***********************************************************************//**
445  Retrieves the last error number if an error occurs in a file io function.
446  The number should be retrieved before any other OS calls (because they may
447 --- a/storage/innobase/include/os0file.ic
448 +++ b/storage/innobase/include/os0file.ic
449 @@ -369,6 +369,7 @@
450  pfs_os_file_flush_func(
451  /*===================*/
452         os_file_t       file,   /*!< in, own: handle to a file */
453 +       ibool           metadata,
454         const char*     src_file,/*!< in: file name where func invoked */
455         ulint           src_line)/*!< in: line where the func invoked */
456  {
457 @@ -378,7 +379,7 @@
458  
459         register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
460                                    src_file, src_line);
461 -       result = os_file_flush_func(file);
462 +       result = os_file_flush_func(file, metadata);
463  
464         register_pfs_file_io_end(locker, 0);
465  
466 --- a/storage/innobase/include/srv0srv.h
467 +++ b/storage/innobase/include/srv0srv.h
468 @@ -138,7 +138,8 @@
469  extern ulint   srv_n_log_files;
470  extern ulint   srv_log_file_size;
471  extern ulint   srv_log_buffer_size;
472 -extern ulong   srv_flush_log_at_trx_commit;
473 +//extern ulong srv_flush_log_at_trx_commit;
474 +extern char    srv_use_global_flush_log_at_trx_commit;
475  extern char    srv_adaptive_flushing;
476  
477  
478 @@ -216,6 +217,16 @@
479  extern ulong   srv_max_purge_lag;
480  
481  extern ulong   srv_replication_delay;
482 +
483 +extern long long       srv_ibuf_max_size;
484 +extern ulint   srv_ibuf_active_contract;
485 +extern ulint   srv_ibuf_accel_rate;
486 +extern ulint   srv_checkpoint_age_target;
487 +extern ulint   srv_flush_neighbor_pages;
488 +extern ulint   srv_enable_unsafe_group_commit;
489 +extern ulint   srv_read_ahead;
490 +extern ulint   srv_adaptive_flushing_method;
491 +
492  /*-------------------------------------------*/
493  
494  extern ulint   srv_n_rows_inserted;
495 @@ -394,8 +405,9 @@
496                                 when writing data files, but do flush
497                                 after writing to log files */
498         SRV_UNIX_NOSYNC,        /*!< do not flush after writing */
499 -       SRV_UNIX_O_DIRECT       /*!< invoke os_file_set_nocache() on
500 +       SRV_UNIX_O_DIRECT,      /*!< invoke os_file_set_nocache() on
501                                 data files */
502 +       SRV_UNIX_ALL_O_DIRECT   /* new method for examination: logfile also open O_DIRECT */
503  };
504  
505  /** Alternatives for file i/o in Windows */
506 --- a/storage/innobase/log/log0log.c
507 +++ b/storage/innobase/log/log0log.c
508 @@ -48,6 +48,7 @@
509  #include "srv0start.h"
510  #include "trx0sys.h"
511  #include "trx0trx.h"
512 +#include "ha_prototypes.h"
513  
514  /*
515  General philosophy of InnoDB redo-logs:
516 @@ -359,6 +360,33 @@
517  }
518  
519  /************************************************************//**
520 +*/
521 +UNIV_INLINE
522 +ulint
523 +log_max_modified_age_async()
524 +{
525 +       if (srv_checkpoint_age_target) {
526 +               return(ut_min(log_sys->max_modified_age_async,
527 +                               srv_checkpoint_age_target
528 +                               - srv_checkpoint_age_target / 8));
529 +       } else {
530 +               return(log_sys->max_modified_age_async);
531 +       }
532 +}
533 +
534 +UNIV_INLINE
535 +ulint
536 +log_max_checkpoint_age_async()
537 +{
538 +       if (srv_checkpoint_age_target) {
539 +               return(ut_min(log_sys->max_checkpoint_age_async,
540 +                               srv_checkpoint_age_target));
541 +       } else {
542 +               return(log_sys->max_checkpoint_age_async);
543 +       }
544 +}
545 +
546 +/************************************************************//**
547  Closes the log.
548  @return        lsn */
549  UNIV_INTERN
550 @@ -427,7 +455,7 @@
551                 }
552         }
553  
554 -       if (checkpoint_age <= log->max_modified_age_async) {
555 +       if (checkpoint_age <= log_max_modified_age_async()) {
556  
557                 goto function_exit;
558         }
559 @@ -435,8 +463,8 @@
560         oldest_lsn = buf_pool_get_oldest_modification();
561  
562         if (!oldest_lsn
563 -           || lsn - oldest_lsn > log->max_modified_age_async
564 -           || checkpoint_age > log->max_checkpoint_age_async) {
565 +           || lsn - oldest_lsn > log_max_modified_age_async()
566 +           || checkpoint_age > log_max_checkpoint_age_async()) {
567  
568                 log->check_flush_or_checkpoint = TRUE;
569         }
570 @@ -1100,9 +1128,10 @@
571                 group = (log_group_t*)((ulint)group - 1);
572  
573                 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
574 +                   && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
575                     && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
576  
577 -                       fil_flush(group->space_id);
578 +                       fil_flush(group->space_id, FALSE);
579                 }
580  
581  #ifdef UNIV_DEBUG
582 @@ -1121,10 +1150,11 @@
583                         logs and cannot end up here! */
584  
585         if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
586 +           && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
587             && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
588 -           && srv_flush_log_at_trx_commit != 2) {
589 +           && thd_flush_log_at_trx_commit(NULL) != 2) {
590  
591 -               fil_flush(group->space_id);
592 +               fil_flush(group->space_id, FALSE);
593         }
594  
595         mutex_enter(&(log_sys->mutex));
596 @@ -1501,7 +1531,8 @@
597  
598         mutex_exit(&(log_sys->mutex));
599  
600 -       if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
601 +       if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
602 +           || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
603                 /* O_DSYNC means the OS did not buffer the log file at all:
604                 so we have also flushed to disk what we have written */
605  
606 @@ -1511,7 +1542,7 @@
607  
608                 group = UT_LIST_GET_FIRST(log_sys->log_groups);
609  
610 -               fil_flush(group->space_id);
611 +               fil_flush(group->space_id, FALSE);
612                 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
613         }
614  
615 @@ -2120,10 +2151,10 @@
616  
617                 sync = TRUE;
618                 advance = 2 * (age - log->max_modified_age_sync);
619 -       } else if (age > log->max_modified_age_async) {
620 +       } else if (age > log_max_modified_age_async()) {
621  
622                 /* A flush is not urgent: we do an asynchronous preflush */
623 -               advance = age - log->max_modified_age_async;
624 +               advance = age - log_max_modified_age_async();
625         } else {
626                 advance = 0;
627         }
628 @@ -2137,7 +2168,7 @@
629  
630                 do_checkpoint = TRUE;
631  
632 -       } else if (checkpoint_age > log->max_checkpoint_age_async) {
633 +       } else if (checkpoint_age > log_max_checkpoint_age_async()) {
634                 /* A checkpoint is not urgent: do it asynchronously */
635  
636                 do_checkpoint = TRUE;
637 @@ -2607,7 +2638,7 @@
638  
639         mutex_exit(&(log_sys->mutex));
640  
641 -       fil_flush(group->archive_space_id);
642 +       fil_flush(group->archive_space_id, TRUE);
643  
644         mutex_enter(&(log_sys->mutex));
645  
646 @@ -3349,6 +3380,17 @@
647                 log_sys->flushed_to_disk_lsn,
648                 log_sys->last_checkpoint_lsn);
649  
650 +       fprintf(file,
651 +               "Max checkpoint age    %lu\n"
652 +               "Checkpoint age target %lu\n"
653 +               "Modified age          %lu\n"
654 +               "Checkpoint age        %lu\n",
655 +               (ulong) log_sys->max_checkpoint_age,
656 +               (ulong) log_max_checkpoint_age_async(),
657 +               (ulong) (log_sys->lsn -
658 +                               log_buf_pool_get_oldest_modification()),
659 +               (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
660 +
661         current_time = time(NULL);
662  
663         time_elapsed = 0.001 + difftime(current_time,
664 --- a/storage/innobase/log/log0recv.c
665 +++ b/storage/innobase/log/log0recv.c
666 @@ -2906,9 +2906,12 @@
667         ib_uint64_t     archived_lsn;
668  #endif /* UNIV_LOG_ARCHIVE */
669         byte*           buf;
670 -       byte            log_hdr_buf[LOG_FILE_HDR_SIZE];
671 +       byte*           log_hdr_buf;
672 +       byte            log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
673         ulint           err;
674  
675 +       log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
676 +
677  #ifdef UNIV_LOG_ARCHIVE
678         ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
679  /** TRUE when recovering from a checkpoint */
680 @@ -3468,7 +3471,7 @@
681                         exit(1);
682                 }
683  
684 -               os_file_flush(log_file);
685 +               os_file_flush(log_file, TRUE);
686                 os_file_close(log_file);
687         }
688  
689 @@ -3492,7 +3495,7 @@
690  
691         os_file_write(name, log_file, buf, 0, 0,
692                       LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
693 -       os_file_flush(log_file);
694 +       os_file_flush(log_file, TRUE);
695         os_file_close(log_file);
696  
697         ut_free(buf);
698 --- a/storage/innobase/os/os0file.c
699 +++ b/storage/innobase/os/os0file.c
700 @@ -1424,7 +1424,7 @@
701  #endif
702  #ifdef UNIV_NON_BUFFERED_IO
703  # ifndef UNIV_HOTBACKUP
704 -               if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
705 +               if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
706                         /* Do not use unbuffered i/o to log files because
707                         value 2 denotes that we do not flush the log at every
708                         commit, but only once per second */
709 @@ -1440,7 +1440,7 @@
710                 attributes = 0;
711  #ifdef UNIV_NON_BUFFERED_IO
712  # ifndef UNIV_HOTBACKUP
713 -               if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
714 +               if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
715                         /* Do not use unbuffered i/o to log files because
716                         value 2 denotes that we do not flush the log at every
717                         commit, but only once per second */
718 @@ -1585,6 +1585,11 @@
719                 os_file_set_nocache(file, name, mode_str);
720         }
721  
722 +       /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
723 +       if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
724 +               os_file_set_nocache(file, name, mode_str);
725 +       }
726 +
727  #ifdef USE_FILE_LOCK
728         if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
729  
730 @@ -2008,7 +2013,7 @@
731  
732         ut_free(buf2);
733  
734 -       ret = os_file_flush(file);
735 +       ret = os_file_flush(file, TRUE);
736  
737         if (ret) {
738                 return(TRUE);
739 @@ -2046,7 +2051,8 @@
740  int
741  os_file_fsync(
742  /*==========*/
743 -       os_file_t       file)   /*!< in: handle to a file */
744 +       os_file_t       file,   /*!< in: handle to a file */
745 +       ibool           metadata)
746  {
747         int     ret;
748         int     failures;
749 @@ -2055,7 +2061,16 @@
750         failures = 0;
751  
752         do {
753 +#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
754 +               if (metadata) {
755 +                       ret = fsync(file);
756 +               } else {
757 +                       ret = fdatasync(file);
758 +               }
759 +#else
760 +               (void) metadata;
761                 ret = fsync(file);
762 +#endif
763  
764                 os_n_fsyncs++;
765  
766 @@ -2092,7 +2107,8 @@
767  ibool
768  os_file_flush_func(
769  /*===============*/
770 -       os_file_t       file)   /*!< in, own: handle to a file */
771 +       os_file_t       file,   /*!< in, own: handle to a file */
772 +       ibool           metadata)
773  {
774  #ifdef __WIN__
775         BOOL    ret;
776 @@ -2142,18 +2158,18 @@
777                 /* If we are not on an operating system that supports this,
778                 then fall back to a plain fsync. */
779  
780 -               ret = os_file_fsync(file);
781 +               ret = os_file_fsync(file, metadata);
782         } else {
783                 ret = fcntl(file, F_FULLFSYNC, NULL);
784  
785                 if (ret) {
786                         /* If we are not on a file system that supports this,
787                         then fall back to a plain fsync. */
788 -                       ret = os_file_fsync(file);
789 +                       ret = os_file_fsync(file, metadata);
790                 }
791         }
792  #else
793 -       ret = os_file_fsync(file);
794 +       ret = os_file_fsync(file, metadata);
795  #endif
796  
797         if (ret == 0) {
798 @@ -2336,7 +2352,7 @@
799                 the OS crashes, a database page is only partially
800                 physically written to disk. */
801  
802 -               ut_a(TRUE == os_file_flush(file));
803 +               ut_a(TRUE == os_file_flush(file, TRUE));
804         }
805  # endif /* UNIV_DO_FLUSH */
806  
807 @@ -2378,7 +2394,7 @@
808                         the OS crashes, a database page is only partially
809                         physically written to disk. */
810  
811 -                       ut_a(TRUE == os_file_flush(file));
812 +                       ut_a(TRUE == os_file_flush(file, TRUE));
813                 }
814  # endif /* UNIV_DO_FLUSH */
815  
816 @@ -2750,7 +2766,7 @@
817  
818  # ifdef UNIV_DO_FLUSH
819         if (!os_do_not_call_flush_at_each_write) {
820 -               ut_a(TRUE == os_file_flush(file));
821 +               ut_a(TRUE == os_file_flush(file, TRUE));
822         }
823  # endif /* UNIV_DO_FLUSH */
824  
825 @@ -4296,7 +4312,7 @@
826  #ifdef UNIV_DO_FLUSH
827                 if (slot->type == OS_FILE_WRITE
828                     && !os_do_not_call_flush_at_each_write) {
829 -                       if (!os_file_flush(slot->file)) {
830 +                       if (!os_file_flush(slot->file, TRUE)) {
831                                 ut_error;
832                         }
833                 }
834 @@ -4597,7 +4613,7 @@
835  #ifdef UNIV_DO_FLUSH
836                 if (slot->type == OS_FILE_WRITE
837                     && !os_do_not_call_flush_at_each_write)
838 -                   && !os_file_flush(slot->file) {
839 +                   && !os_file_flush(slot->file, TRUE) {
840                         ut_error;
841                 }
842  #endif /* UNIV_DO_FLUSH */
843 --- a/storage/innobase/srv/srv0srv.c
844 +++ b/storage/innobase/srv/srv0srv.c
845 @@ -183,7 +183,8 @@
846  UNIV_INTERN ulint      srv_log_file_size       = ULINT_MAX;
847  /* size in database pages */
848  UNIV_INTERN ulint      srv_log_buffer_size     = ULINT_MAX;
849 -UNIV_INTERN ulong      srv_flush_log_at_trx_commit = 1;
850 +//UNIV_INTERN ulong    srv_flush_log_at_trx_commit = 1;
851 +UNIV_INTERN char       srv_use_global_flush_log_at_trx_commit  = TRUE;
852  
853  /* Try to flush dirty pages so as to avoid IO bursts at
854  the checkpoints. */
855 @@ -402,6 +403,17 @@
856  
857  UNIV_INTERN ulong      srv_replication_delay           = 0;
858  
859 +UNIV_INTERN long long  srv_ibuf_max_size = 0;
860 +UNIV_INTERN ulint      srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
861 +UNIV_INTERN ulint      srv_ibuf_accel_rate = 100;
862 +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
863 +
864 +UNIV_INTERN ulint      srv_checkpoint_age_target = 0;
865 +UNIV_INTERN ulint      srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
866 +
867 +UNIV_INTERN ulint      srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
868 +UNIV_INTERN ulint      srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
869 +UNIV_INTERN ulint      srv_adaptive_flushing_method = 0; /* 0: native  1: estimate  2: keep_average */
870  /*-------------------------------------------*/
871  UNIV_INTERN ulong      srv_n_spin_wait_rounds  = 30;
872  UNIV_INTERN ulong      srv_n_free_tickets_to_enter = 500;
873 @@ -2709,7 +2721,7 @@
874  
875         ut_ad(!mutex_own(&kernel_mutex));
876  
877 -       ut_a(srv_n_purge_threads == 0);
878 +       ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
879  
880         do {
881                 /* Check for shutdown and change in purge config. */
882 @@ -2742,6 +2754,7 @@
883         ulint           n_pages_purged  = 0;
884         ulint           n_bytes_merged;
885         ulint           n_pages_flushed;
886 +       ulint           n_pages_flushed_prev = 0;
887         ulint           n_bytes_archived;
888         ulint           n_tables_to_drop;
889         ulint           n_ios;
890 @@ -2749,7 +2762,20 @@
891         ulint           n_ios_very_old;
892         ulint           n_pend_ios;
893         ulint           next_itr_time;
894 +       ulint           prev_adaptive_flushing_method = ULINT_UNDEFINED;
895 +       ulint           inner_loop = 0;
896 +       ibool           skip_sleep      = FALSE;
897         ulint           i;
898 +       struct t_prev_flush_info_struct {
899 +               ulint           count;
900 +               unsigned        space:32;
901 +               unsigned        offset:32;
902 +               ib_uint64_t     oldest_modification;
903 +       } prev_flush_info[MAX_BUFFER_POOLS];
904 +
905 +       ib_uint64_t     lsn_old;
906 +
907 +       ib_uint64_t     oldest_lsn;
908  
909  #ifdef UNIV_DEBUG_THREAD_CREATION
910         fprintf(stderr, "Master thread starts, id %lu\n",
911 @@ -2771,6 +2797,9 @@
912  
913         mutex_exit(&kernel_mutex);
914  
915 +       mutex_enter(&(log_sys->mutex));
916 +       lsn_old = log_sys->lsn;
917 +       mutex_exit(&(log_sys->mutex));
918  loop:
919         /*****************************************************************/
920         /* ---- When there is database activity by users, we cycle in this
921 @@ -2801,9 +2830,13 @@
922         /* Sleep for 1 second on entrying the for loop below the first time. */
923         next_itr_time = ut_time_ms() + 1000;
924  
925 +       skip_sleep = FALSE;
926 +
927         for (i = 0; i < 10; i++) {
928                 ulint   cur_time = ut_time_ms();
929  
930 +               n_pages_flushed = 0; /* initialize */
931 +
932                 /* ALTER TABLE in MySQL requires on Unix that the table handler
933                 can drop tables lazily after there no longer are SELECT
934                 queries to them. */
935 @@ -2827,6 +2860,7 @@
936                 srv_main_thread_op_info = "sleeping";
937                 srv_main_1_second_loops++;
938  
939 +               if (!skip_sleep) {
940                 if (next_itr_time > cur_time
941                     && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
942  
943 @@ -2837,10 +2871,26 @@
944                                         (next_itr_time - cur_time)
945                                          * 1000));
946                         srv_main_sleeps++;
947 +
948 +                       /*
949 +                       mutex_enter(&(log_sys->mutex));
950 +                       oldest_lsn = buf_pool_get_oldest_modification();
951 +                       ib_uint64_t     lsn = log_sys->lsn;
952 +                       mutex_exit(&(log_sys->mutex));
953 +
954 +                       if(oldest_lsn)
955 +                       fprintf(stderr,
956 +                               "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
957 +                               (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
958 +                               lsn - lsn_old);
959 +                       */
960                 }
961  
962                 /* Each iteration should happen at 1 second interval. */
963                 next_itr_time = ut_time_ms() + 1000;
964 +               } /* if (!skip_sleep) */
965 +
966 +               skip_sleep = FALSE;
967  
968                 /* Flush logs if needed */
969                 srv_sync_log_buffer_in_background();
970 @@ -2860,7 +2910,7 @@
971                 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
972                     && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
973                         srv_main_thread_op_info = "doing insert buffer merge";
974 -                       ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
975 +                       ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
976  
977                         /* Flush logs if needed */
978                         srv_sync_log_buffer_in_background();
979 @@ -2877,7 +2927,11 @@
980                         n_pages_flushed = buf_flush_list(
981                                 PCT_IO(100), IB_ULONGLONG_MAX);
982  
983 -               } else if (srv_adaptive_flushing) {
984 +                       mutex_enter(&(log_sys->mutex));
985 +                       lsn_old = log_sys->lsn;
986 +                       mutex_exit(&(log_sys->mutex));
987 +                       prev_adaptive_flushing_method = ULINT_UNDEFINED;
988 +               } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
989  
990                         /* Try to keep the rate of flushing of dirty
991                         pages such that redo log generation does not
992 @@ -2893,6 +2947,224 @@
993                                                 n_flush,
994                                                 IB_ULONGLONG_MAX);
995                         }
996 +
997 +                       mutex_enter(&(log_sys->mutex));
998 +                       lsn_old = log_sys->lsn;
999 +                       mutex_exit(&(log_sys->mutex));
1000 +                       prev_adaptive_flushing_method = ULINT_UNDEFINED;
1001 +               } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1002 +
1003 +                       /* Try to keep modified age not to exceed
1004 +                       max_checkpoint_age * 7/8 line */
1005 +
1006 +                       mutex_enter(&(log_sys->mutex));
1007 +
1008 +                       oldest_lsn = buf_pool_get_oldest_modification();
1009 +                       if (oldest_lsn == 0) {
1010 +                               lsn_old = log_sys->lsn;
1011 +                               mutex_exit(&(log_sys->mutex));
1012 +
1013 +                       } else {
1014 +                               if ((log_sys->lsn - oldest_lsn)
1015 +                                   > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1016 +                                       /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1017 +                                       /* We should not flush from here. */
1018 +                                       lsn_old = log_sys->lsn;
1019 +                                       mutex_exit(&(log_sys->mutex));
1020 +                               } else if ((log_sys->lsn - oldest_lsn)
1021 +                                          > (log_sys->max_checkpoint_age)/4 ) {
1022 +
1023 +                                       /* defence line (max_checkpoint_age * 1/2) */
1024 +                                       ib_uint64_t     lsn = log_sys->lsn;
1025 +
1026 +                                       ib_uint64_t     level, bpl;
1027 +                                       buf_page_t*     bpage;
1028 +                                       ulint           j;
1029 +
1030 +                                       mutex_exit(&(log_sys->mutex));
1031 +
1032 +                                       bpl = 0;
1033 +
1034 +                                       for (j = 0; j < srv_buf_pool_instances; j++) {
1035 +                                               buf_pool_t*     buf_pool;
1036 +                                               ulint           n_blocks;
1037 +
1038 +                                               buf_pool = buf_pool_from_array(j);
1039 +
1040 +                                               /* The scanning flush_list is optimistic here */
1041 +
1042 +                                               level = 0;
1043 +                                               n_blocks = 0;
1044 +                                               bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1045 +
1046 +                                               while (bpage != NULL) {
1047 +                                                       ib_uint64_t     oldest_modification = bpage->oldest_modification;
1048 +                                                       if (oldest_modification != 0) {
1049 +                                                               level += log_sys->max_checkpoint_age
1050 +                                                                        - (lsn - oldest_modification);
1051 +                                                       }
1052 +                                                       bpage = UT_LIST_GET_NEXT(list, bpage);
1053 +                                                       n_blocks++;
1054 +                                               }
1055 +
1056 +                                               if (level) {
1057 +                                                       bpl += ((ib_uint64_t) n_blocks * n_blocks
1058 +                                                               * (lsn - lsn_old)) / level;
1059 +                                               }
1060 +
1061 +                                       }
1062 +
1063 +                                       if (!srv_use_doublewrite_buf) {
1064 +                                               /* flush is faster than when doublewrite */
1065 +                                               bpl = (bpl * 7) / 8;
1066 +                                       }
1067 +
1068 +                                       if (bpl) {
1069 +retry_flush_batch:
1070 +                                               n_pages_flushed = buf_flush_list(bpl,
1071 +                                                                       oldest_lsn + (lsn - lsn_old));
1072 +                                               if (n_pages_flushed == ULINT_UNDEFINED) {
1073 +                                                       os_thread_sleep(5000);
1074 +                                                       goto retry_flush_batch;
1075 +                                               }
1076 +                                       }
1077 +
1078 +                                       lsn_old = lsn;
1079 +                                       /*
1080 +                                       fprintf(stderr,
1081 +                                               "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1082 +                                               (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1083 +                                               lsn - lsn_old, bpl);
1084 +                                       */
1085 +                               } else {
1086 +                                       lsn_old = log_sys->lsn;
1087 +                                       mutex_exit(&(log_sys->mutex));
1088 +                               }
1089 +                       }
1090 +                       prev_adaptive_flushing_method = 1;
1091 +               } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1092 +                       buf_pool_t*     buf_pool;
1093 +                       buf_page_t*     bpage;
1094 +                       ib_uint64_t     lsn;
1095 +                       ulint           j;
1096 +
1097 +                       mutex_enter(&(log_sys->mutex));
1098 +                       oldest_lsn = buf_pool_get_oldest_modification();
1099 +                       lsn = log_sys->lsn;
1100 +                       mutex_exit(&(log_sys->mutex));
1101 +
1102 +                       /* upper loop/sec. (x10) */
1103 +                       next_itr_time -= 900; /* 1000 - 900 == 100 */
1104 +                       inner_loop++;
1105 +                       if (inner_loop < 10) {
1106 +                               i--;
1107 +                       } else {
1108 +                               inner_loop = 0;
1109 +                       }
1110 +
1111 +                       if (prev_adaptive_flushing_method == 2) {
1112 +                               lint    n_flush;
1113 +                               lint    blocks_sum;
1114 +                               ulint   new_blocks_sum, flushed_blocks_sum;
1115 +
1116 +                               blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1117 +
1118 +                               /* prev_flush_info[j] should be the previous loop's */
1119 +                               for (j = 0; j < srv_buf_pool_instances; j++) {
1120 +                                       lint    blocks_num, new_blocks_num, flushed_blocks_num;
1121 +                                       ibool   found;
1122 +
1123 +                                       buf_pool = buf_pool_from_array(j);
1124 +
1125 +                                       blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1126 +                                       bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1127 +                                       new_blocks_num = 0;
1128 +
1129 +                                       found = FALSE;
1130 +                                       while (bpage != NULL) {
1131 +                                               if (prev_flush_info[j].space == bpage->space
1132 +                                                   && prev_flush_info[j].offset == bpage->offset
1133 +                                                   && prev_flush_info[j].oldest_modification
1134 +                                                               == bpage->oldest_modification) {
1135 +                                                       found = TRUE;
1136 +                                                       break;
1137 +                                               }
1138 +                                               bpage = UT_LIST_GET_NEXT(list, bpage);
1139 +                                               new_blocks_num++;
1140 +                                       }
1141 +                                       if (!found) {
1142 +                                               new_blocks_num = blocks_num;
1143 +                                       }
1144 +
1145 +                                       flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1146 +                                                               - blocks_num;
1147 +                                       if (flushed_blocks_num < 0) {
1148 +                                               flushed_blocks_num = 0;
1149 +                                       }
1150 +
1151 +                                       bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1152 +
1153 +                                       prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1154 +                                       if (bpage) {
1155 +                                               prev_flush_info[j].space = bpage->space;
1156 +                                               prev_flush_info[j].offset = bpage->offset;
1157 +                                               prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1158 +                                       } else {
1159 +                                               prev_flush_info[j].space = 0;
1160 +                                               prev_flush_info[j].offset = 0;
1161 +                                               prev_flush_info[j].oldest_modification = 0;
1162 +                                       }
1163 +
1164 +                                       new_blocks_sum += new_blocks_num;
1165 +                                       flushed_blocks_sum += flushed_blocks_num;
1166 +                                       blocks_sum += blocks_num;
1167 +                               }
1168 +
1169 +                               n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1170 +                               if (flushed_blocks_sum > n_pages_flushed_prev) {
1171 +                                       n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1172 +                               }
1173 +
1174 +                               if (n_flush > 0) {
1175 +                                       n_flush++;
1176 +                                       n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1177 +                               } else {
1178 +                                       n_pages_flushed = 0;
1179 +                               }                                       
1180 +                       } else {
1181 +                               /* store previous first pages of the flush_list */
1182 +                               for (j = 0; j < srv_buf_pool_instances; j++) {
1183 +                                       buf_pool = buf_pool_from_array(j);
1184 +
1185 +                                       bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1186 +
1187 +                                       prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1188 +                                       if (bpage) {
1189 +                                               prev_flush_info[j].space = bpage->space;
1190 +                                               prev_flush_info[j].offset = bpage->offset;
1191 +                                               prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1192 +                                       } else {
1193 +                                               prev_flush_info[j].space = 0;
1194 +                                               prev_flush_info[j].offset = 0;
1195 +                                               prev_flush_info[j].oldest_modification = 0;
1196 +                                       }
1197 +                               }
1198 +                               n_pages_flushed = 0;
1199 +                       }
1200 +
1201 +                       lsn_old = lsn;
1202 +                       prev_adaptive_flushing_method = 2;
1203 +               } else {
1204 +                       mutex_enter(&(log_sys->mutex));
1205 +                       lsn_old = log_sys->lsn;
1206 +                       mutex_exit(&(log_sys->mutex));
1207 +                       prev_adaptive_flushing_method = ULINT_UNDEFINED;
1208 +               }
1209 +
1210 +               if (n_pages_flushed == ULINT_UNDEFINED) {
1211 +                       n_pages_flushed_prev = 0;
1212 +               } else {
1213 +                       n_pages_flushed_prev = n_pages_flushed;
1214                 }
1215  
1216                 if (srv_activity_count == old_activity_count) {
1217 @@ -2941,12 +3213,12 @@
1218         even if the server were active */
1219  
1220         srv_main_thread_op_info = "doing insert buffer merge";
1221 -       ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1222 +       ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1223  
1224         /* Flush logs if needed */
1225         srv_sync_log_buffer_in_background();
1226  
1227 -       if (srv_n_purge_threads == 0) {
1228 +       if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1229                 srv_main_thread_op_info = "master purging";
1230  
1231                 srv_master_do_purge();
1232 @@ -3024,7 +3296,7 @@
1233                 }
1234         }
1235  
1236 -       if (srv_n_purge_threads == 0) {
1237 +       if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1238                 srv_main_thread_op_info = "master purging";
1239  
1240                 srv_master_do_purge();
1241 @@ -3049,7 +3321,7 @@
1242                 buf_flush_list below. Otherwise, the system favors
1243                 clean pages over cleanup throughput. */
1244                 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1245 -                                                          PCT_IO(100));
1246 +                                                          PCT_IBUF_IO(100));
1247         }
1248  
1249         srv_main_thread_op_info = "reserving kernel mutex";
1250 @@ -3189,6 +3461,7 @@
1251         srv_slot_t*     slot;
1252         ulint           retries = 0;
1253         ulint           n_total_purged = ULINT_UNDEFINED;
1254 +       ulint           next_itr_time;
1255  
1256         ut_a(srv_n_purge_threads == 1);
1257  
1258 @@ -3209,9 +3482,12 @@
1259  
1260         mutex_exit(&kernel_mutex);
1261  
1262 +       next_itr_time = ut_time_ms();
1263 +
1264         while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1265  
1266                 ulint   n_pages_purged = 0;
1267 +               ulint   cur_time;
1268  
1269                 /* If there are very few records to purge or the last
1270                 purge didn't purge any records then wait for activity.
1271 @@ -3258,6 +3534,16 @@
1272                 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1273  
1274                 srv_sync_log_buffer_in_background();
1275 +
1276 +               cur_time = ut_time_ms();
1277 +               if (next_itr_time > cur_time) {
1278 +                       os_thread_sleep(ut_min(1000000,
1279 +                                       (next_itr_time - cur_time)
1280 +                                        * 1000));
1281 +                       next_itr_time = ut_time_ms() + 1000;
1282 +               } else {
1283 +                       next_itr_time = cur_time + 1000;
1284 +               }
1285         }
1286  
1287         mutex_enter(&kernel_mutex);
1288 --- a/storage/innobase/srv/srv0start.c
1289 +++ b/storage/innobase/srv/srv0start.c
1290 @@ -1217,6 +1217,9 @@
1291         } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1292                 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1293  
1294 +       } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1295 +               srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1296 +
1297         } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1298                 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1299  
1300 --- a/storage/innobase/trx/trx0purge.c
1301 +++ b/storage/innobase/trx/trx0purge.c
1302 @@ -392,10 +392,10 @@
1303         trx_sys->rseg_history_len++;
1304         mutex_exit(&kernel_mutex);
1305  
1306 -       if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1307 +//     if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1308                 /* Inform the purge thread that there is work to do. */
1309                 srv_wake_purge_thread_if_not_active();
1310 -       }
1311 +//     }
1312  }
1313  
1314  /**********************************************************************//**
1315 --- a/storage/innobase/trx/trx0trx.c
1316 +++ b/storage/innobase/trx/trx0trx.c
1317 @@ -984,6 +984,7 @@
1318         trx->read_view = NULL;
1319  
1320         if (lsn) {
1321 +               ulint   flush_log_at_trx_commit;
1322  
1323                 mutex_exit(&kernel_mutex);
1324  
1325 @@ -992,6 +993,12 @@
1326                         trx_undo_insert_cleanup(trx);
1327                 }
1328  
1329 +               if (srv_use_global_flush_log_at_trx_commit) {
1330 +                       flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1331 +               } else {
1332 +                       flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1333 +               }
1334 +
1335                 /* NOTE that we could possibly make a group commit more
1336                 efficient here: call os_thread_yield here to allow also other
1337                 trxs to come to commit! */
1338 @@ -1023,9 +1030,9 @@
1339                 if (trx->flush_log_later) {
1340                         /* Do nothing yet */
1341                         trx->must_flush_log_later = TRUE;
1342 -               } else if (srv_flush_log_at_trx_commit == 0) {
1343 +               } else if (flush_log_at_trx_commit == 0) {
1344                         /* Do nothing */
1345 -               } else if (srv_flush_log_at_trx_commit == 1) {
1346 +               } else if (flush_log_at_trx_commit == 1) {
1347                         if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1348                                 /* Write the log but do not flush it to disk */
1349  
1350 @@ -1037,7 +1044,7 @@
1351  
1352                                 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1353                         }
1354 -               } else if (srv_flush_log_at_trx_commit == 2) {
1355 +               } else if (flush_log_at_trx_commit == 2) {
1356  
1357                         /* Write the log but do not flush it to disk */
1358  
1359 @@ -1701,16 +1708,23 @@
1360         trx_t*  trx)    /*!< in: trx handle */
1361  {
1362         ib_uint64_t     lsn     = trx->commit_lsn;
1363 +       ulint           flush_log_at_trx_commit;
1364  
1365         ut_a(trx);
1366  
1367         trx->op_info = "flushing log";
1368  
1369 +       if (srv_use_global_flush_log_at_trx_commit) {
1370 +               flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1371 +       } else {
1372 +               flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1373 +       }
1374 +
1375         if (!trx->must_flush_log_later) {
1376                 /* Do nothing */
1377 -       } else if (srv_flush_log_at_trx_commit == 0) {
1378 +       } else if (flush_log_at_trx_commit == 0) {
1379                 /* Do nothing */
1380 -       } else if (srv_flush_log_at_trx_commit == 1) {
1381 +       } else if (flush_log_at_trx_commit == 1) {
1382                 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1383                         /* Write the log but do not flush it to disk */
1384  
1385 @@ -1721,7 +1735,7 @@
1386  
1387                         log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1388                 }
1389 -       } else if (srv_flush_log_at_trx_commit == 2) {
1390 +       } else if (flush_log_at_trx_commit == 2) {
1391  
1392                 /* Write the log but do not flush it to disk */
1393  
1394 @@ -1969,6 +1983,8 @@
1395         /*--------------------------------------*/
1396  
1397         if (lsn) {
1398 +               ulint   flush_log_at_trx_commit;
1399 +
1400                 /* Depending on the my.cnf options, we may now write the log
1401                 buffer to the log files, making the prepared state of the
1402                 transaction durable if the OS does not crash. We may also
1403 @@ -1988,9 +2004,15 @@
1404  
1405                 mutex_exit(&kernel_mutex);
1406  
1407 -               if (srv_flush_log_at_trx_commit == 0) {
1408 +               if (srv_use_global_flush_log_at_trx_commit) {
1409 +                       flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1410 +               } else {
1411 +                       flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1412 +               }
1413 +
1414 +               if (flush_log_at_trx_commit == 0) {
1415                         /* Do nothing */
1416 -               } else if (srv_flush_log_at_trx_commit == 1) {
1417 +               } else if (flush_log_at_trx_commit == 1) {
1418                         if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1419                                 /* Write the log but do not flush it to disk */
1420  
1421 @@ -2002,7 +2024,7 @@
1422  
1423                                 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1424                         }
1425 -               } else if (srv_flush_log_at_trx_commit == 2) {
1426 +               } else if (flush_log_at_trx_commit == 2) {
1427  
1428                         /* Write the log but do not flush it to disk */
1429  
This page took 0.174374 seconds and 4 git commands to generate.