]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_io_patches.patch
- mention innodb_file_per_table
[packages/mysql.git] / innodb_io_patches.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_io_patches.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
db82db79
AM
8--- a/storage/innobase/buf/buf0buf.c
9+++ b/storage/innobase/buf/buf0buf.c
b4e1fa2c
AM
10@@ -320,6 +320,7 @@
11
12 /* When we traverse all the flush lists we don't want another
13 thread to add a dirty page to any flush list. */
14+ if (srv_buf_pool_instances > 1)
15 log_flush_order_mutex_enter();
16
17 for (i = 0; i < srv_buf_pool_instances; i++) {
18@@ -343,6 +344,7 @@
19 }
20 }
21
22+ if (srv_buf_pool_instances > 1)
23 log_flush_order_mutex_exit();
24
25 /* The returned answer may be out of date: the flush_list can
db82db79
AM
26--- a/storage/innobase/buf/buf0flu.c
27+++ b/storage/innobase/buf/buf0flu.c
28@@ -857,7 +857,7 @@
413cadc7
AM
29 flush:
30 /* Now flush the doublewrite buffer data to disk */
31
32- fil_flush(TRX_SYS_SPACE);
33+ fil_flush(TRX_SYS_SPACE, FALSE);
34
35 /* We know that the writes have been flushed to disk now
36 and in recovery we will find them in the doublewrite buffer
db82db79 37@@ -1378,7 +1378,7 @@
b4e1fa2c
AM
38
39 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
40
41- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
42+ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
43 /* If there is little space, it is better not to flush
44 any block except from the end of the LRU list */
45
db82db79
AM
46--- a/storage/innobase/buf/buf0rea.c
47+++ b/storage/innobase/buf/buf0rea.c
b4e1fa2c
AM
48@@ -260,6 +260,10 @@
49 = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
50 ulint threshold;
51
52+ if (!(srv_read_ahead & 2)) {
53+ return(0);
54+ }
55+
56 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
57 /* No read-ahead to avoid thread deadlocks */
58 return(0);
db82db79
AM
59--- a/storage/innobase/fil/fil0fil.c
60+++ b/storage/innobase/fil/fil0fil.c
61@@ -2601,7 +2601,7 @@
413cadc7
AM
62
63 os_thread_sleep(20000);
64
65- fil_flush(id);
66+ fil_flush(id, TRUE);
67
68 goto retry;
69
db82db79 70@@ -2815,7 +2815,7 @@
413cadc7
AM
71 goto error_exit;
72 }
73
74- ret = os_file_flush(file);
75+ ret = os_file_flush(file, TRUE);
76
77 if (!ret) {
78 fputs("InnoDB: Error: file flush of tablespace ", stderr);
db82db79 79@@ -3001,7 +3001,7 @@
413cadc7
AM
80 }
81 }
82
83- success = os_file_flush(file);
84+ success = os_file_flush(file, TRUE);
85 if (!success) {
86
87 goto func_exit;
db82db79 88@@ -3023,7 +3023,7 @@
413cadc7
AM
89
90 goto func_exit;
91 }
92- success = os_file_flush(file);
93+ success = os_file_flush(file, TRUE);
94 func_exit:
95 os_file_close(file);
96 ut_free(buf2);
db82db79 97@@ -4006,7 +4006,7 @@
413cadc7
AM
98 size_after_extend, *actual_size); */
99 mutex_exit(&fil_system->mutex);
100
101- fil_flush(space_id);
102+ fil_flush(space_id, TRUE);
103
104 return(success);
105 }
db82db79 106@@ -4577,8 +4577,9 @@
413cadc7
AM
107 void
108 fil_flush(
109 /*======*/
110- ulint space_id) /*!< in: file space id (this can be a group of
111+ ulint space_id, /*!< in: file space id (this can be a group of
112 log files or a tablespace of the database) */
113+ ibool metadata)
114 {
115 fil_space_t* space;
116 fil_node_t* node;
db82db79 117@@ -4649,7 +4650,7 @@
413cadc7
AM
118 /* fprintf(stderr, "Flushing to file %s\n",
119 node->name); */
120
121- os_file_flush(file);
122+ os_file_flush(file, metadata);
123
124 mutex_enter(&fil_system->mutex);
125
db82db79 126@@ -4732,7 +4733,7 @@
413cadc7
AM
127 a non-existing space id. */
128 for (i = 0; i < n_space_ids; i++) {
129
130- fil_flush(space_ids[i]);
131+ fil_flush(space_ids[i], TRUE);
132 }
133
134 mem_free(space_ids);
db82db79
AM
135--- a/storage/innobase/handler/ha_innodb.cc
136+++ b/storage/innobase/handler/ha_innodb.cc
137@@ -445,6 +445,12 @@
b4e1fa2c
AM
138 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
139 NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
140
141+static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
142+ "Set to 0 (write and flush once per second),"
143+ " 1 (write and flush at each commit)"
144+ " or 2 (write at commit, flush once per second).",
145+ NULL, NULL, 1, 0, 2, 0);
146+
147
148 static handler *innobase_create_handler(handlerton *hton,
149 TABLE_SHARE *table,
db82db79 150@@ -839,6 +845,17 @@
b4e1fa2c
AM
151 }
152 }
153
154+/******************************************************************//**
155+*/
156+extern "C" UNIV_INTERN
157+ulong
158+thd_flush_log_at_trx_commit(
159+/*================================*/
160+ void* thd)
161+{
162+ return(THDVAR((THD*) thd, flush_log_at_trx_commit));
163+}
164+
165 /********************************************************************//**
166 Obtain the InnoDB transaction of a MySQL thread.
167 @return reference to transaction pointer */
db82db79 168@@ -2442,6 +2459,9 @@
b4e1fa2c
AM
169 srv_n_read_io_threads = (ulint) innobase_read_io_threads;
170 srv_n_write_io_threads = (ulint) innobase_write_io_threads;
171
172+ srv_read_ahead &= 3;
173+ srv_adaptive_flushing_method %= 3;
174+
175 srv_force_recovery = (ulint) innobase_force_recovery;
176
177 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
db82db79 178@@ -11036,7 +11056,7 @@
b4e1fa2c 179 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
11822e22 180 "Purge threads can be either 0 or 1.",
b4e1fa2c
AM
181 NULL, NULL,
182- 0, /* Default setting */
183+ 1, /* Default setting */
184 0, /* Minimum value */
185 1, 0); /* Maximum value */
186
db82db79 187@@ -11078,12 +11098,18 @@
b4e1fa2c
AM
188 innodb_file_format_max_validate,
189 innodb_file_format_max_update, "Antelope");
190
191-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
192- PLUGIN_VAR_OPCMDARG,
193- "Set to 0 (write and flush once per second),"
194- " 1 (write and flush at each commit)"
195- " or 2 (write at commit, flush once per second).",
196- NULL, NULL, 1, 0, 2, 0);
197+/* Changed to the THDVAR */
198+//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
199+// PLUGIN_VAR_OPCMDARG,
200+// "Set to 0 (write and flush once per second),"
201+// " 1 (write and flush at each commit)"
202+// " or 2 (write at commit, flush once per second).",
203+// NULL, NULL, 1, 0, 2, 0);
204+
205+static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
206+ PLUGIN_VAR_NOCMDARG,
207+ "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
208+ NULL, NULL, TRUE);
209
210 static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
211 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
db82db79 212@@ -11183,7 +11209,7 @@
b4e1fa2c
AM
213 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
214 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
215 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
216- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
217+ NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
218
219 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
220 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
db82db79 221@@ -11335,6 +11361,95 @@
b4e1fa2c
AM
222 "trigger a readahead.",
223 NULL, NULL, 56, 0, 64, 0);
224
225+static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
226+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
227+ "The maximum size of the insert buffer. (in bytes)",
228+ NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
229+
230+static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
231+ PLUGIN_VAR_RQCMDARG,
232+ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
233+ NULL, NULL, 1, 0, 1, 0);
234+
235+static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
236+ PLUGIN_VAR_RQCMDARG,
237+ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
238+ NULL, NULL, 100, 100, 999999999, 0);
239+
240+static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
241+ PLUGIN_VAR_RQCMDARG,
242+ "Control soft limit of checkpoint age. (0 : not control)",
243+ NULL, NULL, 0, 0, ~0UL, 0);
244+
245+static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
246+ PLUGIN_VAR_RQCMDARG,
247+ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
248+ NULL, NULL, 1, 0, 1, 0);
249+
250+static
251+void
252+innodb_read_ahead_update(
253+ THD* thd,
254+ struct st_mysql_sys_var* var,
255+ void* var_ptr,
256+ const void* save)
257+{
258+ *(long *)var_ptr= (*(long *)save) & 3;
259+}
260+const char *read_ahead_names[]=
261+{
262+ "none", /* 0 */
263+ "random",
264+ "linear",
265+ "both", /* 3 */
266+ /* For compatibility of the older patch */
267+ "0", /* 4 ("none" + 4) */
268+ "1",
269+ "2",
270+ "3", /* 7 ("both" + 4) */
271+ NullS
272+};
273+TYPELIB read_ahead_typelib=
274+{
275+ array_elements(read_ahead_names) - 1, "read_ahead_typelib",
276+ read_ahead_names, NULL
277+};
278+static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
279+ PLUGIN_VAR_RQCMDARG,
280+ "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
281+ NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
282+
283+static
284+void
285+innodb_adaptive_flushing_method_update(
286+ THD* thd,
287+ struct st_mysql_sys_var* var,
288+ void* var_ptr,
289+ const void* save)
290+{
291+ *(long *)var_ptr= (*(long *)save) % 4;
292+}
293+const char *adaptive_flushing_method_names[]=
294+{
295+ "native", /* 0 */
296+ "estimate", /* 1 */
297+ "keep_average", /* 2 */
298+ /* For compatibility of the older patch */
299+ "0", /* 3 ("none" + 3) */
300+ "1", /* 4 ("estimate" + 3) */
301+ "2", /* 5 ("keep_average" + 3) */
302+ NullS
303+};
304+TYPELIB adaptive_flushing_method_typelib=
305+{
306+ array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
307+ adaptive_flushing_method_names, NULL
308+};
309+static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
310+ PLUGIN_VAR_RQCMDARG,
311+ "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
312+ NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
b4e1fa2c
AM
313+
314 static struct st_mysql_sys_var* innobase_system_variables[]= {
315 MYSQL_SYSVAR(additional_mem_pool_size),
316 MYSQL_SYSVAR(autoextend_increment),
db82db79 317@@ -11355,6 +11470,7 @@
b4e1fa2c
AM
318 MYSQL_SYSVAR(file_format_check),
319 MYSQL_SYSVAR(file_format_max),
320 MYSQL_SYSVAR(flush_log_at_trx_commit),
321+ MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
322 MYSQL_SYSVAR(flush_method),
323 MYSQL_SYSVAR(force_recovery),
db82db79
AM
324 MYSQL_SYSVAR(large_prefix),
325@@ -11393,6 +11509,13 @@
b4e1fa2c
AM
326 MYSQL_SYSVAR(show_verbose_locks),
327 MYSQL_SYSVAR(show_locks_held),
328 MYSQL_SYSVAR(version),
329+ MYSQL_SYSVAR(ibuf_max_size),
330+ MYSQL_SYSVAR(ibuf_active_contract),
331+ MYSQL_SYSVAR(ibuf_accel_rate),
332+ MYSQL_SYSVAR(checkpoint_age_target),
333+ MYSQL_SYSVAR(flush_neighbor_pages),
334+ MYSQL_SYSVAR(read_ahead),
335+ MYSQL_SYSVAR(adaptive_flushing_method),
b4e1fa2c
AM
336 MYSQL_SYSVAR(use_sys_malloc),
337 MYSQL_SYSVAR(use_native_aio),
338 MYSQL_SYSVAR(change_buffering),
db82db79
AM
339--- a/storage/innobase/ibuf/ibuf0ibuf.c
340+++ b/storage/innobase/ibuf/ibuf0ibuf.c
adf0fb13 341@@ -514,8 +514,10 @@
b4e1fa2c
AM
342 grow in size, as the references on the upper levels of the tree can
343 change */
344
345- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
346- / IBUF_POOL_SIZE_PER_MAX_SIZE;
347+ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
348+ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
349+
350+ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
351
352 mutex_create(ibuf_pessimistic_insert_mutex_key,
353 &ibuf_pessimistic_insert_mutex,
adf0fb13 354@@ -2753,9 +2755,11 @@
b4e1fa2c
AM
355 size = ibuf->size;
356 max_size = ibuf->max_size;
357
358+ if (!srv_ibuf_active_contract) {
359 if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
360 return;
361 }
362+ }
363
364 sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
365
db82db79
AM
366--- a/storage/innobase/include/buf0rea.h
367+++ b/storage/innobase/include/buf0rea.h
b4e1fa2c
AM
368@@ -124,8 +124,7 @@
369
370 /** The size in pages of the area which the read-ahead algorithms read if
371 invoked */
372-#define BUF_READ_AHEAD_AREA(b) \
373- ut_min(64, ut_2_power_up((b)->curr_size / 32))
374+#define BUF_READ_AHEAD_AREA(b) 64
375
376 /** @name Modes used in read-ahead @{ */
377 /** read only pages belonging to the insert buffer tree */
db82db79
AM
378--- a/storage/innobase/include/fil0fil.h
379+++ b/storage/innobase/include/fil0fil.h
413cadc7
AM
380@@ -658,8 +658,9 @@
381 void
382 fil_flush(
383 /*======*/
384- ulint space_id); /*!< in: file space id (this can be a group of
385+ ulint space_id, /*!< in: file space id (this can be a group of
386 log files or a tablespace of the database) */
387+ ibool metadata);
388 /**********************************************************************//**
389 Flushes to disk writes in file spaces of the given type possibly cached by
390 the OS. */
db82db79
AM
391--- a/storage/innobase/include/ha_prototypes.h
392+++ b/storage/innobase/include/ha_prototypes.h
adf0fb13 393@@ -284,6 +284,13 @@
b4e1fa2c
AM
394 /*===================*/
395 void* thd, /*!< in: thread handle (THD*) */
396 ulint value); /*!< in: time waited for the lock */
397+/******************************************************************//**
398+*/
399+
400+ulong
401+thd_flush_log_at_trx_commit(
402+/*================================*/
403+ void* thd);
404
adf0fb13
AM
405 /**********************************************************************//**
406 Get the current setting of the lower_case_table_names global parameter from
db82db79
AM
407--- a/storage/innobase/include/os0file.h
408+++ b/storage/innobase/include/os0file.h
413cadc7
AM
409@@ -296,8 +296,8 @@
410 pfs_os_file_write_func(name, file, buf, offset, offset_high, \
411 n, __FILE__, __LINE__)
412
413-# define os_file_flush(file) \
414- pfs_os_file_flush_func(file, __FILE__, __LINE__)
415+# define os_file_flush(file, metadata) \
416+ pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
417
418 # define os_file_rename(key, oldpath, newpath) \
419 pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
420@@ -333,7 +333,7 @@
421 # define os_file_write(name, file, buf, offset, offset_high, n) \
422 os_file_write_func(name, file, buf, offset, offset_high, n)
423
424-# define os_file_flush(file) os_file_flush_func(file)
425+# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
426
427 # define os_file_rename(key, oldpath, newpath) \
428 os_file_rename_func(oldpath, newpath)
429@@ -781,6 +781,7 @@
430 pfs_os_file_flush_func(
431 /*===================*/
432 os_file_t file, /*!< in, own: handle to a file */
433+ ibool metadata,
434 const char* src_file,/*!< in: file name where func invoked */
435 ulint src_line);/*!< in: line where the func invoked */
436
437@@ -860,7 +861,8 @@
438 ibool
439 os_file_flush_func(
440 /*===============*/
441- os_file_t file); /*!< in, own: handle to a file */
442+ os_file_t file, /*!< in, own: handle to a file */
443+ ibool metadata);
444 /***********************************************************************//**
445 Retrieves the last error number if an error occurs in a file io function.
446 The number should be retrieved before any other OS calls (because they may
db82db79
AM
447--- a/storage/innobase/include/os0file.ic
448+++ b/storage/innobase/include/os0file.ic
413cadc7
AM
449@@ -369,6 +369,7 @@
450 pfs_os_file_flush_func(
451 /*===================*/
452 os_file_t file, /*!< in, own: handle to a file */
453+ ibool metadata,
454 const char* src_file,/*!< in: file name where func invoked */
455 ulint src_line)/*!< in: line where the func invoked */
456 {
457@@ -378,7 +379,7 @@
458
459 register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
460 src_file, src_line);
461- result = os_file_flush_func(file);
462+ result = os_file_flush_func(file, metadata);
463
464 register_pfs_file_io_end(locker, 0);
465
db82db79
AM
466--- a/storage/innobase/include/srv0srv.h
467+++ b/storage/innobase/include/srv0srv.h
adf0fb13 468@@ -138,7 +138,8 @@
b4e1fa2c
AM
469 extern ulint srv_n_log_files;
470 extern ulint srv_log_file_size;
471 extern ulint srv_log_buffer_size;
472-extern ulong srv_flush_log_at_trx_commit;
473+//extern ulong srv_flush_log_at_trx_commit;
474+extern char srv_use_global_flush_log_at_trx_commit;
475 extern char srv_adaptive_flushing;
476
477
adf0fb13 478@@ -216,6 +217,16 @@
b4e1fa2c
AM
479 extern ulong srv_max_purge_lag;
480
481 extern ulong srv_replication_delay;
482+
483+extern long long srv_ibuf_max_size;
484+extern ulint srv_ibuf_active_contract;
485+extern ulint srv_ibuf_accel_rate;
486+extern ulint srv_checkpoint_age_target;
487+extern ulint srv_flush_neighbor_pages;
488+extern ulint srv_enable_unsafe_group_commit;
489+extern ulint srv_read_ahead;
490+extern ulint srv_adaptive_flushing_method;
491+
492 /*-------------------------------------------*/
493
494 extern ulint srv_n_rows_inserted;
adf0fb13 495@@ -394,8 +405,9 @@
b4e1fa2c
AM
496 when writing data files, but do flush
497 after writing to log files */
498 SRV_UNIX_NOSYNC, /*!< do not flush after writing */
499- SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
500+ SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
501 data files */
502+ SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
503 };
504
505 /** Alternatives for file i/o in Windows */
db82db79
AM
506--- a/storage/innobase/log/log0log.c
507+++ b/storage/innobase/log/log0log.c
d8778560
AM
508@@ -48,6 +48,7 @@
509 #include "srv0start.h"
510 #include "trx0sys.h"
511 #include "trx0trx.h"
512+#include "ha_prototypes.h"
513
514 /*
515 General philosophy of InnoDB redo-logs:
516@@ -359,6 +360,33 @@
b4e1fa2c
AM
517 }
518
519 /************************************************************//**
520+*/
521+UNIV_INLINE
522+ulint
523+log_max_modified_age_async()
524+{
525+ if (srv_checkpoint_age_target) {
526+ return(ut_min(log_sys->max_modified_age_async,
527+ srv_checkpoint_age_target
528+ - srv_checkpoint_age_target / 8));
529+ } else {
530+ return(log_sys->max_modified_age_async);
531+ }
532+}
533+
534+UNIV_INLINE
535+ulint
536+log_max_checkpoint_age_async()
537+{
538+ if (srv_checkpoint_age_target) {
539+ return(ut_min(log_sys->max_checkpoint_age_async,
540+ srv_checkpoint_age_target));
541+ } else {
542+ return(log_sys->max_checkpoint_age_async);
543+ }
544+}
545+
546+/************************************************************//**
547 Closes the log.
548 @return lsn */
549 UNIV_INTERN
d8778560 550@@ -427,7 +455,7 @@
b4e1fa2c
AM
551 }
552 }
553
554- if (checkpoint_age <= log->max_modified_age_async) {
555+ if (checkpoint_age <= log_max_modified_age_async()) {
556
557 goto function_exit;
558 }
d8778560 559@@ -435,8 +463,8 @@
b4e1fa2c
AM
560 oldest_lsn = buf_pool_get_oldest_modification();
561
562 if (!oldest_lsn
563- || lsn - oldest_lsn > log->max_modified_age_async
564- || checkpoint_age > log->max_checkpoint_age_async) {
565+ || lsn - oldest_lsn > log_max_modified_age_async()
566+ || checkpoint_age > log_max_checkpoint_age_async()) {
567
568 log->check_flush_or_checkpoint = TRUE;
569 }
413cadc7 570@@ -1100,9 +1128,10 @@
b4e1fa2c
AM
571 group = (log_group_t*)((ulint)group - 1);
572
573 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
574+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
575 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
576
413cadc7
AM
577- fil_flush(group->space_id);
578+ fil_flush(group->space_id, FALSE);
579 }
580
581 #ifdef UNIV_DEBUG
582@@ -1121,10 +1150,11 @@
b4e1fa2c
AM
583 logs and cannot end up here! */
584
585 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
586+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
587 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
588- && srv_flush_log_at_trx_commit != 2) {
589+ && thd_flush_log_at_trx_commit(NULL) != 2) {
590
413cadc7
AM
591- fil_flush(group->space_id);
592+ fil_flush(group->space_id, FALSE);
b4e1fa2c 593 }
413cadc7
AM
594
595 mutex_enter(&(log_sys->mutex));
d8778560 596@@ -1501,7 +1531,8 @@
b4e1fa2c
AM
597
598 mutex_exit(&(log_sys->mutex));
599
600- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
601+ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
602+ || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
603 /* O_DSYNC means the OS did not buffer the log file at all:
604 so we have also flushed to disk what we have written */
605
413cadc7
AM
606@@ -1511,7 +1542,7 @@
607
608 group = UT_LIST_GET_FIRST(log_sys->log_groups);
609
610- fil_flush(group->space_id);
611+ fil_flush(group->space_id, FALSE);
612 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
613 }
614
d8778560 615@@ -2120,10 +2151,10 @@
b4e1fa2c
AM
616
617 sync = TRUE;
618 advance = 2 * (age - log->max_modified_age_sync);
619- } else if (age > log->max_modified_age_async) {
620+ } else if (age > log_max_modified_age_async()) {
621
622 /* A flush is not urgent: we do an asynchronous preflush */
623- advance = age - log->max_modified_age_async;
624+ advance = age - log_max_modified_age_async();
625 } else {
626 advance = 0;
627 }
d8778560 628@@ -2137,7 +2168,7 @@
b4e1fa2c
AM
629
630 do_checkpoint = TRUE;
631
632- } else if (checkpoint_age > log->max_checkpoint_age_async) {
633+ } else if (checkpoint_age > log_max_checkpoint_age_async()) {
634 /* A checkpoint is not urgent: do it asynchronously */
635
636 do_checkpoint = TRUE;
413cadc7
AM
637@@ -2607,7 +2638,7 @@
638
639 mutex_exit(&(log_sys->mutex));
640
641- fil_flush(group->archive_space_id);
642+ fil_flush(group->archive_space_id, TRUE);
643
644 mutex_enter(&(log_sys->mutex));
645
d8778560 646@@ -3349,6 +3380,17 @@
b4e1fa2c
AM
647 log_sys->flushed_to_disk_lsn,
648 log_sys->last_checkpoint_lsn);
649
650+ fprintf(file,
651+ "Max checkpoint age %lu\n"
652+ "Checkpoint age target %lu\n"
653+ "Modified age %lu\n"
654+ "Checkpoint age %lu\n",
655+ (ulong) log_sys->max_checkpoint_age,
656+ (ulong) log_max_checkpoint_age_async(),
657+ (ulong) (log_sys->lsn -
658+ log_buf_pool_get_oldest_modification()),
659+ (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
660+
661 current_time = time(NULL);
662
663 time_elapsed = 0.001 + difftime(current_time,
db82db79
AM
664--- a/storage/innobase/log/log0recv.c
665+++ b/storage/innobase/log/log0recv.c
b4e1fa2c
AM
666@@ -2906,9 +2906,12 @@
667 ib_uint64_t archived_lsn;
668 #endif /* UNIV_LOG_ARCHIVE */
669 byte* buf;
670- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
671+ byte* log_hdr_buf;
672+ byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
673 ulint err;
674
675+ log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
676+
677 #ifdef UNIV_LOG_ARCHIVE
678 ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
679 /** TRUE when recovering from a checkpoint */
413cadc7
AM
680@@ -3468,7 +3471,7 @@
681 exit(1);
682 }
683
684- os_file_flush(log_file);
685+ os_file_flush(log_file, TRUE);
686 os_file_close(log_file);
687 }
688
689@@ -3492,7 +3495,7 @@
690
691 os_file_write(name, log_file, buf, 0, 0,
692 LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
693- os_file_flush(log_file);
694+ os_file_flush(log_file, TRUE);
695 os_file_close(log_file);
696
697 ut_free(buf);
db82db79
AM
698--- a/storage/innobase/os/os0file.c
699+++ b/storage/innobase/os/os0file.c
d8778560 700@@ -1424,7 +1424,7 @@
b4e1fa2c
AM
701 #endif
702 #ifdef UNIV_NON_BUFFERED_IO
703 # ifndef UNIV_HOTBACKUP
704- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
705+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
706 /* Do not use unbuffered i/o to log files because
707 value 2 denotes that we do not flush the log at every
708 commit, but only once per second */
d8778560 709@@ -1440,7 +1440,7 @@
b4e1fa2c
AM
710 attributes = 0;
711 #ifdef UNIV_NON_BUFFERED_IO
712 # ifndef UNIV_HOTBACKUP
713- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
714+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
715 /* Do not use unbuffered i/o to log files because
716 value 2 denotes that we do not flush the log at every
717 commit, but only once per second */
d8778560 718@@ -1585,6 +1585,11 @@
b4e1fa2c
AM
719 os_file_set_nocache(file, name, mode_str);
720 }
721
722+ /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
723+ if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
724+ os_file_set_nocache(file, name, mode_str);
725+ }
726+
727 #ifdef USE_FILE_LOCK
728 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
729
413cadc7
AM
730@@ -2008,7 +2013,7 @@
731
732 ut_free(buf2);
733
734- ret = os_file_flush(file);
735+ ret = os_file_flush(file, TRUE);
736
737 if (ret) {
738 return(TRUE);
739@@ -2046,7 +2051,8 @@
740 int
741 os_file_fsync(
742 /*==========*/
743- os_file_t file) /*!< in: handle to a file */
744+ os_file_t file, /*!< in: handle to a file */
745+ ibool metadata)
746 {
747 int ret;
748 int failures;
db82db79 749@@ -2055,7 +2061,16 @@
413cadc7
AM
750 failures = 0;
751
752 do {
db82db79 753+#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
413cadc7
AM
754+ if (metadata) {
755+ ret = fsync(file);
756+ } else {
757+ ret = fdatasync(file);
758+ }
759+#else
db82db79 760+ (void) metadata;
413cadc7
AM
761 ret = fsync(file);
762+#endif
763
764 os_n_fsyncs++;
765
db82db79 766@@ -2092,7 +2107,8 @@
413cadc7
AM
767 ibool
768 os_file_flush_func(
769 /*===============*/
770- os_file_t file) /*!< in, own: handle to a file */
771+ os_file_t file, /*!< in, own: handle to a file */
772+ ibool metadata)
773 {
774 #ifdef __WIN__
775 BOOL ret;
db82db79 776@@ -2142,18 +2158,18 @@
413cadc7
AM
777 /* If we are not on an operating system that supports this,
778 then fall back to a plain fsync. */
779
780- ret = os_file_fsync(file);
781+ ret = os_file_fsync(file, metadata);
782 } else {
783 ret = fcntl(file, F_FULLFSYNC, NULL);
784
785 if (ret) {
786 /* If we are not on a file system that supports this,
787 then fall back to a plain fsync. */
788- ret = os_file_fsync(file);
789+ ret = os_file_fsync(file, metadata);
790 }
791 }
792 #else
793- ret = os_file_fsync(file);
794+ ret = os_file_fsync(file, metadata);
795 #endif
796
797 if (ret == 0) {
db82db79 798@@ -2336,7 +2352,7 @@
413cadc7
AM
799 the OS crashes, a database page is only partially
800 physically written to disk. */
801
802- ut_a(TRUE == os_file_flush(file));
803+ ut_a(TRUE == os_file_flush(file, TRUE));
804 }
805 # endif /* UNIV_DO_FLUSH */
806
db82db79 807@@ -2378,7 +2394,7 @@
413cadc7
AM
808 the OS crashes, a database page is only partially
809 physically written to disk. */
810
811- ut_a(TRUE == os_file_flush(file));
812+ ut_a(TRUE == os_file_flush(file, TRUE));
813 }
814 # endif /* UNIV_DO_FLUSH */
815
db82db79 816@@ -2750,7 +2766,7 @@
413cadc7
AM
817
818 # ifdef UNIV_DO_FLUSH
819 if (!os_do_not_call_flush_at_each_write) {
820- ut_a(TRUE == os_file_flush(file));
821+ ut_a(TRUE == os_file_flush(file, TRUE));
822 }
823 # endif /* UNIV_DO_FLUSH */
824
db82db79 825@@ -4296,7 +4312,7 @@
413cadc7
AM
826 #ifdef UNIV_DO_FLUSH
827 if (slot->type == OS_FILE_WRITE
828 && !os_do_not_call_flush_at_each_write) {
829- if (!os_file_flush(slot->file)) {
830+ if (!os_file_flush(slot->file, TRUE)) {
831 ut_error;
832 }
833 }
db82db79 834@@ -4597,7 +4613,7 @@
413cadc7
AM
835 #ifdef UNIV_DO_FLUSH
836 if (slot->type == OS_FILE_WRITE
837 && !os_do_not_call_flush_at_each_write)
838- && !os_file_flush(slot->file) {
839+ && !os_file_flush(slot->file, TRUE) {
840 ut_error;
841 }
842 #endif /* UNIV_DO_FLUSH */
db82db79
AM
843--- a/storage/innobase/srv/srv0srv.c
844+++ b/storage/innobase/srv/srv0srv.c
adf0fb13 845@@ -183,7 +183,8 @@
b4e1fa2c
AM
846 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
847 /* size in database pages */
848 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
849-UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
850+//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
851+UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
852
853 /* Try to flush dirty pages so as to avoid IO bursts at
854 the checkpoints. */
adf0fb13 855@@ -402,6 +403,17 @@
b4e1fa2c
AM
856
857 UNIV_INTERN ulong srv_replication_delay = 0;
858
859+UNIV_INTERN long long srv_ibuf_max_size = 0;
860+UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
861+UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
862+#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
863+
864+UNIV_INTERN ulint srv_checkpoint_age_target = 0;
865+UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
866+
867+UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
868+UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
869+UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
870 /*-------------------------------------------*/
871 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
872 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
db82db79
AM
873@@ -2709,7 +2721,7 @@
874
875 ut_ad(!mutex_own(&kernel_mutex));
876
877- ut_a(srv_n_purge_threads == 0);
878+ ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
879
880 do {
881 /* Check for shutdown and change in purge config. */
adf0fb13 882@@ -2742,6 +2754,7 @@
b4e1fa2c
AM
883 ulint n_pages_purged = 0;
884 ulint n_bytes_merged;
885 ulint n_pages_flushed;
886+ ulint n_pages_flushed_prev = 0;
887 ulint n_bytes_archived;
888 ulint n_tables_to_drop;
889 ulint n_ios;
adf0fb13 890@@ -2749,7 +2762,20 @@
b4e1fa2c
AM
891 ulint n_ios_very_old;
892 ulint n_pend_ios;
893 ulint next_itr_time;
894+ ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
895+ ulint inner_loop = 0;
896+ ibool skip_sleep = FALSE;
897 ulint i;
898+ struct t_prev_flush_info_struct {
899+ ulint count;
900+ unsigned space:32;
901+ unsigned offset:32;
902+ ib_uint64_t oldest_modification;
903+ } prev_flush_info[MAX_BUFFER_POOLS];
904+
905+ ib_uint64_t lsn_old;
906+
907+ ib_uint64_t oldest_lsn;
908
909 #ifdef UNIV_DEBUG_THREAD_CREATION
910 fprintf(stderr, "Master thread starts, id %lu\n",
adf0fb13 911@@ -2771,6 +2797,9 @@
b4e1fa2c
AM
912
913 mutex_exit(&kernel_mutex);
914
915+ mutex_enter(&(log_sys->mutex));
916+ lsn_old = log_sys->lsn;
917+ mutex_exit(&(log_sys->mutex));
918 loop:
919 /*****************************************************************/
920 /* ---- When there is database activity by users, we cycle in this
adf0fb13 921@@ -2801,9 +2830,13 @@
b4e1fa2c
AM
922 /* Sleep for 1 second on entrying the for loop below the first time. */
923 next_itr_time = ut_time_ms() + 1000;
924
925+ skip_sleep = FALSE;
926+
927 for (i = 0; i < 10; i++) {
928 ulint cur_time = ut_time_ms();
929
930+ n_pages_flushed = 0; /* initialize */
931+
932 /* ALTER TABLE in MySQL requires on Unix that the table handler
933 can drop tables lazily after there no longer are SELECT
934 queries to them. */
adf0fb13 935@@ -2827,6 +2860,7 @@
b4e1fa2c
AM
936 srv_main_thread_op_info = "sleeping";
937 srv_main_1_second_loops++;
938
939+ if (!skip_sleep) {
940 if (next_itr_time > cur_time
941 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
942
adf0fb13 943@@ -2837,10 +2871,26 @@
b4e1fa2c
AM
944 (next_itr_time - cur_time)
945 * 1000));
946 srv_main_sleeps++;
947+
948+ /*
949+ mutex_enter(&(log_sys->mutex));
950+ oldest_lsn = buf_pool_get_oldest_modification();
951+ ib_uint64_t lsn = log_sys->lsn;
952+ mutex_exit(&(log_sys->mutex));
953+
954+ if(oldest_lsn)
955+ fprintf(stderr,
956+ "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
957+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
958+ lsn - lsn_old);
959+ */
960 }
961
962 /* Each iteration should happen at 1 second interval. */
963 next_itr_time = ut_time_ms() + 1000;
964+ } /* if (!skip_sleep) */
965+
966+ skip_sleep = FALSE;
967
968 /* Flush logs if needed */
969 srv_sync_log_buffer_in_background();
adf0fb13 970@@ -2860,7 +2910,7 @@
b4e1fa2c
AM
971 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
972 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
973 srv_main_thread_op_info = "doing insert buffer merge";
974- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
975+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
976
977 /* Flush logs if needed */
978 srv_sync_log_buffer_in_background();
adf0fb13 979@@ -2877,7 +2927,11 @@
b4e1fa2c
AM
980 n_pages_flushed = buf_flush_list(
981 PCT_IO(100), IB_ULONGLONG_MAX);
982
983- } else if (srv_adaptive_flushing) {
984+ mutex_enter(&(log_sys->mutex));
985+ lsn_old = log_sys->lsn;
986+ mutex_exit(&(log_sys->mutex));
987+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
988+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
989
990 /* Try to keep the rate of flushing of dirty
991 pages such that redo log generation does not
adf0fb13 992@@ -2893,6 +2947,224 @@
b4e1fa2c
AM
993 n_flush,
994 IB_ULONGLONG_MAX);
995 }
996+
997+ mutex_enter(&(log_sys->mutex));
998+ lsn_old = log_sys->lsn;
999+ mutex_exit(&(log_sys->mutex));
1000+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1001+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1002+
1003+ /* Try to keep modified age not to exceed
1004+ max_checkpoint_age * 7/8 line */
1005+
1006+ mutex_enter(&(log_sys->mutex));
1007+
1008+ oldest_lsn = buf_pool_get_oldest_modification();
1009+ if (oldest_lsn == 0) {
1010+ lsn_old = log_sys->lsn;
1011+ mutex_exit(&(log_sys->mutex));
1012+
1013+ } else {
1014+ if ((log_sys->lsn - oldest_lsn)
1015+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1016+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1017+ /* We should not flush from here. */
1018+ lsn_old = log_sys->lsn;
1019+ mutex_exit(&(log_sys->mutex));
1020+ } else if ((log_sys->lsn - oldest_lsn)
1021+ > (log_sys->max_checkpoint_age)/4 ) {
1022+
1023+ /* defence line (max_checkpoint_age * 1/2) */
1024+ ib_uint64_t lsn = log_sys->lsn;
1025+
1026+ ib_uint64_t level, bpl;
1027+ buf_page_t* bpage;
1028+ ulint j;
1029+
1030+ mutex_exit(&(log_sys->mutex));
1031+
1032+ bpl = 0;
1033+
1034+ for (j = 0; j < srv_buf_pool_instances; j++) {
1035+ buf_pool_t* buf_pool;
1036+ ulint n_blocks;
1037+
1038+ buf_pool = buf_pool_from_array(j);
1039+
1040+ /* The scanning flush_list is optimistic here */
1041+
1042+ level = 0;
1043+ n_blocks = 0;
1044+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1045+
1046+ while (bpage != NULL) {
1047+ ib_uint64_t oldest_modification = bpage->oldest_modification;
1048+ if (oldest_modification != 0) {
1049+ level += log_sys->max_checkpoint_age
1050+ - (lsn - oldest_modification);
1051+ }
1052+ bpage = UT_LIST_GET_NEXT(list, bpage);
1053+ n_blocks++;
1054+ }
1055+
1056+ if (level) {
1057+ bpl += ((ib_uint64_t) n_blocks * n_blocks
1058+ * (lsn - lsn_old)) / level;
1059+ }
1060+
1061+ }
1062+
1063+ if (!srv_use_doublewrite_buf) {
1064+ /* flush is faster than when doublewrite */
1065+ bpl = (bpl * 7) / 8;
1066+ }
1067+
1068+ if (bpl) {
1069+retry_flush_batch:
1070+ n_pages_flushed = buf_flush_list(bpl,
1071+ oldest_lsn + (lsn - lsn_old));
1072+ if (n_pages_flushed == ULINT_UNDEFINED) {
1073+ os_thread_sleep(5000);
1074+ goto retry_flush_batch;
1075+ }
1076+ }
1077+
1078+ lsn_old = lsn;
1079+ /*
1080+ fprintf(stderr,
1081+ "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1082+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1083+ lsn - lsn_old, bpl);
1084+ */
1085+ } else {
1086+ lsn_old = log_sys->lsn;
1087+ mutex_exit(&(log_sys->mutex));
1088+ }
1089+ }
1090+ prev_adaptive_flushing_method = 1;
1091+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1092+ buf_pool_t* buf_pool;
1093+ buf_page_t* bpage;
1094+ ib_uint64_t lsn;
1095+ ulint j;
1096+
1097+ mutex_enter(&(log_sys->mutex));
1098+ oldest_lsn = buf_pool_get_oldest_modification();
1099+ lsn = log_sys->lsn;
1100+ mutex_exit(&(log_sys->mutex));
1101+
1102+ /* upper loop/sec. (x10) */
1103+ next_itr_time -= 900; /* 1000 - 900 == 100 */
1104+ inner_loop++;
1105+ if (inner_loop < 10) {
1106+ i--;
1107+ } else {
1108+ inner_loop = 0;
1109+ }
1110+
1111+ if (prev_adaptive_flushing_method == 2) {
1112+ lint n_flush;
d8778560
AM
1113+ lint blocks_sum;
1114+ ulint new_blocks_sum, flushed_blocks_sum;
b4e1fa2c
AM
1115+
1116+ blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1117+
1118+ /* prev_flush_info[j] should be the previous loop's */
1119+ for (j = 0; j < srv_buf_pool_instances; j++) {
1120+ lint blocks_num, new_blocks_num, flushed_blocks_num;
1121+ ibool found;
1122+
1123+ buf_pool = buf_pool_from_array(j);
1124+
1125+ blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1126+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1127+ new_blocks_num = 0;
1128+
1129+ found = FALSE;
1130+ while (bpage != NULL) {
1131+ if (prev_flush_info[j].space == bpage->space
1132+ && prev_flush_info[j].offset == bpage->offset
1133+ && prev_flush_info[j].oldest_modification
1134+ == bpage->oldest_modification) {
1135+ found = TRUE;
1136+ break;
1137+ }
1138+ bpage = UT_LIST_GET_NEXT(list, bpage);
1139+ new_blocks_num++;
1140+ }
1141+ if (!found) {
1142+ new_blocks_num = blocks_num;
1143+ }
1144+
1145+ flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1146+ - blocks_num;
1147+ if (flushed_blocks_num < 0) {
1148+ flushed_blocks_num = 0;
1149+ }
1150+
1151+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1152+
1153+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1154+ if (bpage) {
1155+ prev_flush_info[j].space = bpage->space;
1156+ prev_flush_info[j].offset = bpage->offset;
1157+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1158+ } else {
1159+ prev_flush_info[j].space = 0;
1160+ prev_flush_info[j].offset = 0;
1161+ prev_flush_info[j].oldest_modification = 0;
1162+ }
1163+
1164+ new_blocks_sum += new_blocks_num;
1165+ flushed_blocks_sum += flushed_blocks_num;
1166+ blocks_sum += blocks_num;
1167+ }
1168+
1169+ n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1170+ if (flushed_blocks_sum > n_pages_flushed_prev) {
1171+ n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1172+ }
1173+
1174+ if (n_flush > 0) {
1175+ n_flush++;
1176+ n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1177+ } else {
1178+ n_pages_flushed = 0;
1179+ }
1180+ } else {
1181+ /* store previous first pages of the flush_list */
1182+ for (j = 0; j < srv_buf_pool_instances; j++) {
1183+ buf_pool = buf_pool_from_array(j);
1184+
1185+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1186+
1187+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1188+ if (bpage) {
1189+ prev_flush_info[j].space = bpage->space;
1190+ prev_flush_info[j].offset = bpage->offset;
1191+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1192+ } else {
1193+ prev_flush_info[j].space = 0;
1194+ prev_flush_info[j].offset = 0;
1195+ prev_flush_info[j].oldest_modification = 0;
1196+ }
1197+ }
1198+ n_pages_flushed = 0;
1199+ }
1200+
1201+ lsn_old = lsn;
1202+ prev_adaptive_flushing_method = 2;
1203+ } else {
1204+ mutex_enter(&(log_sys->mutex));
1205+ lsn_old = log_sys->lsn;
1206+ mutex_exit(&(log_sys->mutex));
1207+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1208+ }
1209+
1210+ if (n_pages_flushed == ULINT_UNDEFINED) {
1211+ n_pages_flushed_prev = 0;
1212+ } else {
1213+ n_pages_flushed_prev = n_pages_flushed;
1214 }
1215
1216 if (srv_activity_count == old_activity_count) {
db82db79 1217@@ -2941,12 +3213,12 @@
b4e1fa2c
AM
1218 even if the server were active */
1219
1220 srv_main_thread_op_info = "doing insert buffer merge";
1221- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1222+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1223
1224 /* Flush logs if needed */
1225 srv_sync_log_buffer_in_background();
db82db79
AM
1226
1227- if (srv_n_purge_threads == 0) {
1228+ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1229 srv_main_thread_op_info = "master purging";
1230
1231 srv_master_do_purge();
1232@@ -3024,7 +3296,7 @@
1233 }
1234 }
1235
1236- if (srv_n_purge_threads == 0) {
1237+ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1238 srv_main_thread_op_info = "master purging";
1239
1240 srv_master_do_purge();
adf0fb13 1241@@ -3049,7 +3321,7 @@
b4e1fa2c
AM
1242 buf_flush_list below. Otherwise, the system favors
1243 clean pages over cleanup throughput. */
1244 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1245- PCT_IO(100));
1246+ PCT_IBUF_IO(100));
1247 }
1248
1249 srv_main_thread_op_info = "reserving kernel mutex";
adf0fb13
AM
1250@@ -3189,6 +3461,7 @@
1251 srv_slot_t* slot;
11822e22 1252 ulint retries = 0;
b4e1fa2c
AM
1253 ulint n_total_purged = ULINT_UNDEFINED;
1254+ ulint next_itr_time;
1255
1256 ut_a(srv_n_purge_threads == 1);
1257
adf0fb13 1258@@ -3209,9 +3482,12 @@
b4e1fa2c
AM
1259
1260 mutex_exit(&kernel_mutex);
1261
1262+ next_itr_time = ut_time_ms();
1263+
1264 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1265
11822e22 1266 ulint n_pages_purged = 0;
b4e1fa2c
AM
1267+ ulint cur_time;
1268
1269 /* If there are very few records to purge or the last
1270 purge didn't purge any records then wait for activity.
adf0fb13 1271@@ -3258,6 +3534,16 @@
b4e1fa2c
AM
1272 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1273
1274 srv_sync_log_buffer_in_background();
1275+
1276+ cur_time = ut_time_ms();
1277+ if (next_itr_time > cur_time) {
1278+ os_thread_sleep(ut_min(1000000,
1279+ (next_itr_time - cur_time)
1280+ * 1000));
1281+ next_itr_time = ut_time_ms() + 1000;
1282+ } else {
1283+ next_itr_time = cur_time + 1000;
1284+ }
1285 }
1286
1287 mutex_enter(&kernel_mutex);
db82db79
AM
1288--- a/storage/innobase/srv/srv0start.c
1289+++ b/storage/innobase/srv/srv0start.c
adf0fb13 1290@@ -1217,6 +1217,9 @@
b4e1fa2c
AM
1291 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1292 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1293
1294+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1295+ srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1296+
1297 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1298 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1299
db82db79
AM
1300--- a/storage/innobase/trx/trx0purge.c
1301+++ b/storage/innobase/trx/trx0purge.c
11822e22
AM
1302@@ -392,10 +392,10 @@
1303 trx_sys->rseg_history_len++;
1304 mutex_exit(&kernel_mutex);
1305
1306- if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1307+// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1308 /* Inform the purge thread that there is work to do. */
1309 srv_wake_purge_thread_if_not_active();
1310- }
1311+// }
1312 }
1313
1314 /**********************************************************************//**
db82db79
AM
1315--- a/storage/innobase/trx/trx0trx.c
1316+++ b/storage/innobase/trx/trx0trx.c
adf0fb13 1317@@ -984,6 +984,7 @@
b4e1fa2c
AM
1318 trx->read_view = NULL;
1319
1320 if (lsn) {
1321+ ulint flush_log_at_trx_commit;
1322
1323 mutex_exit(&kernel_mutex);
1324
adf0fb13 1325@@ -992,6 +993,12 @@
b4e1fa2c
AM
1326 trx_undo_insert_cleanup(trx);
1327 }
1328
1329+ if (srv_use_global_flush_log_at_trx_commit) {
1330+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1331+ } else {
1332+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1333+ }
1334+
1335 /* NOTE that we could possibly make a group commit more
1336 efficient here: call os_thread_yield here to allow also other
1337 trxs to come to commit! */
adf0fb13 1338@@ -1023,9 +1030,9 @@
b4e1fa2c
AM
1339 if (trx->flush_log_later) {
1340 /* Do nothing yet */
1341 trx->must_flush_log_later = TRUE;
1342- } else if (srv_flush_log_at_trx_commit == 0) {
1343+ } else if (flush_log_at_trx_commit == 0) {
1344 /* Do nothing */
1345- } else if (srv_flush_log_at_trx_commit == 1) {
1346+ } else if (flush_log_at_trx_commit == 1) {
1347 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1348 /* Write the log but do not flush it to disk */
1349
adf0fb13 1350@@ -1037,7 +1044,7 @@
b4e1fa2c
AM
1351
1352 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1353 }
1354- } else if (srv_flush_log_at_trx_commit == 2) {
1355+ } else if (flush_log_at_trx_commit == 2) {
1356
1357 /* Write the log but do not flush it to disk */
1358
adf0fb13 1359@@ -1701,16 +1708,23 @@
b4e1fa2c
AM
1360 trx_t* trx) /*!< in: trx handle */
1361 {
1362 ib_uint64_t lsn = trx->commit_lsn;
1363+ ulint flush_log_at_trx_commit;
1364
1365 ut_a(trx);
1366
1367 trx->op_info = "flushing log";
1368
1369+ if (srv_use_global_flush_log_at_trx_commit) {
1370+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1371+ } else {
1372+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1373+ }
1374+
1375 if (!trx->must_flush_log_later) {
1376 /* Do nothing */
1377- } else if (srv_flush_log_at_trx_commit == 0) {
1378+ } else if (flush_log_at_trx_commit == 0) {
1379 /* Do nothing */
1380- } else if (srv_flush_log_at_trx_commit == 1) {
1381+ } else if (flush_log_at_trx_commit == 1) {
1382 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1383 /* Write the log but do not flush it to disk */
1384
adf0fb13 1385@@ -1721,7 +1735,7 @@
b4e1fa2c
AM
1386
1387 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1388 }
1389- } else if (srv_flush_log_at_trx_commit == 2) {
1390+ } else if (flush_log_at_trx_commit == 2) {
1391
1392 /* Write the log but do not flush it to disk */
1393
adf0fb13 1394@@ -1969,6 +1983,8 @@
b4e1fa2c
AM
1395 /*--------------------------------------*/
1396
1397 if (lsn) {
1398+ ulint flush_log_at_trx_commit;
1399+
1400 /* Depending on the my.cnf options, we may now write the log
1401 buffer to the log files, making the prepared state of the
1402 transaction durable if the OS does not crash. We may also
adf0fb13 1403@@ -1988,9 +2004,15 @@
b4e1fa2c
AM
1404
1405 mutex_exit(&kernel_mutex);
1406
1407- if (srv_flush_log_at_trx_commit == 0) {
1408+ if (srv_use_global_flush_log_at_trx_commit) {
1409+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1410+ } else {
1411+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1412+ }
1413+
1414+ if (flush_log_at_trx_commit == 0) {
1415 /* Do nothing */
1416- } else if (srv_flush_log_at_trx_commit == 1) {
1417+ } else if (flush_log_at_trx_commit == 1) {
1418 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1419 /* Write the log but do not flush it to disk */
1420
adf0fb13 1421@@ -2002,7 +2024,7 @@
b4e1fa2c
AM
1422
1423 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1424 }
1425- } else if (srv_flush_log_at_trx_commit == 2) {
1426+ } else if (flush_log_at_trx_commit == 2) {
1427
1428 /* Write the log but do not flush it to disk */
1429
This page took 0.253411 seconds and 4 git commands to generate.