]> git.pld-linux.org Git - packages/mysql.git/blame_incremental - innodb_io_patches.patch
- updated to 5.5.32
[packages/mysql.git] / innodb_io_patches.patch
... / ...
CommitLineData
1# name : innodb_io_patches.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8--- a/storage/innobase/buf/buf0buf.c
9+++ b/storage/innobase/buf/buf0buf.c
10@@ -320,6 +320,7 @@
11
12 /* When we traverse all the flush lists we don't want another
13 thread to add a dirty page to any flush list. */
14+ if (srv_buf_pool_instances > 1)
15 log_flush_order_mutex_enter();
16
17 for (i = 0; i < srv_buf_pool_instances; i++) {
18@@ -343,6 +344,7 @@
19 }
20 }
21
22+ if (srv_buf_pool_instances > 1)
23 log_flush_order_mutex_exit();
24
25 /* The returned answer may be out of date: the flush_list can
26--- a/storage/innobase/buf/buf0flu.c
27+++ b/storage/innobase/buf/buf0flu.c
28@@ -857,7 +857,7 @@
29 flush:
30 /* Now flush the doublewrite buffer data to disk */
31
32- fil_flush(TRX_SYS_SPACE);
33+ fil_flush(TRX_SYS_SPACE, FALSE);
34
35 /* We know that the writes have been flushed to disk now
36 and in recovery we will find them in the doublewrite buffer
37@@ -1375,10 +1375,11 @@
38 ulint high;
39 ulint count = 0;
40 buf_pool_t* buf_pool = buf_pool_get(space, offset);
41+ ibool is_forward_scan;
42
43 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
44
45- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
46+ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
47 /* If there is little space, it is better not to flush
48 any block except from the end of the LRU list */
49
50@@ -1405,7 +1406,32 @@
51 high = fil_space_get_size(space);
52 }
53
54- for (i = low; i < high; i++) {
55+ if (srv_flush_neighbor_pages == 2) {
56+
57+ /* In the case of contiguous flush where the requested page
58+ does not fall at the start of flush area, first scan backward
59+ from the page and later forward from it. */
60+ is_forward_scan = (offset == low);
61+ }
62+ else {
63+ is_forward_scan = TRUE;
64+ }
65+
66+scan:
67+ if (srv_flush_neighbor_pages == 2) {
68+ if (is_forward_scan) {
69+ i = offset;
70+ }
71+ else {
72+ i = offset - 1;
73+ }
74+ }
75+ else {
76+ i = low;
77+ }
78+
79+ for (; is_forward_scan ? (i < high) : (i >= low);
80+ is_forward_scan ? i++ : i--) {
81
82 buf_page_t* bpage;
83
84@@ -1434,6 +1460,12 @@
85 if (!bpage) {
86
87 buf_pool_mutex_exit(buf_pool);
88+ if (srv_flush_neighbor_pages == 2) {
89+
90+ /* This is contiguous neighbor page flush and
91+ the pages here are not contiguous. */
92+ break;
93+ }
94 continue;
95 }
96
97@@ -1470,6 +1502,22 @@
98 }
99 }
100 buf_pool_mutex_exit(buf_pool);
101+
102+ if (srv_flush_neighbor_pages == 2) {
103+
104+ /* We are trying to do the contiguous neighbor page
105+ flush, but the last page we checked was unflushable,
106+ making a "hole" in the flush, so stop this attempt. */
107+ break;
108+ }
109+ }
110+
111+ if (!is_forward_scan) {
112+
113+ /* Backward scan done, now do the forward scan */
114+ ut_a (srv_flush_neighbor_pages == 2);
115+ is_forward_scan = TRUE;
116+ goto scan;
117 }
118
119 return(count);
120--- a/storage/innobase/buf/buf0rea.c
121+++ b/storage/innobase/buf/buf0rea.c
122@@ -427,6 +427,10 @@
123 = BUF_READ_AHEAD_AREA(buf_pool);
124 ulint threshold;
125
126+ if (!(srv_read_ahead & 2)) {
127+ return(0);
128+ }
129+
130 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
131 /* No read-ahead to avoid thread deadlocks */
132 return(0);
133--- a/storage/innobase/fil/fil0fil.c
134+++ b/storage/innobase/fil/fil0fil.c
135@@ -2609,7 +2609,7 @@
136
137 os_thread_sleep(20000);
138
139- fil_flush(id);
140+ fil_flush(id, TRUE);
141
142 goto retry;
143
144@@ -2823,7 +2823,7 @@
145 goto error_exit;
146 }
147
148- ret = os_file_flush(file);
149+ ret = os_file_flush(file, TRUE);
150
151 if (!ret) {
152 fputs("InnoDB: Error: file flush of tablespace ", stderr);
153@@ -3009,7 +3009,7 @@
154 }
155 }
156
157- success = os_file_flush(file);
158+ success = os_file_flush(file, TRUE);
159 if (!success) {
160
161 goto func_exit;
162@@ -3031,7 +3031,7 @@
163
164 goto func_exit;
165 }
166- success = os_file_flush(file);
167+ success = os_file_flush(file, TRUE);
168 func_exit:
169 os_file_close(file);
170 ut_free(buf2);
171@@ -4014,7 +4014,7 @@
172 size_after_extend, *actual_size); */
173 mutex_exit(&fil_system->mutex);
174
175- fil_flush(space_id);
176+ fil_flush(space_id, TRUE);
177
178 return(success);
179 }
180@@ -4585,8 +4585,9 @@
181 void
182 fil_flush(
183 /*======*/
184- ulint space_id) /*!< in: file space id (this can be a group of
185+ ulint space_id, /*!< in: file space id (this can be a group of
186 log files or a tablespace of the database) */
187+ ibool metadata)
188 {
189 fil_space_t* space;
190 fil_node_t* node;
191@@ -4657,7 +4658,7 @@
192 /* fprintf(stderr, "Flushing to file %s\n",
193 node->name); */
194
195- os_file_flush(file);
196+ os_file_flush(file, metadata);
197
198 mutex_enter(&fil_system->mutex);
199
200@@ -4740,7 +4741,7 @@
201 a non-existing space id. */
202 for (i = 0; i < n_space_ids; i++) {
203
204- fil_flush(space_ids[i]);
205+ fil_flush(space_ids[i], TRUE);
206 }
207
208 mem_free(space_ids);
209--- a/storage/innobase/handler/ha_innodb.cc
210+++ b/storage/innobase/handler/ha_innodb.cc
211@@ -445,6 +445,12 @@
212 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
213 NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
214
215+static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
216+ "Set to 0 (write and flush once per second),"
217+ " 1 (write and flush at each commit)"
218+ " or 2 (write at commit, flush once per second).",
219+ NULL, NULL, 1, 0, 2, 0);
220+
221
222 static handler *innobase_create_handler(handlerton *hton,
223 TABLE_SHARE *table,
224@@ -841,6 +847,17 @@
225 }
226 }
227
228+/******************************************************************//**
229+*/
230+extern "C" UNIV_INTERN
231+ulong
232+thd_flush_log_at_trx_commit(
233+/*================================*/
234+ void* thd)
235+{
236+ return(THDVAR((THD*) thd, flush_log_at_trx_commit));
237+}
238+
239 /********************************************************************//**
240 Obtain the InnoDB transaction of a MySQL thread.
241 @return reference to transaction pointer */
242@@ -2471,6 +2488,9 @@
243 srv_n_read_io_threads = (ulint) innobase_read_io_threads;
244 srv_n_write_io_threads = (ulint) innobase_write_io_threads;
245
246+ srv_read_ahead &= 3;
247+ srv_adaptive_flushing_method %= 3;
248+
249 srv_force_recovery = (ulint) innobase_force_recovery;
250
251 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
252@@ -11141,7 +11161,7 @@
253 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
254 "Purge threads can be either 0 or 1.",
255 NULL, NULL,
256- 0, /* Default setting */
257+ 1, /* Default setting */
258 0, /* Minimum value */
259 1, 0); /* Maximum value */
260
261@@ -11183,12 +11203,18 @@
262 innodb_file_format_max_validate,
263 innodb_file_format_max_update, "Antelope");
264
265-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
266- PLUGIN_VAR_OPCMDARG,
267- "Set to 0 (write and flush once per second),"
268- " 1 (write and flush at each commit)"
269- " or 2 (write at commit, flush once per second).",
270- NULL, NULL, 1, 0, 2, 0);
271+/* Changed to the THDVAR */
272+//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
273+// PLUGIN_VAR_OPCMDARG,
274+// "Set to 0 (write and flush once per second),"
275+// " 1 (write and flush at each commit)"
276+// " or 2 (write at commit, flush once per second).",
277+// NULL, NULL, 1, 0, 2, 0);
278+
279+static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
280+ PLUGIN_VAR_NOCMDARG,
281+ "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
282+ NULL, NULL, TRUE);
283
284 static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
285 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
286@@ -11293,7 +11319,7 @@
287 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
288 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
289 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
290- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
291+ NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
292
293 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
294 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
295@@ -11442,6 +11468,127 @@
296 "trigger a readahead.",
297 NULL, NULL, 56, 0, 64, 0);
298
299+static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
300+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
301+ "The maximum size of the insert buffer. (in bytes)",
302+ NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
303+
304+static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
305+ PLUGIN_VAR_RQCMDARG,
306+ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
307+ NULL, NULL, 1, 0, 1, 0);
308+
309+static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
310+ PLUGIN_VAR_RQCMDARG,
311+ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
312+ NULL, NULL, 100, 100, 999999999, 0);
313+
314+static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
315+ PLUGIN_VAR_RQCMDARG,
316+ "Control soft limit of checkpoint age. (0 : not control)",
317+ NULL, NULL, 0, 0, ~0UL, 0);
318+
319+static
320+void
321+innodb_flush_neighbor_pages_update(
322+ THD* thd,
323+ struct st_mysql_sys_var* var,
324+ void* var_ptr,
325+ const void* save)
326+{
327+ *(long *)var_ptr = (*(long *)save) % 3;
328+}
329+
330+const char *flush_neighbor_pages_names[]=
331+{
332+ "none", /* 0 */
333+ "area",
334+ "cont", /* 2 */
335+ /* For compatibility with the older patch */
336+ "0", /* "none" + 3 */
337+ "1", /* "area" + 3 */
338+ "2", /* "cont" + 3 */
339+ NullS
340+};
341+
342+TYPELIB flush_neighbor_pages_typelib=
343+{
344+ array_elements(flush_neighbor_pages_names) - 1,
345+ "flush_neighbor_pages_typelib",
346+ flush_neighbor_pages_names,
347+ NULL
348+};
349+
350+static MYSQL_SYSVAR_ENUM(flush_neighbor_pages, srv_flush_neighbor_pages,
351+ PLUGIN_VAR_RQCMDARG, "Neighbor page flushing behaviour: none: do not flush, "
352+ "[area]: flush selected pages one-by-one, "
353+ "cont: flush a contiguous block of pages", NULL,
354+ innodb_flush_neighbor_pages_update, 1, &flush_neighbor_pages_typelib);
355+
356+static
357+void
358+innodb_read_ahead_update(
359+ THD* thd,
360+ struct st_mysql_sys_var* var,
361+ void* var_ptr,
362+ const void* save)
363+{
364+ *(long *)var_ptr= (*(long *)save) & 3;
365+}
366+const char *read_ahead_names[]=
367+{
368+ "none", /* 0 */
369+ "random",
370+ "linear",
371+ "both", /* 3 */
372+ /* For compatibility of the older patch */
373+ "0", /* 4 ("none" + 4) */
374+ "1",
375+ "2",
376+ "3", /* 7 ("both" + 4) */
377+ NullS
378+};
379+TYPELIB read_ahead_typelib=
380+{
381+ array_elements(read_ahead_names) - 1, "read_ahead_typelib",
382+ read_ahead_names, NULL
383+};
384+static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
385+ PLUGIN_VAR_RQCMDARG,
386+ "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
387+ NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
388+
389+static
390+void
391+innodb_adaptive_flushing_method_update(
392+ THD* thd,
393+ struct st_mysql_sys_var* var,
394+ void* var_ptr,
395+ const void* save)
396+{
397+ *(long *)var_ptr= (*(long *)save) % 4;
398+}
399+const char *adaptive_flushing_method_names[]=
400+{
401+ "native", /* 0 */
402+ "estimate", /* 1 */
403+ "keep_average", /* 2 */
404+ /* For compatibility of the older patch */
405+ "0", /* 3 ("none" + 3) */
406+ "1", /* 4 ("estimate" + 3) */
407+ "2", /* 5 ("keep_average" + 3) */
408+ NullS
409+};
410+TYPELIB adaptive_flushing_method_typelib=
411+{
412+ array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
413+ adaptive_flushing_method_names, NULL
414+};
415+static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
416+ PLUGIN_VAR_RQCMDARG,
417+ "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
418+ NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
419+
420 static struct st_mysql_sys_var* innobase_system_variables[]= {
421 MYSQL_SYSVAR(additional_mem_pool_size),
422 MYSQL_SYSVAR(autoextend_increment),
423@@ -11462,6 +11609,7 @@
424 MYSQL_SYSVAR(file_format_check),
425 MYSQL_SYSVAR(file_format_max),
426 MYSQL_SYSVAR(flush_log_at_trx_commit),
427+ MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
428 MYSQL_SYSVAR(flush_method),
429 MYSQL_SYSVAR(force_recovery),
430 MYSQL_SYSVAR(large_prefix),
431@@ -11501,6 +11649,13 @@
432 MYSQL_SYSVAR(show_verbose_locks),
433 MYSQL_SYSVAR(show_locks_held),
434 MYSQL_SYSVAR(version),
435+ MYSQL_SYSVAR(ibuf_max_size),
436+ MYSQL_SYSVAR(ibuf_active_contract),
437+ MYSQL_SYSVAR(ibuf_accel_rate),
438+ MYSQL_SYSVAR(checkpoint_age_target),
439+ MYSQL_SYSVAR(flush_neighbor_pages),
440+ MYSQL_SYSVAR(read_ahead),
441+ MYSQL_SYSVAR(adaptive_flushing_method),
442 MYSQL_SYSVAR(use_sys_malloc),
443 MYSQL_SYSVAR(use_native_aio),
444 MYSQL_SYSVAR(change_buffering),
445--- a/storage/innobase/ibuf/ibuf0ibuf.c
446+++ b/storage/innobase/ibuf/ibuf0ibuf.c
447@@ -523,8 +523,10 @@
448 grow in size, as the references on the upper levels of the tree can
449 change */
450
451- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
452- / IBUF_POOL_SIZE_PER_MAX_SIZE;
453+ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
454+ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
455+
456+ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
457
458 mutex_create(ibuf_pessimistic_insert_mutex_key,
459 &ibuf_pessimistic_insert_mutex,
460@@ -2763,9 +2765,11 @@
461 size = ibuf->size;
462 max_size = ibuf->max_size;
463
464+ if (!srv_ibuf_active_contract) {
465 if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
466 return;
467 }
468+ }
469
470 sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
471
472--- a/storage/innobase/include/buf0rea.h
473+++ b/storage/innobase/include/buf0rea.h
474@@ -149,8 +149,7 @@
475
476 /** The size in pages of the area which the read-ahead algorithms read if
477 invoked */
478-#define BUF_READ_AHEAD_AREA(b) \
479- ut_min(64, ut_2_power_up((b)->curr_size / 32))
480+#define BUF_READ_AHEAD_AREA(b) 64
481
482 /** @name Modes used in read-ahead @{ */
483 /** read only pages belonging to the insert buffer tree */
484--- a/storage/innobase/include/fil0fil.h
485+++ b/storage/innobase/include/fil0fil.h
486@@ -663,8 +663,9 @@
487 void
488 fil_flush(
489 /*======*/
490- ulint space_id); /*!< in: file space id (this can be a group of
491+ ulint space_id, /*!< in: file space id (this can be a group of
492 log files or a tablespace of the database) */
493+ ibool metadata);
494 /**********************************************************************//**
495 Flushes to disk writes in file spaces of the given type possibly cached by
496 the OS. */
497--- a/storage/innobase/include/ha_prototypes.h
498+++ b/storage/innobase/include/ha_prototypes.h
499@@ -284,6 +284,13 @@
500 /*===================*/
501 void* thd, /*!< in: thread handle (THD*) */
502 ulint value); /*!< in: time waited for the lock */
503+/******************************************************************//**
504+*/
505+
506+ulong
507+thd_flush_log_at_trx_commit(
508+/*================================*/
509+ void* thd);
510
511 /**********************************************************************//**
512 Get the current setting of the lower_case_table_names global parameter from
513--- a/storage/innobase/include/os0file.h
514+++ b/storage/innobase/include/os0file.h
515@@ -296,8 +296,8 @@
516 pfs_os_file_write_func(name, file, buf, offset, offset_high, \
517 n, __FILE__, __LINE__)
518
519-# define os_file_flush(file) \
520- pfs_os_file_flush_func(file, __FILE__, __LINE__)
521+# define os_file_flush(file, metadata) \
522+ pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
523
524 # define os_file_rename(key, oldpath, newpath) \
525 pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
526@@ -333,7 +333,7 @@
527 # define os_file_write(name, file, buf, offset, offset_high, n) \
528 os_file_write_func(name, file, buf, offset, offset_high, n)
529
530-# define os_file_flush(file) os_file_flush_func(file)
531+# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
532
533 # define os_file_rename(key, oldpath, newpath) \
534 os_file_rename_func(oldpath, newpath)
535@@ -781,6 +781,7 @@
536 pfs_os_file_flush_func(
537 /*===================*/
538 os_file_t file, /*!< in, own: handle to a file */
539+ ibool metadata,
540 const char* src_file,/*!< in: file name where func invoked */
541 ulint src_line);/*!< in: line where the func invoked */
542
543@@ -860,7 +861,8 @@
544 ibool
545 os_file_flush_func(
546 /*===============*/
547- os_file_t file); /*!< in, own: handle to a file */
548+ os_file_t file, /*!< in, own: handle to a file */
549+ ibool metadata);
550 /***********************************************************************//**
551 Retrieves the last error number if an error occurs in a file io function.
552 The number should be retrieved before any other OS calls (because they may
553--- a/storage/innobase/include/os0file.ic
554+++ b/storage/innobase/include/os0file.ic
555@@ -369,6 +369,7 @@
556 pfs_os_file_flush_func(
557 /*===================*/
558 os_file_t file, /*!< in, own: handle to a file */
559+ ibool metadata,
560 const char* src_file,/*!< in: file name where func invoked */
561 ulint src_line)/*!< in: line where the func invoked */
562 {
563@@ -378,7 +379,7 @@
564
565 register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
566 src_file, src_line);
567- result = os_file_flush_func(file);
568+ result = os_file_flush_func(file, metadata);
569
570 register_pfs_file_io_end(locker, 0);
571
572--- a/storage/innobase/include/srv0srv.h
573+++ b/storage/innobase/include/srv0srv.h
574@@ -138,7 +138,8 @@
575 extern ulint srv_n_log_files;
576 extern ulint srv_log_file_size;
577 extern ulint srv_log_buffer_size;
578-extern ulong srv_flush_log_at_trx_commit;
579+//extern ulong srv_flush_log_at_trx_commit;
580+extern char srv_use_global_flush_log_at_trx_commit;
581 extern char srv_adaptive_flushing;
582
583 /* If this flag is TRUE, then we will load the indexes' (and tables') metadata
584@@ -221,6 +222,16 @@
585 extern ulong srv_max_purge_lag;
586
587 extern ulong srv_replication_delay;
588+
589+extern long long srv_ibuf_max_size;
590+extern ulint srv_ibuf_active_contract;
591+extern ulint srv_ibuf_accel_rate;
592+extern ulint srv_checkpoint_age_target;
593+extern ulint srv_flush_neighbor_pages;
594+extern ulint srv_enable_unsafe_group_commit;
595+extern ulint srv_read_ahead;
596+extern ulint srv_adaptive_flushing_method;
597+
598 /*-------------------------------------------*/
599
600 extern ulint srv_n_rows_inserted;
601@@ -399,8 +410,9 @@
602 when writing data files, but do flush
603 after writing to log files */
604 SRV_UNIX_NOSYNC, /*!< do not flush after writing */
605- SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
606+ SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
607 data files */
608+ SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
609 };
610
611 /** Alternatives for file i/o in Windows */
612--- a/storage/innobase/log/log0log.c
613+++ b/storage/innobase/log/log0log.c
614@@ -48,6 +48,7 @@
615 #include "srv0start.h"
616 #include "trx0sys.h"
617 #include "trx0trx.h"
618+#include "ha_prototypes.h"
619
620 /*
621 General philosophy of InnoDB redo-logs:
622@@ -359,6 +360,33 @@
623 }
624
625 /************************************************************//**
626+*/
627+UNIV_INLINE
628+ulint
629+log_max_modified_age_async()
630+{
631+ if (srv_checkpoint_age_target) {
632+ return(ut_min(log_sys->max_modified_age_async,
633+ srv_checkpoint_age_target
634+ - srv_checkpoint_age_target / 8));
635+ } else {
636+ return(log_sys->max_modified_age_async);
637+ }
638+}
639+
640+UNIV_INLINE
641+ulint
642+log_max_checkpoint_age_async()
643+{
644+ if (srv_checkpoint_age_target) {
645+ return(ut_min(log_sys->max_checkpoint_age_async,
646+ srv_checkpoint_age_target));
647+ } else {
648+ return(log_sys->max_checkpoint_age_async);
649+ }
650+}
651+
652+/************************************************************//**
653 Closes the log.
654 @return lsn */
655 UNIV_INTERN
656@@ -427,7 +455,7 @@
657 }
658 }
659
660- if (checkpoint_age <= log->max_modified_age_async) {
661+ if (checkpoint_age <= log_max_modified_age_async()) {
662
663 goto function_exit;
664 }
665@@ -435,8 +463,8 @@
666 oldest_lsn = buf_pool_get_oldest_modification();
667
668 if (!oldest_lsn
669- || lsn - oldest_lsn > log->max_modified_age_async
670- || checkpoint_age > log->max_checkpoint_age_async) {
671+ || lsn - oldest_lsn > log_max_modified_age_async()
672+ || checkpoint_age > log_max_checkpoint_age_async()) {
673
674 log->check_flush_or_checkpoint = TRUE;
675 }
676@@ -1100,9 +1128,10 @@
677 group = (log_group_t*)((ulint)group - 1);
678
679 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
680+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
681 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
682
683- fil_flush(group->space_id);
684+ fil_flush(group->space_id, FALSE);
685 }
686
687 #ifdef UNIV_DEBUG
688@@ -1121,10 +1150,11 @@
689 logs and cannot end up here! */
690
691 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
692+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
693 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
694- && srv_flush_log_at_trx_commit != 2) {
695+ && thd_flush_log_at_trx_commit(NULL) != 2) {
696
697- fil_flush(group->space_id);
698+ fil_flush(group->space_id, FALSE);
699 }
700
701 mutex_enter(&(log_sys->mutex));
702@@ -1501,7 +1531,8 @@
703
704 mutex_exit(&(log_sys->mutex));
705
706- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
707+ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
708+ || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
709 /* O_DSYNC means the OS did not buffer the log file at all:
710 so we have also flushed to disk what we have written */
711
712@@ -1511,7 +1542,7 @@
713
714 group = UT_LIST_GET_FIRST(log_sys->log_groups);
715
716- fil_flush(group->space_id);
717+ fil_flush(group->space_id, FALSE);
718 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
719 }
720
721@@ -2120,10 +2151,10 @@
722
723 sync = TRUE;
724 advance = 2 * (age - log->max_modified_age_sync);
725- } else if (age > log->max_modified_age_async) {
726+ } else if (age > log_max_modified_age_async()) {
727
728 /* A flush is not urgent: we do an asynchronous preflush */
729- advance = age - log->max_modified_age_async;
730+ advance = age - log_max_modified_age_async();
731 } else {
732 advance = 0;
733 }
734@@ -2137,7 +2168,7 @@
735
736 do_checkpoint = TRUE;
737
738- } else if (checkpoint_age > log->max_checkpoint_age_async) {
739+ } else if (checkpoint_age > log_max_checkpoint_age_async()) {
740 /* A checkpoint is not urgent: do it asynchronously */
741
742 do_checkpoint = TRUE;
743@@ -2607,7 +2638,7 @@
744
745 mutex_exit(&(log_sys->mutex));
746
747- fil_flush(group->archive_space_id);
748+ fil_flush(group->archive_space_id, TRUE);
749
750 mutex_enter(&(log_sys->mutex));
751
752@@ -3349,6 +3380,17 @@
753 log_sys->flushed_to_disk_lsn,
754 log_sys->last_checkpoint_lsn);
755
756+ fprintf(file,
757+ "Max checkpoint age %lu\n"
758+ "Checkpoint age target %lu\n"
759+ "Modified age %lu\n"
760+ "Checkpoint age %lu\n",
761+ (ulong) log_sys->max_checkpoint_age,
762+ (ulong) log_max_checkpoint_age_async(),
763+ (ulong) (log_sys->lsn -
764+ log_buf_pool_get_oldest_modification()),
765+ (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
766+
767 current_time = time(NULL);
768
769 time_elapsed = 0.001 + difftime(current_time,
770--- a/storage/innobase/log/log0recv.c
771+++ b/storage/innobase/log/log0recv.c
772@@ -2906,9 +2906,12 @@
773 ib_uint64_t archived_lsn;
774 #endif /* UNIV_LOG_ARCHIVE */
775 byte* buf;
776- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
777+ byte* log_hdr_buf;
778+ byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
779 ulint err;
780
781+ log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
782+
783 #ifdef UNIV_LOG_ARCHIVE
784 ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
785 /** TRUE when recovering from a checkpoint */
786@@ -3468,7 +3471,7 @@
787 exit(1);
788 }
789
790- os_file_flush(log_file);
791+ os_file_flush(log_file, TRUE);
792 os_file_close(log_file);
793 }
794
795@@ -3492,7 +3495,7 @@
796
797 os_file_write(name, log_file, buf, 0, 0,
798 LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
799- os_file_flush(log_file);
800+ os_file_flush(log_file, TRUE);
801 os_file_close(log_file);
802
803 ut_free(buf);
804--- a/storage/innobase/os/os0file.c
805+++ b/storage/innobase/os/os0file.c
806@@ -1424,7 +1424,7 @@
807 #endif
808 #ifdef UNIV_NON_BUFFERED_IO
809 # ifndef UNIV_HOTBACKUP
810- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
811+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
812 /* Do not use unbuffered i/o to log files because
813 value 2 denotes that we do not flush the log at every
814 commit, but only once per second */
815@@ -1440,7 +1440,7 @@
816 attributes = 0;
817 #ifdef UNIV_NON_BUFFERED_IO
818 # ifndef UNIV_HOTBACKUP
819- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
820+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
821 /* Do not use unbuffered i/o to log files because
822 value 2 denotes that we do not flush the log at every
823 commit, but only once per second */
824@@ -1585,6 +1585,11 @@
825 os_file_set_nocache(file, name, mode_str);
826 }
827
828+ /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
829+ if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
830+ os_file_set_nocache(file, name, mode_str);
831+ }
832+
833 #ifdef USE_FILE_LOCK
834 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
835
836@@ -2008,7 +2013,7 @@
837
838 ut_free(buf2);
839
840- ret = os_file_flush(file);
841+ ret = os_file_flush(file, TRUE);
842
843 if (ret) {
844 return(TRUE);
845@@ -2046,7 +2051,8 @@
846 int
847 os_file_fsync(
848 /*==========*/
849- os_file_t file) /*!< in: handle to a file */
850+ os_file_t file, /*!< in: handle to a file */
851+ ibool metadata)
852 {
853 int ret;
854 int failures;
855@@ -2055,7 +2061,16 @@
856 failures = 0;
857
858 do {
859+#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
860+ if (metadata) {
861+ ret = fsync(file);
862+ } else {
863+ ret = fdatasync(file);
864+ }
865+#else
866+ (void) metadata;
867 ret = fsync(file);
868+#endif
869
870 os_n_fsyncs++;
871
872@@ -2092,7 +2107,8 @@
873 ibool
874 os_file_flush_func(
875 /*===============*/
876- os_file_t file) /*!< in, own: handle to a file */
877+ os_file_t file, /*!< in, own: handle to a file */
878+ ibool metadata)
879 {
880 #ifdef __WIN__
881 BOOL ret;
882@@ -2142,18 +2158,18 @@
883 /* If we are not on an operating system that supports this,
884 then fall back to a plain fsync. */
885
886- ret = os_file_fsync(file);
887+ ret = os_file_fsync(file, metadata);
888 } else {
889 ret = fcntl(file, F_FULLFSYNC, NULL);
890
891 if (ret) {
892 /* If we are not on a file system that supports this,
893 then fall back to a plain fsync. */
894- ret = os_file_fsync(file);
895+ ret = os_file_fsync(file, metadata);
896 }
897 }
898 #else
899- ret = os_file_fsync(file);
900+ ret = os_file_fsync(file, metadata);
901 #endif
902
903 if (ret == 0) {
904@@ -2336,7 +2352,7 @@
905 the OS crashes, a database page is only partially
906 physically written to disk. */
907
908- ut_a(TRUE == os_file_flush(file));
909+ ut_a(TRUE == os_file_flush(file, TRUE));
910 }
911 # endif /* UNIV_DO_FLUSH */
912
913@@ -2378,7 +2394,7 @@
914 the OS crashes, a database page is only partially
915 physically written to disk. */
916
917- ut_a(TRUE == os_file_flush(file));
918+ ut_a(TRUE == os_file_flush(file, TRUE));
919 }
920 # endif /* UNIV_DO_FLUSH */
921
922@@ -2750,7 +2766,7 @@
923
924 # ifdef UNIV_DO_FLUSH
925 if (!os_do_not_call_flush_at_each_write) {
926- ut_a(TRUE == os_file_flush(file));
927+ ut_a(TRUE == os_file_flush(file, TRUE));
928 }
929 # endif /* UNIV_DO_FLUSH */
930
931@@ -4296,7 +4312,7 @@
932 #ifdef UNIV_DO_FLUSH
933 if (slot->type == OS_FILE_WRITE
934 && !os_do_not_call_flush_at_each_write) {
935- if (!os_file_flush(slot->file)) {
936+ if (!os_file_flush(slot->file, TRUE)) {
937 ut_error;
938 }
939 }
940@@ -4597,7 +4613,7 @@
941 #ifdef UNIV_DO_FLUSH
942 if (slot->type == OS_FILE_WRITE
943 && !os_do_not_call_flush_at_each_write)
944- && !os_file_flush(slot->file) {
945+ && !os_file_flush(slot->file, TRUE) {
946 ut_error;
947 }
948 #endif /* UNIV_DO_FLUSH */
949--- a/storage/innobase/srv/srv0srv.c
950+++ b/storage/innobase/srv/srv0srv.c
951@@ -183,7 +183,8 @@
952 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
953 /* size in database pages */
954 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
955-UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
956+//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
957+UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
958
959 /* Try to flush dirty pages so as to avoid IO bursts at
960 the checkpoints. */
961@@ -404,6 +405,17 @@
962
963 UNIV_INTERN ulong srv_replication_delay = 0;
964
965+UNIV_INTERN long long srv_ibuf_max_size = 0;
966+UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
967+UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
968+#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
969+
970+UNIV_INTERN ulint srv_checkpoint_age_target = 0;
971+UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:area 2:contiguous */
972+
973+UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
974+UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
975+UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
976 /*-------------------------------------------*/
977 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
978 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
979@@ -2713,7 +2725,7 @@
980
981 ut_ad(!mutex_own(&kernel_mutex));
982
983- ut_a(srv_n_purge_threads == 0);
984+ ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
985
986 do {
987 /* Check for shutdown and change in purge config. */
988@@ -2746,6 +2758,7 @@
989 ulint n_pages_purged = 0;
990 ulint n_bytes_merged;
991 ulint n_pages_flushed;
992+ ulint n_pages_flushed_prev = 0;
993 ulint n_bytes_archived;
994 ulint n_tables_to_drop;
995 ulint n_ios;
996@@ -2753,7 +2766,20 @@
997 ulint n_ios_very_old;
998 ulint n_pend_ios;
999 ulint next_itr_time;
1000+ ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
1001+ ulint inner_loop = 0;
1002+ ibool skip_sleep = FALSE;
1003 ulint i;
1004+ struct t_prev_flush_info_struct {
1005+ ulint count;
1006+ unsigned space:32;
1007+ unsigned offset:32;
1008+ ib_uint64_t oldest_modification;
1009+ } prev_flush_info[MAX_BUFFER_POOLS];
1010+
1011+ ib_uint64_t lsn_old;
1012+
1013+ ib_uint64_t oldest_lsn;
1014
1015 #ifdef UNIV_DEBUG_THREAD_CREATION
1016 fprintf(stderr, "Master thread starts, id %lu\n",
1017@@ -2775,6 +2801,9 @@
1018
1019 mutex_exit(&kernel_mutex);
1020
1021+ mutex_enter(&(log_sys->mutex));
1022+ lsn_old = log_sys->lsn;
1023+ mutex_exit(&(log_sys->mutex));
1024 loop:
1025 /*****************************************************************/
1026 /* ---- When there is database activity by users, we cycle in this
1027@@ -2805,9 +2834,13 @@
1028 /* Sleep for 1 second on entrying the for loop below the first time. */
1029 next_itr_time = ut_time_ms() + 1000;
1030
1031+ skip_sleep = FALSE;
1032+
1033 for (i = 0; i < 10; i++) {
1034 ulint cur_time = ut_time_ms();
1035
1036+ n_pages_flushed = 0; /* initialize */
1037+
1038 /* ALTER TABLE in MySQL requires on Unix that the table handler
1039 can drop tables lazily after there no longer are SELECT
1040 queries to them. */
1041@@ -2831,6 +2864,7 @@
1042 srv_main_thread_op_info = "sleeping";
1043 srv_main_1_second_loops++;
1044
1045+ if (!skip_sleep) {
1046 if (next_itr_time > cur_time
1047 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
1048
1049@@ -2841,10 +2875,26 @@
1050 (next_itr_time - cur_time)
1051 * 1000));
1052 srv_main_sleeps++;
1053+
1054+ /*
1055+ mutex_enter(&(log_sys->mutex));
1056+ oldest_lsn = buf_pool_get_oldest_modification();
1057+ ib_uint64_t lsn = log_sys->lsn;
1058+ mutex_exit(&(log_sys->mutex));
1059+
1060+ if(oldest_lsn)
1061+ fprintf(stderr,
1062+ "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
1063+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1064+ lsn - lsn_old);
1065+ */
1066 }
1067
1068 /* Each iteration should happen at 1 second interval. */
1069 next_itr_time = ut_time_ms() + 1000;
1070+ } /* if (!skip_sleep) */
1071+
1072+ skip_sleep = FALSE;
1073
1074 /* Flush logs if needed */
1075 srv_sync_log_buffer_in_background();
1076@@ -2864,7 +2914,7 @@
1077 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
1078 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
1079 srv_main_thread_op_info = "doing insert buffer merge";
1080- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1081+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1082
1083 /* Flush logs if needed */
1084 srv_sync_log_buffer_in_background();
1085@@ -2881,7 +2931,11 @@
1086 n_pages_flushed = buf_flush_list(
1087 PCT_IO(100), IB_ULONGLONG_MAX);
1088
1089- } else if (srv_adaptive_flushing) {
1090+ mutex_enter(&(log_sys->mutex));
1091+ lsn_old = log_sys->lsn;
1092+ mutex_exit(&(log_sys->mutex));
1093+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1094+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
1095
1096 /* Try to keep the rate of flushing of dirty
1097 pages such that redo log generation does not
1098@@ -2897,6 +2951,224 @@
1099 n_flush,
1100 IB_ULONGLONG_MAX);
1101 }
1102+
1103+ mutex_enter(&(log_sys->mutex));
1104+ lsn_old = log_sys->lsn;
1105+ mutex_exit(&(log_sys->mutex));
1106+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1107+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1108+
1109+ /* Try to keep modified age not to exceed
1110+ max_checkpoint_age * 7/8 line */
1111+
1112+ mutex_enter(&(log_sys->mutex));
1113+
1114+ oldest_lsn = buf_pool_get_oldest_modification();
1115+ if (oldest_lsn == 0) {
1116+ lsn_old = log_sys->lsn;
1117+ mutex_exit(&(log_sys->mutex));
1118+
1119+ } else {
1120+ if ((log_sys->lsn - oldest_lsn)
1121+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1122+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1123+ /* We should not flush from here. */
1124+ lsn_old = log_sys->lsn;
1125+ mutex_exit(&(log_sys->mutex));
1126+ } else if ((log_sys->lsn - oldest_lsn)
1127+ > (log_sys->max_checkpoint_age)/4 ) {
1128+
1129+ /* defence line (max_checkpoint_age * 1/2) */
1130+ ib_uint64_t lsn = log_sys->lsn;
1131+
1132+ ib_uint64_t level, bpl;
1133+ buf_page_t* bpage;
1134+ ulint j;
1135+
1136+ mutex_exit(&(log_sys->mutex));
1137+
1138+ bpl = 0;
1139+
1140+ for (j = 0; j < srv_buf_pool_instances; j++) {
1141+ buf_pool_t* buf_pool;
1142+ ulint n_blocks;
1143+
1144+ buf_pool = buf_pool_from_array(j);
1145+
1146+ /* The scanning flush_list is optimistic here */
1147+
1148+ level = 0;
1149+ n_blocks = 0;
1150+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1151+
1152+ while (bpage != NULL) {
1153+ ib_uint64_t oldest_modification = bpage->oldest_modification;
1154+ if (oldest_modification != 0) {
1155+ level += log_sys->max_checkpoint_age
1156+ - (lsn - oldest_modification);
1157+ }
1158+ bpage = UT_LIST_GET_NEXT(list, bpage);
1159+ n_blocks++;
1160+ }
1161+
1162+ if (level) {
1163+ bpl += ((ib_uint64_t) n_blocks * n_blocks
1164+ * (lsn - lsn_old)) / level;
1165+ }
1166+
1167+ }
1168+
1169+ if (!srv_use_doublewrite_buf) {
1170+ /* flush is faster than when doublewrite */
1171+ bpl = (bpl * 7) / 8;
1172+ }
1173+
1174+ if (bpl) {
1175+retry_flush_batch:
1176+ n_pages_flushed = buf_flush_list(bpl,
1177+ oldest_lsn + (lsn - lsn_old));
1178+ if (n_pages_flushed == ULINT_UNDEFINED) {
1179+ os_thread_sleep(5000);
1180+ goto retry_flush_batch;
1181+ }
1182+ }
1183+
1184+ lsn_old = lsn;
1185+ /*
1186+ fprintf(stderr,
1187+ "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1188+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1189+ lsn - lsn_old, bpl);
1190+ */
1191+ } else {
1192+ lsn_old = log_sys->lsn;
1193+ mutex_exit(&(log_sys->mutex));
1194+ }
1195+ }
1196+ prev_adaptive_flushing_method = 1;
1197+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1198+ buf_pool_t* buf_pool;
1199+ buf_page_t* bpage;
1200+ ib_uint64_t lsn;
1201+ ulint j;
1202+
1203+ mutex_enter(&(log_sys->mutex));
1204+ oldest_lsn = buf_pool_get_oldest_modification();
1205+ lsn = log_sys->lsn;
1206+ mutex_exit(&(log_sys->mutex));
1207+
1208+ /* upper loop/sec. (x10) */
1209+ next_itr_time -= 900; /* 1000 - 900 == 100 */
1210+ inner_loop++;
1211+ if (inner_loop < 10) {
1212+ i--;
1213+ } else {
1214+ inner_loop = 0;
1215+ }
1216+
1217+ if (prev_adaptive_flushing_method == 2) {
1218+ lint n_flush;
1219+ lint blocks_sum;
1220+ ulint new_blocks_sum, flushed_blocks_sum;
1221+
1222+ blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1223+
1224+ /* prev_flush_info[j] should be the previous loop's */
1225+ for (j = 0; j < srv_buf_pool_instances; j++) {
1226+ lint blocks_num, new_blocks_num, flushed_blocks_num;
1227+ ibool found;
1228+
1229+ buf_pool = buf_pool_from_array(j);
1230+
1231+ blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1232+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1233+ new_blocks_num = 0;
1234+
1235+ found = FALSE;
1236+ while (bpage != NULL) {
1237+ if (prev_flush_info[j].space == bpage->space
1238+ && prev_flush_info[j].offset == bpage->offset
1239+ && prev_flush_info[j].oldest_modification
1240+ == bpage->oldest_modification) {
1241+ found = TRUE;
1242+ break;
1243+ }
1244+ bpage = UT_LIST_GET_NEXT(list, bpage);
1245+ new_blocks_num++;
1246+ }
1247+ if (!found) {
1248+ new_blocks_num = blocks_num;
1249+ }
1250+
1251+ flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1252+ - blocks_num;
1253+ if (flushed_blocks_num < 0) {
1254+ flushed_blocks_num = 0;
1255+ }
1256+
1257+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1258+
1259+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1260+ if (bpage) {
1261+ prev_flush_info[j].space = bpage->space;
1262+ prev_flush_info[j].offset = bpage->offset;
1263+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1264+ } else {
1265+ prev_flush_info[j].space = 0;
1266+ prev_flush_info[j].offset = 0;
1267+ prev_flush_info[j].oldest_modification = 0;
1268+ }
1269+
1270+ new_blocks_sum += new_blocks_num;
1271+ flushed_blocks_sum += flushed_blocks_num;
1272+ blocks_sum += blocks_num;
1273+ }
1274+
1275+ n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1276+ if (flushed_blocks_sum > n_pages_flushed_prev) {
1277+ n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1278+ }
1279+
1280+ if (n_flush > 0) {
1281+ n_flush++;
1282+ n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1283+ } else {
1284+ n_pages_flushed = 0;
1285+ }
1286+ } else {
1287+ /* store previous first pages of the flush_list */
1288+ for (j = 0; j < srv_buf_pool_instances; j++) {
1289+ buf_pool = buf_pool_from_array(j);
1290+
1291+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1292+
1293+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1294+ if (bpage) {
1295+ prev_flush_info[j].space = bpage->space;
1296+ prev_flush_info[j].offset = bpage->offset;
1297+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1298+ } else {
1299+ prev_flush_info[j].space = 0;
1300+ prev_flush_info[j].offset = 0;
1301+ prev_flush_info[j].oldest_modification = 0;
1302+ }
1303+ }
1304+ n_pages_flushed = 0;
1305+ }
1306+
1307+ lsn_old = lsn;
1308+ prev_adaptive_flushing_method = 2;
1309+ } else {
1310+ mutex_enter(&(log_sys->mutex));
1311+ lsn_old = log_sys->lsn;
1312+ mutex_exit(&(log_sys->mutex));
1313+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1314+ }
1315+
1316+ if (n_pages_flushed == ULINT_UNDEFINED) {
1317+ n_pages_flushed_prev = 0;
1318+ } else {
1319+ n_pages_flushed_prev = n_pages_flushed;
1320 }
1321
1322 if (srv_activity_count == old_activity_count) {
1323@@ -2945,12 +3217,12 @@
1324 even if the server were active */
1325
1326 srv_main_thread_op_info = "doing insert buffer merge";
1327- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1328+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1329
1330 /* Flush logs if needed */
1331 srv_sync_log_buffer_in_background();
1332
1333- if (srv_n_purge_threads == 0) {
1334+ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1335 srv_main_thread_op_info = "master purging";
1336
1337 srv_master_do_purge();
1338@@ -3028,7 +3300,7 @@
1339 }
1340 }
1341
1342- if (srv_n_purge_threads == 0) {
1343+ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1344 srv_main_thread_op_info = "master purging";
1345
1346 srv_master_do_purge();
1347@@ -3053,7 +3325,7 @@
1348 buf_flush_list below. Otherwise, the system favors
1349 clean pages over cleanup throughput. */
1350 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1351- PCT_IO(100));
1352+ PCT_IBUF_IO(100));
1353 }
1354
1355 srv_main_thread_op_info = "reserving kernel mutex";
1356@@ -3193,6 +3465,7 @@
1357 srv_slot_t* slot;
1358 ulint retries = 0;
1359 ulint n_total_purged = ULINT_UNDEFINED;
1360+ ulint next_itr_time;
1361
1362 ut_a(srv_n_purge_threads == 1);
1363
1364@@ -3213,9 +3486,12 @@
1365
1366 mutex_exit(&kernel_mutex);
1367
1368+ next_itr_time = ut_time_ms();
1369+
1370 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1371
1372 ulint n_pages_purged = 0;
1373+ ulint cur_time;
1374
1375 /* If there are very few records to purge or the last
1376 purge didn't purge any records then wait for activity.
1377@@ -3262,6 +3538,16 @@
1378 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1379
1380 srv_sync_log_buffer_in_background();
1381+
1382+ cur_time = ut_time_ms();
1383+ if (next_itr_time > cur_time) {
1384+ os_thread_sleep(ut_min(1000000,
1385+ (next_itr_time - cur_time)
1386+ * 1000));
1387+ next_itr_time = ut_time_ms() + 1000;
1388+ } else {
1389+ next_itr_time = cur_time + 1000;
1390+ }
1391 }
1392
1393 mutex_enter(&kernel_mutex);
1394--- a/storage/innobase/srv/srv0start.c
1395+++ b/storage/innobase/srv/srv0start.c
1396@@ -1237,6 +1237,9 @@
1397 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1398 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1399
1400+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1401+ srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1402+
1403 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1404 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1405
1406--- a/storage/innobase/trx/trx0purge.c
1407+++ b/storage/innobase/trx/trx0purge.c
1408@@ -392,10 +392,10 @@
1409 trx_sys->rseg_history_len++;
1410 mutex_exit(&kernel_mutex);
1411
1412- if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1413+// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1414 /* Inform the purge thread that there is work to do. */
1415 srv_wake_purge_thread_if_not_active();
1416- }
1417+// }
1418 }
1419
1420 /**********************************************************************//**
1421--- a/storage/innobase/trx/trx0trx.c
1422+++ b/storage/innobase/trx/trx0trx.c
1423@@ -984,6 +984,7 @@
1424 trx->read_view = NULL;
1425
1426 if (lsn) {
1427+ ulint flush_log_at_trx_commit;
1428
1429 mutex_exit(&kernel_mutex);
1430
1431@@ -992,6 +993,12 @@
1432 trx_undo_insert_cleanup(trx);
1433 }
1434
1435+ if (srv_use_global_flush_log_at_trx_commit) {
1436+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1437+ } else {
1438+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1439+ }
1440+
1441 /* NOTE that we could possibly make a group commit more
1442 efficient here: call os_thread_yield here to allow also other
1443 trxs to come to commit! */
1444@@ -1023,9 +1030,9 @@
1445 if (trx->flush_log_later) {
1446 /* Do nothing yet */
1447 trx->must_flush_log_later = TRUE;
1448- } else if (srv_flush_log_at_trx_commit == 0) {
1449+ } else if (flush_log_at_trx_commit == 0) {
1450 /* Do nothing */
1451- } else if (srv_flush_log_at_trx_commit == 1) {
1452+ } else if (flush_log_at_trx_commit == 1) {
1453 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1454 /* Write the log but do not flush it to disk */
1455
1456@@ -1037,7 +1044,7 @@
1457
1458 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1459 }
1460- } else if (srv_flush_log_at_trx_commit == 2) {
1461+ } else if (flush_log_at_trx_commit == 2) {
1462
1463 /* Write the log but do not flush it to disk */
1464
1465@@ -1701,16 +1708,23 @@
1466 trx_t* trx) /*!< in: trx handle */
1467 {
1468 ib_uint64_t lsn = trx->commit_lsn;
1469+ ulint flush_log_at_trx_commit;
1470
1471 ut_a(trx);
1472
1473 trx->op_info = "flushing log";
1474
1475+ if (srv_use_global_flush_log_at_trx_commit) {
1476+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1477+ } else {
1478+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1479+ }
1480+
1481 if (!trx->must_flush_log_later) {
1482 /* Do nothing */
1483- } else if (srv_flush_log_at_trx_commit == 0) {
1484+ } else if (flush_log_at_trx_commit == 0) {
1485 /* Do nothing */
1486- } else if (srv_flush_log_at_trx_commit == 1) {
1487+ } else if (flush_log_at_trx_commit == 1) {
1488 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1489 /* Write the log but do not flush it to disk */
1490
1491@@ -1721,7 +1735,7 @@
1492
1493 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1494 }
1495- } else if (srv_flush_log_at_trx_commit == 2) {
1496+ } else if (flush_log_at_trx_commit == 2) {
1497
1498 /* Write the log but do not flush it to disk */
1499
1500@@ -1969,6 +1983,8 @@
1501 /*--------------------------------------*/
1502
1503 if (lsn) {
1504+ ulint flush_log_at_trx_commit;
1505+
1506 /* Depending on the my.cnf options, we may now write the log
1507 buffer to the log files, making the prepared state of the
1508 transaction durable if the OS does not crash. We may also
1509@@ -1988,9 +2004,15 @@
1510
1511 mutex_exit(&kernel_mutex);
1512
1513- if (srv_flush_log_at_trx_commit == 0) {
1514+ if (srv_use_global_flush_log_at_trx_commit) {
1515+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1516+ } else {
1517+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1518+ }
1519+
1520+ if (flush_log_at_trx_commit == 0) {
1521 /* Do nothing */
1522- } else if (srv_flush_log_at_trx_commit == 1) {
1523+ } else if (flush_log_at_trx_commit == 1) {
1524 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1525 /* Write the log but do not flush it to disk */
1526
1527@@ -2002,7 +2024,7 @@
1528
1529 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1530 }
1531- } else if (srv_flush_log_at_trx_commit == 2) {
1532+ } else if (flush_log_at_trx_commit == 2) {
1533
1534 /* Write the log but do not flush it to disk */
1535
1536--- a/mysql-test/include/default_mysqld.cnf
1537+++ b/mysql-test/include/default_mysqld.cnf
1538@@ -29,7 +29,7 @@
1539 max_heap_table_size= 1M
1540
1541 loose-innodb_data_file_path= ibdata1:10M:autoextend
1542-loose-innodb_buffer_pool_size= 8M
1543+loose-innodb_buffer_pool_size= 32M
1544 loose-innodb_write_io_threads= 2
1545 loose-innodb_read_io_threads= 2
1546 loose-innodb_log_buffer_size= 1M
1547--- a/mysql-test/suite/innodb/r/innodb.result
1548+++ b/mysql-test/suite/innodb/r/innodb.result
1549@@ -1678,7 +1678,7 @@
1550 drop table t1;
1551 SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
1552 variable_value
1553-511
1554+2047
1555 SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
1556 variable_value
1557 16384
1558--- /dev/null
1559+++ b/mysql-test/suite/innodb/r/percona_flush_contiguous_neighbors.result
1560@@ -0,0 +1,21 @@
1561+DROP TABLE IF EXISTS t1;
1562+CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1563+INSERT INTO t1(foo) VALUES ('a'), ('b');
1564+INSERT INTO t1(foo) SELECT foo FROM t1;
1565+INSERT INTO t1(foo) SELECT foo FROM t1;
1566+INSERT INTO t1(foo) SELECT foo FROM t1;
1567+INSERT INTO t1(foo) SELECT foo FROM t1;
1568+INSERT INTO t1(foo) SELECT foo FROM t1;
1569+INSERT INTO t1(foo) SELECT foo FROM t1;
1570+INSERT INTO t1(foo) SELECT foo FROM t1;
1571+INSERT INTO t1(foo) SELECT foo FROM t1;
1572+INSERT INTO t1(foo) SELECT foo FROM t1;
1573+INSERT INTO t1(foo) SELECT foo FROM t1;
1574+INSERT INTO t1(foo) SELECT foo FROM t1;
1575+INSERT INTO t1(foo) SELECT foo FROM t1;
1576+INSERT INTO t1(foo) SELECT foo FROM t1;
1577+INSERT INTO t1(foo) SELECT foo FROM t1;
1578+INSERT INTO t1(foo) SELECT foo FROM t1;
1579+INSERT INTO t1(foo) SELECT foo FROM t1;
1580+INSERT INTO t1(foo) SELECT foo FROM t1;
1581+DROP TABLE t1;
1582--- /dev/null
1583+++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors-master.opt
1584@@ -0,0 +1 @@
1585+--innodb_flush_neighbor_pages=cont
1586--- /dev/null
1587+++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors.test
1588@@ -0,0 +1,36 @@
1589+# Test for innodb_flush_neighbor_pages=contiguous.
1590+# The test is very crude: we simply overflow the buffer pool with such a number of
1591+# new/modified pages that some flushing is bound to happen.
1592+
1593+--source include/have_innodb.inc
1594+
1595+--disable_warnings
1596+DROP TABLE IF EXISTS t1;
1597+--enable_warnings
1598+
1599+CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1600+
1601+INSERT INTO t1(foo) VALUES ('a'), ('b');
1602+INSERT INTO t1(foo) SELECT foo FROM t1;
1603+INSERT INTO t1(foo) SELECT foo FROM t1;
1604+INSERT INTO t1(foo) SELECT foo FROM t1;
1605+INSERT INTO t1(foo) SELECT foo FROM t1;
1606+INSERT INTO t1(foo) SELECT foo FROM t1;
1607+INSERT INTO t1(foo) SELECT foo FROM t1;
1608+INSERT INTO t1(foo) SELECT foo FROM t1;
1609+INSERT INTO t1(foo) SELECT foo FROM t1;
1610+INSERT INTO t1(foo) SELECT foo FROM t1;
1611+INSERT INTO t1(foo) SELECT foo FROM t1;
1612+INSERT INTO t1(foo) SELECT foo FROM t1;
1613+INSERT INTO t1(foo) SELECT foo FROM t1;
1614+INSERT INTO t1(foo) SELECT foo FROM t1;
1615+INSERT INTO t1(foo) SELECT foo FROM t1;
1616+INSERT INTO t1(foo) SELECT foo FROM t1;
1617+INSERT INTO t1(foo) SELECT foo FROM t1;
1618+INSERT INTO t1(foo) SELECT foo FROM t1;
1619+
1620+# TODO: cannot record a stable value here. A check of > 0 should be enough,
1621+# but the variable is not accessible through INFORMATION_SCHEMA currently.
1622+# SHOW GLOBAL STATUS LIKE 'Innodb_buffer_pool_pages_flushed';
1623+
1624+DROP TABLE t1;
1625--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
1626+++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
1627@@ -1 +1 @@
1628---innodb-buffer-pool-size=8M
1629+--innodb-buffer-pool-size=32M
1630--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
1631+++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
1632@@ -36,13 +36,14 @@
1633
1634 -- disable_query_log
1635
1636--- let $i = 400
1637+-- let $i = 4000
1638+begin;
1639 while ($i)
1640 {
1641 insert into t2 values(repeat('abcdefghijklmnopqrstuvwxyz',1000));
1642 dec $i;
1643 }
1644-
1645+commit;
1646 -- enable_query_log
1647
1648 # now there should be no 8K pages in the buffer pool
This page took 0.414449 seconds and 4 git commands to generate.