]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_io_patches.patch
- no need for su, log dir is not writable by non-root uid/gid
[packages/mysql.git] / innodb_io_patches.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_io_patches.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
db82db79
AM
8--- a/storage/innobase/buf/buf0buf.c
9+++ b/storage/innobase/buf/buf0buf.c
b4e1fa2c
AM
10@@ -320,6 +320,7 @@
11
12 /* When we traverse all the flush lists we don't want another
13 thread to add a dirty page to any flush list. */
14+ if (srv_buf_pool_instances > 1)
15 log_flush_order_mutex_enter();
16
17 for (i = 0; i < srv_buf_pool_instances; i++) {
18@@ -343,6 +344,7 @@
19 }
20 }
21
22+ if (srv_buf_pool_instances > 1)
23 log_flush_order_mutex_exit();
24
25 /* The returned answer may be out of date: the flush_list can
db82db79
AM
26--- a/storage/innobase/buf/buf0flu.c
27+++ b/storage/innobase/buf/buf0flu.c
28@@ -857,7 +857,7 @@
413cadc7
AM
29 flush:
30 /* Now flush the doublewrite buffer data to disk */
31
32- fil_flush(TRX_SYS_SPACE);
33+ fil_flush(TRX_SYS_SPACE, FALSE);
34
35 /* We know that the writes have been flushed to disk now
36 and in recovery we will find them in the doublewrite buffer
1bfc1981
AM
37@@ -1375,10 +1375,11 @@
38 ulint high;
39 ulint count = 0;
40 buf_pool_t* buf_pool = buf_pool_get(space, offset);
41+ ibool is_forward_scan;
b4e1fa2c
AM
42
43 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
44
45- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
46+ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
47 /* If there is little space, it is better not to flush
48 any block except from the end of the LRU list */
49
1bfc1981
AM
50@@ -1405,7 +1406,32 @@
51 high = fil_space_get_size(space);
52 }
53
54- for (i = low; i < high; i++) {
55+ if (srv_flush_neighbor_pages == 2) {
56+
57+ /* In the case of contiguous flush where the requested page
58+ does not fall at the start of flush area, first scan backward
59+ from the page and later forward from it. */
60+ is_forward_scan = (offset == low);
61+ }
62+ else {
63+ is_forward_scan = TRUE;
64+ }
65+
66+scan:
67+ if (srv_flush_neighbor_pages == 2) {
68+ if (is_forward_scan) {
69+ i = offset;
70+ }
71+ else {
72+ i = offset - 1;
73+ }
74+ }
75+ else {
76+ i = low;
77+ }
78+
79+ for (; is_forward_scan ? (i < high) : (i >= low);
80+ is_forward_scan ? i++ : i--) {
81
82 buf_page_t* bpage;
83
84@@ -1434,6 +1460,12 @@
85 if (!bpage) {
86
87 buf_pool_mutex_exit(buf_pool);
88+ if (srv_flush_neighbor_pages == 2) {
89+
90+ /* This is contiguous neighbor page flush and
91+ the pages here are not contiguous. */
92+ break;
93+ }
94 continue;
95 }
96
97@@ -1470,6 +1502,22 @@
98 }
99 }
100 buf_pool_mutex_exit(buf_pool);
101+
102+ if (srv_flush_neighbor_pages == 2) {
103+
104+ /* We are trying to do the contiguous neighbor page
105+ flush, but the last page we checked was unflushable,
106+ making a "hole" in the flush, so stop this attempt. */
107+ break;
108+ }
109+ }
110+
111+ if (!is_forward_scan) {
112+
113+ /* Backward scan done, now do the forward scan */
114+ ut_a (srv_flush_neighbor_pages == 2);
115+ is_forward_scan = TRUE;
116+ goto scan;
117 }
118
119 return(count);
120@@ -1940,6 +1988,22 @@
121
122 buf_pool = buf_pool_from_array(i);
123
124+ if (lsn_limit != IB_ULONGLONG_MAX) {
125+ buf_page_t* bpage;
126+
127+ buf_flush_list_mutex_enter(buf_pool);
128+ bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
129+ if (!bpage
130+ || bpage->oldest_modification >= lsn_limit) {
131+
132+ buf_flush_list_mutex_exit(buf_pool);
133+ continue;
134+ } else {
135+
136+ buf_flush_list_mutex_exit(buf_pool);
137+ }
138+ }
139+
140 if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
141 /* We have two choices here. If lsn_limit was
142 specified then skipping an instance of buffer
db82db79
AM
143--- a/storage/innobase/buf/buf0rea.c
144+++ b/storage/innobase/buf/buf0rea.c
734d6226
AM
145@@ -427,6 +427,10 @@
146 = BUF_READ_AHEAD_AREA(buf_pool);
b4e1fa2c
AM
147 ulint threshold;
148
149+ if (!(srv_read_ahead & 2)) {
150+ return(0);
151+ }
152+
153 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
154 /* No read-ahead to avoid thread deadlocks */
155 return(0);
db82db79
AM
156--- a/storage/innobase/fil/fil0fil.c
157+++ b/storage/innobase/fil/fil0fil.c
29ffd636 158@@ -2609,7 +2609,7 @@
413cadc7
AM
159
160 os_thread_sleep(20000);
161
162- fil_flush(id);
163+ fil_flush(id, TRUE);
164
165 goto retry;
166
29ffd636 167@@ -2823,7 +2823,7 @@
413cadc7
AM
168 goto error_exit;
169 }
170
171- ret = os_file_flush(file);
172+ ret = os_file_flush(file, TRUE);
173
174 if (!ret) {
175 fputs("InnoDB: Error: file flush of tablespace ", stderr);
29ffd636 176@@ -3009,7 +3009,7 @@
413cadc7
AM
177 }
178 }
179
180- success = os_file_flush(file);
181+ success = os_file_flush(file, TRUE);
182 if (!success) {
183
184 goto func_exit;
29ffd636 185@@ -3031,7 +3031,7 @@
413cadc7
AM
186
187 goto func_exit;
188 }
189- success = os_file_flush(file);
190+ success = os_file_flush(file, TRUE);
191 func_exit:
192 os_file_close(file);
193 ut_free(buf2);
29ffd636 194@@ -4014,7 +4014,7 @@
413cadc7
AM
195 size_after_extend, *actual_size); */
196 mutex_exit(&fil_system->mutex);
197
198- fil_flush(space_id);
199+ fil_flush(space_id, TRUE);
200
201 return(success);
202 }
29ffd636 203@@ -4585,8 +4585,9 @@
413cadc7
AM
204 void
205 fil_flush(
206 /*======*/
207- ulint space_id) /*!< in: file space id (this can be a group of
208+ ulint space_id, /*!< in: file space id (this can be a group of
209 log files or a tablespace of the database) */
210+ ibool metadata)
211 {
212 fil_space_t* space;
213 fil_node_t* node;
29ffd636 214@@ -4657,7 +4658,7 @@
413cadc7
AM
215 /* fprintf(stderr, "Flushing to file %s\n",
216 node->name); */
217
218- os_file_flush(file);
219+ os_file_flush(file, metadata);
220
221 mutex_enter(&fil_system->mutex);
222
29ffd636 223@@ -4740,7 +4741,7 @@
413cadc7
AM
224 a non-existing space id. */
225 for (i = 0; i < n_space_ids; i++) {
226
227- fil_flush(space_ids[i]);
228+ fil_flush(space_ids[i], TRUE);
229 }
230
231 mem_free(space_ids);
db82db79
AM
232--- a/storage/innobase/handler/ha_innodb.cc
233+++ b/storage/innobase/handler/ha_innodb.cc
234@@ -445,6 +445,12 @@
b4e1fa2c
AM
235 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
236 NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
237
238+static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
239+ "Set to 0 (write and flush once per second),"
240+ " 1 (write and flush at each commit)"
241+ " or 2 (write at commit, flush once per second).",
242+ NULL, NULL, 1, 0, 2, 0);
243+
244
245 static handler *innobase_create_handler(handlerton *hton,
246 TABLE_SHARE *table,
734d6226 247@@ -841,6 +847,17 @@
b4e1fa2c
AM
248 }
249 }
250
251+/******************************************************************//**
252+*/
253+extern "C" UNIV_INTERN
254+ulong
255+thd_flush_log_at_trx_commit(
256+/*================================*/
257+ void* thd)
258+{
259+ return(THDVAR((THD*) thd, flush_log_at_trx_commit));
260+}
261+
262 /********************************************************************//**
263 Obtain the InnoDB transaction of a MySQL thread.
264 @return reference to transaction pointer */
734d6226 265@@ -2471,6 +2488,9 @@
b4e1fa2c
AM
266 srv_n_read_io_threads = (ulint) innobase_read_io_threads;
267 srv_n_write_io_threads = (ulint) innobase_write_io_threads;
268
269+ srv_read_ahead &= 3;
270+ srv_adaptive_flushing_method %= 3;
271+
272 srv_force_recovery = (ulint) innobase_force_recovery;
273
274 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
29ffd636 275@@ -11141,7 +11161,7 @@
b4e1fa2c 276 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
11822e22 277 "Purge threads can be either 0 or 1.",
b4e1fa2c
AM
278 NULL, NULL,
279- 0, /* Default setting */
280+ 1, /* Default setting */
281 0, /* Minimum value */
282 1, 0); /* Maximum value */
283
29ffd636 284@@ -11183,12 +11203,18 @@
b4e1fa2c
AM
285 innodb_file_format_max_validate,
286 innodb_file_format_max_update, "Antelope");
287
288-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
289- PLUGIN_VAR_OPCMDARG,
290- "Set to 0 (write and flush once per second),"
291- " 1 (write and flush at each commit)"
292- " or 2 (write at commit, flush once per second).",
293- NULL, NULL, 1, 0, 2, 0);
294+/* Changed to the THDVAR */
295+//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
296+// PLUGIN_VAR_OPCMDARG,
297+// "Set to 0 (write and flush once per second),"
298+// " 1 (write and flush at each commit)"
299+// " or 2 (write at commit, flush once per second).",
300+// NULL, NULL, 1, 0, 2, 0);
301+
302+static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
303+ PLUGIN_VAR_NOCMDARG,
304+ "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
305+ NULL, NULL, TRUE);
306
307 static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
308 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
29ffd636 309@@ -11293,7 +11319,7 @@
b4e1fa2c
AM
310 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
311 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
312 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
313- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
314+ NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
315
316 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
317 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
29ffd636 318@@ -11450,6 +11476,135 @@
b4e1fa2c
AM
319 "trigger a readahead.",
320 NULL, NULL, 56, 0, 64, 0);
321
322+static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
323+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
324+ "The maximum size of the insert buffer. (in bytes)",
325+ NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
326+
327+static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
328+ PLUGIN_VAR_RQCMDARG,
329+ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
330+ NULL, NULL, 1, 0, 1, 0);
331+
332+static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
333+ PLUGIN_VAR_RQCMDARG,
334+ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
335+ NULL, NULL, 100, 100, 999999999, 0);
336+
337+static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
338+ PLUGIN_VAR_RQCMDARG,
339+ "Control soft limit of checkpoint age. (0 : not control)",
340+ NULL, NULL, 0, 0, ~0UL, 0);
341+
1bfc1981
AM
342+static
343+void
344+innodb_flush_neighbor_pages_update(
345+ THD* thd,
346+ struct st_mysql_sys_var* var,
347+ void* var_ptr,
348+ const void* save)
349+{
350+ *(long *)var_ptr = (*(long *)save) % 3;
351+}
352+
353+const char *flush_neighbor_pages_names[]=
354+{
355+ "none", /* 0 */
356+ "area",
357+ "cont", /* 2 */
358+ /* For compatibility with the older patch */
359+ "0", /* "none" + 3 */
360+ "1", /* "area" + 3 */
361+ "2", /* "cont" + 3 */
362+ NullS
363+};
364+
365+TYPELIB flush_neighbor_pages_typelib=
366+{
367+ array_elements(flush_neighbor_pages_names) - 1,
368+ "flush_neighbor_pages_typelib",
369+ flush_neighbor_pages_names,
370+ NULL
371+};
372+
373+static MYSQL_SYSVAR_ENUM(flush_neighbor_pages, srv_flush_neighbor_pages,
374+ PLUGIN_VAR_RQCMDARG, "Neighbor page flushing behaviour: none: do not flush, "
375+ "[area]: flush selected pages one-by-one, "
376+ "cont: flush a contiguous block of pages", NULL,
377+ innodb_flush_neighbor_pages_update, 1, &flush_neighbor_pages_typelib);
b4e1fa2c
AM
378+
379+static
380+void
381+innodb_read_ahead_update(
382+ THD* thd,
383+ struct st_mysql_sys_var* var,
384+ void* var_ptr,
385+ const void* save)
386+{
387+ *(long *)var_ptr= (*(long *)save) & 3;
388+}
389+const char *read_ahead_names[]=
390+{
391+ "none", /* 0 */
392+ "random",
393+ "linear",
394+ "both", /* 3 */
395+ /* For compatibility of the older patch */
396+ "0", /* 4 ("none" + 4) */
397+ "1",
398+ "2",
399+ "3", /* 7 ("both" + 4) */
400+ NullS
401+};
402+TYPELIB read_ahead_typelib=
403+{
404+ array_elements(read_ahead_names) - 1, "read_ahead_typelib",
405+ read_ahead_names, NULL
406+};
407+static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
408+ PLUGIN_VAR_RQCMDARG,
409+ "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
410+ NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
411+
412+static
413+void
414+innodb_adaptive_flushing_method_update(
415+ THD* thd,
416+ struct st_mysql_sys_var* var,
417+ void* var_ptr,
418+ const void* save)
419+{
420+ *(long *)var_ptr= (*(long *)save) % 4;
421+}
422+const char *adaptive_flushing_method_names[]=
423+{
424+ "native", /* 0 */
425+ "estimate", /* 1 */
426+ "keep_average", /* 2 */
427+ /* For compatibility of the older patch */
428+ "0", /* 3 ("none" + 3) */
429+ "1", /* 4 ("estimate" + 3) */
430+ "2", /* 5 ("keep_average" + 3) */
431+ NullS
432+};
433+TYPELIB adaptive_flushing_method_typelib=
434+{
435+ array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
436+ adaptive_flushing_method_names, NULL
437+};
438+static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
439+ PLUGIN_VAR_RQCMDARG,
440+ "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
441+ NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
1bfc1981
AM
442+
443+#ifdef UNIV_DEBUG
444+static MYSQL_SYSVAR_ULONG(flush_checkpoint_debug, srv_flush_checkpoint_debug,
445+ PLUGIN_VAR_RQCMDARG,
446+ "Debug flags for InnoDB flushing and checkpointing (0=none,"
447+ "1=stop preflush and checkpointing)",
448+ NULL, NULL, 0, 0, 1, 0);
449+#endif
b4e1fa2c
AM
450+
451 static struct st_mysql_sys_var* innobase_system_variables[]= {
452 MYSQL_SYSVAR(additional_mem_pool_size),
453 MYSQL_SYSVAR(autoextend_increment),
29ffd636 454@@ -11470,6 +11625,7 @@
b4e1fa2c
AM
455 MYSQL_SYSVAR(file_format_check),
456 MYSQL_SYSVAR(file_format_max),
457 MYSQL_SYSVAR(flush_log_at_trx_commit),
458+ MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
459 MYSQL_SYSVAR(flush_method),
460 MYSQL_SYSVAR(force_recovery),
db82db79 461 MYSQL_SYSVAR(large_prefix),
29ffd636 462@@ -11509,6 +11665,13 @@
b4e1fa2c
AM
463 MYSQL_SYSVAR(show_verbose_locks),
464 MYSQL_SYSVAR(show_locks_held),
465 MYSQL_SYSVAR(version),
466+ MYSQL_SYSVAR(ibuf_max_size),
467+ MYSQL_SYSVAR(ibuf_active_contract),
468+ MYSQL_SYSVAR(ibuf_accel_rate),
469+ MYSQL_SYSVAR(checkpoint_age_target),
470+ MYSQL_SYSVAR(flush_neighbor_pages),
471+ MYSQL_SYSVAR(read_ahead),
472+ MYSQL_SYSVAR(adaptive_flushing_method),
b4e1fa2c
AM
473 MYSQL_SYSVAR(use_sys_malloc),
474 MYSQL_SYSVAR(use_native_aio),
475 MYSQL_SYSVAR(change_buffering),
29ffd636 476@@ -11521,6 +11684,9 @@
1bfc1981
AM
477 MYSQL_SYSVAR(purge_threads),
478 MYSQL_SYSVAR(purge_batch_size),
479 MYSQL_SYSVAR(rollback_segments),
480+#ifdef UNIV_DEBUG
481+ MYSQL_SYSVAR(flush_checkpoint_debug),
482+#endif
483 NULL
484 };
485
db82db79
AM
486--- a/storage/innobase/ibuf/ibuf0ibuf.c
487+++ b/storage/innobase/ibuf/ibuf0ibuf.c
1bfc1981 488@@ -523,8 +523,10 @@
b4e1fa2c
AM
489 grow in size, as the references on the upper levels of the tree can
490 change */
491
492- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
493- / IBUF_POOL_SIZE_PER_MAX_SIZE;
494+ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
495+ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
496+
497+ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
498
499 mutex_create(ibuf_pessimistic_insert_mutex_key,
500 &ibuf_pessimistic_insert_mutex,
1bfc1981 501@@ -2763,9 +2765,11 @@
b4e1fa2c
AM
502 size = ibuf->size;
503 max_size = ibuf->max_size;
504
505+ if (!srv_ibuf_active_contract) {
506 if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
507 return;
508 }
509+ }
510
511 sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
512
db82db79
AM
513--- a/storage/innobase/include/buf0rea.h
514+++ b/storage/innobase/include/buf0rea.h
734d6226 515@@ -149,8 +149,7 @@
b4e1fa2c
AM
516
517 /** The size in pages of the area which the read-ahead algorithms read if
518 invoked */
519-#define BUF_READ_AHEAD_AREA(b) \
520- ut_min(64, ut_2_power_up((b)->curr_size / 32))
521+#define BUF_READ_AHEAD_AREA(b) 64
522
523 /** @name Modes used in read-ahead @{ */
524 /** read only pages belonging to the insert buffer tree */
db82db79
AM
525--- a/storage/innobase/include/fil0fil.h
526+++ b/storage/innobase/include/fil0fil.h
29ffd636 527@@ -663,8 +663,9 @@
413cadc7
AM
528 void
529 fil_flush(
530 /*======*/
531- ulint space_id); /*!< in: file space id (this can be a group of
532+ ulint space_id, /*!< in: file space id (this can be a group of
533 log files or a tablespace of the database) */
534+ ibool metadata);
535 /**********************************************************************//**
536 Flushes to disk writes in file spaces of the given type possibly cached by
537 the OS. */
db82db79
AM
538--- a/storage/innobase/include/ha_prototypes.h
539+++ b/storage/innobase/include/ha_prototypes.h
adf0fb13 540@@ -284,6 +284,13 @@
b4e1fa2c
AM
541 /*===================*/
542 void* thd, /*!< in: thread handle (THD*) */
543 ulint value); /*!< in: time waited for the lock */
544+/******************************************************************//**
545+*/
546+
547+ulong
548+thd_flush_log_at_trx_commit(
549+/*================================*/
550+ void* thd);
551
adf0fb13
AM
552 /**********************************************************************//**
553 Get the current setting of the lower_case_table_names global parameter from
db82db79
AM
554--- a/storage/innobase/include/os0file.h
555+++ b/storage/innobase/include/os0file.h
413cadc7
AM
556@@ -296,8 +296,8 @@
557 pfs_os_file_write_func(name, file, buf, offset, offset_high, \
558 n, __FILE__, __LINE__)
559
560-# define os_file_flush(file) \
561- pfs_os_file_flush_func(file, __FILE__, __LINE__)
562+# define os_file_flush(file, metadata) \
563+ pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
564
565 # define os_file_rename(key, oldpath, newpath) \
566 pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
567@@ -333,7 +333,7 @@
568 # define os_file_write(name, file, buf, offset, offset_high, n) \
569 os_file_write_func(name, file, buf, offset, offset_high, n)
570
571-# define os_file_flush(file) os_file_flush_func(file)
572+# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
573
574 # define os_file_rename(key, oldpath, newpath) \
575 os_file_rename_func(oldpath, newpath)
576@@ -781,6 +781,7 @@
577 pfs_os_file_flush_func(
578 /*===================*/
579 os_file_t file, /*!< in, own: handle to a file */
580+ ibool metadata,
581 const char* src_file,/*!< in: file name where func invoked */
582 ulint src_line);/*!< in: line where the func invoked */
583
584@@ -860,7 +861,8 @@
585 ibool
586 os_file_flush_func(
587 /*===============*/
588- os_file_t file); /*!< in, own: handle to a file */
589+ os_file_t file, /*!< in, own: handle to a file */
590+ ibool metadata);
591 /***********************************************************************//**
592 Retrieves the last error number if an error occurs in a file io function.
593 The number should be retrieved before any other OS calls (because they may
db82db79
AM
594--- a/storage/innobase/include/os0file.ic
595+++ b/storage/innobase/include/os0file.ic
413cadc7
AM
596@@ -369,6 +369,7 @@
597 pfs_os_file_flush_func(
598 /*===================*/
599 os_file_t file, /*!< in, own: handle to a file */
600+ ibool metadata,
601 const char* src_file,/*!< in: file name where func invoked */
602 ulint src_line)/*!< in: line where the func invoked */
603 {
604@@ -378,7 +379,7 @@
605
606 register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
607 src_file, src_line);
608- result = os_file_flush_func(file);
609+ result = os_file_flush_func(file, metadata);
610
611 register_pfs_file_io_end(locker, 0);
612
db82db79
AM
613--- a/storage/innobase/include/srv0srv.h
614+++ b/storage/innobase/include/srv0srv.h
adf0fb13 615@@ -138,7 +138,8 @@
b4e1fa2c
AM
616 extern ulint srv_n_log_files;
617 extern ulint srv_log_file_size;
618 extern ulint srv_log_buffer_size;
619-extern ulong srv_flush_log_at_trx_commit;
620+//extern ulong srv_flush_log_at_trx_commit;
621+extern char srv_use_global_flush_log_at_trx_commit;
622 extern char srv_adaptive_flushing;
623
734d6226
AM
624 /* If this flag is TRUE, then we will load the indexes' (and tables') metadata
625@@ -221,6 +222,16 @@
b4e1fa2c
AM
626 extern ulong srv_max_purge_lag;
627
628 extern ulong srv_replication_delay;
629+
630+extern long long srv_ibuf_max_size;
631+extern ulint srv_ibuf_active_contract;
632+extern ulint srv_ibuf_accel_rate;
633+extern ulint srv_checkpoint_age_target;
634+extern ulint srv_flush_neighbor_pages;
635+extern ulint srv_enable_unsafe_group_commit;
636+extern ulint srv_read_ahead;
637+extern ulint srv_adaptive_flushing_method;
638+
639 /*-------------------------------------------*/
640
641 extern ulint srv_n_rows_inserted;
1bfc1981
AM
642@@ -255,6 +266,9 @@
643 extern ibool srv_print_buf_io;
644 extern ibool srv_print_log_io;
645 extern ibool srv_print_latch_waits;
646+
647+extern ulint srv_flush_checkpoint_debug;
648+
649 #else /* UNIV_DEBUG */
650 # define srv_print_thread_releases FALSE
651 # define srv_print_lock_waits FALSE
652@@ -399,8 +413,9 @@
b4e1fa2c
AM
653 when writing data files, but do flush
654 after writing to log files */
655 SRV_UNIX_NOSYNC, /*!< do not flush after writing */
656- SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
657+ SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
658 data files */
659+ SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
660 };
661
662 /** Alternatives for file i/o in Windows */
db82db79
AM
663--- a/storage/innobase/log/log0log.c
664+++ b/storage/innobase/log/log0log.c
d8778560
AM
665@@ -48,6 +48,7 @@
666 #include "srv0start.h"
667 #include "trx0sys.h"
668 #include "trx0trx.h"
669+#include "ha_prototypes.h"
670
671 /*
672 General philosophy of InnoDB redo-logs:
673@@ -359,6 +360,33 @@
b4e1fa2c
AM
674 }
675
676 /************************************************************//**
677+*/
678+UNIV_INLINE
679+ulint
680+log_max_modified_age_async()
681+{
682+ if (srv_checkpoint_age_target) {
683+ return(ut_min(log_sys->max_modified_age_async,
684+ srv_checkpoint_age_target
685+ - srv_checkpoint_age_target / 8));
686+ } else {
687+ return(log_sys->max_modified_age_async);
688+ }
689+}
690+
691+UNIV_INLINE
692+ulint
693+log_max_checkpoint_age_async()
694+{
695+ if (srv_checkpoint_age_target) {
696+ return(ut_min(log_sys->max_checkpoint_age_async,
697+ srv_checkpoint_age_target));
698+ } else {
699+ return(log_sys->max_checkpoint_age_async);
700+ }
701+}
702+
703+/************************************************************//**
704 Closes the log.
705 @return lsn */
706 UNIV_INTERN
d8778560 707@@ -427,7 +455,7 @@
b4e1fa2c
AM
708 }
709 }
710
711- if (checkpoint_age <= log->max_modified_age_async) {
712+ if (checkpoint_age <= log_max_modified_age_async()) {
713
714 goto function_exit;
715 }
d8778560 716@@ -435,8 +463,8 @@
b4e1fa2c
AM
717 oldest_lsn = buf_pool_get_oldest_modification();
718
719 if (!oldest_lsn
720- || lsn - oldest_lsn > log->max_modified_age_async
721- || checkpoint_age > log->max_checkpoint_age_async) {
722+ || lsn - oldest_lsn > log_max_modified_age_async()
723+ || checkpoint_age > log_max_checkpoint_age_async()) {
724
725 log->check_flush_or_checkpoint = TRUE;
726 }
413cadc7 727@@ -1100,9 +1128,10 @@
b4e1fa2c
AM
728 group = (log_group_t*)((ulint)group - 1);
729
730 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
731+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
732 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
733
413cadc7
AM
734- fil_flush(group->space_id);
735+ fil_flush(group->space_id, FALSE);
736 }
737
738 #ifdef UNIV_DEBUG
739@@ -1121,10 +1150,11 @@
b4e1fa2c
AM
740 logs and cannot end up here! */
741
742 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
743+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
744 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
745- && srv_flush_log_at_trx_commit != 2) {
746+ && thd_flush_log_at_trx_commit(NULL) != 2) {
747
413cadc7
AM
748- fil_flush(group->space_id);
749+ fil_flush(group->space_id, FALSE);
b4e1fa2c 750 }
413cadc7
AM
751
752 mutex_enter(&(log_sys->mutex));
d8778560 753@@ -1501,7 +1531,8 @@
b4e1fa2c
AM
754
755 mutex_exit(&(log_sys->mutex));
756
757- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
758+ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
759+ || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
760 /* O_DSYNC means the OS did not buffer the log file at all:
761 so we have also flushed to disk what we have written */
762
413cadc7
AM
763@@ -1511,7 +1542,7 @@
764
765 group = UT_LIST_GET_FIRST(log_sys->log_groups);
766
767- fil_flush(group->space_id);
768+ fil_flush(group->space_id, FALSE);
769 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
770 }
771
1bfc1981
AM
772@@ -1655,10 +1686,13 @@
773 recv_apply_hashed_log_recs(TRUE);
774 }
775
776+ retry:
777 n_pages = buf_flush_list(ULINT_MAX, new_oldest);
778
779- if (sync) {
780- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
781+ if (sync && n_pages != 0) {
782+ //buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
783+ os_thread_sleep(100000);
784+ goto retry;
785 }
786
787 if (n_pages == ULINT_UNDEFINED) {
788@@ -1979,6 +2013,13 @@
789 {
790 ib_uint64_t oldest_lsn;
791
792+#ifdef UNIV_DEBUG
793+ if (srv_flush_checkpoint_debug == 1) {
794+
795+ return TRUE;
796+ }
797+#endif
798+
799 if (recv_recovery_is_on()) {
800 recv_apply_hashed_log_recs(TRUE);
801 }
802@@ -2070,7 +2111,11 @@
803 physical write will always be made to
804 log files */
805 {
806- /* Preflush pages synchronously */
807+#ifdef UNIV_DEBUG
808+ if (srv_flush_checkpoint_debug == 1)
809+ return;
810+#endif
811+/* Preflush pages synchronously */
812
813 while (!log_preflush_pool_modified_pages(lsn, TRUE));
814
815@@ -2096,7 +2141,13 @@
816 ibool checkpoint_sync;
817 ibool do_checkpoint;
818 ibool success;
819-loop:
820+
821+#ifdef UNIV_DEBUG
822+ if (srv_flush_checkpoint_debug == 1)
823+ return;
824+#endif
825+
826+ loop:
827 sync = FALSE;
828 checkpoint_sync = FALSE;
829 do_checkpoint = FALSE;
830@@ -2119,13 +2170,15 @@
831 /* A flush is urgent: we have to do a synchronous preflush */
b4e1fa2c
AM
832
833 sync = TRUE;
1bfc1981 834- advance = 2 * (age - log->max_modified_age_sync);
b4e1fa2c 835- } else if (age > log->max_modified_age_async) {
1bfc1981 836+ advance = age - log->max_modified_age_sync;
b4e1fa2c
AM
837+ } else if (age > log_max_modified_age_async()) {
838
839 /* A flush is not urgent: we do an asynchronous preflush */
840- advance = age - log->max_modified_age_async;
841+ advance = age - log_max_modified_age_async();
1bfc1981 842+ log->check_flush_or_checkpoint = FALSE;
b4e1fa2c
AM
843 } else {
844 advance = 0;
1bfc1981 845+ log->check_flush_or_checkpoint = FALSE;
b4e1fa2c 846 }
1bfc1981
AM
847
848 checkpoint_age = log->lsn - log->last_checkpoint_lsn;
849@@ -2137,14 +2190,14 @@
b4e1fa2c
AM
850
851 do_checkpoint = TRUE;
852
853- } else if (checkpoint_age > log->max_checkpoint_age_async) {
854+ } else if (checkpoint_age > log_max_checkpoint_age_async()) {
855 /* A checkpoint is not urgent: do it asynchronously */
856
857 do_checkpoint = TRUE;
1bfc1981
AM
858
859- log->check_flush_or_checkpoint = FALSE;
860+ //log->check_flush_or_checkpoint = FALSE;
861 } else {
862- log->check_flush_or_checkpoint = FALSE;
863+ //log->check_flush_or_checkpoint = FALSE;
864 }
865
866 mutex_exit(&(log->mutex));
867@@ -2152,6 +2205,7 @@
868 if (advance) {
869 ib_uint64_t new_oldest = oldest_lsn + advance;
870
871+retry:
872 success = log_preflush_pool_modified_pages(new_oldest, sync);
873
874 /* If the flush succeeded, this thread has done its part
875@@ -2166,7 +2220,7 @@
876 log->check_flush_or_checkpoint = TRUE;
877
878 mutex_exit(&(log->mutex));
879- goto loop;
880+ goto retry;
881 }
882 }
883
884@@ -2607,7 +2661,7 @@
413cadc7
AM
885
886 mutex_exit(&(log_sys->mutex));
887
888- fil_flush(group->archive_space_id);
889+ fil_flush(group->archive_space_id, TRUE);
890
891 mutex_enter(&(log_sys->mutex));
892
1bfc1981
AM
893@@ -3044,7 +3098,11 @@
894 log_check_margins(void)
895 /*===================*/
896 {
897-loop:
898+#ifdef UNIV_DEBUG
899+ if (srv_flush_checkpoint_debug == 1)
900+ return;
901+#endif
902+ loop:
903 log_flush_margin();
904
905 log_checkpoint_margin();
906@@ -3349,6 +3407,17 @@
b4e1fa2c
AM
907 log_sys->flushed_to_disk_lsn,
908 log_sys->last_checkpoint_lsn);
909
910+ fprintf(file,
911+ "Max checkpoint age %lu\n"
912+ "Checkpoint age target %lu\n"
913+ "Modified age %lu\n"
914+ "Checkpoint age %lu\n",
915+ (ulong) log_sys->max_checkpoint_age,
916+ (ulong) log_max_checkpoint_age_async(),
917+ (ulong) (log_sys->lsn -
918+ log_buf_pool_get_oldest_modification()),
919+ (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
920+
921 current_time = time(NULL);
922
923 time_elapsed = 0.001 + difftime(current_time,
db82db79
AM
924--- a/storage/innobase/log/log0recv.c
925+++ b/storage/innobase/log/log0recv.c
b4e1fa2c
AM
926@@ -2906,9 +2906,12 @@
927 ib_uint64_t archived_lsn;
928 #endif /* UNIV_LOG_ARCHIVE */
929 byte* buf;
930- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
931+ byte* log_hdr_buf;
932+ byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
933 ulint err;
934
935+ log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
936+
937 #ifdef UNIV_LOG_ARCHIVE
938 ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
939 /** TRUE when recovering from a checkpoint */
413cadc7
AM
940@@ -3468,7 +3471,7 @@
941 exit(1);
942 }
943
944- os_file_flush(log_file);
945+ os_file_flush(log_file, TRUE);
946 os_file_close(log_file);
947 }
948
949@@ -3492,7 +3495,7 @@
950
951 os_file_write(name, log_file, buf, 0, 0,
952 LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
953- os_file_flush(log_file);
954+ os_file_flush(log_file, TRUE);
955 os_file_close(log_file);
956
957 ut_free(buf);
db82db79
AM
958--- a/storage/innobase/os/os0file.c
959+++ b/storage/innobase/os/os0file.c
d8778560 960@@ -1424,7 +1424,7 @@
b4e1fa2c
AM
961 #endif
962 #ifdef UNIV_NON_BUFFERED_IO
963 # ifndef UNIV_HOTBACKUP
964- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
965+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
966 /* Do not use unbuffered i/o to log files because
967 value 2 denotes that we do not flush the log at every
968 commit, but only once per second */
d8778560 969@@ -1440,7 +1440,7 @@
b4e1fa2c
AM
970 attributes = 0;
971 #ifdef UNIV_NON_BUFFERED_IO
972 # ifndef UNIV_HOTBACKUP
973- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
974+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
975 /* Do not use unbuffered i/o to log files because
976 value 2 denotes that we do not flush the log at every
977 commit, but only once per second */
d8778560 978@@ -1585,6 +1585,11 @@
b4e1fa2c
AM
979 os_file_set_nocache(file, name, mode_str);
980 }
981
982+ /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
983+ if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
984+ os_file_set_nocache(file, name, mode_str);
985+ }
986+
987 #ifdef USE_FILE_LOCK
988 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
989
413cadc7
AM
990@@ -2008,7 +2013,7 @@
991
992 ut_free(buf2);
993
994- ret = os_file_flush(file);
995+ ret = os_file_flush(file, TRUE);
996
997 if (ret) {
998 return(TRUE);
999@@ -2046,7 +2051,8 @@
1000 int
1001 os_file_fsync(
1002 /*==========*/
1003- os_file_t file) /*!< in: handle to a file */
1004+ os_file_t file, /*!< in: handle to a file */
1005+ ibool metadata)
1006 {
1007 int ret;
1008 int failures;
db82db79 1009@@ -2055,7 +2061,16 @@
413cadc7
AM
1010 failures = 0;
1011
1012 do {
db82db79 1013+#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
413cadc7
AM
1014+ if (metadata) {
1015+ ret = fsync(file);
1016+ } else {
1017+ ret = fdatasync(file);
1018+ }
1019+#else
db82db79 1020+ (void) metadata;
413cadc7
AM
1021 ret = fsync(file);
1022+#endif
1023
1024 os_n_fsyncs++;
1025
db82db79 1026@@ -2092,7 +2107,8 @@
413cadc7
AM
1027 ibool
1028 os_file_flush_func(
1029 /*===============*/
1030- os_file_t file) /*!< in, own: handle to a file */
1031+ os_file_t file, /*!< in, own: handle to a file */
1032+ ibool metadata)
1033 {
1034 #ifdef __WIN__
1035 BOOL ret;
db82db79 1036@@ -2142,18 +2158,18 @@
413cadc7
AM
1037 /* If we are not on an operating system that supports this,
1038 then fall back to a plain fsync. */
1039
1040- ret = os_file_fsync(file);
1041+ ret = os_file_fsync(file, metadata);
1042 } else {
1043 ret = fcntl(file, F_FULLFSYNC, NULL);
1044
1045 if (ret) {
1046 /* If we are not on a file system that supports this,
1047 then fall back to a plain fsync. */
1048- ret = os_file_fsync(file);
1049+ ret = os_file_fsync(file, metadata);
1050 }
1051 }
1052 #else
1053- ret = os_file_fsync(file);
1054+ ret = os_file_fsync(file, metadata);
1055 #endif
1056
1057 if (ret == 0) {
db82db79 1058@@ -2336,7 +2352,7 @@
413cadc7
AM
1059 the OS crashes, a database page is only partially
1060 physically written to disk. */
1061
1062- ut_a(TRUE == os_file_flush(file));
1063+ ut_a(TRUE == os_file_flush(file, TRUE));
1064 }
1065 # endif /* UNIV_DO_FLUSH */
1066
db82db79 1067@@ -2378,7 +2394,7 @@
413cadc7
AM
1068 the OS crashes, a database page is only partially
1069 physically written to disk. */
1070
1071- ut_a(TRUE == os_file_flush(file));
1072+ ut_a(TRUE == os_file_flush(file, TRUE));
1073 }
1074 # endif /* UNIV_DO_FLUSH */
1075
db82db79 1076@@ -2750,7 +2766,7 @@
413cadc7
AM
1077
1078 # ifdef UNIV_DO_FLUSH
1079 if (!os_do_not_call_flush_at_each_write) {
1080- ut_a(TRUE == os_file_flush(file));
1081+ ut_a(TRUE == os_file_flush(file, TRUE));
1082 }
1083 # endif /* UNIV_DO_FLUSH */
1084
db82db79 1085@@ -4296,7 +4312,7 @@
413cadc7
AM
1086 #ifdef UNIV_DO_FLUSH
1087 if (slot->type == OS_FILE_WRITE
1088 && !os_do_not_call_flush_at_each_write) {
1089- if (!os_file_flush(slot->file)) {
1090+ if (!os_file_flush(slot->file, TRUE)) {
1091 ut_error;
1092 }
1093 }
db82db79 1094@@ -4597,7 +4613,7 @@
413cadc7
AM
1095 #ifdef UNIV_DO_FLUSH
1096 if (slot->type == OS_FILE_WRITE
1097 && !os_do_not_call_flush_at_each_write)
1098- && !os_file_flush(slot->file) {
1099+ && !os_file_flush(slot->file, TRUE) {
1100 ut_error;
1101 }
1102 #endif /* UNIV_DO_FLUSH */
db82db79
AM
1103--- a/storage/innobase/srv/srv0srv.c
1104+++ b/storage/innobase/srv/srv0srv.c
adf0fb13 1105@@ -183,7 +183,8 @@
b4e1fa2c
AM
1106 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
1107 /* size in database pages */
1108 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
1109-UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
1110+//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
1111+UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
1112
1113 /* Try to flush dirty pages so as to avoid IO bursts at
1114 the checkpoints. */
734d6226 1115@@ -404,6 +405,17 @@
b4e1fa2c
AM
1116
1117 UNIV_INTERN ulong srv_replication_delay = 0;
1118
1119+UNIV_INTERN long long srv_ibuf_max_size = 0;
1120+UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
1121+UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
1122+#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
1123+
1124+UNIV_INTERN ulint srv_checkpoint_age_target = 0;
1bfc1981 1125+UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:area 2:contiguous */
b4e1fa2c
AM
1126+
1127+UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
1128+UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
1129+UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
1130 /*-------------------------------------------*/
1131 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
1132 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
1bfc1981
AM
1133@@ -417,6 +429,9 @@
1134 UNIV_INTERN ibool srv_print_buf_io = FALSE;
1135 UNIV_INTERN ibool srv_print_log_io = FALSE;
1136 UNIV_INTERN ibool srv_print_latch_waits = FALSE;
1137+
1138+UNIV_INTERN ulong srv_flush_checkpoint_debug = 0;
1139+
1140 #endif /* UNIV_DEBUG */
1141
1142 UNIV_INTERN ulint srv_n_rows_inserted = 0;
1143@@ -2713,7 +2728,7 @@
db82db79
AM
1144
1145 ut_ad(!mutex_own(&kernel_mutex));
1146
1147- ut_a(srv_n_purge_threads == 0);
1148+ ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
1149
1150 do {
1151 /* Check for shutdown and change in purge config. */
1bfc1981 1152@@ -2746,6 +2761,7 @@
b4e1fa2c
AM
1153 ulint n_pages_purged = 0;
1154 ulint n_bytes_merged;
1155 ulint n_pages_flushed;
1156+ ulint n_pages_flushed_prev = 0;
1157 ulint n_bytes_archived;
1158 ulint n_tables_to_drop;
1159 ulint n_ios;
1bfc1981 1160@@ -2753,7 +2769,20 @@
b4e1fa2c
AM
1161 ulint n_ios_very_old;
1162 ulint n_pend_ios;
1163 ulint next_itr_time;
1164+ ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
1165+ ulint inner_loop = 0;
1166+ ibool skip_sleep = FALSE;
1167 ulint i;
1168+ struct t_prev_flush_info_struct {
1169+ ulint count;
1170+ unsigned space:32;
1171+ unsigned offset:32;
1172+ ib_uint64_t oldest_modification;
1173+ } prev_flush_info[MAX_BUFFER_POOLS];
1174+
1175+ ib_uint64_t lsn_old;
1176+
1177+ ib_uint64_t oldest_lsn;
1178
1179 #ifdef UNIV_DEBUG_THREAD_CREATION
1180 fprintf(stderr, "Master thread starts, id %lu\n",
1bfc1981 1181@@ -2775,6 +2804,9 @@
b4e1fa2c
AM
1182
1183 mutex_exit(&kernel_mutex);
1184
1185+ mutex_enter(&(log_sys->mutex));
1186+ lsn_old = log_sys->lsn;
1187+ mutex_exit(&(log_sys->mutex));
1188 loop:
1189 /*****************************************************************/
1190 /* ---- When there is database activity by users, we cycle in this
1bfc1981 1191@@ -2805,9 +2837,13 @@
b4e1fa2c
AM
1192 /* Sleep for 1 second on entrying the for loop below the first time. */
1193 next_itr_time = ut_time_ms() + 1000;
1194
1195+ skip_sleep = FALSE;
1196+
1197 for (i = 0; i < 10; i++) {
1198 ulint cur_time = ut_time_ms();
1199
1200+ n_pages_flushed = 0; /* initialize */
1201+
1202 /* ALTER TABLE in MySQL requires on Unix that the table handler
1203 can drop tables lazily after there no longer are SELECT
1204 queries to them. */
1bfc1981 1205@@ -2831,6 +2867,7 @@
b4e1fa2c
AM
1206 srv_main_thread_op_info = "sleeping";
1207 srv_main_1_second_loops++;
1208
1209+ if (!skip_sleep) {
1210 if (next_itr_time > cur_time
1211 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
1212
1bfc1981 1213@@ -2841,10 +2878,26 @@
b4e1fa2c
AM
1214 (next_itr_time - cur_time)
1215 * 1000));
1216 srv_main_sleeps++;
1217+
1218+ /*
1219+ mutex_enter(&(log_sys->mutex));
1220+ oldest_lsn = buf_pool_get_oldest_modification();
1221+ ib_uint64_t lsn = log_sys->lsn;
1222+ mutex_exit(&(log_sys->mutex));
1223+
1224+ if(oldest_lsn)
1225+ fprintf(stderr,
1226+ "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
1227+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1228+ lsn - lsn_old);
1229+ */
1230 }
1231
1232 /* Each iteration should happen at 1 second interval. */
1233 next_itr_time = ut_time_ms() + 1000;
1234+ } /* if (!skip_sleep) */
1235+
1236+ skip_sleep = FALSE;
1237
1238 /* Flush logs if needed */
1239 srv_sync_log_buffer_in_background();
1bfc1981 1240@@ -2864,7 +2917,7 @@
b4e1fa2c
AM
1241 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
1242 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
1243 srv_main_thread_op_info = "doing insert buffer merge";
1244- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1245+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1246
1247 /* Flush logs if needed */
1248 srv_sync_log_buffer_in_background();
1bfc1981 1249@@ -2881,7 +2934,11 @@
b4e1fa2c
AM
1250 n_pages_flushed = buf_flush_list(
1251 PCT_IO(100), IB_ULONGLONG_MAX);
1252
1253- } else if (srv_adaptive_flushing) {
1254+ mutex_enter(&(log_sys->mutex));
1255+ lsn_old = log_sys->lsn;
1256+ mutex_exit(&(log_sys->mutex));
1257+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1258+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
1259
1260 /* Try to keep the rate of flushing of dirty
1261 pages such that redo log generation does not
1bfc1981 1262@@ -2897,6 +2954,224 @@
b4e1fa2c
AM
1263 n_flush,
1264 IB_ULONGLONG_MAX);
1265 }
1266+
1267+ mutex_enter(&(log_sys->mutex));
1268+ lsn_old = log_sys->lsn;
1269+ mutex_exit(&(log_sys->mutex));
1270+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1271+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1272+
1273+ /* Try to keep modified age not to exceed
1274+ max_checkpoint_age * 7/8 line */
1275+
1276+ mutex_enter(&(log_sys->mutex));
1277+
1278+ oldest_lsn = buf_pool_get_oldest_modification();
1279+ if (oldest_lsn == 0) {
1280+ lsn_old = log_sys->lsn;
1281+ mutex_exit(&(log_sys->mutex));
1282+
1283+ } else {
1284+ if ((log_sys->lsn - oldest_lsn)
1285+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1286+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1287+ /* We should not flush from here. */
1288+ lsn_old = log_sys->lsn;
1289+ mutex_exit(&(log_sys->mutex));
1290+ } else if ((log_sys->lsn - oldest_lsn)
1291+ > (log_sys->max_checkpoint_age)/4 ) {
1292+
1293+ /* defence line (max_checkpoint_age * 1/2) */
1294+ ib_uint64_t lsn = log_sys->lsn;
1295+
1296+ ib_uint64_t level, bpl;
1297+ buf_page_t* bpage;
1298+ ulint j;
1299+
1300+ mutex_exit(&(log_sys->mutex));
1301+
1302+ bpl = 0;
1303+
1304+ for (j = 0; j < srv_buf_pool_instances; j++) {
1305+ buf_pool_t* buf_pool;
1306+ ulint n_blocks;
1307+
1308+ buf_pool = buf_pool_from_array(j);
1309+
1310+ /* The scanning flush_list is optimistic here */
1311+
1312+ level = 0;
1313+ n_blocks = 0;
1314+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1315+
1316+ while (bpage != NULL) {
1317+ ib_uint64_t oldest_modification = bpage->oldest_modification;
1318+ if (oldest_modification != 0) {
1319+ level += log_sys->max_checkpoint_age
1320+ - (lsn - oldest_modification);
1321+ }
1322+ bpage = UT_LIST_GET_NEXT(list, bpage);
1323+ n_blocks++;
1324+ }
1325+
1326+ if (level) {
1327+ bpl += ((ib_uint64_t) n_blocks * n_blocks
1328+ * (lsn - lsn_old)) / level;
1329+ }
1330+
1331+ }
1332+
1333+ if (!srv_use_doublewrite_buf) {
1334+ /* flush is faster than when doublewrite */
1335+ bpl = (bpl * 7) / 8;
1336+ }
1337+
1338+ if (bpl) {
1339+retry_flush_batch:
1340+ n_pages_flushed = buf_flush_list(bpl,
1341+ oldest_lsn + (lsn - lsn_old));
1342+ if (n_pages_flushed == ULINT_UNDEFINED) {
1343+ os_thread_sleep(5000);
1344+ goto retry_flush_batch;
1345+ }
1346+ }
1347+
1348+ lsn_old = lsn;
1349+ /*
1350+ fprintf(stderr,
1351+ "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1352+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1353+ lsn - lsn_old, bpl);
1354+ */
1355+ } else {
1356+ lsn_old = log_sys->lsn;
1357+ mutex_exit(&(log_sys->mutex));
1358+ }
1359+ }
1360+ prev_adaptive_flushing_method = 1;
1361+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1362+ buf_pool_t* buf_pool;
1363+ buf_page_t* bpage;
1364+ ib_uint64_t lsn;
1365+ ulint j;
1366+
1367+ mutex_enter(&(log_sys->mutex));
1368+ oldest_lsn = buf_pool_get_oldest_modification();
1369+ lsn = log_sys->lsn;
1370+ mutex_exit(&(log_sys->mutex));
1371+
1372+ /* upper loop/sec. (x10) */
1373+ next_itr_time -= 900; /* 1000 - 900 == 100 */
1374+ inner_loop++;
1375+ if (inner_loop < 10) {
1376+ i--;
1377+ } else {
1378+ inner_loop = 0;
1379+ }
1380+
1381+ if (prev_adaptive_flushing_method == 2) {
1382+ lint n_flush;
d8778560
AM
1383+ lint blocks_sum;
1384+ ulint new_blocks_sum, flushed_blocks_sum;
b4e1fa2c
AM
1385+
1386+ blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1387+
1388+ /* prev_flush_info[j] should be the previous loop's */
1389+ for (j = 0; j < srv_buf_pool_instances; j++) {
1390+ lint blocks_num, new_blocks_num, flushed_blocks_num;
1391+ ibool found;
1392+
1393+ buf_pool = buf_pool_from_array(j);
1394+
1395+ blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1396+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1397+ new_blocks_num = 0;
1398+
1399+ found = FALSE;
1400+ while (bpage != NULL) {
1401+ if (prev_flush_info[j].space == bpage->space
1402+ && prev_flush_info[j].offset == bpage->offset
1403+ && prev_flush_info[j].oldest_modification
1404+ == bpage->oldest_modification) {
1405+ found = TRUE;
1406+ break;
1407+ }
1408+ bpage = UT_LIST_GET_NEXT(list, bpage);
1409+ new_blocks_num++;
1410+ }
1411+ if (!found) {
1412+ new_blocks_num = blocks_num;
1413+ }
1414+
1415+ flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1416+ - blocks_num;
1417+ if (flushed_blocks_num < 0) {
1418+ flushed_blocks_num = 0;
1419+ }
1420+
1421+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1422+
1423+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1424+ if (bpage) {
1425+ prev_flush_info[j].space = bpage->space;
1426+ prev_flush_info[j].offset = bpage->offset;
1427+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1428+ } else {
1429+ prev_flush_info[j].space = 0;
1430+ prev_flush_info[j].offset = 0;
1431+ prev_flush_info[j].oldest_modification = 0;
1432+ }
1433+
1434+ new_blocks_sum += new_blocks_num;
1435+ flushed_blocks_sum += flushed_blocks_num;
1436+ blocks_sum += blocks_num;
1437+ }
1438+
1439+ n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1440+ if (flushed_blocks_sum > n_pages_flushed_prev) {
1441+ n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1442+ }
1443+
1444+ if (n_flush > 0) {
1445+ n_flush++;
1446+ n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1447+ } else {
1448+ n_pages_flushed = 0;
1449+ }
1450+ } else {
1451+ /* store previous first pages of the flush_list */
1452+ for (j = 0; j < srv_buf_pool_instances; j++) {
1453+ buf_pool = buf_pool_from_array(j);
1454+
1455+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1456+
1457+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1458+ if (bpage) {
1459+ prev_flush_info[j].space = bpage->space;
1460+ prev_flush_info[j].offset = bpage->offset;
1461+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1462+ } else {
1463+ prev_flush_info[j].space = 0;
1464+ prev_flush_info[j].offset = 0;
1465+ prev_flush_info[j].oldest_modification = 0;
1466+ }
1467+ }
1468+ n_pages_flushed = 0;
1469+ }
1470+
1471+ lsn_old = lsn;
1472+ prev_adaptive_flushing_method = 2;
1473+ } else {
1474+ mutex_enter(&(log_sys->mutex));
1475+ lsn_old = log_sys->lsn;
1476+ mutex_exit(&(log_sys->mutex));
1477+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1478+ }
1479+
1480+ if (n_pages_flushed == ULINT_UNDEFINED) {
1481+ n_pages_flushed_prev = 0;
1482+ } else {
1483+ n_pages_flushed_prev = n_pages_flushed;
1484 }
1485
1486 if (srv_activity_count == old_activity_count) {
1bfc1981 1487@@ -2945,12 +3220,12 @@
b4e1fa2c
AM
1488 even if the server were active */
1489
1490 srv_main_thread_op_info = "doing insert buffer merge";
1491- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1492+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1493
1494 /* Flush logs if needed */
1495 srv_sync_log_buffer_in_background();
db82db79
AM
1496
1497- if (srv_n_purge_threads == 0) {
1498+ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1499 srv_main_thread_op_info = "master purging";
1500
1501 srv_master_do_purge();
1bfc1981
AM
1502@@ -2982,11 +3257,18 @@
1503 PCT_IO(10), IB_ULONGLONG_MAX);
1504 }
1505
1506- srv_main_thread_op_info = "making checkpoint";
1507+#ifdef UNIV_DEBUG
1508+ if (srv_flush_checkpoint_debug != 1) {
1509+#endif
1510
1511- /* Make a new checkpoint about once in 10 seconds */
1512+ srv_main_thread_op_info = "making checkpoint";
1513
1514- log_checkpoint(TRUE, FALSE);
1515+ /* Make a new checkpoint about once in 10 seconds */
1516+
1517+ log_checkpoint(TRUE, FALSE);
1518+#ifdef UNIV_DEBUG
1519+ }
1520+#endif
1521
1522 srv_main_thread_op_info = "reserving kernel mutex";
1523
1524@@ -3028,7 +3310,7 @@
db82db79
AM
1525 }
1526 }
1527
1528- if (srv_n_purge_threads == 0) {
1529+ if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
1530 srv_main_thread_op_info = "master purging";
1531
1532 srv_master_do_purge();
1bfc1981 1533@@ -3053,7 +3335,7 @@
b4e1fa2c
AM
1534 buf_flush_list below. Otherwise, the system favors
1535 clean pages over cleanup throughput. */
1536 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1537- PCT_IO(100));
1538+ PCT_IBUF_IO(100));
1539 }
1540
1541 srv_main_thread_op_info = "reserving kernel mutex";
1bfc1981
AM
1542@@ -3065,6 +3347,10 @@
1543 }
1544 mutex_exit(&kernel_mutex);
1545
1546+#ifdef UNIV_DEBUG
1547+ if (srv_flush_checkpoint_debug == 1)
1548+ goto skip_flush;
1549+#endif
1550 flush_loop:
1551 srv_main_thread_op_info = "flushing buffer pool pages";
1552 srv_main_flush_loops++;
1553@@ -3105,6 +3391,9 @@
1554 goto flush_loop;
1555 }
1556
1557+#ifdef UNIV_DEBUG
1558+skip_flush:
1559+#endif
1560 srv_main_thread_op_info = "reserving kernel mutex";
1561
1562 mutex_enter(&kernel_mutex);
1563@@ -3193,6 +3482,7 @@
adf0fb13 1564 srv_slot_t* slot;
11822e22 1565 ulint retries = 0;
b4e1fa2c
AM
1566 ulint n_total_purged = ULINT_UNDEFINED;
1567+ ulint next_itr_time;
1568
1569 ut_a(srv_n_purge_threads == 1);
1570
1bfc1981 1571@@ -3213,9 +3503,12 @@
b4e1fa2c
AM
1572
1573 mutex_exit(&kernel_mutex);
1574
1575+ next_itr_time = ut_time_ms();
1576+
1577 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1578
11822e22 1579 ulint n_pages_purged = 0;
b4e1fa2c
AM
1580+ ulint cur_time;
1581
1582 /* If there are very few records to purge or the last
1583 purge didn't purge any records then wait for activity.
1bfc1981 1584@@ -3262,6 +3555,16 @@
b4e1fa2c
AM
1585 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1586
1587 srv_sync_log_buffer_in_background();
1588+
1589+ cur_time = ut_time_ms();
1590+ if (next_itr_time > cur_time) {
1591+ os_thread_sleep(ut_min(1000000,
1592+ (next_itr_time - cur_time)
1593+ * 1000));
1594+ next_itr_time = ut_time_ms() + 1000;
1595+ } else {
1596+ next_itr_time = cur_time + 1000;
1597+ }
1598 }
1599
1600 mutex_enter(&kernel_mutex);
db82db79
AM
1601--- a/storage/innobase/srv/srv0start.c
1602+++ b/storage/innobase/srv/srv0start.c
29ffd636 1603@@ -1237,6 +1237,9 @@
b4e1fa2c
AM
1604 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1605 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1606
1607+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1608+ srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1609+
1610 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1611 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1612
db82db79
AM
1613--- a/storage/innobase/trx/trx0purge.c
1614+++ b/storage/innobase/trx/trx0purge.c
11822e22
AM
1615@@ -392,10 +392,10 @@
1616 trx_sys->rseg_history_len++;
1617 mutex_exit(&kernel_mutex);
1618
1619- if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1620+// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1621 /* Inform the purge thread that there is work to do. */
1622 srv_wake_purge_thread_if_not_active();
1623- }
1624+// }
1625 }
1626
1627 /**********************************************************************//**
db82db79
AM
1628--- a/storage/innobase/trx/trx0trx.c
1629+++ b/storage/innobase/trx/trx0trx.c
adf0fb13 1630@@ -984,6 +984,7 @@
b4e1fa2c
AM
1631 trx->read_view = NULL;
1632
1633 if (lsn) {
1634+ ulint flush_log_at_trx_commit;
1635
1636 mutex_exit(&kernel_mutex);
1637
adf0fb13 1638@@ -992,6 +993,12 @@
b4e1fa2c
AM
1639 trx_undo_insert_cleanup(trx);
1640 }
1641
1642+ if (srv_use_global_flush_log_at_trx_commit) {
1643+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1644+ } else {
1645+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1646+ }
1647+
1648 /* NOTE that we could possibly make a group commit more
1649 efficient here: call os_thread_yield here to allow also other
1650 trxs to come to commit! */
adf0fb13 1651@@ -1023,9 +1030,9 @@
b4e1fa2c
AM
1652 if (trx->flush_log_later) {
1653 /* Do nothing yet */
1654 trx->must_flush_log_later = TRUE;
1655- } else if (srv_flush_log_at_trx_commit == 0) {
1656+ } else if (flush_log_at_trx_commit == 0) {
1657 /* Do nothing */
1658- } else if (srv_flush_log_at_trx_commit == 1) {
1659+ } else if (flush_log_at_trx_commit == 1) {
1660 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1661 /* Write the log but do not flush it to disk */
1662
adf0fb13 1663@@ -1037,7 +1044,7 @@
b4e1fa2c
AM
1664
1665 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1666 }
1667- } else if (srv_flush_log_at_trx_commit == 2) {
1668+ } else if (flush_log_at_trx_commit == 2) {
1669
1670 /* Write the log but do not flush it to disk */
1671
adf0fb13 1672@@ -1701,16 +1708,23 @@
b4e1fa2c
AM
1673 trx_t* trx) /*!< in: trx handle */
1674 {
1675 ib_uint64_t lsn = trx->commit_lsn;
1676+ ulint flush_log_at_trx_commit;
1677
1678 ut_a(trx);
1679
1680 trx->op_info = "flushing log";
1681
1682+ if (srv_use_global_flush_log_at_trx_commit) {
1683+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1684+ } else {
1685+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1686+ }
1687+
1688 if (!trx->must_flush_log_later) {
1689 /* Do nothing */
1690- } else if (srv_flush_log_at_trx_commit == 0) {
1691+ } else if (flush_log_at_trx_commit == 0) {
1692 /* Do nothing */
1693- } else if (srv_flush_log_at_trx_commit == 1) {
1694+ } else if (flush_log_at_trx_commit == 1) {
1695 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1696 /* Write the log but do not flush it to disk */
1697
adf0fb13 1698@@ -1721,7 +1735,7 @@
b4e1fa2c
AM
1699
1700 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1701 }
1702- } else if (srv_flush_log_at_trx_commit == 2) {
1703+ } else if (flush_log_at_trx_commit == 2) {
1704
1705 /* Write the log but do not flush it to disk */
1706
adf0fb13 1707@@ -1969,6 +1983,8 @@
b4e1fa2c
AM
1708 /*--------------------------------------*/
1709
1710 if (lsn) {
1711+ ulint flush_log_at_trx_commit;
1712+
1713 /* Depending on the my.cnf options, we may now write the log
1714 buffer to the log files, making the prepared state of the
1715 transaction durable if the OS does not crash. We may also
adf0fb13 1716@@ -1988,9 +2004,15 @@
b4e1fa2c
AM
1717
1718 mutex_exit(&kernel_mutex);
1719
1720- if (srv_flush_log_at_trx_commit == 0) {
1721+ if (srv_use_global_flush_log_at_trx_commit) {
1722+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1723+ } else {
1724+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1725+ }
1726+
1727+ if (flush_log_at_trx_commit == 0) {
1728 /* Do nothing */
1729- } else if (srv_flush_log_at_trx_commit == 1) {
1730+ } else if (flush_log_at_trx_commit == 1) {
1731 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1732 /* Write the log but do not flush it to disk */
1733
adf0fb13 1734@@ -2002,7 +2024,7 @@
b4e1fa2c
AM
1735
1736 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1737 }
1738- } else if (srv_flush_log_at_trx_commit == 2) {
1739+ } else if (flush_log_at_trx_commit == 2) {
1740
1741 /* Write the log but do not flush it to disk */
1742
13ceb006
AM
1743--- a/mysql-test/include/default_mysqld.cnf
1744+++ b/mysql-test/include/default_mysqld.cnf
1745@@ -29,7 +29,7 @@
1746 max_heap_table_size= 1M
1747
1748 loose-innodb_data_file_path= ibdata1:10M:autoextend
1749-loose-innodb_buffer_pool_size= 8M
1750+loose-innodb_buffer_pool_size= 32M
1751 loose-innodb_write_io_threads= 2
1752 loose-innodb_read_io_threads= 2
1753 loose-innodb_log_buffer_size= 1M
1754--- a/mysql-test/suite/innodb/r/innodb.result
1755+++ b/mysql-test/suite/innodb/r/innodb.result
1756@@ -1678,7 +1678,7 @@
1757 drop table t1;
1758 SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
1759 variable_value
1760-511
1761+2047
1762 SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
1763 variable_value
1764 16384
1bfc1981
AM
1765--- /dev/null
1766+++ b/mysql-test/suite/innodb/r/percona_flush_contiguous_neighbors.result
1767@@ -0,0 +1,21 @@
1768+DROP TABLE IF EXISTS t1;
1769+CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1770+INSERT INTO t1(foo) VALUES ('a'), ('b');
1771+INSERT INTO t1(foo) SELECT foo FROM t1;
1772+INSERT INTO t1(foo) SELECT foo FROM t1;
1773+INSERT INTO t1(foo) SELECT foo FROM t1;
1774+INSERT INTO t1(foo) SELECT foo FROM t1;
1775+INSERT INTO t1(foo) SELECT foo FROM t1;
1776+INSERT INTO t1(foo) SELECT foo FROM t1;
1777+INSERT INTO t1(foo) SELECT foo FROM t1;
1778+INSERT INTO t1(foo) SELECT foo FROM t1;
1779+INSERT INTO t1(foo) SELECT foo FROM t1;
1780+INSERT INTO t1(foo) SELECT foo FROM t1;
1781+INSERT INTO t1(foo) SELECT foo FROM t1;
1782+INSERT INTO t1(foo) SELECT foo FROM t1;
1783+INSERT INTO t1(foo) SELECT foo FROM t1;
1784+INSERT INTO t1(foo) SELECT foo FROM t1;
1785+INSERT INTO t1(foo) SELECT foo FROM t1;
1786+INSERT INTO t1(foo) SELECT foo FROM t1;
1787+INSERT INTO t1(foo) SELECT foo FROM t1;
1788+DROP TABLE t1;
1789--- /dev/null
1790+++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors-master.opt
1791@@ -0,0 +1 @@
1792+--innodb_flush_neighbor_pages=cont
1793--- /dev/null
1794+++ b/mysql-test/suite/innodb/t/percona_flush_contiguous_neighbors.test
1795@@ -0,0 +1,36 @@
1796+# Test for innodb_flush_neighbor_pages=contiguous.
1797+# The test is very crude: we simply overflow the buffer pool with such a number of
1798+# new/modified pages that some flushing is bound to happen.
1799+
1800+--source include/have_innodb.inc
1801+
1802+--disable_warnings
1803+DROP TABLE IF EXISTS t1;
1804+--enable_warnings
1805+
1806+CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1807+
1808+INSERT INTO t1(foo) VALUES ('a'), ('b');
1809+INSERT INTO t1(foo) SELECT foo FROM t1;
1810+INSERT INTO t1(foo) SELECT foo FROM t1;
1811+INSERT INTO t1(foo) SELECT foo FROM t1;
1812+INSERT INTO t1(foo) SELECT foo FROM t1;
1813+INSERT INTO t1(foo) SELECT foo FROM t1;
1814+INSERT INTO t1(foo) SELECT foo FROM t1;
1815+INSERT INTO t1(foo) SELECT foo FROM t1;
1816+INSERT INTO t1(foo) SELECT foo FROM t1;
1817+INSERT INTO t1(foo) SELECT foo FROM t1;
1818+INSERT INTO t1(foo) SELECT foo FROM t1;
1819+INSERT INTO t1(foo) SELECT foo FROM t1;
1820+INSERT INTO t1(foo) SELECT foo FROM t1;
1821+INSERT INTO t1(foo) SELECT foo FROM t1;
1822+INSERT INTO t1(foo) SELECT foo FROM t1;
1823+INSERT INTO t1(foo) SELECT foo FROM t1;
1824+INSERT INTO t1(foo) SELECT foo FROM t1;
1825+INSERT INTO t1(foo) SELECT foo FROM t1;
1826+
1827+# TODO: cannot record a stable value here. A check of > 0 should be enough,
1828+# but the variable is not accessible through INFORMATION_SCHEMA currently.
1829+# SHOW GLOBAL STATUS LIKE 'Innodb_buffer_pool_pages_flushed';
1830+
1831+DROP TABLE t1;
1832--- /dev/null
1833+++ b/mysql-test/suite/innodb/r/percona_sync_flush.result
1834@@ -0,0 +1,35 @@
1835+DROP TABLE IF EXISTS t1;
1836+CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1837+SET @@global.innodb_flush_checkpoint_debug=1;
1838+INSERT INTO t1(foo) VALUES ('a'), ('b');
1839+INSERT INTO t1(foo) SELECT foo FROM t1;
1840+UPDATE t1 SET foo='c';
1841+INSERT INTO t1(foo) SELECT foo FROM t1;
1842+UPDATE t1 SET foo='c';
1843+INSERT INTO t1(foo) SELECT foo FROM t1;
1844+UPDATE t1 SET foo='c';
1845+INSERT INTO t1(foo) SELECT foo FROM t1;
1846+UPDATE t1 SET foo='c';
1847+INSERT INTO t1(foo) SELECT foo FROM t1;
1848+UPDATE t1 SET foo='c';
1849+INSERT INTO t1(foo) SELECT foo FROM t1;
1850+UPDATE t1 SET foo='c';
1851+INSERT INTO t1(foo) SELECT foo FROM t1;
1852+UPDATE t1 SET foo='c';
1853+INSERT INTO t1(foo) SELECT foo FROM t1;
1854+UPDATE t1 SET foo='c';
1855+INSERT INTO t1(foo) SELECT foo FROM t1;
1856+UPDATE t1 SET foo='c';
1857+INSERT INTO t1(foo) SELECT foo FROM t1;
1858+UPDATE t1 SET foo='c';
1859+INSERT INTO t1(foo) SELECT foo FROM t1;
1860+UPDATE t1 SET foo='c';
1861+INSERT INTO t1(foo) SELECT foo FROM t1;
1862+UPDATE t1 SET foo='c';
1863+INSERT INTO t1(foo) SELECT foo FROM t1;
1864+UPDATE t1 SET foo='c';
1865+INSERT INTO t1(foo) SELECT foo FROM t1;
1866+UPDATE t1 SET foo='c';
1867+SET @@global.innodb_flush_checkpoint_debug=0;
1868+UPDATE t1 SET foo='d' WHERE foo='c';
1869+DROP TABLE t1;
1870--- /dev/null
1871+++ b/mysql-test/suite/innodb/t/percona_sync_flush.test
1872@@ -0,0 +1,33 @@
1873+# Test for InnoDB sync state flushing.
1874+
1875+--source include/have_innodb.inc
1876+--source include/have_debug.inc
1877+
1878+--disable_warnings
1879+DROP TABLE IF EXISTS t1;
1880+--enable_warnings
1881+
1882+CREATE TABLE t1 (id INT AUTO_INCREMENT, foo CHAR(255), PRIMARY KEY (id)) ENGINE=InnoDB;
1883+
1884+# It is hard to get to InnoDB sync state flushing in MTR with regular workload. Perhaps
1885+# it is possible with many parallel connections, but that would be brittle anyway.
1886+# So, just disable preflushing and checkpointing and issue simple workload.
1887+SET @@global.innodb_flush_checkpoint_debug=1;
1888+
1889+INSERT INTO t1(foo) VALUES ('a'), ('b');
1890+
1891+let $rep=0;
1892+while ($rep < 14)
1893+{
1894+ INSERT INTO t1(foo) SELECT foo FROM t1;
1895+ UPDATE t1 SET foo='c';
1896+ inc $rep;
1897+}
1898+
1899+# By now checkpoint age should be well past sync flush point. Allow
1900+# preflushing/checkpointing again and do some work in order to do the sync flush.
1901+SET @@global.innodb_flush_checkpoint_debug=0;
1902+
1903+UPDATE t1 SET foo='d' WHERE foo='c';
1904+
1905+DROP TABLE t1;
1906--- a/mysql-test/suite/sys_vars/r/all_vars.result
1907+++ b/mysql-test/suite/sys_vars/r/all_vars.result
1908@@ -4,6 +4,7 @@
1909 insert into t2 select variable_name from information_schema.global_variables;
1910 insert into t2 select variable_name from information_schema.session_variables;
1911 delete from t2 where variable_name='innodb_change_buffering_debug';
1912+delete from t2 where variable_name='innodb_flush_checkpoint_debug';
1913 update t2 set variable_name= replace(variable_name, "PERFORMANCE_SCHEMA_", "PFS_");
1914 select variable_name as `There should be *no* long test name listed below:` from t2
1915 where length(variable_name) > 50;
1916--- a/mysql-test/suite/sys_vars/t/all_vars.test
1917+++ b/mysql-test/suite/sys_vars/t/all_vars.test
1918@@ -47,8 +47,9 @@
1919 insert into t2 select variable_name from information_schema.global_variables;
1920 insert into t2 select variable_name from information_schema.session_variables;
1921
1922-# This is only present in debug builds.
1923+# These are only present in debug builds.
1924 delete from t2 where variable_name='innodb_change_buffering_debug';
1925+delete from t2 where variable_name='innodb_flush_checkpoint_debug';
1926
1927 # Performance schema variables are too long for files named
1928 # 'mysql-test/suite/sys_vars/t/' ...
29ffd636
AM
1929--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
1930+++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table-master.opt
1931@@ -1 +1 @@
1932---innodb-buffer-pool-size=8M
1933+--innodb-buffer-pool-size=32M
1934--- a/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
1935+++ b/mysql-test/suite/innodb/t/innodb_cmp_drop_table.test
1936@@ -36,13 +36,14 @@
1937
1938 -- disable_query_log
1939
1940--- let $i = 400
1941+-- let $i = 4000
1942+begin;
1943 while ($i)
1944 {
1945 insert into t2 values(repeat('abcdefghijklmnopqrstuvwxyz',1000));
1946 dec $i;
1947 }
1948-
1949+commit;
1950 -- enable_query_log
1951
1952 # now there should be no 8K pages in the buffer pool
This page took 0.332094 seconds and 4 git commands to generate.