]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_io_patches.patch
- make mysql.init a bit more lsb-compatible
[packages/mysql.git] / innodb_io_patches.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_io_patches.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
9--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:09:51.273986410 +0900
10+++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:10:08.934990091 +0900
11@@ -320,6 +320,7 @@
12
13 /* When we traverse all the flush lists we don't want another
14 thread to add a dirty page to any flush list. */
15+ if (srv_buf_pool_instances > 1)
16 log_flush_order_mutex_enter();
17
18 for (i = 0; i < srv_buf_pool_instances; i++) {
19@@ -343,6 +344,7 @@
20 }
21 }
22
23+ if (srv_buf_pool_instances > 1)
24 log_flush_order_mutex_exit();
25
26 /* The returned answer may be out of date: the flush_list can
27diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
28--- a/storage/innobase/buf/buf0flu.c 2010-11-03 07:01:13.000000000 +0900
29+++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:10:08.934990091 +0900
413cadc7
AM
30@@ -855,7 +855,7 @@
31 flush:
32 /* Now flush the doublewrite buffer data to disk */
33
34- fil_flush(TRX_SYS_SPACE);
35+ fil_flush(TRX_SYS_SPACE, FALSE);
36
37 /* We know that the writes have been flushed to disk now
38 and in recovery we will find them in the doublewrite buffer
d8778560 39@@ -1376,7 +1376,7 @@
b4e1fa2c
AM
40
41 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
42
43- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
44+ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
45 /* If there is little space, it is better not to flush
46 any block except from the end of the LRU list */
47
48diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
49--- a/storage/innobase/buf/buf0rea.c 2010-11-03 07:01:13.000000000 +0900
50+++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:10:08.937050537 +0900
51@@ -260,6 +260,10 @@
52 = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
53 ulint threshold;
54
55+ if (!(srv_read_ahead & 2)) {
56+ return(0);
57+ }
58+
59 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
60 /* No read-ahead to avoid thread deadlocks */
61 return(0);
413cadc7
AM
62diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
63--- a/storage/innobase/fil/fil0fil.c 2011-06-29 17:48:24.797971571 +0900
64+++ b/storage/innobase/fil/fil0fil.c 2011-06-29 18:04:02.548053286 +0900
65@@ -2600,7 +2600,7 @@
66
67 os_thread_sleep(20000);
68
69- fil_flush(id);
70+ fil_flush(id, TRUE);
71
72 goto retry;
73
74@@ -2814,7 +2814,7 @@
75 goto error_exit;
76 }
77
78- ret = os_file_flush(file);
79+ ret = os_file_flush(file, TRUE);
80
81 if (!ret) {
82 fputs("InnoDB: Error: file flush of tablespace ", stderr);
83@@ -3000,7 +3000,7 @@
84 }
85 }
86
87- success = os_file_flush(file);
88+ success = os_file_flush(file, TRUE);
89 if (!success) {
90
91 goto func_exit;
92@@ -3022,7 +3022,7 @@
93
94 goto func_exit;
95 }
96- success = os_file_flush(file);
97+ success = os_file_flush(file, TRUE);
98 func_exit:
99 os_file_close(file);
100 ut_free(buf2);
101@@ -4005,7 +4005,7 @@
102 size_after_extend, *actual_size); */
103 mutex_exit(&fil_system->mutex);
104
105- fil_flush(space_id);
106+ fil_flush(space_id, TRUE);
107
108 return(success);
109 }
110@@ -4576,8 +4576,9 @@
111 void
112 fil_flush(
113 /*======*/
114- ulint space_id) /*!< in: file space id (this can be a group of
115+ ulint space_id, /*!< in: file space id (this can be a group of
116 log files or a tablespace of the database) */
117+ ibool metadata)
118 {
119 fil_space_t* space;
120 fil_node_t* node;
121@@ -4648,7 +4649,7 @@
122 /* fprintf(stderr, "Flushing to file %s\n",
123 node->name); */
124
125- os_file_flush(file);
126+ os_file_flush(file, metadata);
127
128 mutex_enter(&fil_system->mutex);
129
130@@ -4731,7 +4732,7 @@
131 a non-existing space id. */
132 for (i = 0; i < n_space_ids; i++) {
133
134- fil_flush(space_ids[i]);
135+ fil_flush(space_ids[i], TRUE);
136 }
137
138 mem_free(space_ids);
b4e1fa2c
AM
139diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
140--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:09:51.283956391 +0900
141+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:10:08.963980444 +0900
adf0fb13 142@@ -444,6 +444,12 @@
b4e1fa2c
AM
143 "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
144 NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
145
146+static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
147+ "Set to 0 (write and flush once per second),"
148+ " 1 (write and flush at each commit)"
149+ " or 2 (write at commit, flush once per second).",
150+ NULL, NULL, 1, 0, 2, 0);
151+
152
153 static handler *innobase_create_handler(handlerton *hton,
154 TABLE_SHARE *table,
adf0fb13 155@@ -838,6 +844,17 @@
b4e1fa2c
AM
156 }
157 }
158
159+/******************************************************************//**
160+*/
161+extern "C" UNIV_INTERN
162+ulong
163+thd_flush_log_at_trx_commit(
164+/*================================*/
165+ void* thd)
166+{
167+ return(THDVAR((THD*) thd, flush_log_at_trx_commit));
168+}
169+
170 /********************************************************************//**
171 Obtain the InnoDB transaction of a MySQL thread.
172 @return reference to transaction pointer */
adf0fb13 173@@ -2437,6 +2454,9 @@
b4e1fa2c
AM
174 srv_n_read_io_threads = (ulint) innobase_read_io_threads;
175 srv_n_write_io_threads = (ulint) innobase_write_io_threads;
176
177+ srv_read_ahead &= 3;
178+ srv_adaptive_flushing_method %= 3;
179+
180 srv_force_recovery = (ulint) innobase_force_recovery;
181
182 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
adf0fb13 183@@ -11025,7 +11045,7 @@
b4e1fa2c 184 PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
11822e22 185 "Purge threads can be either 0 or 1.",
b4e1fa2c
AM
186 NULL, NULL,
187- 0, /* Default setting */
188+ 1, /* Default setting */
189 0, /* Minimum value */
190 1, 0); /* Maximum value */
191
adf0fb13 192@@ -11067,12 +11087,18 @@
b4e1fa2c
AM
193 innodb_file_format_max_validate,
194 innodb_file_format_max_update, "Antelope");
195
196-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
197- PLUGIN_VAR_OPCMDARG,
198- "Set to 0 (write and flush once per second),"
199- " 1 (write and flush at each commit)"
200- " or 2 (write at commit, flush once per second).",
201- NULL, NULL, 1, 0, 2, 0);
202+/* Changed to the THDVAR */
203+//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
204+// PLUGIN_VAR_OPCMDARG,
205+// "Set to 0 (write and flush once per second),"
206+// " 1 (write and flush at each commit)"
207+// " or 2 (write at commit, flush once per second).",
208+// NULL, NULL, 1, 0, 2, 0);
209+
210+static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
211+ PLUGIN_VAR_NOCMDARG,
212+ "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
213+ NULL, NULL, TRUE);
214
215 static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
216 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
adf0fb13 217@@ -11167,7 +11193,7 @@
b4e1fa2c
AM
218 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
219 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
220 "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
221- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
222+ NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
223
224 static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
225 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
adf0fb13 226@@ -11319,6 +11345,95 @@
b4e1fa2c
AM
227 "trigger a readahead.",
228 NULL, NULL, 56, 0, 64, 0);
229
230+static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
231+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
232+ "The maximum size of the insert buffer. (in bytes)",
233+ NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
234+
235+static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
236+ PLUGIN_VAR_RQCMDARG,
237+ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
238+ NULL, NULL, 1, 0, 1, 0);
239+
240+static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
241+ PLUGIN_VAR_RQCMDARG,
242+ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
243+ NULL, NULL, 100, 100, 999999999, 0);
244+
245+static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
246+ PLUGIN_VAR_RQCMDARG,
247+ "Control soft limit of checkpoint age. (0 : not control)",
248+ NULL, NULL, 0, 0, ~0UL, 0);
249+
250+static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
251+ PLUGIN_VAR_RQCMDARG,
252+ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
253+ NULL, NULL, 1, 0, 1, 0);
254+
255+static
256+void
257+innodb_read_ahead_update(
258+ THD* thd,
259+ struct st_mysql_sys_var* var,
260+ void* var_ptr,
261+ const void* save)
262+{
263+ *(long *)var_ptr= (*(long *)save) & 3;
264+}
265+const char *read_ahead_names[]=
266+{
267+ "none", /* 0 */
268+ "random",
269+ "linear",
270+ "both", /* 3 */
271+ /* For compatibility of the older patch */
272+ "0", /* 4 ("none" + 4) */
273+ "1",
274+ "2",
275+ "3", /* 7 ("both" + 4) */
276+ NullS
277+};
278+TYPELIB read_ahead_typelib=
279+{
280+ array_elements(read_ahead_names) - 1, "read_ahead_typelib",
281+ read_ahead_names, NULL
282+};
283+static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
284+ PLUGIN_VAR_RQCMDARG,
285+ "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
286+ NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
287+
288+static
289+void
290+innodb_adaptive_flushing_method_update(
291+ THD* thd,
292+ struct st_mysql_sys_var* var,
293+ void* var_ptr,
294+ const void* save)
295+{
296+ *(long *)var_ptr= (*(long *)save) % 4;
297+}
298+const char *adaptive_flushing_method_names[]=
299+{
300+ "native", /* 0 */
301+ "estimate", /* 1 */
302+ "keep_average", /* 2 */
303+ /* For compatibility of the older patch */
304+ "0", /* 3 ("none" + 3) */
305+ "1", /* 4 ("estimate" + 3) */
306+ "2", /* 5 ("keep_average" + 3) */
307+ NullS
308+};
309+TYPELIB adaptive_flushing_method_typelib=
310+{
311+ array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
312+ adaptive_flushing_method_names, NULL
313+};
314+static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
315+ PLUGIN_VAR_RQCMDARG,
316+ "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
317+ NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
b4e1fa2c
AM
318+
319 static struct st_mysql_sys_var* innobase_system_variables[]= {
320 MYSQL_SYSVAR(additional_mem_pool_size),
321 MYSQL_SYSVAR(autoextend_increment),
adf0fb13 322@@ -11339,6 +11454,7 @@
b4e1fa2c
AM
323 MYSQL_SYSVAR(file_format_check),
324 MYSQL_SYSVAR(file_format_max),
325 MYSQL_SYSVAR(flush_log_at_trx_commit),
326+ MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
327 MYSQL_SYSVAR(flush_method),
328 MYSQL_SYSVAR(force_recovery),
329 MYSQL_SYSVAR(locks_unsafe_for_binlog),
adf0fb13 330@@ -11376,6 +11492,13 @@
b4e1fa2c
AM
331 MYSQL_SYSVAR(show_verbose_locks),
332 MYSQL_SYSVAR(show_locks_held),
333 MYSQL_SYSVAR(version),
334+ MYSQL_SYSVAR(ibuf_max_size),
335+ MYSQL_SYSVAR(ibuf_active_contract),
336+ MYSQL_SYSVAR(ibuf_accel_rate),
337+ MYSQL_SYSVAR(checkpoint_age_target),
338+ MYSQL_SYSVAR(flush_neighbor_pages),
339+ MYSQL_SYSVAR(read_ahead),
340+ MYSQL_SYSVAR(adaptive_flushing_method),
b4e1fa2c
AM
341 MYSQL_SYSVAR(use_sys_malloc),
342 MYSQL_SYSVAR(use_native_aio),
343 MYSQL_SYSVAR(change_buffering),
344diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
345--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-11-03 07:01:13.000000000 +0900
346+++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:10:09.073984282 +0900
adf0fb13 347@@ -514,8 +514,10 @@
b4e1fa2c
AM
348 grow in size, as the references on the upper levels of the tree can
349 change */
350
351- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
352- / IBUF_POOL_SIZE_PER_MAX_SIZE;
353+ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
354+ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
355+
356+ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
357
358 mutex_create(ibuf_pessimistic_insert_mutex_key,
359 &ibuf_pessimistic_insert_mutex,
adf0fb13 360@@ -2753,9 +2755,11 @@
b4e1fa2c
AM
361 size = ibuf->size;
362 max_size = ibuf->max_size;
363
364+ if (!srv_ibuf_active_contract) {
365 if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
366 return;
367 }
368+ }
369
370 sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
371
372diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
373--- a/storage/innobase/include/buf0rea.h 2010-11-03 07:01:13.000000000 +0900
374+++ b/storage/innobase/include/buf0rea.h 2010-12-03 15:10:09.076066335 +0900
375@@ -124,8 +124,7 @@
376
377 /** The size in pages of the area which the read-ahead algorithms read if
378 invoked */
379-#define BUF_READ_AHEAD_AREA(b) \
380- ut_min(64, ut_2_power_up((b)->curr_size / 32))
381+#define BUF_READ_AHEAD_AREA(b) 64
382
383 /** @name Modes used in read-ahead @{ */
384 /** read only pages belonging to the insert buffer tree */
413cadc7
AM
385diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
386--- a/storage/innobase/include/fil0fil.h 2011-06-29 17:48:24.818969583 +0900
387+++ b/storage/innobase/include/fil0fil.h 2011-06-29 17:58:49.215971540 +0900
388@@ -658,8 +658,9 @@
389 void
390 fil_flush(
391 /*======*/
392- ulint space_id); /*!< in: file space id (this can be a group of
393+ ulint space_id, /*!< in: file space id (this can be a group of
394 log files or a tablespace of the database) */
395+ ibool metadata);
396 /**********************************************************************//**
397 Flushes to disk writes in file spaces of the given type possibly cached by
398 the OS. */
b4e1fa2c
AM
399diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
400--- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900
401+++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900
adf0fb13 402@@ -284,6 +284,13 @@
b4e1fa2c
AM
403 /*===================*/
404 void* thd, /*!< in: thread handle (THD*) */
405 ulint value); /*!< in: time waited for the lock */
406+/******************************************************************//**
407+*/
408+
409+ulong
410+thd_flush_log_at_trx_commit(
411+/*================================*/
412+ void* thd);
413
adf0fb13
AM
414 /**********************************************************************//**
415 Get the current setting of the lower_case_table_names global parameter from
413cadc7
AM
416diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
417--- a/storage/innobase/include/os0file.h 2011-05-11 20:54:12.000000000 +0900
418+++ b/storage/innobase/include/os0file.h 2011-06-29 17:55:21.947041132 +0900
419@@ -296,8 +296,8 @@
420 pfs_os_file_write_func(name, file, buf, offset, offset_high, \
421 n, __FILE__, __LINE__)
422
423-# define os_file_flush(file) \
424- pfs_os_file_flush_func(file, __FILE__, __LINE__)
425+# define os_file_flush(file, metadata) \
426+ pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
427
428 # define os_file_rename(key, oldpath, newpath) \
429 pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
430@@ -333,7 +333,7 @@
431 # define os_file_write(name, file, buf, offset, offset_high, n) \
432 os_file_write_func(name, file, buf, offset, offset_high, n)
433
434-# define os_file_flush(file) os_file_flush_func(file)
435+# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
436
437 # define os_file_rename(key, oldpath, newpath) \
438 os_file_rename_func(oldpath, newpath)
439@@ -781,6 +781,7 @@
440 pfs_os_file_flush_func(
441 /*===================*/
442 os_file_t file, /*!< in, own: handle to a file */
443+ ibool metadata,
444 const char* src_file,/*!< in: file name where func invoked */
445 ulint src_line);/*!< in: line where the func invoked */
446
447@@ -860,7 +861,8 @@
448 ibool
449 os_file_flush_func(
450 /*===============*/
451- os_file_t file); /*!< in, own: handle to a file */
452+ os_file_t file, /*!< in, own: handle to a file */
453+ ibool metadata);
454 /***********************************************************************//**
455 Retrieves the last error number if an error occurs in a file io function.
456 The number should be retrieved before any other OS calls (because they may
457diff -ruN a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
458--- a/storage/innobase/include/os0file.ic 2011-05-11 20:54:12.000000000 +0900
459+++ b/storage/innobase/include/os0file.ic 2011-06-29 17:56:01.510958172 +0900
460@@ -369,6 +369,7 @@
461 pfs_os_file_flush_func(
462 /*===================*/
463 os_file_t file, /*!< in, own: handle to a file */
464+ ibool metadata,
465 const char* src_file,/*!< in: file name where func invoked */
466 ulint src_line)/*!< in: line where the func invoked */
467 {
468@@ -378,7 +379,7 @@
469
470 register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
471 src_file, src_line);
472- result = os_file_flush_func(file);
473+ result = os_file_flush_func(file, metadata);
474
475 register_pfs_file_io_end(locker, 0);
476
b4e1fa2c
AM
477diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
478--- a/storage/innobase/include/srv0srv.h 2010-12-03 15:09:51.291955835 +0900
479+++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:10:09.079029047 +0900
adf0fb13 480@@ -138,7 +138,8 @@
b4e1fa2c
AM
481 extern ulint srv_n_log_files;
482 extern ulint srv_log_file_size;
483 extern ulint srv_log_buffer_size;
484-extern ulong srv_flush_log_at_trx_commit;
485+//extern ulong srv_flush_log_at_trx_commit;
486+extern char srv_use_global_flush_log_at_trx_commit;
487 extern char srv_adaptive_flushing;
488
489
adf0fb13 490@@ -216,6 +217,16 @@
b4e1fa2c
AM
491 extern ulong srv_max_purge_lag;
492
493 extern ulong srv_replication_delay;
494+
495+extern long long srv_ibuf_max_size;
496+extern ulint srv_ibuf_active_contract;
497+extern ulint srv_ibuf_accel_rate;
498+extern ulint srv_checkpoint_age_target;
499+extern ulint srv_flush_neighbor_pages;
500+extern ulint srv_enable_unsafe_group_commit;
501+extern ulint srv_read_ahead;
502+extern ulint srv_adaptive_flushing_method;
503+
504 /*-------------------------------------------*/
505
506 extern ulint srv_n_rows_inserted;
adf0fb13 507@@ -394,8 +405,9 @@
b4e1fa2c
AM
508 when writing data files, but do flush
509 after writing to log files */
510 SRV_UNIX_NOSYNC, /*!< do not flush after writing */
511- SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
512+ SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
513 data files */
514+ SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
515 };
516
517 /** Alternatives for file i/o in Windows */
518diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
519--- a/storage/innobase/log/log0log.c 2010-11-03 07:01:13.000000000 +0900
520+++ b/storage/innobase/log/log0log.c 2010-12-03 15:10:09.084023562 +0900
d8778560
AM
521@@ -48,6 +48,7 @@
522 #include "srv0start.h"
523 #include "trx0sys.h"
524 #include "trx0trx.h"
525+#include "ha_prototypes.h"
526
527 /*
528 General philosophy of InnoDB redo-logs:
529@@ -359,6 +360,33 @@
b4e1fa2c
AM
530 }
531
532 /************************************************************//**
533+*/
534+UNIV_INLINE
535+ulint
536+log_max_modified_age_async()
537+{
538+ if (srv_checkpoint_age_target) {
539+ return(ut_min(log_sys->max_modified_age_async,
540+ srv_checkpoint_age_target
541+ - srv_checkpoint_age_target / 8));
542+ } else {
543+ return(log_sys->max_modified_age_async);
544+ }
545+}
546+
547+UNIV_INLINE
548+ulint
549+log_max_checkpoint_age_async()
550+{
551+ if (srv_checkpoint_age_target) {
552+ return(ut_min(log_sys->max_checkpoint_age_async,
553+ srv_checkpoint_age_target));
554+ } else {
555+ return(log_sys->max_checkpoint_age_async);
556+ }
557+}
558+
559+/************************************************************//**
560 Closes the log.
561 @return lsn */
562 UNIV_INTERN
d8778560 563@@ -427,7 +455,7 @@
b4e1fa2c
AM
564 }
565 }
566
567- if (checkpoint_age <= log->max_modified_age_async) {
568+ if (checkpoint_age <= log_max_modified_age_async()) {
569
570 goto function_exit;
571 }
d8778560 572@@ -435,8 +463,8 @@
b4e1fa2c
AM
573 oldest_lsn = buf_pool_get_oldest_modification();
574
575 if (!oldest_lsn
576- || lsn - oldest_lsn > log->max_modified_age_async
577- || checkpoint_age > log->max_checkpoint_age_async) {
578+ || lsn - oldest_lsn > log_max_modified_age_async()
579+ || checkpoint_age > log_max_checkpoint_age_async()) {
580
581 log->check_flush_or_checkpoint = TRUE;
582 }
413cadc7 583@@ -1100,9 +1128,10 @@
b4e1fa2c
AM
584 group = (log_group_t*)((ulint)group - 1);
585
586 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
587+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
588 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
589
413cadc7
AM
590- fil_flush(group->space_id);
591+ fil_flush(group->space_id, FALSE);
592 }
593
594 #ifdef UNIV_DEBUG
595@@ -1121,10 +1150,11 @@
b4e1fa2c
AM
596 logs and cannot end up here! */
597
598 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
599+ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
600 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
601- && srv_flush_log_at_trx_commit != 2) {
602+ && thd_flush_log_at_trx_commit(NULL) != 2) {
603
413cadc7
AM
604- fil_flush(group->space_id);
605+ fil_flush(group->space_id, FALSE);
b4e1fa2c 606 }
413cadc7
AM
607
608 mutex_enter(&(log_sys->mutex));
d8778560 609@@ -1501,7 +1531,8 @@
b4e1fa2c
AM
610
611 mutex_exit(&(log_sys->mutex));
612
613- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
614+ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
615+ || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
616 /* O_DSYNC means the OS did not buffer the log file at all:
617 so we have also flushed to disk what we have written */
618
413cadc7
AM
619@@ -1511,7 +1542,7 @@
620
621 group = UT_LIST_GET_FIRST(log_sys->log_groups);
622
623- fil_flush(group->space_id);
624+ fil_flush(group->space_id, FALSE);
625 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
626 }
627
d8778560 628@@ -2120,10 +2151,10 @@
b4e1fa2c
AM
629
630 sync = TRUE;
631 advance = 2 * (age - log->max_modified_age_sync);
632- } else if (age > log->max_modified_age_async) {
633+ } else if (age > log_max_modified_age_async()) {
634
635 /* A flush is not urgent: we do an asynchronous preflush */
636- advance = age - log->max_modified_age_async;
637+ advance = age - log_max_modified_age_async();
638 } else {
639 advance = 0;
640 }
d8778560 641@@ -2137,7 +2168,7 @@
b4e1fa2c
AM
642
643 do_checkpoint = TRUE;
644
645- } else if (checkpoint_age > log->max_checkpoint_age_async) {
646+ } else if (checkpoint_age > log_max_checkpoint_age_async()) {
647 /* A checkpoint is not urgent: do it asynchronously */
648
649 do_checkpoint = TRUE;
413cadc7
AM
650@@ -2607,7 +2638,7 @@
651
652 mutex_exit(&(log_sys->mutex));
653
654- fil_flush(group->archive_space_id);
655+ fil_flush(group->archive_space_id, TRUE);
656
657 mutex_enter(&(log_sys->mutex));
658
d8778560 659@@ -3349,6 +3380,17 @@
b4e1fa2c
AM
660 log_sys->flushed_to_disk_lsn,
661 log_sys->last_checkpoint_lsn);
662
663+ fprintf(file,
664+ "Max checkpoint age %lu\n"
665+ "Checkpoint age target %lu\n"
666+ "Modified age %lu\n"
667+ "Checkpoint age %lu\n",
668+ (ulong) log_sys->max_checkpoint_age,
669+ (ulong) log_max_checkpoint_age_async(),
670+ (ulong) (log_sys->lsn -
671+ log_buf_pool_get_oldest_modification()),
672+ (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
673+
674 current_time = time(NULL);
675
676 time_elapsed = 0.001 + difftime(current_time,
677diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
678--- a/storage/innobase/log/log0recv.c 2010-11-03 07:01:13.000000000 +0900
679+++ b/storage/innobase/log/log0recv.c 2010-12-03 15:10:09.089024191 +0900
680@@ -2906,9 +2906,12 @@
681 ib_uint64_t archived_lsn;
682 #endif /* UNIV_LOG_ARCHIVE */
683 byte* buf;
684- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
685+ byte* log_hdr_buf;
686+ byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
687 ulint err;
688
689+ log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
690+
691 #ifdef UNIV_LOG_ARCHIVE
692 ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
693 /** TRUE when recovering from a checkpoint */
413cadc7
AM
694@@ -3468,7 +3471,7 @@
695 exit(1);
696 }
697
698- os_file_flush(log_file);
699+ os_file_flush(log_file, TRUE);
700 os_file_close(log_file);
701 }
702
703@@ -3492,7 +3495,7 @@
704
705 os_file_write(name, log_file, buf, 0, 0,
706 LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
707- os_file_flush(log_file);
708+ os_file_flush(log_file, TRUE);
709 os_file_close(log_file);
710
711 ut_free(buf);
b4e1fa2c
AM
712diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
713--- a/storage/innobase/os/os0file.c 2010-11-03 07:01:13.000000000 +0900
714+++ b/storage/innobase/os/os0file.c 2010-12-03 15:10:09.093023540 +0900
d8778560 715@@ -1424,7 +1424,7 @@
b4e1fa2c
AM
716 #endif
717 #ifdef UNIV_NON_BUFFERED_IO
718 # ifndef UNIV_HOTBACKUP
719- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
720+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
721 /* Do not use unbuffered i/o to log files because
722 value 2 denotes that we do not flush the log at every
723 commit, but only once per second */
d8778560 724@@ -1440,7 +1440,7 @@
b4e1fa2c
AM
725 attributes = 0;
726 #ifdef UNIV_NON_BUFFERED_IO
727 # ifndef UNIV_HOTBACKUP
728- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
729+ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
730 /* Do not use unbuffered i/o to log files because
731 value 2 denotes that we do not flush the log at every
732 commit, but only once per second */
d8778560 733@@ -1585,6 +1585,11 @@
b4e1fa2c
AM
734 os_file_set_nocache(file, name, mode_str);
735 }
736
737+ /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
738+ if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
739+ os_file_set_nocache(file, name, mode_str);
740+ }
741+
742 #ifdef USE_FILE_LOCK
743 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
744
413cadc7
AM
745@@ -2008,7 +2013,7 @@
746
747 ut_free(buf2);
748
749- ret = os_file_flush(file);
750+ ret = os_file_flush(file, TRUE);
751
752 if (ret) {
753 return(TRUE);
754@@ -2046,7 +2051,8 @@
755 int
756 os_file_fsync(
757 /*==========*/
758- os_file_t file) /*!< in: handle to a file */
759+ os_file_t file, /*!< in: handle to a file */
760+ ibool metadata)
761 {
762 int ret;
763 int failures;
764@@ -2055,7 +2061,15 @@
765 failures = 0;
766
767 do {
768+#ifdef HAVE_FDATASYNC
769+ if (metadata) {
770+ ret = fsync(file);
771+ } else {
772+ ret = fdatasync(file);
773+ }
774+#else
775 ret = fsync(file);
776+#endif
777
778 os_n_fsyncs++;
779
780@@ -2092,7 +2106,8 @@
781 ibool
782 os_file_flush_func(
783 /*===============*/
784- os_file_t file) /*!< in, own: handle to a file */
785+ os_file_t file, /*!< in, own: handle to a file */
786+ ibool metadata)
787 {
788 #ifdef __WIN__
789 BOOL ret;
790@@ -2142,18 +2157,18 @@
791 /* If we are not on an operating system that supports this,
792 then fall back to a plain fsync. */
793
794- ret = os_file_fsync(file);
795+ ret = os_file_fsync(file, metadata);
796 } else {
797 ret = fcntl(file, F_FULLFSYNC, NULL);
798
799 if (ret) {
800 /* If we are not on a file system that supports this,
801 then fall back to a plain fsync. */
802- ret = os_file_fsync(file);
803+ ret = os_file_fsync(file, metadata);
804 }
805 }
806 #else
807- ret = os_file_fsync(file);
808+ ret = os_file_fsync(file, metadata);
809 #endif
810
811 if (ret == 0) {
812@@ -2336,7 +2351,7 @@
813 the OS crashes, a database page is only partially
814 physically written to disk. */
815
816- ut_a(TRUE == os_file_flush(file));
817+ ut_a(TRUE == os_file_flush(file, TRUE));
818 }
819 # endif /* UNIV_DO_FLUSH */
820
821@@ -2378,7 +2393,7 @@
822 the OS crashes, a database page is only partially
823 physically written to disk. */
824
825- ut_a(TRUE == os_file_flush(file));
826+ ut_a(TRUE == os_file_flush(file, TRUE));
827 }
828 # endif /* UNIV_DO_FLUSH */
829
830@@ -2750,7 +2765,7 @@
831
832 # ifdef UNIV_DO_FLUSH
833 if (!os_do_not_call_flush_at_each_write) {
834- ut_a(TRUE == os_file_flush(file));
835+ ut_a(TRUE == os_file_flush(file, TRUE));
836 }
837 # endif /* UNIV_DO_FLUSH */
838
839@@ -4289,7 +4304,7 @@
840 #ifdef UNIV_DO_FLUSH
841 if (slot->type == OS_FILE_WRITE
842 && !os_do_not_call_flush_at_each_write) {
843- if (!os_file_flush(slot->file)) {
844+ if (!os_file_flush(slot->file, TRUE)) {
845 ut_error;
846 }
847 }
848@@ -4590,7 +4605,7 @@
849 #ifdef UNIV_DO_FLUSH
850 if (slot->type == OS_FILE_WRITE
851 && !os_do_not_call_flush_at_each_write)
852- && !os_file_flush(slot->file) {
853+ && !os_file_flush(slot->file, TRUE) {
854 ut_error;
855 }
856 #endif /* UNIV_DO_FLUSH */
b4e1fa2c
AM
857diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
858--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:09:51.301987792 +0900
859+++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:13:29.369986988 +0900
adf0fb13 860@@ -183,7 +183,8 @@
b4e1fa2c
AM
861 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
862 /* size in database pages */
863 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
864-UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
865+//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
866+UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
867
868 /* Try to flush dirty pages so as to avoid IO bursts at
869 the checkpoints. */
adf0fb13 870@@ -402,6 +403,17 @@
b4e1fa2c
AM
871
872 UNIV_INTERN ulong srv_replication_delay = 0;
873
874+UNIV_INTERN long long srv_ibuf_max_size = 0;
875+UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
876+UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
877+#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
878+
879+UNIV_INTERN ulint srv_checkpoint_age_target = 0;
880+UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
881+
882+UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
883+UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
884+UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
885 /*-------------------------------------------*/
886 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
887 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
adf0fb13 888@@ -2742,6 +2754,7 @@
b4e1fa2c
AM
889 ulint n_pages_purged = 0;
890 ulint n_bytes_merged;
891 ulint n_pages_flushed;
892+ ulint n_pages_flushed_prev = 0;
893 ulint n_bytes_archived;
894 ulint n_tables_to_drop;
895 ulint n_ios;
adf0fb13 896@@ -2749,7 +2762,20 @@
b4e1fa2c
AM
897 ulint n_ios_very_old;
898 ulint n_pend_ios;
899 ulint next_itr_time;
900+ ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
901+ ulint inner_loop = 0;
902+ ibool skip_sleep = FALSE;
903 ulint i;
904+ struct t_prev_flush_info_struct {
905+ ulint count;
906+ unsigned space:32;
907+ unsigned offset:32;
908+ ib_uint64_t oldest_modification;
909+ } prev_flush_info[MAX_BUFFER_POOLS];
910+
911+ ib_uint64_t lsn_old;
912+
913+ ib_uint64_t oldest_lsn;
914
915 #ifdef UNIV_DEBUG_THREAD_CREATION
916 fprintf(stderr, "Master thread starts, id %lu\n",
adf0fb13 917@@ -2771,6 +2797,9 @@
b4e1fa2c
AM
918
919 mutex_exit(&kernel_mutex);
920
921+ mutex_enter(&(log_sys->mutex));
922+ lsn_old = log_sys->lsn;
923+ mutex_exit(&(log_sys->mutex));
924 loop:
925 /*****************************************************************/
926 /* ---- When there is database activity by users, we cycle in this
adf0fb13 927@@ -2801,9 +2830,13 @@
b4e1fa2c
AM
928 /* Sleep for 1 second on entrying the for loop below the first time. */
929 next_itr_time = ut_time_ms() + 1000;
930
931+ skip_sleep = FALSE;
932+
933 for (i = 0; i < 10; i++) {
934 ulint cur_time = ut_time_ms();
935
936+ n_pages_flushed = 0; /* initialize */
937+
938 /* ALTER TABLE in MySQL requires on Unix that the table handler
939 can drop tables lazily after there no longer are SELECT
940 queries to them. */
adf0fb13 941@@ -2827,6 +2860,7 @@
b4e1fa2c
AM
942 srv_main_thread_op_info = "sleeping";
943 srv_main_1_second_loops++;
944
945+ if (!skip_sleep) {
946 if (next_itr_time > cur_time
947 && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
948
adf0fb13 949@@ -2837,10 +2871,26 @@
b4e1fa2c
AM
950 (next_itr_time - cur_time)
951 * 1000));
952 srv_main_sleeps++;
953+
954+ /*
955+ mutex_enter(&(log_sys->mutex));
956+ oldest_lsn = buf_pool_get_oldest_modification();
957+ ib_uint64_t lsn = log_sys->lsn;
958+ mutex_exit(&(log_sys->mutex));
959+
960+ if(oldest_lsn)
961+ fprintf(stderr,
962+ "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
963+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
964+ lsn - lsn_old);
965+ */
966 }
967
968 /* Each iteration should happen at 1 second interval. */
969 next_itr_time = ut_time_ms() + 1000;
970+ } /* if (!skip_sleep) */
971+
972+ skip_sleep = FALSE;
973
974 /* Flush logs if needed */
975 srv_sync_log_buffer_in_background();
adf0fb13 976@@ -2860,7 +2910,7 @@
b4e1fa2c
AM
977 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
978 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
979 srv_main_thread_op_info = "doing insert buffer merge";
980- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
981+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
982
983 /* Flush logs if needed */
984 srv_sync_log_buffer_in_background();
adf0fb13 985@@ -2877,7 +2927,11 @@
b4e1fa2c
AM
986 n_pages_flushed = buf_flush_list(
987 PCT_IO(100), IB_ULONGLONG_MAX);
988
989- } else if (srv_adaptive_flushing) {
990+ mutex_enter(&(log_sys->mutex));
991+ lsn_old = log_sys->lsn;
992+ mutex_exit(&(log_sys->mutex));
993+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
994+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
995
996 /* Try to keep the rate of flushing of dirty
997 pages such that redo log generation does not
adf0fb13 998@@ -2893,6 +2947,224 @@
b4e1fa2c
AM
999 n_flush,
1000 IB_ULONGLONG_MAX);
1001 }
1002+
1003+ mutex_enter(&(log_sys->mutex));
1004+ lsn_old = log_sys->lsn;
1005+ mutex_exit(&(log_sys->mutex));
1006+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1007+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
1008+
1009+ /* Try to keep modified age not to exceed
1010+ max_checkpoint_age * 7/8 line */
1011+
1012+ mutex_enter(&(log_sys->mutex));
1013+
1014+ oldest_lsn = buf_pool_get_oldest_modification();
1015+ if (oldest_lsn == 0) {
1016+ lsn_old = log_sys->lsn;
1017+ mutex_exit(&(log_sys->mutex));
1018+
1019+ } else {
1020+ if ((log_sys->lsn - oldest_lsn)
1021+ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
1022+ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
1023+ /* We should not flush from here. */
1024+ lsn_old = log_sys->lsn;
1025+ mutex_exit(&(log_sys->mutex));
1026+ } else if ((log_sys->lsn - oldest_lsn)
1027+ > (log_sys->max_checkpoint_age)/4 ) {
1028+
1029+ /* defence line (max_checkpoint_age * 1/2) */
1030+ ib_uint64_t lsn = log_sys->lsn;
1031+
1032+ ib_uint64_t level, bpl;
1033+ buf_page_t* bpage;
1034+ ulint j;
1035+
1036+ mutex_exit(&(log_sys->mutex));
1037+
1038+ bpl = 0;
1039+
1040+ for (j = 0; j < srv_buf_pool_instances; j++) {
1041+ buf_pool_t* buf_pool;
1042+ ulint n_blocks;
1043+
1044+ buf_pool = buf_pool_from_array(j);
1045+
1046+ /* The scanning flush_list is optimistic here */
1047+
1048+ level = 0;
1049+ n_blocks = 0;
1050+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1051+
1052+ while (bpage != NULL) {
1053+ ib_uint64_t oldest_modification = bpage->oldest_modification;
1054+ if (oldest_modification != 0) {
1055+ level += log_sys->max_checkpoint_age
1056+ - (lsn - oldest_modification);
1057+ }
1058+ bpage = UT_LIST_GET_NEXT(list, bpage);
1059+ n_blocks++;
1060+ }
1061+
1062+ if (level) {
1063+ bpl += ((ib_uint64_t) n_blocks * n_blocks
1064+ * (lsn - lsn_old)) / level;
1065+ }
1066+
1067+ }
1068+
1069+ if (!srv_use_doublewrite_buf) {
1070+ /* flush is faster than when doublewrite */
1071+ bpl = (bpl * 7) / 8;
1072+ }
1073+
1074+ if (bpl) {
1075+retry_flush_batch:
1076+ n_pages_flushed = buf_flush_list(bpl,
1077+ oldest_lsn + (lsn - lsn_old));
1078+ if (n_pages_flushed == ULINT_UNDEFINED) {
1079+ os_thread_sleep(5000);
1080+ goto retry_flush_batch;
1081+ }
1082+ }
1083+
1084+ lsn_old = lsn;
1085+ /*
1086+ fprintf(stderr,
1087+ "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
1088+ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
1089+ lsn - lsn_old, bpl);
1090+ */
1091+ } else {
1092+ lsn_old = log_sys->lsn;
1093+ mutex_exit(&(log_sys->mutex));
1094+ }
1095+ }
1096+ prev_adaptive_flushing_method = 1;
1097+ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
1098+ buf_pool_t* buf_pool;
1099+ buf_page_t* bpage;
1100+ ib_uint64_t lsn;
1101+ ulint j;
1102+
1103+ mutex_enter(&(log_sys->mutex));
1104+ oldest_lsn = buf_pool_get_oldest_modification();
1105+ lsn = log_sys->lsn;
1106+ mutex_exit(&(log_sys->mutex));
1107+
1108+ /* upper loop/sec. (x10) */
1109+ next_itr_time -= 900; /* 1000 - 900 == 100 */
1110+ inner_loop++;
1111+ if (inner_loop < 10) {
1112+ i--;
1113+ } else {
1114+ inner_loop = 0;
1115+ }
1116+
1117+ if (prev_adaptive_flushing_method == 2) {
1118+ lint n_flush;
d8778560
AM
1119+ lint blocks_sum;
1120+ ulint new_blocks_sum, flushed_blocks_sum;
b4e1fa2c
AM
1121+
1122+ blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
1123+
1124+ /* prev_flush_info[j] should be the previous loop's */
1125+ for (j = 0; j < srv_buf_pool_instances; j++) {
1126+ lint blocks_num, new_blocks_num, flushed_blocks_num;
1127+ ibool found;
1128+
1129+ buf_pool = buf_pool_from_array(j);
1130+
1131+ blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
1132+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1133+ new_blocks_num = 0;
1134+
1135+ found = FALSE;
1136+ while (bpage != NULL) {
1137+ if (prev_flush_info[j].space == bpage->space
1138+ && prev_flush_info[j].offset == bpage->offset
1139+ && prev_flush_info[j].oldest_modification
1140+ == bpage->oldest_modification) {
1141+ found = TRUE;
1142+ break;
1143+ }
1144+ bpage = UT_LIST_GET_NEXT(list, bpage);
1145+ new_blocks_num++;
1146+ }
1147+ if (!found) {
1148+ new_blocks_num = blocks_num;
1149+ }
1150+
1151+ flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
1152+ - blocks_num;
1153+ if (flushed_blocks_num < 0) {
1154+ flushed_blocks_num = 0;
1155+ }
1156+
1157+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1158+
1159+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1160+ if (bpage) {
1161+ prev_flush_info[j].space = bpage->space;
1162+ prev_flush_info[j].offset = bpage->offset;
1163+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1164+ } else {
1165+ prev_flush_info[j].space = 0;
1166+ prev_flush_info[j].offset = 0;
1167+ prev_flush_info[j].oldest_modification = 0;
1168+ }
1169+
1170+ new_blocks_sum += new_blocks_num;
1171+ flushed_blocks_sum += flushed_blocks_num;
1172+ blocks_sum += blocks_num;
1173+ }
1174+
1175+ n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
1176+ if (flushed_blocks_sum > n_pages_flushed_prev) {
1177+ n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
1178+ }
1179+
1180+ if (n_flush > 0) {
1181+ n_flush++;
1182+ n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
1183+ } else {
1184+ n_pages_flushed = 0;
1185+ }
1186+ } else {
1187+ /* store previous first pages of the flush_list */
1188+ for (j = 0; j < srv_buf_pool_instances; j++) {
1189+ buf_pool = buf_pool_from_array(j);
1190+
1191+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
1192+
1193+ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
1194+ if (bpage) {
1195+ prev_flush_info[j].space = bpage->space;
1196+ prev_flush_info[j].offset = bpage->offset;
1197+ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
1198+ } else {
1199+ prev_flush_info[j].space = 0;
1200+ prev_flush_info[j].offset = 0;
1201+ prev_flush_info[j].oldest_modification = 0;
1202+ }
1203+ }
1204+ n_pages_flushed = 0;
1205+ }
1206+
1207+ lsn_old = lsn;
1208+ prev_adaptive_flushing_method = 2;
1209+ } else {
1210+ mutex_enter(&(log_sys->mutex));
1211+ lsn_old = log_sys->lsn;
1212+ mutex_exit(&(log_sys->mutex));
1213+ prev_adaptive_flushing_method = ULINT_UNDEFINED;
1214+ }
1215+
1216+ if (n_pages_flushed == ULINT_UNDEFINED) {
1217+ n_pages_flushed_prev = 0;
1218+ } else {
1219+ n_pages_flushed_prev = n_pages_flushed;
1220 }
1221
1222 if (srv_activity_count == old_activity_count) {
adf0fb13 1223@@ -2941,7 +3213,7 @@
b4e1fa2c
AM
1224 even if the server were active */
1225
1226 srv_main_thread_op_info = "doing insert buffer merge";
1227- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
1228+ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
1229
1230 /* Flush logs if needed */
1231 srv_sync_log_buffer_in_background();
adf0fb13 1232@@ -3049,7 +3321,7 @@
b4e1fa2c
AM
1233 buf_flush_list below. Otherwise, the system favors
1234 clean pages over cleanup throughput. */
1235 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
1236- PCT_IO(100));
1237+ PCT_IBUF_IO(100));
1238 }
1239
1240 srv_main_thread_op_info = "reserving kernel mutex";
adf0fb13
AM
1241@@ -3189,6 +3461,7 @@
1242 srv_slot_t* slot;
11822e22 1243 ulint retries = 0;
b4e1fa2c
AM
1244 ulint n_total_purged = ULINT_UNDEFINED;
1245+ ulint next_itr_time;
1246
1247 ut_a(srv_n_purge_threads == 1);
1248
adf0fb13 1249@@ -3209,9 +3482,12 @@
b4e1fa2c
AM
1250
1251 mutex_exit(&kernel_mutex);
1252
1253+ next_itr_time = ut_time_ms();
1254+
1255 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
1256
11822e22 1257 ulint n_pages_purged = 0;
b4e1fa2c
AM
1258+ ulint cur_time;
1259
1260 /* If there are very few records to purge or the last
1261 purge didn't purge any records then wait for activity.
adf0fb13 1262@@ -3258,6 +3534,16 @@
b4e1fa2c
AM
1263 } while (n_pages_purged > 0 && !srv_fast_shutdown);
1264
1265 srv_sync_log_buffer_in_background();
1266+
1267+ cur_time = ut_time_ms();
1268+ if (next_itr_time > cur_time) {
1269+ os_thread_sleep(ut_min(1000000,
1270+ (next_itr_time - cur_time)
1271+ * 1000));
1272+ next_itr_time = ut_time_ms() + 1000;
1273+ } else {
1274+ next_itr_time = cur_time + 1000;
1275+ }
1276 }
1277
1278 mutex_enter(&kernel_mutex);
1279diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
1280--- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900
1281+++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900
adf0fb13 1282@@ -1217,6 +1217,9 @@
b4e1fa2c
AM
1283 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1284 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1285
1286+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1287+ srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1288+
1289 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1290 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1291
11822e22
AM
1292diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
1293--- a/storage/innobase/trx/trx0purge.c 2011-04-12 14:14:14.000000000 +0900
1294+++ b/storage/innobase/trx/trx0purge.c 2011-04-12 14:15:44.000000000 +0900
1295@@ -392,10 +392,10 @@
1296 trx_sys->rseg_history_len++;
1297 mutex_exit(&kernel_mutex);
1298
1299- if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
1300+// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
1301 /* Inform the purge thread that there is work to do. */
1302 srv_wake_purge_thread_if_not_active();
1303- }
1304+// }
1305 }
1306
1307 /**********************************************************************//**
b4e1fa2c
AM
1308diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
1309--- a/storage/innobase/trx/trx0trx.c 2010-11-03 07:01:13.000000000 +0900
1310+++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:10:09.106023937 +0900
adf0fb13 1311@@ -984,6 +984,7 @@
b4e1fa2c
AM
1312 trx->read_view = NULL;
1313
1314 if (lsn) {
1315+ ulint flush_log_at_trx_commit;
1316
1317 mutex_exit(&kernel_mutex);
1318
adf0fb13 1319@@ -992,6 +993,12 @@
b4e1fa2c
AM
1320 trx_undo_insert_cleanup(trx);
1321 }
1322
1323+ if (srv_use_global_flush_log_at_trx_commit) {
1324+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1325+ } else {
1326+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1327+ }
1328+
1329 /* NOTE that we could possibly make a group commit more
1330 efficient here: call os_thread_yield here to allow also other
1331 trxs to come to commit! */
adf0fb13 1332@@ -1023,9 +1030,9 @@
b4e1fa2c
AM
1333 if (trx->flush_log_later) {
1334 /* Do nothing yet */
1335 trx->must_flush_log_later = TRUE;
1336- } else if (srv_flush_log_at_trx_commit == 0) {
1337+ } else if (flush_log_at_trx_commit == 0) {
1338 /* Do nothing */
1339- } else if (srv_flush_log_at_trx_commit == 1) {
1340+ } else if (flush_log_at_trx_commit == 1) {
1341 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1342 /* Write the log but do not flush it to disk */
1343
adf0fb13 1344@@ -1037,7 +1044,7 @@
b4e1fa2c
AM
1345
1346 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1347 }
1348- } else if (srv_flush_log_at_trx_commit == 2) {
1349+ } else if (flush_log_at_trx_commit == 2) {
1350
1351 /* Write the log but do not flush it to disk */
1352
adf0fb13 1353@@ -1701,16 +1708,23 @@
b4e1fa2c
AM
1354 trx_t* trx) /*!< in: trx handle */
1355 {
1356 ib_uint64_t lsn = trx->commit_lsn;
1357+ ulint flush_log_at_trx_commit;
1358
1359 ut_a(trx);
1360
1361 trx->op_info = "flushing log";
1362
1363+ if (srv_use_global_flush_log_at_trx_commit) {
1364+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1365+ } else {
1366+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1367+ }
1368+
1369 if (!trx->must_flush_log_later) {
1370 /* Do nothing */
1371- } else if (srv_flush_log_at_trx_commit == 0) {
1372+ } else if (flush_log_at_trx_commit == 0) {
1373 /* Do nothing */
1374- } else if (srv_flush_log_at_trx_commit == 1) {
1375+ } else if (flush_log_at_trx_commit == 1) {
1376 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1377 /* Write the log but do not flush it to disk */
1378
adf0fb13 1379@@ -1721,7 +1735,7 @@
b4e1fa2c
AM
1380
1381 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1382 }
1383- } else if (srv_flush_log_at_trx_commit == 2) {
1384+ } else if (flush_log_at_trx_commit == 2) {
1385
1386 /* Write the log but do not flush it to disk */
1387
adf0fb13 1388@@ -1969,6 +1983,8 @@
b4e1fa2c
AM
1389 /*--------------------------------------*/
1390
1391 if (lsn) {
1392+ ulint flush_log_at_trx_commit;
1393+
1394 /* Depending on the my.cnf options, we may now write the log
1395 buffer to the log files, making the prepared state of the
1396 transaction durable if the OS does not crash. We may also
adf0fb13 1397@@ -1988,9 +2004,15 @@
b4e1fa2c
AM
1398
1399 mutex_exit(&kernel_mutex);
1400
1401- if (srv_flush_log_at_trx_commit == 0) {
1402+ if (srv_use_global_flush_log_at_trx_commit) {
1403+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1404+ } else {
1405+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1406+ }
1407+
1408+ if (flush_log_at_trx_commit == 0) {
1409 /* Do nothing */
1410- } else if (srv_flush_log_at_trx_commit == 1) {
1411+ } else if (flush_log_at_trx_commit == 1) {
1412 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1413 /* Write the log but do not flush it to disk */
1414
adf0fb13 1415@@ -2002,7 +2024,7 @@
b4e1fa2c
AM
1416
1417 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1418 }
1419- } else if (srv_flush_log_at_trx_commit == 2) {
1420+ } else if (flush_log_at_trx_commit == 2) {
1421
1422 /* Write the log but do not flush it to disk */
1423
This page took 0.286223 seconds and 4 git commands to generate.