]>
Commit | Line | Data |
---|---|---|
b4e1fa2c AM |
1 | # name : innodb_io_patches.patch |
2 | # introduced : 11 or before | |
3 | # maintainer : Yasufumi | |
4 | # | |
5 | #!!! notice !!! | |
6 | # Any small change to this file in the main branch | |
7 | # should be done or reviewed by the maintainer! | |
8 | diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c | |
9 | --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:09:51.273986410 +0900 | |
10 | +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:10:08.934990091 +0900 | |
11 | @@ -320,6 +320,7 @@ | |
12 | ||
13 | /* When we traverse all the flush lists we don't want another | |
14 | thread to add a dirty page to any flush list. */ | |
15 | + if (srv_buf_pool_instances > 1) | |
16 | log_flush_order_mutex_enter(); | |
17 | ||
18 | for (i = 0; i < srv_buf_pool_instances; i++) { | |
19 | @@ -343,6 +344,7 @@ | |
20 | } | |
21 | } | |
22 | ||
23 | + if (srv_buf_pool_instances > 1) | |
24 | log_flush_order_mutex_exit(); | |
25 | ||
26 | /* The returned answer may be out of date: the flush_list can | |
27 | diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c | |
28 | --- a/storage/innobase/buf/buf0flu.c 2010-11-03 07:01:13.000000000 +0900 | |
29 | +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:10:08.934990091 +0900 | |
d8778560 | 30 | @@ -1376,7 +1376,7 @@ |
b4e1fa2c AM |
31 | |
32 | ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); | |
33 | ||
34 | - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { | |
35 | + if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) { | |
36 | /* If there is little space, it is better not to flush | |
37 | any block except from the end of the LRU list */ | |
38 | ||
39 | diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c | |
40 | --- a/storage/innobase/buf/buf0rea.c 2010-11-03 07:01:13.000000000 +0900 | |
41 | +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:10:08.937050537 +0900 | |
42 | @@ -260,6 +260,10 @@ | |
43 | = BUF_READ_AHEAD_LINEAR_AREA(buf_pool); | |
44 | ulint threshold; | |
45 | ||
46 | + if (!(srv_read_ahead & 2)) { | |
47 | + return(0); | |
48 | + } | |
49 | + | |
50 | if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) { | |
51 | /* No read-ahead to avoid thread deadlocks */ | |
52 | return(0); | |
53 | diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc | |
54 | --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:09:51.283956391 +0900 | |
55 | +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:10:08.963980444 +0900 | |
df1b5770 | 56 | @@ -445,6 +445,12 @@ |
b4e1fa2c AM |
57 | "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", |
58 | NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); | |
59 | ||
60 | +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG, | |
61 | + "Set to 0 (write and flush once per second)," | |
62 | + " 1 (write and flush at each commit)" | |
63 | + " or 2 (write at commit, flush once per second).", | |
64 | + NULL, NULL, 1, 0, 2, 0); | |
65 | + | |
66 | ||
67 | static handler *innobase_create_handler(handlerton *hton, | |
68 | TABLE_SHARE *table, | |
df1b5770 | 69 | @@ -839,6 +845,17 @@ |
b4e1fa2c AM |
70 | } |
71 | } | |
72 | ||
73 | +/******************************************************************//** | |
74 | +*/ | |
75 | +extern "C" UNIV_INTERN | |
76 | +ulong | |
77 | +thd_flush_log_at_trx_commit( | |
78 | +/*================================*/ | |
79 | + void* thd) | |
80 | +{ | |
81 | + return(THDVAR((THD*) thd, flush_log_at_trx_commit)); | |
82 | +} | |
83 | + | |
84 | /********************************************************************//** | |
85 | Obtain the InnoDB transaction of a MySQL thread. | |
86 | @return reference to transaction pointer */ | |
df1b5770 | 87 | @@ -2410,6 +2427,9 @@ |
b4e1fa2c AM |
88 | srv_n_read_io_threads = (ulint) innobase_read_io_threads; |
89 | srv_n_write_io_threads = (ulint) innobase_write_io_threads; | |
90 | ||
91 | + srv_read_ahead &= 3; | |
92 | + srv_adaptive_flushing_method %= 3; | |
93 | + | |
94 | srv_force_recovery = (ulint) innobase_force_recovery; | |
95 | ||
96 | srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; | |
df1b5770 | 97 | @@ -10992,9 +11012,9 @@ |
b4e1fa2c AM |
98 | |
99 | static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads, | |
100 | PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, | |
101 | - "Purge threads can be either 0 or 1. Default is 0.", | |
102 | + "Purge threads can be either 0 or 1. Default is 1.", | |
103 | NULL, NULL, | |
104 | - 0, /* Default setting */ | |
105 | + 1, /* Default setting */ | |
106 | 0, /* Minimum value */ | |
107 | 1, 0); /* Maximum value */ | |
108 | ||
df1b5770 | 109 | @@ -11036,12 +11056,18 @@ |
b4e1fa2c AM |
110 | innodb_file_format_max_validate, |
111 | innodb_file_format_max_update, "Antelope"); | |
112 | ||
113 | -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, | |
114 | - PLUGIN_VAR_OPCMDARG, | |
115 | - "Set to 0 (write and flush once per second)," | |
116 | - " 1 (write and flush at each commit)" | |
117 | - " or 2 (write at commit, flush once per second).", | |
118 | - NULL, NULL, 1, 0, 2, 0); | |
119 | +/* Changed to the THDVAR */ | |
120 | +//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, | |
121 | +// PLUGIN_VAR_OPCMDARG, | |
122 | +// "Set to 0 (write and flush once per second)," | |
123 | +// " 1 (write and flush at each commit)" | |
124 | +// " or 2 (write at commit, flush once per second).", | |
125 | +// NULL, NULL, 1, 0, 2, 0); | |
126 | + | |
127 | +static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit, | |
128 | + PLUGIN_VAR_NOCMDARG, | |
129 | + "Use global innodb_flush_log_at_trx_commit value. (default: ON).", | |
130 | + NULL, NULL, TRUE); | |
131 | ||
132 | static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method, | |
133 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
df1b5770 | 134 | @@ -11136,7 +11162,7 @@ |
b4e1fa2c AM |
135 | static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, |
136 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
137 | "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", | |
138 | - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); | |
139 | + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); | |
140 | ||
141 | static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, | |
142 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
df1b5770 | 143 | @@ -11288,6 +11314,95 @@ |
b4e1fa2c AM |
144 | "trigger a readahead.", |
145 | NULL, NULL, 56, 0, 64, 0); | |
146 | ||
147 | +static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size, | |
148 | + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
149 | + "The maximum size of the insert buffer. (in bytes)", | |
150 | + NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0); | |
151 | + | |
152 | +static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract, | |
153 | + PLUGIN_VAR_RQCMDARG, | |
154 | + "Enable/Disable active_contract of insert buffer. 0:disable 1:enable", | |
155 | + NULL, NULL, 1, 0, 1, 0); | |
156 | + | |
157 | +static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate, | |
158 | + PLUGIN_VAR_RQCMDARG, | |
159 | + "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)", | |
160 | + NULL, NULL, 100, 100, 999999999, 0); | |
161 | + | |
162 | +static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target, | |
163 | + PLUGIN_VAR_RQCMDARG, | |
164 | + "Control soft limit of checkpoint age. (0 : not control)", | |
165 | + NULL, NULL, 0, 0, ~0UL, 0); | |
166 | + | |
167 | +static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages, | |
168 | + PLUGIN_VAR_RQCMDARG, | |
169 | + "Enable/Disable flushing also neighbor pages. 0:disable 1:enable", | |
170 | + NULL, NULL, 1, 0, 1, 0); | |
171 | + | |
172 | +static | |
173 | +void | |
174 | +innodb_read_ahead_update( | |
175 | + THD* thd, | |
176 | + struct st_mysql_sys_var* var, | |
177 | + void* var_ptr, | |
178 | + const void* save) | |
179 | +{ | |
180 | + *(long *)var_ptr= (*(long *)save) & 3; | |
181 | +} | |
182 | +const char *read_ahead_names[]= | |
183 | +{ | |
184 | + "none", /* 0 */ | |
185 | + "random", | |
186 | + "linear", | |
187 | + "both", /* 3 */ | |
188 | + /* For compatibility of the older patch */ | |
189 | + "0", /* 4 ("none" + 4) */ | |
190 | + "1", | |
191 | + "2", | |
192 | + "3", /* 7 ("both" + 4) */ | |
193 | + NullS | |
194 | +}; | |
195 | +TYPELIB read_ahead_typelib= | |
196 | +{ | |
197 | + array_elements(read_ahead_names) - 1, "read_ahead_typelib", | |
198 | + read_ahead_names, NULL | |
199 | +}; | |
200 | +static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead, | |
201 | + PLUGIN_VAR_RQCMDARG, | |
202 | + "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]", | |
203 | + NULL, innodb_read_ahead_update, 2, &read_ahead_typelib); | |
204 | + | |
205 | +static | |
206 | +void | |
207 | +innodb_adaptive_flushing_method_update( | |
208 | + THD* thd, | |
209 | + struct st_mysql_sys_var* var, | |
210 | + void* var_ptr, | |
211 | + const void* save) | |
212 | +{ | |
213 | + *(long *)var_ptr= (*(long *)save) % 4; | |
214 | +} | |
215 | +const char *adaptive_flushing_method_names[]= | |
216 | +{ | |
217 | + "native", /* 0 */ | |
218 | + "estimate", /* 1 */ | |
219 | + "keep_average", /* 2 */ | |
220 | + /* For compatibility of the older patch */ | |
221 | + "0", /* 3 ("none" + 3) */ | |
222 | + "1", /* 4 ("estimate" + 3) */ | |
223 | + "2", /* 5 ("keep_average" + 3) */ | |
224 | + NullS | |
225 | +}; | |
226 | +TYPELIB adaptive_flushing_method_typelib= | |
227 | +{ | |
228 | + array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib", | |
229 | + adaptive_flushing_method_names, NULL | |
230 | +}; | |
231 | +static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method, | |
232 | + PLUGIN_VAR_RQCMDARG, | |
233 | + "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)", | |
234 | + NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib); | |
b4e1fa2c AM |
235 | + |
236 | static struct st_mysql_sys_var* innobase_system_variables[]= { | |
237 | MYSQL_SYSVAR(additional_mem_pool_size), | |
238 | MYSQL_SYSVAR(autoextend_increment), | |
df1b5770 | 239 | @@ -11308,6 +11423,7 @@ |
b4e1fa2c AM |
240 | MYSQL_SYSVAR(file_format_check), |
241 | MYSQL_SYSVAR(file_format_max), | |
242 | MYSQL_SYSVAR(flush_log_at_trx_commit), | |
243 | + MYSQL_SYSVAR(use_global_flush_log_at_trx_commit), | |
244 | MYSQL_SYSVAR(flush_method), | |
245 | MYSQL_SYSVAR(force_recovery), | |
246 | MYSQL_SYSVAR(locks_unsafe_for_binlog), | |
df1b5770 | 247 | @@ -11345,6 +11461,13 @@ |
b4e1fa2c AM |
248 | MYSQL_SYSVAR(show_verbose_locks), |
249 | MYSQL_SYSVAR(show_locks_held), | |
250 | MYSQL_SYSVAR(version), | |
251 | + MYSQL_SYSVAR(ibuf_max_size), | |
252 | + MYSQL_SYSVAR(ibuf_active_contract), | |
253 | + MYSQL_SYSVAR(ibuf_accel_rate), | |
254 | + MYSQL_SYSVAR(checkpoint_age_target), | |
255 | + MYSQL_SYSVAR(flush_neighbor_pages), | |
256 | + MYSQL_SYSVAR(read_ahead), | |
257 | + MYSQL_SYSVAR(adaptive_flushing_method), | |
b4e1fa2c AM |
258 | MYSQL_SYSVAR(use_sys_malloc), |
259 | MYSQL_SYSVAR(use_native_aio), | |
260 | MYSQL_SYSVAR(change_buffering), | |
261 | diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c | |
262 | --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-11-03 07:01:13.000000000 +0900 | |
263 | +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:10:09.073984282 +0900 | |
264 | @@ -524,8 +524,10 @@ | |
265 | grow in size, as the references on the upper levels of the tree can | |
266 | change */ | |
267 | ||
268 | - ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE | |
269 | - / IBUF_POOL_SIZE_PER_MAX_SIZE; | |
270 | + ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE | |
271 | + / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE); | |
272 | + | |
273 | + srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE; | |
274 | ||
275 | mutex_create(ibuf_pessimistic_insert_mutex_key, | |
276 | &ibuf_pessimistic_insert_mutex, | |
df1b5770 | 277 | @@ -2712,9 +2714,11 @@ |
b4e1fa2c AM |
278 | size = ibuf->size; |
279 | max_size = ibuf->max_size; | |
280 | ||
281 | + if (!srv_ibuf_active_contract) { | |
282 | if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { | |
283 | return; | |
284 | } | |
285 | + } | |
286 | ||
287 | sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); | |
288 | ||
289 | diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h | |
290 | --- a/storage/innobase/include/buf0rea.h 2010-11-03 07:01:13.000000000 +0900 | |
291 | +++ b/storage/innobase/include/buf0rea.h 2010-12-03 15:10:09.076066335 +0900 | |
292 | @@ -124,8 +124,7 @@ | |
293 | ||
294 | /** The size in pages of the area which the read-ahead algorithms read if | |
295 | invoked */ | |
296 | -#define BUF_READ_AHEAD_AREA(b) \ | |
297 | - ut_min(64, ut_2_power_up((b)->curr_size / 32)) | |
298 | +#define BUF_READ_AHEAD_AREA(b) 64 | |
299 | ||
300 | /** @name Modes used in read-ahead @{ */ | |
301 | /** read only pages belonging to the insert buffer tree */ | |
302 | diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h | |
303 | --- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900 | |
304 | +++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900 | |
305 | @@ -275,5 +275,12 @@ | |
306 | /*===================*/ | |
307 | void* thd, /*!< in: thread handle (THD*) */ | |
308 | ulint value); /*!< in: time waited for the lock */ | |
309 | +/******************************************************************//** | |
310 | +*/ | |
311 | + | |
312 | +ulong | |
313 | +thd_flush_log_at_trx_commit( | |
314 | +/*================================*/ | |
315 | + void* thd); | |
316 | ||
317 | #endif | |
318 | diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h | |
319 | --- a/storage/innobase/include/srv0srv.h 2010-12-03 15:09:51.291955835 +0900 | |
320 | +++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:10:09.079029047 +0900 | |
321 | @@ -141,7 +141,8 @@ | |
322 | extern ulint srv_n_log_files; | |
323 | extern ulint srv_log_file_size; | |
324 | extern ulint srv_log_buffer_size; | |
325 | -extern ulong srv_flush_log_at_trx_commit; | |
326 | +//extern ulong srv_flush_log_at_trx_commit; | |
327 | +extern char srv_use_global_flush_log_at_trx_commit; | |
328 | extern char srv_adaptive_flushing; | |
329 | ||
330 | ||
df1b5770 | 331 | @@ -219,6 +220,16 @@ |
b4e1fa2c AM |
332 | extern ulong srv_max_purge_lag; |
333 | ||
334 | extern ulong srv_replication_delay; | |
335 | + | |
336 | +extern long long srv_ibuf_max_size; | |
337 | +extern ulint srv_ibuf_active_contract; | |
338 | +extern ulint srv_ibuf_accel_rate; | |
339 | +extern ulint srv_checkpoint_age_target; | |
340 | +extern ulint srv_flush_neighbor_pages; | |
341 | +extern ulint srv_enable_unsafe_group_commit; | |
342 | +extern ulint srv_read_ahead; | |
343 | +extern ulint srv_adaptive_flushing_method; | |
344 | + | |
345 | /*-------------------------------------------*/ | |
346 | ||
347 | extern ulint srv_n_rows_inserted; | |
df1b5770 | 348 | @@ -394,8 +405,9 @@ |
b4e1fa2c AM |
349 | when writing data files, but do flush |
350 | after writing to log files */ | |
351 | SRV_UNIX_NOSYNC, /*!< do not flush after writing */ | |
352 | - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on | |
353 | + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on | |
354 | data files */ | |
355 | + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */ | |
356 | }; | |
357 | ||
358 | /** Alternatives for file i/o in Windows */ | |
359 | diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c | |
360 | --- a/storage/innobase/log/log0log.c 2010-11-03 07:01:13.000000000 +0900 | |
361 | +++ b/storage/innobase/log/log0log.c 2010-12-03 15:10:09.084023562 +0900 | |
d8778560 AM |
362 | @@ -48,6 +48,7 @@ |
363 | #include "srv0start.h" | |
364 | #include "trx0sys.h" | |
365 | #include "trx0trx.h" | |
366 | +#include "ha_prototypes.h" | |
367 | ||
368 | /* | |
369 | General philosophy of InnoDB redo-logs: | |
370 | @@ -359,6 +360,33 @@ | |
b4e1fa2c AM |
371 | } |
372 | ||
373 | /************************************************************//** | |
374 | +*/ | |
375 | +UNIV_INLINE | |
376 | +ulint | |
377 | +log_max_modified_age_async() | |
378 | +{ | |
379 | + if (srv_checkpoint_age_target) { | |
380 | + return(ut_min(log_sys->max_modified_age_async, | |
381 | + srv_checkpoint_age_target | |
382 | + - srv_checkpoint_age_target / 8)); | |
383 | + } else { | |
384 | + return(log_sys->max_modified_age_async); | |
385 | + } | |
386 | +} | |
387 | + | |
388 | +UNIV_INLINE | |
389 | +ulint | |
390 | +log_max_checkpoint_age_async() | |
391 | +{ | |
392 | + if (srv_checkpoint_age_target) { | |
393 | + return(ut_min(log_sys->max_checkpoint_age_async, | |
394 | + srv_checkpoint_age_target)); | |
395 | + } else { | |
396 | + return(log_sys->max_checkpoint_age_async); | |
397 | + } | |
398 | +} | |
399 | + | |
400 | +/************************************************************//** | |
401 | Closes the log. | |
402 | @return lsn */ | |
403 | UNIV_INTERN | |
d8778560 | 404 | @@ -427,7 +455,7 @@ |
b4e1fa2c AM |
405 | } |
406 | } | |
407 | ||
408 | - if (checkpoint_age <= log->max_modified_age_async) { | |
409 | + if (checkpoint_age <= log_max_modified_age_async()) { | |
410 | ||
411 | goto function_exit; | |
412 | } | |
d8778560 | 413 | @@ -435,8 +463,8 @@ |
b4e1fa2c AM |
414 | oldest_lsn = buf_pool_get_oldest_modification(); |
415 | ||
416 | if (!oldest_lsn | |
417 | - || lsn - oldest_lsn > log->max_modified_age_async | |
418 | - || checkpoint_age > log->max_checkpoint_age_async) { | |
419 | + || lsn - oldest_lsn > log_max_modified_age_async() | |
420 | + || checkpoint_age > log_max_checkpoint_age_async()) { | |
421 | ||
422 | log->check_flush_or_checkpoint = TRUE; | |
423 | } | |
d8778560 | 424 | @@ -1100,6 +1128,7 @@ |
b4e1fa2c AM |
425 | group = (log_group_t*)((ulint)group - 1); |
426 | ||
427 | if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC | |
428 | + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT | |
429 | && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { | |
430 | ||
431 | fil_flush(group->space_id); | |
d8778560 | 432 | @@ -1121,8 +1150,9 @@ |
b4e1fa2c AM |
433 | logs and cannot end up here! */ |
434 | ||
435 | if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC | |
436 | + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT | |
437 | && srv_unix_file_flush_method != SRV_UNIX_NOSYNC | |
438 | - && srv_flush_log_at_trx_commit != 2) { | |
439 | + && thd_flush_log_at_trx_commit(NULL) != 2) { | |
440 | ||
441 | fil_flush(group->space_id); | |
442 | } | |
d8778560 | 443 | @@ -1501,7 +1531,8 @@ |
b4e1fa2c AM |
444 | |
445 | mutex_exit(&(log_sys->mutex)); | |
446 | ||
447 | - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { | |
448 | + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC | |
449 | + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { | |
450 | /* O_DSYNC means the OS did not buffer the log file at all: | |
451 | so we have also flushed to disk what we have written */ | |
452 | ||
d8778560 | 453 | @@ -2120,10 +2151,10 @@ |
b4e1fa2c AM |
454 | |
455 | sync = TRUE; | |
456 | advance = 2 * (age - log->max_modified_age_sync); | |
457 | - } else if (age > log->max_modified_age_async) { | |
458 | + } else if (age > log_max_modified_age_async()) { | |
459 | ||
460 | /* A flush is not urgent: we do an asynchronous preflush */ | |
461 | - advance = age - log->max_modified_age_async; | |
462 | + advance = age - log_max_modified_age_async(); | |
463 | } else { | |
464 | advance = 0; | |
465 | } | |
d8778560 | 466 | @@ -2137,7 +2168,7 @@ |
b4e1fa2c AM |
467 | |
468 | do_checkpoint = TRUE; | |
469 | ||
470 | - } else if (checkpoint_age > log->max_checkpoint_age_async) { | |
471 | + } else if (checkpoint_age > log_max_checkpoint_age_async()) { | |
472 | /* A checkpoint is not urgent: do it asynchronously */ | |
473 | ||
474 | do_checkpoint = TRUE; | |
d8778560 | 475 | @@ -3349,6 +3380,17 @@ |
b4e1fa2c AM |
476 | log_sys->flushed_to_disk_lsn, |
477 | log_sys->last_checkpoint_lsn); | |
478 | ||
479 | + fprintf(file, | |
480 | + "Max checkpoint age %lu\n" | |
481 | + "Checkpoint age target %lu\n" | |
482 | + "Modified age %lu\n" | |
483 | + "Checkpoint age %lu\n", | |
484 | + (ulong) log_sys->max_checkpoint_age, | |
485 | + (ulong) log_max_checkpoint_age_async(), | |
486 | + (ulong) (log_sys->lsn - | |
487 | + log_buf_pool_get_oldest_modification()), | |
488 | + (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn)); | |
489 | + | |
490 | current_time = time(NULL); | |
491 | ||
492 | time_elapsed = 0.001 + difftime(current_time, | |
493 | diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c | |
494 | --- a/storage/innobase/log/log0recv.c 2010-11-03 07:01:13.000000000 +0900 | |
495 | +++ b/storage/innobase/log/log0recv.c 2010-12-03 15:10:09.089024191 +0900 | |
496 | @@ -2906,9 +2906,12 @@ | |
497 | ib_uint64_t archived_lsn; | |
498 | #endif /* UNIV_LOG_ARCHIVE */ | |
499 | byte* buf; | |
500 | - byte log_hdr_buf[LOG_FILE_HDR_SIZE]; | |
501 | + byte* log_hdr_buf; | |
502 | + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE]; | |
503 | ulint err; | |
504 | ||
505 | + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE); | |
506 | + | |
507 | #ifdef UNIV_LOG_ARCHIVE | |
508 | ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX); | |
509 | /** TRUE when recovering from a checkpoint */ | |
510 | diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c | |
511 | --- a/storage/innobase/os/os0file.c 2010-11-03 07:01:13.000000000 +0900 | |
512 | +++ b/storage/innobase/os/os0file.c 2010-12-03 15:10:09.093023540 +0900 | |
d8778560 | 513 | @@ -1424,7 +1424,7 @@ |
b4e1fa2c AM |
514 | #endif |
515 | #ifdef UNIV_NON_BUFFERED_IO | |
516 | # ifndef UNIV_HOTBACKUP | |
517 | - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { | |
518 | + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) { | |
519 | /* Do not use unbuffered i/o to log files because | |
520 | value 2 denotes that we do not flush the log at every | |
521 | commit, but only once per second */ | |
d8778560 | 522 | @@ -1440,7 +1440,7 @@ |
b4e1fa2c AM |
523 | attributes = 0; |
524 | #ifdef UNIV_NON_BUFFERED_IO | |
525 | # ifndef UNIV_HOTBACKUP | |
526 | - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { | |
527 | + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) { | |
528 | /* Do not use unbuffered i/o to log files because | |
529 | value 2 denotes that we do not flush the log at every | |
530 | commit, but only once per second */ | |
d8778560 | 531 | @@ -1585,6 +1585,11 @@ |
b4e1fa2c AM |
532 | os_file_set_nocache(file, name, mode_str); |
533 | } | |
534 | ||
535 | + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */ | |
536 | + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { | |
537 | + os_file_set_nocache(file, name, mode_str); | |
538 | + } | |
539 | + | |
540 | #ifdef USE_FILE_LOCK | |
541 | if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { | |
542 | ||
543 | diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c | |
544 | --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:09:51.301987792 +0900 | |
545 | +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:13:29.369986988 +0900 | |
d8778560 | 546 | @@ -190,7 +190,8 @@ |
b4e1fa2c AM |
547 | UNIV_INTERN ulint srv_log_file_size = ULINT_MAX; |
548 | /* size in database pages */ | |
549 | UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX; | |
550 | -UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; | |
551 | +//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; | |
552 | +UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE; | |
553 | ||
554 | /* Try to flush dirty pages so as to avoid IO bursts at | |
555 | the checkpoints. */ | |
df1b5770 | 556 | @@ -406,6 +407,17 @@ |
b4e1fa2c AM |
557 | |
558 | UNIV_INTERN ulong srv_replication_delay = 0; | |
559 | ||
560 | +UNIV_INTERN long long srv_ibuf_max_size = 0; | |
561 | +UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */ | |
562 | +UNIV_INTERN ulint srv_ibuf_accel_rate = 100; | |
563 | +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0))) | |
564 | + | |
565 | +UNIV_INTERN ulint srv_checkpoint_age_target = 0; | |
566 | +UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */ | |
567 | + | |
568 | +UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */ | |
569 | +UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ | |
570 | +UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */ | |
571 | /*-------------------------------------------*/ | |
572 | UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; | |
573 | UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; | |
df1b5770 | 574 | @@ -2742,6 +2754,7 @@ |
b4e1fa2c AM |
575 | ulint n_pages_purged = 0; |
576 | ulint n_bytes_merged; | |
577 | ulint n_pages_flushed; | |
578 | + ulint n_pages_flushed_prev = 0; | |
579 | ulint n_bytes_archived; | |
580 | ulint n_tables_to_drop; | |
581 | ulint n_ios; | |
df1b5770 | 582 | @@ -2749,7 +2762,20 @@ |
b4e1fa2c AM |
583 | ulint n_ios_very_old; |
584 | ulint n_pend_ios; | |
585 | ulint next_itr_time; | |
586 | + ulint prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
587 | + ulint inner_loop = 0; | |
588 | + ibool skip_sleep = FALSE; | |
589 | ulint i; | |
590 | + struct t_prev_flush_info_struct { | |
591 | + ulint count; | |
592 | + unsigned space:32; | |
593 | + unsigned offset:32; | |
594 | + ib_uint64_t oldest_modification; | |
595 | + } prev_flush_info[MAX_BUFFER_POOLS]; | |
596 | + | |
597 | + ib_uint64_t lsn_old; | |
598 | + | |
599 | + ib_uint64_t oldest_lsn; | |
600 | ||
601 | #ifdef UNIV_DEBUG_THREAD_CREATION | |
602 | fprintf(stderr, "Master thread starts, id %lu\n", | |
df1b5770 | 603 | @@ -2771,6 +2797,9 @@ |
b4e1fa2c AM |
604 | |
605 | mutex_exit(&kernel_mutex); | |
606 | ||
607 | + mutex_enter(&(log_sys->mutex)); | |
608 | + lsn_old = log_sys->lsn; | |
609 | + mutex_exit(&(log_sys->mutex)); | |
610 | loop: | |
611 | /*****************************************************************/ | |
612 | /* ---- When there is database activity by users, we cycle in this | |
df1b5770 | 613 | @@ -2801,9 +2830,13 @@ |
b4e1fa2c AM |
614 | /* Sleep for 1 second on entrying the for loop below the first time. */ |
615 | next_itr_time = ut_time_ms() + 1000; | |
616 | ||
617 | + skip_sleep = FALSE; | |
618 | + | |
619 | for (i = 0; i < 10; i++) { | |
620 | ulint cur_time = ut_time_ms(); | |
621 | ||
622 | + n_pages_flushed = 0; /* initialize */ | |
623 | + | |
624 | /* ALTER TABLE in MySQL requires on Unix that the table handler | |
625 | can drop tables lazily after there no longer are SELECT | |
626 | queries to them. */ | |
df1b5770 | 627 | @@ -2827,6 +2860,7 @@ |
b4e1fa2c AM |
628 | srv_main_thread_op_info = "sleeping"; |
629 | srv_main_1_second_loops++; | |
630 | ||
631 | + if (!skip_sleep) { | |
632 | if (next_itr_time > cur_time | |
633 | && srv_shutdown_state == SRV_SHUTDOWN_NONE) { | |
634 | ||
df1b5770 | 635 | @@ -2837,10 +2871,26 @@ |
b4e1fa2c AM |
636 | (next_itr_time - cur_time) |
637 | * 1000)); | |
638 | srv_main_sleeps++; | |
639 | + | |
640 | + /* | |
641 | + mutex_enter(&(log_sys->mutex)); | |
642 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
643 | + ib_uint64_t lsn = log_sys->lsn; | |
644 | + mutex_exit(&(log_sys->mutex)); | |
645 | + | |
646 | + if(oldest_lsn) | |
647 | + fprintf(stderr, | |
648 | + "InnoDB flush: age pct: %lu, lsn progress: %lu\n", | |
649 | + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age, | |
650 | + lsn - lsn_old); | |
651 | + */ | |
652 | } | |
653 | ||
654 | /* Each iteration should happen at 1 second interval. */ | |
655 | next_itr_time = ut_time_ms() + 1000; | |
656 | + } /* if (!skip_sleep) */ | |
657 | + | |
658 | + skip_sleep = FALSE; | |
659 | ||
660 | /* Flush logs if needed */ | |
661 | srv_sync_log_buffer_in_background(); | |
df1b5770 | 662 | @@ -2860,7 +2910,7 @@ |
b4e1fa2c AM |
663 | if (n_pend_ios < SRV_PEND_IO_THRESHOLD |
664 | && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) { | |
665 | srv_main_thread_op_info = "doing insert buffer merge"; | |
666 | - ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); | |
667 | + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5)); | |
668 | ||
669 | /* Flush logs if needed */ | |
670 | srv_sync_log_buffer_in_background(); | |
df1b5770 | 671 | @@ -2877,7 +2927,11 @@ |
b4e1fa2c AM |
672 | n_pages_flushed = buf_flush_list( |
673 | PCT_IO(100), IB_ULONGLONG_MAX); | |
674 | ||
675 | - } else if (srv_adaptive_flushing) { | |
676 | + mutex_enter(&(log_sys->mutex)); | |
677 | + lsn_old = log_sys->lsn; | |
678 | + mutex_exit(&(log_sys->mutex)); | |
679 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
680 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) { | |
681 | ||
682 | /* Try to keep the rate of flushing of dirty | |
683 | pages such that redo log generation does not | |
df1b5770 | 684 | @@ -2893,6 +2947,224 @@ |
b4e1fa2c AM |
685 | n_flush, |
686 | IB_ULONGLONG_MAX); | |
687 | } | |
688 | + | |
689 | + mutex_enter(&(log_sys->mutex)); | |
690 | + lsn_old = log_sys->lsn; | |
691 | + mutex_exit(&(log_sys->mutex)); | |
692 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
693 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) { | |
694 | + | |
695 | + /* Try to keep modified age not to exceed | |
696 | + max_checkpoint_age * 7/8 line */ | |
697 | + | |
698 | + mutex_enter(&(log_sys->mutex)); | |
699 | + | |
700 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
701 | + if (oldest_lsn == 0) { | |
702 | + lsn_old = log_sys->lsn; | |
703 | + mutex_exit(&(log_sys->mutex)); | |
704 | + | |
705 | + } else { | |
706 | + if ((log_sys->lsn - oldest_lsn) | |
707 | + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) { | |
708 | + /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */ | |
709 | + /* We should not flush from here. */ | |
710 | + lsn_old = log_sys->lsn; | |
711 | + mutex_exit(&(log_sys->mutex)); | |
712 | + } else if ((log_sys->lsn - oldest_lsn) | |
713 | + > (log_sys->max_checkpoint_age)/4 ) { | |
714 | + | |
715 | + /* defence line (max_checkpoint_age * 1/2) */ | |
716 | + ib_uint64_t lsn = log_sys->lsn; | |
717 | + | |
718 | + ib_uint64_t level, bpl; | |
719 | + buf_page_t* bpage; | |
720 | + ulint j; | |
721 | + | |
722 | + mutex_exit(&(log_sys->mutex)); | |
723 | + | |
724 | + bpl = 0; | |
725 | + | |
726 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
727 | + buf_pool_t* buf_pool; | |
728 | + ulint n_blocks; | |
729 | + | |
730 | + buf_pool = buf_pool_from_array(j); | |
731 | + | |
732 | + /* The scanning flush_list is optimistic here */ | |
733 | + | |
734 | + level = 0; | |
735 | + n_blocks = 0; | |
736 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
737 | + | |
738 | + while (bpage != NULL) { | |
739 | + ib_uint64_t oldest_modification = bpage->oldest_modification; | |
740 | + if (oldest_modification != 0) { | |
741 | + level += log_sys->max_checkpoint_age | |
742 | + - (lsn - oldest_modification); | |
743 | + } | |
744 | + bpage = UT_LIST_GET_NEXT(list, bpage); | |
745 | + n_blocks++; | |
746 | + } | |
747 | + | |
748 | + if (level) { | |
749 | + bpl += ((ib_uint64_t) n_blocks * n_blocks | |
750 | + * (lsn - lsn_old)) / level; | |
751 | + } | |
752 | + | |
753 | + } | |
754 | + | |
755 | + if (!srv_use_doublewrite_buf) { | |
756 | + /* flush is faster than when doublewrite */ | |
757 | + bpl = (bpl * 7) / 8; | |
758 | + } | |
759 | + | |
760 | + if (bpl) { | |
761 | +retry_flush_batch: | |
762 | + n_pages_flushed = buf_flush_list(bpl, | |
763 | + oldest_lsn + (lsn - lsn_old)); | |
764 | + if (n_pages_flushed == ULINT_UNDEFINED) { | |
765 | + os_thread_sleep(5000); | |
766 | + goto retry_flush_batch; | |
767 | + } | |
768 | + } | |
769 | + | |
770 | + lsn_old = lsn; | |
771 | + /* | |
772 | + fprintf(stderr, | |
773 | + "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n", | |
774 | + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age, | |
775 | + lsn - lsn_old, bpl); | |
776 | + */ | |
777 | + } else { | |
778 | + lsn_old = log_sys->lsn; | |
779 | + mutex_exit(&(log_sys->mutex)); | |
780 | + } | |
781 | + } | |
782 | + prev_adaptive_flushing_method = 1; | |
783 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) { | |
784 | + buf_pool_t* buf_pool; | |
785 | + buf_page_t* bpage; | |
786 | + ib_uint64_t lsn; | |
787 | + ulint j; | |
788 | + | |
789 | + mutex_enter(&(log_sys->mutex)); | |
790 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
791 | + lsn = log_sys->lsn; | |
792 | + mutex_exit(&(log_sys->mutex)); | |
793 | + | |
794 | + /* upper loop/sec. (x10) */ | |
795 | + next_itr_time -= 900; /* 1000 - 900 == 100 */ | |
796 | + inner_loop++; | |
797 | + if (inner_loop < 10) { | |
798 | + i--; | |
799 | + } else { | |
800 | + inner_loop = 0; | |
801 | + } | |
802 | + | |
803 | + if (prev_adaptive_flushing_method == 2) { | |
804 | + lint n_flush; | |
d8778560 AM |
805 | + lint blocks_sum; |
806 | + ulint new_blocks_sum, flushed_blocks_sum; | |
b4e1fa2c AM |
807 | + |
808 | + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0; | |
809 | + | |
810 | + /* prev_flush_info[j] should be the previous loop's */ | |
811 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
812 | + lint blocks_num, new_blocks_num, flushed_blocks_num; | |
813 | + ibool found; | |
814 | + | |
815 | + buf_pool = buf_pool_from_array(j); | |
816 | + | |
817 | + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list); | |
818 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
819 | + new_blocks_num = 0; | |
820 | + | |
821 | + found = FALSE; | |
822 | + while (bpage != NULL) { | |
823 | + if (prev_flush_info[j].space == bpage->space | |
824 | + && prev_flush_info[j].offset == bpage->offset | |
825 | + && prev_flush_info[j].oldest_modification | |
826 | + == bpage->oldest_modification) { | |
827 | + found = TRUE; | |
828 | + break; | |
829 | + } | |
830 | + bpage = UT_LIST_GET_NEXT(list, bpage); | |
831 | + new_blocks_num++; | |
832 | + } | |
833 | + if (!found) { | |
834 | + new_blocks_num = blocks_num; | |
835 | + } | |
836 | + | |
837 | + flushed_blocks_num = new_blocks_num + prev_flush_info[j].count | |
838 | + - blocks_num; | |
839 | + if (flushed_blocks_num < 0) { | |
840 | + flushed_blocks_num = 0; | |
841 | + } | |
842 | + | |
843 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
844 | + | |
845 | + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list); | |
846 | + if (bpage) { | |
847 | + prev_flush_info[j].space = bpage->space; | |
848 | + prev_flush_info[j].offset = bpage->offset; | |
849 | + prev_flush_info[j].oldest_modification = bpage->oldest_modification; | |
850 | + } else { | |
851 | + prev_flush_info[j].space = 0; | |
852 | + prev_flush_info[j].offset = 0; | |
853 | + prev_flush_info[j].oldest_modification = 0; | |
854 | + } | |
855 | + | |
856 | + new_blocks_sum += new_blocks_num; | |
857 | + flushed_blocks_sum += flushed_blocks_num; | |
858 | + blocks_sum += blocks_num; | |
859 | + } | |
860 | + | |
861 | + n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async; | |
862 | + if (flushed_blocks_sum > n_pages_flushed_prev) { | |
863 | + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev); | |
864 | + } | |
865 | + | |
866 | + if (n_flush > 0) { | |
867 | + n_flush++; | |
868 | + n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old)); | |
869 | + } else { | |
870 | + n_pages_flushed = 0; | |
871 | + } | |
872 | + } else { | |
873 | + /* store previous first pages of the flush_list */ | |
874 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
875 | + buf_pool = buf_pool_from_array(j); | |
876 | + | |
877 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
878 | + | |
879 | + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list); | |
880 | + if (bpage) { | |
881 | + prev_flush_info[j].space = bpage->space; | |
882 | + prev_flush_info[j].offset = bpage->offset; | |
883 | + prev_flush_info[j].oldest_modification = bpage->oldest_modification; | |
884 | + } else { | |
885 | + prev_flush_info[j].space = 0; | |
886 | + prev_flush_info[j].offset = 0; | |
887 | + prev_flush_info[j].oldest_modification = 0; | |
888 | + } | |
889 | + } | |
890 | + n_pages_flushed = 0; | |
891 | + } | |
892 | + | |
893 | + lsn_old = lsn; | |
894 | + prev_adaptive_flushing_method = 2; | |
895 | + } else { | |
896 | + mutex_enter(&(log_sys->mutex)); | |
897 | + lsn_old = log_sys->lsn; | |
898 | + mutex_exit(&(log_sys->mutex)); | |
899 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
900 | + } | |
901 | + | |
902 | + if (n_pages_flushed == ULINT_UNDEFINED) { | |
903 | + n_pages_flushed_prev = 0; | |
904 | + } else { | |
905 | + n_pages_flushed_prev = n_pages_flushed; | |
906 | } | |
907 | ||
908 | if (srv_activity_count == old_activity_count) { | |
df1b5770 | 909 | @@ -2941,7 +3213,7 @@ |
b4e1fa2c AM |
910 | even if the server were active */ |
911 | ||
912 | srv_main_thread_op_info = "doing insert buffer merge"; | |
913 | - ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); | |
914 | + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5)); | |
915 | ||
916 | /* Flush logs if needed */ | |
917 | srv_sync_log_buffer_in_background(); | |
df1b5770 | 918 | @@ -3049,7 +3321,7 @@ |
b4e1fa2c AM |
919 | buf_flush_list below. Otherwise, the system favors |
920 | clean pages over cleanup throughput. */ | |
921 | n_bytes_merged = ibuf_contract_for_n_pages(FALSE, | |
922 | - PCT_IO(100)); | |
923 | + PCT_IBUF_IO(100)); | |
924 | } | |
925 | ||
926 | srv_main_thread_op_info = "reserving kernel mutex"; | |
df1b5770 | 927 | @@ -3195,6 +3467,7 @@ |
b4e1fa2c AM |
928 | srv_slot_t* slot; |
929 | ulint slot_no = ULINT_UNDEFINED; | |
930 | ulint n_total_purged = ULINT_UNDEFINED; | |
931 | + ulint next_itr_time; | |
932 | ||
933 | ut_a(srv_n_purge_threads == 1); | |
934 | ||
df1b5770 | 935 | @@ -3217,9 +3490,12 @@ |
b4e1fa2c AM |
936 | |
937 | mutex_exit(&kernel_mutex); | |
938 | ||
939 | + next_itr_time = ut_time_ms(); | |
940 | + | |
941 | while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) { | |
942 | ||
943 | ulint n_pages_purged; | |
944 | + ulint cur_time; | |
945 | ||
946 | /* If there are very few records to purge or the last | |
947 | purge didn't purge any records then wait for activity. | |
df1b5770 | 948 | @@ -3260,6 +3536,16 @@ |
b4e1fa2c AM |
949 | } while (n_pages_purged > 0 && !srv_fast_shutdown); |
950 | ||
951 | srv_sync_log_buffer_in_background(); | |
952 | + | |
953 | + cur_time = ut_time_ms(); | |
954 | + if (next_itr_time > cur_time) { | |
955 | + os_thread_sleep(ut_min(1000000, | |
956 | + (next_itr_time - cur_time) | |
957 | + * 1000)); | |
958 | + next_itr_time = ut_time_ms() + 1000; | |
959 | + } else { | |
960 | + next_itr_time = cur_time + 1000; | |
961 | + } | |
962 | } | |
963 | ||
964 | mutex_enter(&kernel_mutex); | |
965 | diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c | |
966 | --- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900 | |
967 | +++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900 | |
d8778560 | 968 | @@ -1212,6 +1212,9 @@ |
b4e1fa2c AM |
969 | } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { |
970 | srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; | |
971 | ||
972 | + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { | |
973 | + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; | |
974 | + | |
975 | } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { | |
976 | srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; | |
977 | ||
978 | diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c | |
979 | --- a/storage/innobase/trx/trx0trx.c 2010-11-03 07:01:13.000000000 +0900 | |
980 | +++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:10:09.106023937 +0900 | |
981 | @@ -865,6 +865,7 @@ | |
982 | trx->read_view = NULL; | |
983 | ||
984 | if (lsn) { | |
985 | + ulint flush_log_at_trx_commit; | |
986 | ||
987 | mutex_exit(&kernel_mutex); | |
988 | ||
989 | @@ -873,6 +874,12 @@ | |
990 | trx_undo_insert_cleanup(trx); | |
991 | } | |
992 | ||
993 | + if (srv_use_global_flush_log_at_trx_commit) { | |
994 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
995 | + } else { | |
996 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
997 | + } | |
998 | + | |
999 | /* NOTE that we could possibly make a group commit more | |
1000 | efficient here: call os_thread_yield here to allow also other | |
1001 | trxs to come to commit! */ | |
1002 | @@ -904,9 +911,9 @@ | |
1003 | if (trx->flush_log_later) { | |
1004 | /* Do nothing yet */ | |
1005 | trx->must_flush_log_later = TRUE; | |
1006 | - } else if (srv_flush_log_at_trx_commit == 0) { | |
1007 | + } else if (flush_log_at_trx_commit == 0) { | |
1008 | /* Do nothing */ | |
1009 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1010 | + } else if (flush_log_at_trx_commit == 1) { | |
1011 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1012 | /* Write the log but do not flush it to disk */ | |
1013 | ||
1014 | @@ -918,7 +925,7 @@ | |
1015 | ||
1016 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1017 | } | |
1018 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1019 | + } else if (flush_log_at_trx_commit == 2) { | |
1020 | ||
1021 | /* Write the log but do not flush it to disk */ | |
1022 | ||
1023 | @@ -1582,16 +1589,23 @@ | |
1024 | trx_t* trx) /*!< in: trx handle */ | |
1025 | { | |
1026 | ib_uint64_t lsn = trx->commit_lsn; | |
1027 | + ulint flush_log_at_trx_commit; | |
1028 | ||
1029 | ut_a(trx); | |
1030 | ||
1031 | trx->op_info = "flushing log"; | |
1032 | ||
1033 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1034 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1035 | + } else { | |
1036 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1037 | + } | |
1038 | + | |
1039 | if (!trx->must_flush_log_later) { | |
1040 | /* Do nothing */ | |
1041 | - } else if (srv_flush_log_at_trx_commit == 0) { | |
1042 | + } else if (flush_log_at_trx_commit == 0) { | |
1043 | /* Do nothing */ | |
1044 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1045 | + } else if (flush_log_at_trx_commit == 1) { | |
1046 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1047 | /* Write the log but do not flush it to disk */ | |
1048 | ||
1049 | @@ -1602,7 +1616,7 @@ | |
1050 | ||
1051 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1052 | } | |
1053 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1054 | + } else if (flush_log_at_trx_commit == 2) { | |
1055 | ||
1056 | /* Write the log but do not flush it to disk */ | |
1057 | ||
1058 | @@ -1855,6 +1869,8 @@ | |
1059 | /*--------------------------------------*/ | |
1060 | ||
1061 | if (lsn) { | |
1062 | + ulint flush_log_at_trx_commit; | |
1063 | + | |
1064 | /* Depending on the my.cnf options, we may now write the log | |
1065 | buffer to the log files, making the prepared state of the | |
1066 | transaction durable if the OS does not crash. We may also | |
1067 | @@ -1874,9 +1890,15 @@ | |
1068 | ||
1069 | mutex_exit(&kernel_mutex); | |
1070 | ||
1071 | - if (srv_flush_log_at_trx_commit == 0) { | |
1072 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1073 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1074 | + } else { | |
1075 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1076 | + } | |
1077 | + | |
1078 | + if (flush_log_at_trx_commit == 0) { | |
1079 | /* Do nothing */ | |
1080 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1081 | + } else if (flush_log_at_trx_commit == 1) { | |
1082 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1083 | /* Write the log but do not flush it to disk */ | |
1084 | ||
1085 | @@ -1888,7 +1910,7 @@ | |
1086 | ||
1087 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1088 | } | |
1089 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1090 | + } else if (flush_log_at_trx_commit == 2) { | |
1091 | ||
1092 | /* Write the log but do not flush it to disk */ | |
1093 |