]>
Commit | Line | Data |
---|---|---|
b4e1fa2c AM |
1 | # name : innodb_io_patches.patch |
2 | # introduced : 11 or before | |
3 | # maintainer : Yasufumi | |
4 | # | |
5 | #!!! notice !!! | |
6 | # Any small change to this file in the main branch | |
7 | # should be done or reviewed by the maintainer! | |
8 | diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c | |
9 | --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:09:51.273986410 +0900 | |
10 | +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:10:08.934990091 +0900 | |
11 | @@ -320,6 +320,7 @@ | |
12 | ||
13 | /* When we traverse all the flush lists we don't want another | |
14 | thread to add a dirty page to any flush list. */ | |
15 | + if (srv_buf_pool_instances > 1) | |
16 | log_flush_order_mutex_enter(); | |
17 | ||
18 | for (i = 0; i < srv_buf_pool_instances; i++) { | |
19 | @@ -343,6 +344,7 @@ | |
20 | } | |
21 | } | |
22 | ||
23 | + if (srv_buf_pool_instances > 1) | |
24 | log_flush_order_mutex_exit(); | |
25 | ||
26 | /* The returned answer may be out of date: the flush_list can | |
27 | diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c | |
28 | --- a/storage/innobase/buf/buf0flu.c 2010-11-03 07:01:13.000000000 +0900 | |
29 | +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:10:08.934990091 +0900 | |
d8778560 | 30 | @@ -1376,7 +1376,7 @@ |
b4e1fa2c AM |
31 | |
32 | ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); | |
33 | ||
34 | - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { | |
35 | + if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) { | |
36 | /* If there is little space, it is better not to flush | |
37 | any block except from the end of the LRU list */ | |
38 | ||
39 | diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c | |
40 | --- a/storage/innobase/buf/buf0rea.c 2010-11-03 07:01:13.000000000 +0900 | |
41 | +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:10:08.937050537 +0900 | |
42 | @@ -260,6 +260,10 @@ | |
43 | = BUF_READ_AHEAD_LINEAR_AREA(buf_pool); | |
44 | ulint threshold; | |
45 | ||
46 | + if (!(srv_read_ahead & 2)) { | |
47 | + return(0); | |
48 | + } | |
49 | + | |
50 | if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) { | |
51 | /* No read-ahead to avoid thread deadlocks */ | |
52 | return(0); | |
53 | diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc | |
54 | --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:09:51.283956391 +0900 | |
55 | +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:10:08.963980444 +0900 | |
df1b5770 | 56 | @@ -445,6 +445,12 @@ |
b4e1fa2c AM |
57 | "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", |
58 | NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); | |
59 | ||
60 | +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG, | |
61 | + "Set to 0 (write and flush once per second)," | |
62 | + " 1 (write and flush at each commit)" | |
63 | + " or 2 (write at commit, flush once per second).", | |
64 | + NULL, NULL, 1, 0, 2, 0); | |
65 | + | |
66 | ||
67 | static handler *innobase_create_handler(handlerton *hton, | |
68 | TABLE_SHARE *table, | |
df1b5770 | 69 | @@ -839,6 +845,17 @@ |
b4e1fa2c AM |
70 | } |
71 | } | |
72 | ||
73 | +/******************************************************************//** | |
74 | +*/ | |
75 | +extern "C" UNIV_INTERN | |
76 | +ulong | |
77 | +thd_flush_log_at_trx_commit( | |
78 | +/*================================*/ | |
79 | + void* thd) | |
80 | +{ | |
81 | + return(THDVAR((THD*) thd, flush_log_at_trx_commit)); | |
82 | +} | |
83 | + | |
84 | /********************************************************************//** | |
85 | Obtain the InnoDB transaction of a MySQL thread. | |
86 | @return reference to transaction pointer */ | |
df1b5770 | 87 | @@ -2410,6 +2427,9 @@ |
b4e1fa2c AM |
88 | srv_n_read_io_threads = (ulint) innobase_read_io_threads; |
89 | srv_n_write_io_threads = (ulint) innobase_write_io_threads; | |
90 | ||
91 | + srv_read_ahead &= 3; | |
92 | + srv_adaptive_flushing_method %= 3; | |
93 | + | |
94 | srv_force_recovery = (ulint) innobase_force_recovery; | |
95 | ||
96 | srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; | |
11822e22 | 97 | @@ -11001,7 +11021,7 @@ |
b4e1fa2c | 98 | PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, |
11822e22 | 99 | "Purge threads can be either 0 or 1.", |
b4e1fa2c AM |
100 | NULL, NULL, |
101 | - 0, /* Default setting */ | |
102 | + 1, /* Default setting */ | |
103 | 0, /* Minimum value */ | |
104 | 1, 0); /* Maximum value */ | |
105 | ||
11822e22 | 106 | @@ -11043,12 +11063,18 @@ |
b4e1fa2c AM |
107 | innodb_file_format_max_validate, |
108 | innodb_file_format_max_update, "Antelope"); | |
109 | ||
110 | -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, | |
111 | - PLUGIN_VAR_OPCMDARG, | |
112 | - "Set to 0 (write and flush once per second)," | |
113 | - " 1 (write and flush at each commit)" | |
114 | - " or 2 (write at commit, flush once per second).", | |
115 | - NULL, NULL, 1, 0, 2, 0); | |
116 | +/* Changed to the THDVAR */ | |
117 | +//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, | |
118 | +// PLUGIN_VAR_OPCMDARG, | |
119 | +// "Set to 0 (write and flush once per second)," | |
120 | +// " 1 (write and flush at each commit)" | |
121 | +// " or 2 (write at commit, flush once per second).", | |
122 | +// NULL, NULL, 1, 0, 2, 0); | |
123 | + | |
124 | +static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit, | |
125 | + PLUGIN_VAR_NOCMDARG, | |
126 | + "Use global innodb_flush_log_at_trx_commit value. (default: ON).", | |
127 | + NULL, NULL, TRUE); | |
128 | ||
129 | static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method, | |
130 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
11822e22 | 131 | @@ -11143,7 +11169,7 @@ |
b4e1fa2c AM |
132 | static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, |
133 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
134 | "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", | |
135 | - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); | |
136 | + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); | |
137 | ||
138 | static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, | |
139 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
11822e22 | 140 | @@ -11295,6 +11321,95 @@ |
b4e1fa2c AM |
141 | "trigger a readahead.", |
142 | NULL, NULL, 56, 0, 64, 0); | |
143 | ||
144 | +static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size, | |
145 | + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
146 | + "The maximum size of the insert buffer. (in bytes)", | |
147 | + NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0); | |
148 | + | |
149 | +static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract, | |
150 | + PLUGIN_VAR_RQCMDARG, | |
151 | + "Enable/Disable active_contract of insert buffer. 0:disable 1:enable", | |
152 | + NULL, NULL, 1, 0, 1, 0); | |
153 | + | |
154 | +static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate, | |
155 | + PLUGIN_VAR_RQCMDARG, | |
156 | + "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)", | |
157 | + NULL, NULL, 100, 100, 999999999, 0); | |
158 | + | |
159 | +static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target, | |
160 | + PLUGIN_VAR_RQCMDARG, | |
161 | + "Control soft limit of checkpoint age. (0 : not control)", | |
162 | + NULL, NULL, 0, 0, ~0UL, 0); | |
163 | + | |
164 | +static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages, | |
165 | + PLUGIN_VAR_RQCMDARG, | |
166 | + "Enable/Disable flushing also neighbor pages. 0:disable 1:enable", | |
167 | + NULL, NULL, 1, 0, 1, 0); | |
168 | + | |
169 | +static | |
170 | +void | |
171 | +innodb_read_ahead_update( | |
172 | + THD* thd, | |
173 | + struct st_mysql_sys_var* var, | |
174 | + void* var_ptr, | |
175 | + const void* save) | |
176 | +{ | |
177 | + *(long *)var_ptr= (*(long *)save) & 3; | |
178 | +} | |
179 | +const char *read_ahead_names[]= | |
180 | +{ | |
181 | + "none", /* 0 */ | |
182 | + "random", | |
183 | + "linear", | |
184 | + "both", /* 3 */ | |
185 | + /* For compatibility of the older patch */ | |
186 | + "0", /* 4 ("none" + 4) */ | |
187 | + "1", | |
188 | + "2", | |
189 | + "3", /* 7 ("both" + 4) */ | |
190 | + NullS | |
191 | +}; | |
192 | +TYPELIB read_ahead_typelib= | |
193 | +{ | |
194 | + array_elements(read_ahead_names) - 1, "read_ahead_typelib", | |
195 | + read_ahead_names, NULL | |
196 | +}; | |
197 | +static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead, | |
198 | + PLUGIN_VAR_RQCMDARG, | |
199 | + "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]", | |
200 | + NULL, innodb_read_ahead_update, 2, &read_ahead_typelib); | |
201 | + | |
202 | +static | |
203 | +void | |
204 | +innodb_adaptive_flushing_method_update( | |
205 | + THD* thd, | |
206 | + struct st_mysql_sys_var* var, | |
207 | + void* var_ptr, | |
208 | + const void* save) | |
209 | +{ | |
210 | + *(long *)var_ptr= (*(long *)save) % 4; | |
211 | +} | |
212 | +const char *adaptive_flushing_method_names[]= | |
213 | +{ | |
214 | + "native", /* 0 */ | |
215 | + "estimate", /* 1 */ | |
216 | + "keep_average", /* 2 */ | |
217 | + /* For compatibility of the older patch */ | |
218 | + "0", /* 3 ("none" + 3) */ | |
219 | + "1", /* 4 ("estimate" + 3) */ | |
220 | + "2", /* 5 ("keep_average" + 3) */ | |
221 | + NullS | |
222 | +}; | |
223 | +TYPELIB adaptive_flushing_method_typelib= | |
224 | +{ | |
225 | + array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib", | |
226 | + adaptive_flushing_method_names, NULL | |
227 | +}; | |
228 | +static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method, | |
229 | + PLUGIN_VAR_RQCMDARG, | |
230 | + "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)", | |
231 | + NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib); | |
b4e1fa2c AM |
232 | + |
233 | static struct st_mysql_sys_var* innobase_system_variables[]= { | |
234 | MYSQL_SYSVAR(additional_mem_pool_size), | |
235 | MYSQL_SYSVAR(autoextend_increment), | |
11822e22 | 236 | @@ -11315,6 +11430,7 @@ |
b4e1fa2c AM |
237 | MYSQL_SYSVAR(file_format_check), |
238 | MYSQL_SYSVAR(file_format_max), | |
239 | MYSQL_SYSVAR(flush_log_at_trx_commit), | |
240 | + MYSQL_SYSVAR(use_global_flush_log_at_trx_commit), | |
241 | MYSQL_SYSVAR(flush_method), | |
242 | MYSQL_SYSVAR(force_recovery), | |
243 | MYSQL_SYSVAR(locks_unsafe_for_binlog), | |
11822e22 | 244 | @@ -11352,6 +11468,13 @@ |
b4e1fa2c AM |
245 | MYSQL_SYSVAR(show_verbose_locks), |
246 | MYSQL_SYSVAR(show_locks_held), | |
247 | MYSQL_SYSVAR(version), | |
248 | + MYSQL_SYSVAR(ibuf_max_size), | |
249 | + MYSQL_SYSVAR(ibuf_active_contract), | |
250 | + MYSQL_SYSVAR(ibuf_accel_rate), | |
251 | + MYSQL_SYSVAR(checkpoint_age_target), | |
252 | + MYSQL_SYSVAR(flush_neighbor_pages), | |
253 | + MYSQL_SYSVAR(read_ahead), | |
254 | + MYSQL_SYSVAR(adaptive_flushing_method), | |
b4e1fa2c AM |
255 | MYSQL_SYSVAR(use_sys_malloc), |
256 | MYSQL_SYSVAR(use_native_aio), | |
257 | MYSQL_SYSVAR(change_buffering), | |
258 | diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c | |
259 | --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-11-03 07:01:13.000000000 +0900 | |
260 | +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:10:09.073984282 +0900 | |
261 | @@ -524,8 +524,10 @@ | |
262 | grow in size, as the references on the upper levels of the tree can | |
263 | change */ | |
264 | ||
265 | - ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE | |
266 | - / IBUF_POOL_SIZE_PER_MAX_SIZE; | |
267 | + ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE | |
268 | + / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE); | |
269 | + | |
270 | + srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE; | |
271 | ||
272 | mutex_create(ibuf_pessimistic_insert_mutex_key, | |
273 | &ibuf_pessimistic_insert_mutex, | |
df1b5770 | 274 | @@ -2712,9 +2714,11 @@ |
b4e1fa2c AM |
275 | size = ibuf->size; |
276 | max_size = ibuf->max_size; | |
277 | ||
278 | + if (!srv_ibuf_active_contract) { | |
279 | if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { | |
280 | return; | |
281 | } | |
282 | + } | |
283 | ||
284 | sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); | |
285 | ||
286 | diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h | |
287 | --- a/storage/innobase/include/buf0rea.h 2010-11-03 07:01:13.000000000 +0900 | |
288 | +++ b/storage/innobase/include/buf0rea.h 2010-12-03 15:10:09.076066335 +0900 | |
289 | @@ -124,8 +124,7 @@ | |
290 | ||
291 | /** The size in pages of the area which the read-ahead algorithms read if | |
292 | invoked */ | |
293 | -#define BUF_READ_AHEAD_AREA(b) \ | |
294 | - ut_min(64, ut_2_power_up((b)->curr_size / 32)) | |
295 | +#define BUF_READ_AHEAD_AREA(b) 64 | |
296 | ||
297 | /** @name Modes used in read-ahead @{ */ | |
298 | /** read only pages belonging to the insert buffer tree */ | |
299 | diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h | |
300 | --- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900 | |
301 | +++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900 | |
302 | @@ -275,5 +275,12 @@ | |
303 | /*===================*/ | |
304 | void* thd, /*!< in: thread handle (THD*) */ | |
305 | ulint value); /*!< in: time waited for the lock */ | |
306 | +/******************************************************************//** | |
307 | +*/ | |
308 | + | |
309 | +ulong | |
310 | +thd_flush_log_at_trx_commit( | |
311 | +/*================================*/ | |
312 | + void* thd); | |
313 | ||
314 | #endif | |
315 | diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h | |
316 | --- a/storage/innobase/include/srv0srv.h 2010-12-03 15:09:51.291955835 +0900 | |
317 | +++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:10:09.079029047 +0900 | |
318 | @@ -141,7 +141,8 @@ | |
319 | extern ulint srv_n_log_files; | |
320 | extern ulint srv_log_file_size; | |
321 | extern ulint srv_log_buffer_size; | |
322 | -extern ulong srv_flush_log_at_trx_commit; | |
323 | +//extern ulong srv_flush_log_at_trx_commit; | |
324 | +extern char srv_use_global_flush_log_at_trx_commit; | |
325 | extern char srv_adaptive_flushing; | |
326 | ||
327 | ||
df1b5770 | 328 | @@ -219,6 +220,16 @@ |
b4e1fa2c AM |
329 | extern ulong srv_max_purge_lag; |
330 | ||
331 | extern ulong srv_replication_delay; | |
332 | + | |
333 | +extern long long srv_ibuf_max_size; | |
334 | +extern ulint srv_ibuf_active_contract; | |
335 | +extern ulint srv_ibuf_accel_rate; | |
336 | +extern ulint srv_checkpoint_age_target; | |
337 | +extern ulint srv_flush_neighbor_pages; | |
338 | +extern ulint srv_enable_unsafe_group_commit; | |
339 | +extern ulint srv_read_ahead; | |
340 | +extern ulint srv_adaptive_flushing_method; | |
341 | + | |
342 | /*-------------------------------------------*/ | |
343 | ||
344 | extern ulint srv_n_rows_inserted; | |
11822e22 | 345 | @@ -397,8 +408,9 @@ |
b4e1fa2c AM |
346 | when writing data files, but do flush |
347 | after writing to log files */ | |
348 | SRV_UNIX_NOSYNC, /*!< do not flush after writing */ | |
349 | - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on | |
350 | + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on | |
351 | data files */ | |
352 | + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */ | |
353 | }; | |
354 | ||
355 | /** Alternatives for file i/o in Windows */ | |
356 | diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c | |
357 | --- a/storage/innobase/log/log0log.c 2010-11-03 07:01:13.000000000 +0900 | |
358 | +++ b/storage/innobase/log/log0log.c 2010-12-03 15:10:09.084023562 +0900 | |
d8778560 AM |
359 | @@ -48,6 +48,7 @@ |
360 | #include "srv0start.h" | |
361 | #include "trx0sys.h" | |
362 | #include "trx0trx.h" | |
363 | +#include "ha_prototypes.h" | |
364 | ||
365 | /* | |
366 | General philosophy of InnoDB redo-logs: | |
367 | @@ -359,6 +360,33 @@ | |
b4e1fa2c AM |
368 | } |
369 | ||
370 | /************************************************************//** | |
371 | +*/ | |
372 | +UNIV_INLINE | |
373 | +ulint | |
374 | +log_max_modified_age_async() | |
375 | +{ | |
376 | + if (srv_checkpoint_age_target) { | |
377 | + return(ut_min(log_sys->max_modified_age_async, | |
378 | + srv_checkpoint_age_target | |
379 | + - srv_checkpoint_age_target / 8)); | |
380 | + } else { | |
381 | + return(log_sys->max_modified_age_async); | |
382 | + } | |
383 | +} | |
384 | + | |
385 | +UNIV_INLINE | |
386 | +ulint | |
387 | +log_max_checkpoint_age_async() | |
388 | +{ | |
389 | + if (srv_checkpoint_age_target) { | |
390 | + return(ut_min(log_sys->max_checkpoint_age_async, | |
391 | + srv_checkpoint_age_target)); | |
392 | + } else { | |
393 | + return(log_sys->max_checkpoint_age_async); | |
394 | + } | |
395 | +} | |
396 | + | |
397 | +/************************************************************//** | |
398 | Closes the log. | |
399 | @return lsn */ | |
400 | UNIV_INTERN | |
d8778560 | 401 | @@ -427,7 +455,7 @@ |
b4e1fa2c AM |
402 | } |
403 | } | |
404 | ||
405 | - if (checkpoint_age <= log->max_modified_age_async) { | |
406 | + if (checkpoint_age <= log_max_modified_age_async()) { | |
407 | ||
408 | goto function_exit; | |
409 | } | |
d8778560 | 410 | @@ -435,8 +463,8 @@ |
b4e1fa2c AM |
411 | oldest_lsn = buf_pool_get_oldest_modification(); |
412 | ||
413 | if (!oldest_lsn | |
414 | - || lsn - oldest_lsn > log->max_modified_age_async | |
415 | - || checkpoint_age > log->max_checkpoint_age_async) { | |
416 | + || lsn - oldest_lsn > log_max_modified_age_async() | |
417 | + || checkpoint_age > log_max_checkpoint_age_async()) { | |
418 | ||
419 | log->check_flush_or_checkpoint = TRUE; | |
420 | } | |
d8778560 | 421 | @@ -1100,6 +1128,7 @@ |
b4e1fa2c AM |
422 | group = (log_group_t*)((ulint)group - 1); |
423 | ||
424 | if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC | |
425 | + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT | |
426 | && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { | |
427 | ||
428 | fil_flush(group->space_id); | |
d8778560 | 429 | @@ -1121,8 +1150,9 @@ |
b4e1fa2c AM |
430 | logs and cannot end up here! */ |
431 | ||
432 | if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC | |
433 | + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT | |
434 | && srv_unix_file_flush_method != SRV_UNIX_NOSYNC | |
435 | - && srv_flush_log_at_trx_commit != 2) { | |
436 | + && thd_flush_log_at_trx_commit(NULL) != 2) { | |
437 | ||
438 | fil_flush(group->space_id); | |
439 | } | |
d8778560 | 440 | @@ -1501,7 +1531,8 @@ |
b4e1fa2c AM |
441 | |
442 | mutex_exit(&(log_sys->mutex)); | |
443 | ||
444 | - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { | |
445 | + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC | |
446 | + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { | |
447 | /* O_DSYNC means the OS did not buffer the log file at all: | |
448 | so we have also flushed to disk what we have written */ | |
449 | ||
d8778560 | 450 | @@ -2120,10 +2151,10 @@ |
b4e1fa2c AM |
451 | |
452 | sync = TRUE; | |
453 | advance = 2 * (age - log->max_modified_age_sync); | |
454 | - } else if (age > log->max_modified_age_async) { | |
455 | + } else if (age > log_max_modified_age_async()) { | |
456 | ||
457 | /* A flush is not urgent: we do an asynchronous preflush */ | |
458 | - advance = age - log->max_modified_age_async; | |
459 | + advance = age - log_max_modified_age_async(); | |
460 | } else { | |
461 | advance = 0; | |
462 | } | |
d8778560 | 463 | @@ -2137,7 +2168,7 @@ |
b4e1fa2c AM |
464 | |
465 | do_checkpoint = TRUE; | |
466 | ||
467 | - } else if (checkpoint_age > log->max_checkpoint_age_async) { | |
468 | + } else if (checkpoint_age > log_max_checkpoint_age_async()) { | |
469 | /* A checkpoint is not urgent: do it asynchronously */ | |
470 | ||
471 | do_checkpoint = TRUE; | |
d8778560 | 472 | @@ -3349,6 +3380,17 @@ |
b4e1fa2c AM |
473 | log_sys->flushed_to_disk_lsn, |
474 | log_sys->last_checkpoint_lsn); | |
475 | ||
476 | + fprintf(file, | |
477 | + "Max checkpoint age %lu\n" | |
478 | + "Checkpoint age target %lu\n" | |
479 | + "Modified age %lu\n" | |
480 | + "Checkpoint age %lu\n", | |
481 | + (ulong) log_sys->max_checkpoint_age, | |
482 | + (ulong) log_max_checkpoint_age_async(), | |
483 | + (ulong) (log_sys->lsn - | |
484 | + log_buf_pool_get_oldest_modification()), | |
485 | + (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn)); | |
486 | + | |
487 | current_time = time(NULL); | |
488 | ||
489 | time_elapsed = 0.001 + difftime(current_time, | |
490 | diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c | |
491 | --- a/storage/innobase/log/log0recv.c 2010-11-03 07:01:13.000000000 +0900 | |
492 | +++ b/storage/innobase/log/log0recv.c 2010-12-03 15:10:09.089024191 +0900 | |
493 | @@ -2906,9 +2906,12 @@ | |
494 | ib_uint64_t archived_lsn; | |
495 | #endif /* UNIV_LOG_ARCHIVE */ | |
496 | byte* buf; | |
497 | - byte log_hdr_buf[LOG_FILE_HDR_SIZE]; | |
498 | + byte* log_hdr_buf; | |
499 | + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE]; | |
500 | ulint err; | |
501 | ||
502 | + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE); | |
503 | + | |
504 | #ifdef UNIV_LOG_ARCHIVE | |
505 | ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX); | |
506 | /** TRUE when recovering from a checkpoint */ | |
507 | diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c | |
508 | --- a/storage/innobase/os/os0file.c 2010-11-03 07:01:13.000000000 +0900 | |
509 | +++ b/storage/innobase/os/os0file.c 2010-12-03 15:10:09.093023540 +0900 | |
d8778560 | 510 | @@ -1424,7 +1424,7 @@ |
b4e1fa2c AM |
511 | #endif |
512 | #ifdef UNIV_NON_BUFFERED_IO | |
513 | # ifndef UNIV_HOTBACKUP | |
514 | - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { | |
515 | + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) { | |
516 | /* Do not use unbuffered i/o to log files because | |
517 | value 2 denotes that we do not flush the log at every | |
518 | commit, but only once per second */ | |
d8778560 | 519 | @@ -1440,7 +1440,7 @@ |
b4e1fa2c AM |
520 | attributes = 0; |
521 | #ifdef UNIV_NON_BUFFERED_IO | |
522 | # ifndef UNIV_HOTBACKUP | |
523 | - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { | |
524 | + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) { | |
525 | /* Do not use unbuffered i/o to log files because | |
526 | value 2 denotes that we do not flush the log at every | |
527 | commit, but only once per second */ | |
d8778560 | 528 | @@ -1585,6 +1585,11 @@ |
b4e1fa2c AM |
529 | os_file_set_nocache(file, name, mode_str); |
530 | } | |
531 | ||
532 | + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */ | |
533 | + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { | |
534 | + os_file_set_nocache(file, name, mode_str); | |
535 | + } | |
536 | + | |
537 | #ifdef USE_FILE_LOCK | |
538 | if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { | |
539 | ||
540 | diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c | |
541 | --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:09:51.301987792 +0900 | |
542 | +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:13:29.369986988 +0900 | |
d8778560 | 543 | @@ -190,7 +190,8 @@ |
b4e1fa2c AM |
544 | UNIV_INTERN ulint srv_log_file_size = ULINT_MAX; |
545 | /* size in database pages */ | |
546 | UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX; | |
547 | -UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; | |
548 | +//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; | |
549 | +UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE; | |
550 | ||
551 | /* Try to flush dirty pages so as to avoid IO bursts at | |
552 | the checkpoints. */ | |
11822e22 | 553 | @@ -409,6 +410,17 @@ |
b4e1fa2c AM |
554 | |
555 | UNIV_INTERN ulong srv_replication_delay = 0; | |
556 | ||
557 | +UNIV_INTERN long long srv_ibuf_max_size = 0; | |
558 | +UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */ | |
559 | +UNIV_INTERN ulint srv_ibuf_accel_rate = 100; | |
560 | +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0))) | |
561 | + | |
562 | +UNIV_INTERN ulint srv_checkpoint_age_target = 0; | |
563 | +UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */ | |
564 | + | |
565 | +UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */ | |
566 | +UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ | |
567 | +UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */ | |
568 | /*-------------------------------------------*/ | |
569 | UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; | |
570 | UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; | |
11822e22 | 571 | @@ -2745,6 +2757,7 @@ |
b4e1fa2c AM |
572 | ulint n_pages_purged = 0; |
573 | ulint n_bytes_merged; | |
574 | ulint n_pages_flushed; | |
575 | + ulint n_pages_flushed_prev = 0; | |
576 | ulint n_bytes_archived; | |
577 | ulint n_tables_to_drop; | |
578 | ulint n_ios; | |
11822e22 | 579 | @@ -2752,7 +2765,20 @@ |
b4e1fa2c AM |
580 | ulint n_ios_very_old; |
581 | ulint n_pend_ios; | |
582 | ulint next_itr_time; | |
583 | + ulint prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
584 | + ulint inner_loop = 0; | |
585 | + ibool skip_sleep = FALSE; | |
586 | ulint i; | |
587 | + struct t_prev_flush_info_struct { | |
588 | + ulint count; | |
589 | + unsigned space:32; | |
590 | + unsigned offset:32; | |
591 | + ib_uint64_t oldest_modification; | |
592 | + } prev_flush_info[MAX_BUFFER_POOLS]; | |
593 | + | |
594 | + ib_uint64_t lsn_old; | |
595 | + | |
596 | + ib_uint64_t oldest_lsn; | |
597 | ||
598 | #ifdef UNIV_DEBUG_THREAD_CREATION | |
599 | fprintf(stderr, "Master thread starts, id %lu\n", | |
11822e22 | 600 | @@ -2774,6 +2800,9 @@ |
b4e1fa2c AM |
601 | |
602 | mutex_exit(&kernel_mutex); | |
603 | ||
604 | + mutex_enter(&(log_sys->mutex)); | |
605 | + lsn_old = log_sys->lsn; | |
606 | + mutex_exit(&(log_sys->mutex)); | |
607 | loop: | |
608 | /*****************************************************************/ | |
609 | /* ---- When there is database activity by users, we cycle in this | |
11822e22 | 610 | @@ -2804,9 +2833,13 @@ |
b4e1fa2c AM |
611 | /* Sleep for 1 second on entrying the for loop below the first time. */ |
612 | next_itr_time = ut_time_ms() + 1000; | |
613 | ||
614 | + skip_sleep = FALSE; | |
615 | + | |
616 | for (i = 0; i < 10; i++) { | |
617 | ulint cur_time = ut_time_ms(); | |
618 | ||
619 | + n_pages_flushed = 0; /* initialize */ | |
620 | + | |
621 | /* ALTER TABLE in MySQL requires on Unix that the table handler | |
622 | can drop tables lazily after there no longer are SELECT | |
623 | queries to them. */ | |
11822e22 | 624 | @@ -2830,6 +2863,7 @@ |
b4e1fa2c AM |
625 | srv_main_thread_op_info = "sleeping"; |
626 | srv_main_1_second_loops++; | |
627 | ||
628 | + if (!skip_sleep) { | |
629 | if (next_itr_time > cur_time | |
630 | && srv_shutdown_state == SRV_SHUTDOWN_NONE) { | |
631 | ||
11822e22 | 632 | @@ -2840,10 +2874,26 @@ |
b4e1fa2c AM |
633 | (next_itr_time - cur_time) |
634 | * 1000)); | |
635 | srv_main_sleeps++; | |
636 | + | |
637 | + /* | |
638 | + mutex_enter(&(log_sys->mutex)); | |
639 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
640 | + ib_uint64_t lsn = log_sys->lsn; | |
641 | + mutex_exit(&(log_sys->mutex)); | |
642 | + | |
643 | + if(oldest_lsn) | |
644 | + fprintf(stderr, | |
645 | + "InnoDB flush: age pct: %lu, lsn progress: %lu\n", | |
646 | + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age, | |
647 | + lsn - lsn_old); | |
648 | + */ | |
649 | } | |
650 | ||
651 | /* Each iteration should happen at 1 second interval. */ | |
652 | next_itr_time = ut_time_ms() + 1000; | |
653 | + } /* if (!skip_sleep) */ | |
654 | + | |
655 | + skip_sleep = FALSE; | |
656 | ||
657 | /* Flush logs if needed */ | |
658 | srv_sync_log_buffer_in_background(); | |
11822e22 | 659 | @@ -2863,7 +2913,7 @@ |
b4e1fa2c AM |
660 | if (n_pend_ios < SRV_PEND_IO_THRESHOLD |
661 | && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) { | |
662 | srv_main_thread_op_info = "doing insert buffer merge"; | |
663 | - ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); | |
664 | + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5)); | |
665 | ||
666 | /* Flush logs if needed */ | |
667 | srv_sync_log_buffer_in_background(); | |
11822e22 | 668 | @@ -2880,7 +2930,11 @@ |
b4e1fa2c AM |
669 | n_pages_flushed = buf_flush_list( |
670 | PCT_IO(100), IB_ULONGLONG_MAX); | |
671 | ||
672 | - } else if (srv_adaptive_flushing) { | |
673 | + mutex_enter(&(log_sys->mutex)); | |
674 | + lsn_old = log_sys->lsn; | |
675 | + mutex_exit(&(log_sys->mutex)); | |
676 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
677 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) { | |
678 | ||
679 | /* Try to keep the rate of flushing of dirty | |
680 | pages such that redo log generation does not | |
11822e22 | 681 | @@ -2896,6 +2950,224 @@ |
b4e1fa2c AM |
682 | n_flush, |
683 | IB_ULONGLONG_MAX); | |
684 | } | |
685 | + | |
686 | + mutex_enter(&(log_sys->mutex)); | |
687 | + lsn_old = log_sys->lsn; | |
688 | + mutex_exit(&(log_sys->mutex)); | |
689 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
690 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) { | |
691 | + | |
692 | + /* Try to keep modified age not to exceed | |
693 | + max_checkpoint_age * 7/8 line */ | |
694 | + | |
695 | + mutex_enter(&(log_sys->mutex)); | |
696 | + | |
697 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
698 | + if (oldest_lsn == 0) { | |
699 | + lsn_old = log_sys->lsn; | |
700 | + mutex_exit(&(log_sys->mutex)); | |
701 | + | |
702 | + } else { | |
703 | + if ((log_sys->lsn - oldest_lsn) | |
704 | + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) { | |
705 | + /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */ | |
706 | + /* We should not flush from here. */ | |
707 | + lsn_old = log_sys->lsn; | |
708 | + mutex_exit(&(log_sys->mutex)); | |
709 | + } else if ((log_sys->lsn - oldest_lsn) | |
710 | + > (log_sys->max_checkpoint_age)/4 ) { | |
711 | + | |
712 | + /* defence line (max_checkpoint_age * 1/2) */ | |
713 | + ib_uint64_t lsn = log_sys->lsn; | |
714 | + | |
715 | + ib_uint64_t level, bpl; | |
716 | + buf_page_t* bpage; | |
717 | + ulint j; | |
718 | + | |
719 | + mutex_exit(&(log_sys->mutex)); | |
720 | + | |
721 | + bpl = 0; | |
722 | + | |
723 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
724 | + buf_pool_t* buf_pool; | |
725 | + ulint n_blocks; | |
726 | + | |
727 | + buf_pool = buf_pool_from_array(j); | |
728 | + | |
729 | + /* The scanning flush_list is optimistic here */ | |
730 | + | |
731 | + level = 0; | |
732 | + n_blocks = 0; | |
733 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
734 | + | |
735 | + while (bpage != NULL) { | |
736 | + ib_uint64_t oldest_modification = bpage->oldest_modification; | |
737 | + if (oldest_modification != 0) { | |
738 | + level += log_sys->max_checkpoint_age | |
739 | + - (lsn - oldest_modification); | |
740 | + } | |
741 | + bpage = UT_LIST_GET_NEXT(list, bpage); | |
742 | + n_blocks++; | |
743 | + } | |
744 | + | |
745 | + if (level) { | |
746 | + bpl += ((ib_uint64_t) n_blocks * n_blocks | |
747 | + * (lsn - lsn_old)) / level; | |
748 | + } | |
749 | + | |
750 | + } | |
751 | + | |
752 | + if (!srv_use_doublewrite_buf) { | |
753 | + /* flush is faster than when doublewrite */ | |
754 | + bpl = (bpl * 7) / 8; | |
755 | + } | |
756 | + | |
757 | + if (bpl) { | |
758 | +retry_flush_batch: | |
759 | + n_pages_flushed = buf_flush_list(bpl, | |
760 | + oldest_lsn + (lsn - lsn_old)); | |
761 | + if (n_pages_flushed == ULINT_UNDEFINED) { | |
762 | + os_thread_sleep(5000); | |
763 | + goto retry_flush_batch; | |
764 | + } | |
765 | + } | |
766 | + | |
767 | + lsn_old = lsn; | |
768 | + /* | |
769 | + fprintf(stderr, | |
770 | + "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n", | |
771 | + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age, | |
772 | + lsn - lsn_old, bpl); | |
773 | + */ | |
774 | + } else { | |
775 | + lsn_old = log_sys->lsn; | |
776 | + mutex_exit(&(log_sys->mutex)); | |
777 | + } | |
778 | + } | |
779 | + prev_adaptive_flushing_method = 1; | |
780 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) { | |
781 | + buf_pool_t* buf_pool; | |
782 | + buf_page_t* bpage; | |
783 | + ib_uint64_t lsn; | |
784 | + ulint j; | |
785 | + | |
786 | + mutex_enter(&(log_sys->mutex)); | |
787 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
788 | + lsn = log_sys->lsn; | |
789 | + mutex_exit(&(log_sys->mutex)); | |
790 | + | |
791 | + /* upper loop/sec. (x10) */ | |
792 | + next_itr_time -= 900; /* 1000 - 900 == 100 */ | |
793 | + inner_loop++; | |
794 | + if (inner_loop < 10) { | |
795 | + i--; | |
796 | + } else { | |
797 | + inner_loop = 0; | |
798 | + } | |
799 | + | |
800 | + if (prev_adaptive_flushing_method == 2) { | |
801 | + lint n_flush; | |
d8778560 AM |
802 | + lint blocks_sum; |
803 | + ulint new_blocks_sum, flushed_blocks_sum; | |
b4e1fa2c AM |
804 | + |
805 | + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0; | |
806 | + | |
807 | + /* prev_flush_info[j] should be the previous loop's */ | |
808 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
809 | + lint blocks_num, new_blocks_num, flushed_blocks_num; | |
810 | + ibool found; | |
811 | + | |
812 | + buf_pool = buf_pool_from_array(j); | |
813 | + | |
814 | + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list); | |
815 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
816 | + new_blocks_num = 0; | |
817 | + | |
818 | + found = FALSE; | |
819 | + while (bpage != NULL) { | |
820 | + if (prev_flush_info[j].space == bpage->space | |
821 | + && prev_flush_info[j].offset == bpage->offset | |
822 | + && prev_flush_info[j].oldest_modification | |
823 | + == bpage->oldest_modification) { | |
824 | + found = TRUE; | |
825 | + break; | |
826 | + } | |
827 | + bpage = UT_LIST_GET_NEXT(list, bpage); | |
828 | + new_blocks_num++; | |
829 | + } | |
830 | + if (!found) { | |
831 | + new_blocks_num = blocks_num; | |
832 | + } | |
833 | + | |
834 | + flushed_blocks_num = new_blocks_num + prev_flush_info[j].count | |
835 | + - blocks_num; | |
836 | + if (flushed_blocks_num < 0) { | |
837 | + flushed_blocks_num = 0; | |
838 | + } | |
839 | + | |
840 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
841 | + | |
842 | + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list); | |
843 | + if (bpage) { | |
844 | + prev_flush_info[j].space = bpage->space; | |
845 | + prev_flush_info[j].offset = bpage->offset; | |
846 | + prev_flush_info[j].oldest_modification = bpage->oldest_modification; | |
847 | + } else { | |
848 | + prev_flush_info[j].space = 0; | |
849 | + prev_flush_info[j].offset = 0; | |
850 | + prev_flush_info[j].oldest_modification = 0; | |
851 | + } | |
852 | + | |
853 | + new_blocks_sum += new_blocks_num; | |
854 | + flushed_blocks_sum += flushed_blocks_num; | |
855 | + blocks_sum += blocks_num; | |
856 | + } | |
857 | + | |
858 | + n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async; | |
859 | + if (flushed_blocks_sum > n_pages_flushed_prev) { | |
860 | + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev); | |
861 | + } | |
862 | + | |
863 | + if (n_flush > 0) { | |
864 | + n_flush++; | |
865 | + n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old)); | |
866 | + } else { | |
867 | + n_pages_flushed = 0; | |
868 | + } | |
869 | + } else { | |
870 | + /* store previous first pages of the flush_list */ | |
871 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
872 | + buf_pool = buf_pool_from_array(j); | |
873 | + | |
874 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
875 | + | |
876 | + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list); | |
877 | + if (bpage) { | |
878 | + prev_flush_info[j].space = bpage->space; | |
879 | + prev_flush_info[j].offset = bpage->offset; | |
880 | + prev_flush_info[j].oldest_modification = bpage->oldest_modification; | |
881 | + } else { | |
882 | + prev_flush_info[j].space = 0; | |
883 | + prev_flush_info[j].offset = 0; | |
884 | + prev_flush_info[j].oldest_modification = 0; | |
885 | + } | |
886 | + } | |
887 | + n_pages_flushed = 0; | |
888 | + } | |
889 | + | |
890 | + lsn_old = lsn; | |
891 | + prev_adaptive_flushing_method = 2; | |
892 | + } else { | |
893 | + mutex_enter(&(log_sys->mutex)); | |
894 | + lsn_old = log_sys->lsn; | |
895 | + mutex_exit(&(log_sys->mutex)); | |
896 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
897 | + } | |
898 | + | |
899 | + if (n_pages_flushed == ULINT_UNDEFINED) { | |
900 | + n_pages_flushed_prev = 0; | |
901 | + } else { | |
902 | + n_pages_flushed_prev = n_pages_flushed; | |
903 | } | |
904 | ||
905 | if (srv_activity_count == old_activity_count) { | |
11822e22 | 906 | @@ -2944,7 +3216,7 @@ |
b4e1fa2c AM |
907 | even if the server were active */ |
908 | ||
909 | srv_main_thread_op_info = "doing insert buffer merge"; | |
910 | - ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); | |
911 | + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5)); | |
912 | ||
913 | /* Flush logs if needed */ | |
914 | srv_sync_log_buffer_in_background(); | |
11822e22 | 915 | @@ -3052,7 +3324,7 @@ |
b4e1fa2c AM |
916 | buf_flush_list below. Otherwise, the system favors |
917 | clean pages over cleanup throughput. */ | |
918 | n_bytes_merged = ibuf_contract_for_n_pages(FALSE, | |
919 | - PCT_IO(100)); | |
920 | + PCT_IBUF_IO(100)); | |
921 | } | |
922 | ||
923 | srv_main_thread_op_info = "reserving kernel mutex"; | |
11822e22 AM |
924 | @@ -3199,6 +3471,7 @@ |
925 | ulint retries = 0; | |
b4e1fa2c AM |
926 | ulint slot_no = ULINT_UNDEFINED; |
927 | ulint n_total_purged = ULINT_UNDEFINED; | |
928 | + ulint next_itr_time; | |
929 | ||
930 | ut_a(srv_n_purge_threads == 1); | |
931 | ||
11822e22 | 932 | @@ -3221,9 +3494,12 @@ |
b4e1fa2c AM |
933 | |
934 | mutex_exit(&kernel_mutex); | |
935 | ||
936 | + next_itr_time = ut_time_ms(); | |
937 | + | |
938 | while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) { | |
939 | ||
11822e22 | 940 | ulint n_pages_purged = 0; |
b4e1fa2c AM |
941 | + ulint cur_time; |
942 | ||
943 | /* If there are very few records to purge or the last | |
944 | purge didn't purge any records then wait for activity. | |
11822e22 | 945 | @@ -3272,6 +3548,16 @@ |
b4e1fa2c AM |
946 | } while (n_pages_purged > 0 && !srv_fast_shutdown); |
947 | ||
948 | srv_sync_log_buffer_in_background(); | |
949 | + | |
950 | + cur_time = ut_time_ms(); | |
951 | + if (next_itr_time > cur_time) { | |
952 | + os_thread_sleep(ut_min(1000000, | |
953 | + (next_itr_time - cur_time) | |
954 | + * 1000)); | |
955 | + next_itr_time = ut_time_ms() + 1000; | |
956 | + } else { | |
957 | + next_itr_time = cur_time + 1000; | |
958 | + } | |
959 | } | |
960 | ||
961 | mutex_enter(&kernel_mutex); | |
962 | diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c | |
963 | --- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900 | |
964 | +++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900 | |
11822e22 | 965 | @@ -1218,6 +1218,9 @@ |
b4e1fa2c AM |
966 | } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { |
967 | srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; | |
968 | ||
969 | + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { | |
970 | + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; | |
971 | + | |
972 | } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { | |
973 | srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; | |
974 | ||
11822e22 AM |
975 | diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c |
976 | --- a/storage/innobase/trx/trx0purge.c 2011-04-12 14:14:14.000000000 +0900 | |
977 | +++ b/storage/innobase/trx/trx0purge.c 2011-04-12 14:15:44.000000000 +0900 | |
978 | @@ -392,10 +392,10 @@ | |
979 | trx_sys->rseg_history_len++; | |
980 | mutex_exit(&kernel_mutex); | |
981 | ||
982 | - if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { | |
983 | +// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/ | |
984 | /* Inform the purge thread that there is work to do. */ | |
985 | srv_wake_purge_thread_if_not_active(); | |
986 | - } | |
987 | +// } | |
988 | } | |
989 | ||
990 | /**********************************************************************//** | |
b4e1fa2c AM |
991 | diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c |
992 | --- a/storage/innobase/trx/trx0trx.c 2010-11-03 07:01:13.000000000 +0900 | |
993 | +++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:10:09.106023937 +0900 | |
11822e22 | 994 | @@ -925,6 +925,7 @@ |
b4e1fa2c AM |
995 | trx->read_view = NULL; |
996 | ||
997 | if (lsn) { | |
998 | + ulint flush_log_at_trx_commit; | |
999 | ||
1000 | mutex_exit(&kernel_mutex); | |
1001 | ||
11822e22 | 1002 | @@ -933,6 +934,12 @@ |
b4e1fa2c AM |
1003 | trx_undo_insert_cleanup(trx); |
1004 | } | |
1005 | ||
1006 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1007 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1008 | + } else { | |
1009 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1010 | + } | |
1011 | + | |
1012 | /* NOTE that we could possibly make a group commit more | |
1013 | efficient here: call os_thread_yield here to allow also other | |
1014 | trxs to come to commit! */ | |
11822e22 | 1015 | @@ -964,9 +971,9 @@ |
b4e1fa2c AM |
1016 | if (trx->flush_log_later) { |
1017 | /* Do nothing yet */ | |
1018 | trx->must_flush_log_later = TRUE; | |
1019 | - } else if (srv_flush_log_at_trx_commit == 0) { | |
1020 | + } else if (flush_log_at_trx_commit == 0) { | |
1021 | /* Do nothing */ | |
1022 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1023 | + } else if (flush_log_at_trx_commit == 1) { | |
1024 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1025 | /* Write the log but do not flush it to disk */ | |
1026 | ||
11822e22 | 1027 | @@ -978,7 +985,7 @@ |
b4e1fa2c AM |
1028 | |
1029 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1030 | } | |
1031 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1032 | + } else if (flush_log_at_trx_commit == 2) { | |
1033 | ||
1034 | /* Write the log but do not flush it to disk */ | |
1035 | ||
11822e22 | 1036 | @@ -1642,16 +1649,23 @@ |
b4e1fa2c AM |
1037 | trx_t* trx) /*!< in: trx handle */ |
1038 | { | |
1039 | ib_uint64_t lsn = trx->commit_lsn; | |
1040 | + ulint flush_log_at_trx_commit; | |
1041 | ||
1042 | ut_a(trx); | |
1043 | ||
1044 | trx->op_info = "flushing log"; | |
1045 | ||
1046 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1047 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1048 | + } else { | |
1049 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1050 | + } | |
1051 | + | |
1052 | if (!trx->must_flush_log_later) { | |
1053 | /* Do nothing */ | |
1054 | - } else if (srv_flush_log_at_trx_commit == 0) { | |
1055 | + } else if (flush_log_at_trx_commit == 0) { | |
1056 | /* Do nothing */ | |
1057 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1058 | + } else if (flush_log_at_trx_commit == 1) { | |
1059 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1060 | /* Write the log but do not flush it to disk */ | |
1061 | ||
11822e22 | 1062 | @@ -1662,7 +1676,7 @@ |
b4e1fa2c AM |
1063 | |
1064 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1065 | } | |
1066 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1067 | + } else if (flush_log_at_trx_commit == 2) { | |
1068 | ||
1069 | /* Write the log but do not flush it to disk */ | |
1070 | ||
11822e22 | 1071 | @@ -1915,6 +1929,8 @@ |
b4e1fa2c AM |
1072 | /*--------------------------------------*/ |
1073 | ||
1074 | if (lsn) { | |
1075 | + ulint flush_log_at_trx_commit; | |
1076 | + | |
1077 | /* Depending on the my.cnf options, we may now write the log | |
1078 | buffer to the log files, making the prepared state of the | |
1079 | transaction durable if the OS does not crash. We may also | |
11822e22 | 1080 | @@ -1934,9 +1950,15 @@ |
b4e1fa2c AM |
1081 | |
1082 | mutex_exit(&kernel_mutex); | |
1083 | ||
1084 | - if (srv_flush_log_at_trx_commit == 0) { | |
1085 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1086 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1087 | + } else { | |
1088 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1089 | + } | |
1090 | + | |
1091 | + if (flush_log_at_trx_commit == 0) { | |
1092 | /* Do nothing */ | |
1093 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1094 | + } else if (flush_log_at_trx_commit == 1) { | |
1095 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1096 | /* Write the log but do not flush it to disk */ | |
1097 | ||
11822e22 | 1098 | @@ -1948,7 +1970,7 @@ |
b4e1fa2c AM |
1099 | |
1100 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1101 | } | |
1102 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1103 | + } else if (flush_log_at_trx_commit == 2) { | |
1104 | ||
1105 | /* Write the log but do not flush it to disk */ | |
1106 |