]>
Commit | Line | Data |
---|---|---|
b4e1fa2c AM |
1 | # name : innodb_io_patches.patch |
2 | # introduced : 11 or before | |
3 | # maintainer : Yasufumi | |
4 | # | |
5 | #!!! notice !!! | |
6 | # Any small change to this file in the main branch | |
7 | # should be done or reviewed by the maintainer! | |
8 | diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c | |
9 | --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:09:51.273986410 +0900 | |
10 | +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:10:08.934990091 +0900 | |
11 | @@ -320,6 +320,7 @@ | |
12 | ||
13 | /* When we traverse all the flush lists we don't want another | |
14 | thread to add a dirty page to any flush list. */ | |
15 | + if (srv_buf_pool_instances > 1) | |
16 | log_flush_order_mutex_enter(); | |
17 | ||
18 | for (i = 0; i < srv_buf_pool_instances; i++) { | |
19 | @@ -343,6 +344,7 @@ | |
20 | } | |
21 | } | |
22 | ||
23 | + if (srv_buf_pool_instances > 1) | |
24 | log_flush_order_mutex_exit(); | |
25 | ||
26 | /* The returned answer may be out of date: the flush_list can | |
27 | diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c | |
28 | --- a/storage/innobase/buf/buf0flu.c 2010-11-03 07:01:13.000000000 +0900 | |
29 | +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:10:08.934990091 +0900 | |
d8778560 | 30 | @@ -1376,7 +1376,7 @@ |
b4e1fa2c AM |
31 | |
32 | ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); | |
33 | ||
34 | - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { | |
35 | + if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) { | |
36 | /* If there is little space, it is better not to flush | |
37 | any block except from the end of the LRU list */ | |
38 | ||
39 | diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c | |
40 | --- a/storage/innobase/buf/buf0rea.c 2010-11-03 07:01:13.000000000 +0900 | |
41 | +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:10:08.937050537 +0900 | |
42 | @@ -260,6 +260,10 @@ | |
43 | = BUF_READ_AHEAD_LINEAR_AREA(buf_pool); | |
44 | ulint threshold; | |
45 | ||
46 | + if (!(srv_read_ahead & 2)) { | |
47 | + return(0); | |
48 | + } | |
49 | + | |
50 | if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) { | |
51 | /* No read-ahead to avoid thread deadlocks */ | |
52 | return(0); | |
53 | diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc | |
54 | --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:09:51.283956391 +0900 | |
55 | +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:10:08.963980444 +0900 | |
adf0fb13 | 56 | @@ -444,6 +444,12 @@ |
b4e1fa2c AM |
57 | "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", |
58 | NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); | |
59 | ||
60 | +static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG, | |
61 | + "Set to 0 (write and flush once per second)," | |
62 | + " 1 (write and flush at each commit)" | |
63 | + " or 2 (write at commit, flush once per second).", | |
64 | + NULL, NULL, 1, 0, 2, 0); | |
65 | + | |
66 | ||
67 | static handler *innobase_create_handler(handlerton *hton, | |
68 | TABLE_SHARE *table, | |
adf0fb13 | 69 | @@ -838,6 +844,17 @@ |
b4e1fa2c AM |
70 | } |
71 | } | |
72 | ||
73 | +/******************************************************************//** | |
74 | +*/ | |
75 | +extern "C" UNIV_INTERN | |
76 | +ulong | |
77 | +thd_flush_log_at_trx_commit( | |
78 | +/*================================*/ | |
79 | + void* thd) | |
80 | +{ | |
81 | + return(THDVAR((THD*) thd, flush_log_at_trx_commit)); | |
82 | +} | |
83 | + | |
84 | /********************************************************************//** | |
85 | Obtain the InnoDB transaction of a MySQL thread. | |
86 | @return reference to transaction pointer */ | |
adf0fb13 | 87 | @@ -2437,6 +2454,9 @@ |
b4e1fa2c AM |
88 | srv_n_read_io_threads = (ulint) innobase_read_io_threads; |
89 | srv_n_write_io_threads = (ulint) innobase_write_io_threads; | |
90 | ||
91 | + srv_read_ahead &= 3; | |
92 | + srv_adaptive_flushing_method %= 3; | |
93 | + | |
94 | srv_force_recovery = (ulint) innobase_force_recovery; | |
95 | ||
96 | srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; | |
adf0fb13 | 97 | @@ -11025,7 +11045,7 @@ |
b4e1fa2c | 98 | PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, |
11822e22 | 99 | "Purge threads can be either 0 or 1.", |
b4e1fa2c AM |
100 | NULL, NULL, |
101 | - 0, /* Default setting */ | |
102 | + 1, /* Default setting */ | |
103 | 0, /* Minimum value */ | |
104 | 1, 0); /* Maximum value */ | |
105 | ||
adf0fb13 | 106 | @@ -11067,12 +11087,18 @@ |
b4e1fa2c AM |
107 | innodb_file_format_max_validate, |
108 | innodb_file_format_max_update, "Antelope"); | |
109 | ||
110 | -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, | |
111 | - PLUGIN_VAR_OPCMDARG, | |
112 | - "Set to 0 (write and flush once per second)," | |
113 | - " 1 (write and flush at each commit)" | |
114 | - " or 2 (write at commit, flush once per second).", | |
115 | - NULL, NULL, 1, 0, 2, 0); | |
116 | +/* Changed to the THDVAR */ | |
117 | +//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, | |
118 | +// PLUGIN_VAR_OPCMDARG, | |
119 | +// "Set to 0 (write and flush once per second)," | |
120 | +// " 1 (write and flush at each commit)" | |
121 | +// " or 2 (write at commit, flush once per second).", | |
122 | +// NULL, NULL, 1, 0, 2, 0); | |
123 | + | |
124 | +static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit, | |
125 | + PLUGIN_VAR_NOCMDARG, | |
126 | + "Use global innodb_flush_log_at_trx_commit value. (default: ON).", | |
127 | + NULL, NULL, TRUE); | |
128 | ||
129 | static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method, | |
130 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
adf0fb13 | 131 | @@ -11167,7 +11193,7 @@ |
b4e1fa2c AM |
132 | static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, |
133 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
134 | "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", | |
135 | - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); | |
136 | + NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L); | |
137 | ||
138 | static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, | |
139 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
adf0fb13 | 140 | @@ -11319,6 +11345,95 @@ |
b4e1fa2c AM |
141 | "trigger a readahead.", |
142 | NULL, NULL, 56, 0, 64, 0); | |
143 | ||
144 | +static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size, | |
145 | + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
146 | + "The maximum size of the insert buffer. (in bytes)", | |
147 | + NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0); | |
148 | + | |
149 | +static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract, | |
150 | + PLUGIN_VAR_RQCMDARG, | |
151 | + "Enable/Disable active_contract of insert buffer. 0:disable 1:enable", | |
152 | + NULL, NULL, 1, 0, 1, 0); | |
153 | + | |
154 | +static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate, | |
155 | + PLUGIN_VAR_RQCMDARG, | |
156 | + "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)", | |
157 | + NULL, NULL, 100, 100, 999999999, 0); | |
158 | + | |
159 | +static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target, | |
160 | + PLUGIN_VAR_RQCMDARG, | |
161 | + "Control soft limit of checkpoint age. (0 : not control)", | |
162 | + NULL, NULL, 0, 0, ~0UL, 0); | |
163 | + | |
164 | +static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages, | |
165 | + PLUGIN_VAR_RQCMDARG, | |
166 | + "Enable/Disable flushing also neighbor pages. 0:disable 1:enable", | |
167 | + NULL, NULL, 1, 0, 1, 0); | |
168 | + | |
169 | +static | |
170 | +void | |
171 | +innodb_read_ahead_update( | |
172 | + THD* thd, | |
173 | + struct st_mysql_sys_var* var, | |
174 | + void* var_ptr, | |
175 | + const void* save) | |
176 | +{ | |
177 | + *(long *)var_ptr= (*(long *)save) & 3; | |
178 | +} | |
179 | +const char *read_ahead_names[]= | |
180 | +{ | |
181 | + "none", /* 0 */ | |
182 | + "random", | |
183 | + "linear", | |
184 | + "both", /* 3 */ | |
185 | + /* For compatibility of the older patch */ | |
186 | + "0", /* 4 ("none" + 4) */ | |
187 | + "1", | |
188 | + "2", | |
189 | + "3", /* 7 ("both" + 4) */ | |
190 | + NullS | |
191 | +}; | |
192 | +TYPELIB read_ahead_typelib= | |
193 | +{ | |
194 | + array_elements(read_ahead_names) - 1, "read_ahead_typelib", | |
195 | + read_ahead_names, NULL | |
196 | +}; | |
197 | +static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead, | |
198 | + PLUGIN_VAR_RQCMDARG, | |
199 | + "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]", | |
200 | + NULL, innodb_read_ahead_update, 2, &read_ahead_typelib); | |
201 | + | |
202 | +static | |
203 | +void | |
204 | +innodb_adaptive_flushing_method_update( | |
205 | + THD* thd, | |
206 | + struct st_mysql_sys_var* var, | |
207 | + void* var_ptr, | |
208 | + const void* save) | |
209 | +{ | |
210 | + *(long *)var_ptr= (*(long *)save) % 4; | |
211 | +} | |
212 | +const char *adaptive_flushing_method_names[]= | |
213 | +{ | |
214 | + "native", /* 0 */ | |
215 | + "estimate", /* 1 */ | |
216 | + "keep_average", /* 2 */ | |
217 | + /* For compatibility of the older patch */ | |
218 | + "0", /* 3 ("none" + 3) */ | |
219 | + "1", /* 4 ("estimate" + 3) */ | |
220 | + "2", /* 5 ("keep_average" + 3) */ | |
221 | + NullS | |
222 | +}; | |
223 | +TYPELIB adaptive_flushing_method_typelib= | |
224 | +{ | |
225 | + array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib", | |
226 | + adaptive_flushing_method_names, NULL | |
227 | +}; | |
228 | +static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method, | |
229 | + PLUGIN_VAR_RQCMDARG, | |
230 | + "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)", | |
231 | + NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib); | |
b4e1fa2c AM |
232 | + |
233 | static struct st_mysql_sys_var* innobase_system_variables[]= { | |
234 | MYSQL_SYSVAR(additional_mem_pool_size), | |
235 | MYSQL_SYSVAR(autoextend_increment), | |
adf0fb13 | 236 | @@ -11339,6 +11454,7 @@ |
b4e1fa2c AM |
237 | MYSQL_SYSVAR(file_format_check), |
238 | MYSQL_SYSVAR(file_format_max), | |
239 | MYSQL_SYSVAR(flush_log_at_trx_commit), | |
240 | + MYSQL_SYSVAR(use_global_flush_log_at_trx_commit), | |
241 | MYSQL_SYSVAR(flush_method), | |
242 | MYSQL_SYSVAR(force_recovery), | |
243 | MYSQL_SYSVAR(locks_unsafe_for_binlog), | |
adf0fb13 | 244 | @@ -11376,6 +11492,13 @@ |
b4e1fa2c AM |
245 | MYSQL_SYSVAR(show_verbose_locks), |
246 | MYSQL_SYSVAR(show_locks_held), | |
247 | MYSQL_SYSVAR(version), | |
248 | + MYSQL_SYSVAR(ibuf_max_size), | |
249 | + MYSQL_SYSVAR(ibuf_active_contract), | |
250 | + MYSQL_SYSVAR(ibuf_accel_rate), | |
251 | + MYSQL_SYSVAR(checkpoint_age_target), | |
252 | + MYSQL_SYSVAR(flush_neighbor_pages), | |
253 | + MYSQL_SYSVAR(read_ahead), | |
254 | + MYSQL_SYSVAR(adaptive_flushing_method), | |
b4e1fa2c AM |
255 | MYSQL_SYSVAR(use_sys_malloc), |
256 | MYSQL_SYSVAR(use_native_aio), | |
257 | MYSQL_SYSVAR(change_buffering), | |
258 | diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c | |
259 | --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-11-03 07:01:13.000000000 +0900 | |
260 | +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:10:09.073984282 +0900 | |
adf0fb13 | 261 | @@ -514,8 +514,10 @@ |
b4e1fa2c AM |
262 | grow in size, as the references on the upper levels of the tree can |
263 | change */ | |
264 | ||
265 | - ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE | |
266 | - / IBUF_POOL_SIZE_PER_MAX_SIZE; | |
267 | + ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE | |
268 | + / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE); | |
269 | + | |
270 | + srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE; | |
271 | ||
272 | mutex_create(ibuf_pessimistic_insert_mutex_key, | |
273 | &ibuf_pessimistic_insert_mutex, | |
adf0fb13 | 274 | @@ -2753,9 +2755,11 @@ |
b4e1fa2c AM |
275 | size = ibuf->size; |
276 | max_size = ibuf->max_size; | |
277 | ||
278 | + if (!srv_ibuf_active_contract) { | |
279 | if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { | |
280 | return; | |
281 | } | |
282 | + } | |
283 | ||
284 | sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); | |
285 | ||
286 | diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h | |
287 | --- a/storage/innobase/include/buf0rea.h 2010-11-03 07:01:13.000000000 +0900 | |
288 | +++ b/storage/innobase/include/buf0rea.h 2010-12-03 15:10:09.076066335 +0900 | |
289 | @@ -124,8 +124,7 @@ | |
290 | ||
291 | /** The size in pages of the area which the read-ahead algorithms read if | |
292 | invoked */ | |
293 | -#define BUF_READ_AHEAD_AREA(b) \ | |
294 | - ut_min(64, ut_2_power_up((b)->curr_size / 32)) | |
295 | +#define BUF_READ_AHEAD_AREA(b) 64 | |
296 | ||
297 | /** @name Modes used in read-ahead @{ */ | |
298 | /** read only pages belonging to the insert buffer tree */ | |
299 | diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h | |
300 | --- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900 | |
301 | +++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900 | |
adf0fb13 | 302 | @@ -284,6 +284,13 @@ |
b4e1fa2c AM |
303 | /*===================*/ |
304 | void* thd, /*!< in: thread handle (THD*) */ | |
305 | ulint value); /*!< in: time waited for the lock */ | |
306 | +/******************************************************************//** | |
307 | +*/ | |
308 | + | |
309 | +ulong | |
310 | +thd_flush_log_at_trx_commit( | |
311 | +/*================================*/ | |
312 | + void* thd); | |
313 | ||
adf0fb13 AM |
314 | /**********************************************************************//** |
315 | Get the current setting of the lower_case_table_names global parameter from | |
b4e1fa2c AM |
316 | diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h |
317 | --- a/storage/innobase/include/srv0srv.h 2010-12-03 15:09:51.291955835 +0900 | |
318 | +++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:10:09.079029047 +0900 | |
adf0fb13 | 319 | @@ -138,7 +138,8 @@ |
b4e1fa2c AM |
320 | extern ulint srv_n_log_files; |
321 | extern ulint srv_log_file_size; | |
322 | extern ulint srv_log_buffer_size; | |
323 | -extern ulong srv_flush_log_at_trx_commit; | |
324 | +//extern ulong srv_flush_log_at_trx_commit; | |
325 | +extern char srv_use_global_flush_log_at_trx_commit; | |
326 | extern char srv_adaptive_flushing; | |
327 | ||
328 | ||
adf0fb13 | 329 | @@ -216,6 +217,16 @@ |
b4e1fa2c AM |
330 | extern ulong srv_max_purge_lag; |
331 | ||
332 | extern ulong srv_replication_delay; | |
333 | + | |
334 | +extern long long srv_ibuf_max_size; | |
335 | +extern ulint srv_ibuf_active_contract; | |
336 | +extern ulint srv_ibuf_accel_rate; | |
337 | +extern ulint srv_checkpoint_age_target; | |
338 | +extern ulint srv_flush_neighbor_pages; | |
339 | +extern ulint srv_enable_unsafe_group_commit; | |
340 | +extern ulint srv_read_ahead; | |
341 | +extern ulint srv_adaptive_flushing_method; | |
342 | + | |
343 | /*-------------------------------------------*/ | |
344 | ||
345 | extern ulint srv_n_rows_inserted; | |
adf0fb13 | 346 | @@ -394,8 +405,9 @@ |
b4e1fa2c AM |
347 | when writing data files, but do flush |
348 | after writing to log files */ | |
349 | SRV_UNIX_NOSYNC, /*!< do not flush after writing */ | |
350 | - SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on | |
351 | + SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on | |
352 | data files */ | |
353 | + SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */ | |
354 | }; | |
355 | ||
356 | /** Alternatives for file i/o in Windows */ | |
357 | diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c | |
358 | --- a/storage/innobase/log/log0log.c 2010-11-03 07:01:13.000000000 +0900 | |
359 | +++ b/storage/innobase/log/log0log.c 2010-12-03 15:10:09.084023562 +0900 | |
d8778560 AM |
360 | @@ -48,6 +48,7 @@ |
361 | #include "srv0start.h" | |
362 | #include "trx0sys.h" | |
363 | #include "trx0trx.h" | |
364 | +#include "ha_prototypes.h" | |
365 | ||
366 | /* | |
367 | General philosophy of InnoDB redo-logs: | |
368 | @@ -359,6 +360,33 @@ | |
b4e1fa2c AM |
369 | } |
370 | ||
371 | /************************************************************//** | |
372 | +*/ | |
373 | +UNIV_INLINE | |
374 | +ulint | |
375 | +log_max_modified_age_async() | |
376 | +{ | |
377 | + if (srv_checkpoint_age_target) { | |
378 | + return(ut_min(log_sys->max_modified_age_async, | |
379 | + srv_checkpoint_age_target | |
380 | + - srv_checkpoint_age_target / 8)); | |
381 | + } else { | |
382 | + return(log_sys->max_modified_age_async); | |
383 | + } | |
384 | +} | |
385 | + | |
386 | +UNIV_INLINE | |
387 | +ulint | |
388 | +log_max_checkpoint_age_async() | |
389 | +{ | |
390 | + if (srv_checkpoint_age_target) { | |
391 | + return(ut_min(log_sys->max_checkpoint_age_async, | |
392 | + srv_checkpoint_age_target)); | |
393 | + } else { | |
394 | + return(log_sys->max_checkpoint_age_async); | |
395 | + } | |
396 | +} | |
397 | + | |
398 | +/************************************************************//** | |
399 | Closes the log. | |
400 | @return lsn */ | |
401 | UNIV_INTERN | |
d8778560 | 402 | @@ -427,7 +455,7 @@ |
b4e1fa2c AM |
403 | } |
404 | } | |
405 | ||
406 | - if (checkpoint_age <= log->max_modified_age_async) { | |
407 | + if (checkpoint_age <= log_max_modified_age_async()) { | |
408 | ||
409 | goto function_exit; | |
410 | } | |
d8778560 | 411 | @@ -435,8 +463,8 @@ |
b4e1fa2c AM |
412 | oldest_lsn = buf_pool_get_oldest_modification(); |
413 | ||
414 | if (!oldest_lsn | |
415 | - || lsn - oldest_lsn > log->max_modified_age_async | |
416 | - || checkpoint_age > log->max_checkpoint_age_async) { | |
417 | + || lsn - oldest_lsn > log_max_modified_age_async() | |
418 | + || checkpoint_age > log_max_checkpoint_age_async()) { | |
419 | ||
420 | log->check_flush_or_checkpoint = TRUE; | |
421 | } | |
d8778560 | 422 | @@ -1100,6 +1128,7 @@ |
b4e1fa2c AM |
423 | group = (log_group_t*)((ulint)group - 1); |
424 | ||
425 | if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC | |
426 | + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT | |
427 | && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { | |
428 | ||
429 | fil_flush(group->space_id); | |
d8778560 | 430 | @@ -1121,8 +1150,9 @@ |
b4e1fa2c AM |
431 | logs and cannot end up here! */ |
432 | ||
433 | if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC | |
434 | + && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT | |
435 | && srv_unix_file_flush_method != SRV_UNIX_NOSYNC | |
436 | - && srv_flush_log_at_trx_commit != 2) { | |
437 | + && thd_flush_log_at_trx_commit(NULL) != 2) { | |
438 | ||
439 | fil_flush(group->space_id); | |
440 | } | |
d8778560 | 441 | @@ -1501,7 +1531,8 @@ |
b4e1fa2c AM |
442 | |
443 | mutex_exit(&(log_sys->mutex)); | |
444 | ||
445 | - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { | |
446 | + if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC | |
447 | + || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { | |
448 | /* O_DSYNC means the OS did not buffer the log file at all: | |
449 | so we have also flushed to disk what we have written */ | |
450 | ||
d8778560 | 451 | @@ -2120,10 +2151,10 @@ |
b4e1fa2c AM |
452 | |
453 | sync = TRUE; | |
454 | advance = 2 * (age - log->max_modified_age_sync); | |
455 | - } else if (age > log->max_modified_age_async) { | |
456 | + } else if (age > log_max_modified_age_async()) { | |
457 | ||
458 | /* A flush is not urgent: we do an asynchronous preflush */ | |
459 | - advance = age - log->max_modified_age_async; | |
460 | + advance = age - log_max_modified_age_async(); | |
461 | } else { | |
462 | advance = 0; | |
463 | } | |
d8778560 | 464 | @@ -2137,7 +2168,7 @@ |
b4e1fa2c AM |
465 | |
466 | do_checkpoint = TRUE; | |
467 | ||
468 | - } else if (checkpoint_age > log->max_checkpoint_age_async) { | |
469 | + } else if (checkpoint_age > log_max_checkpoint_age_async()) { | |
470 | /* A checkpoint is not urgent: do it asynchronously */ | |
471 | ||
472 | do_checkpoint = TRUE; | |
d8778560 | 473 | @@ -3349,6 +3380,17 @@ |
b4e1fa2c AM |
474 | log_sys->flushed_to_disk_lsn, |
475 | log_sys->last_checkpoint_lsn); | |
476 | ||
477 | + fprintf(file, | |
478 | + "Max checkpoint age %lu\n" | |
479 | + "Checkpoint age target %lu\n" | |
480 | + "Modified age %lu\n" | |
481 | + "Checkpoint age %lu\n", | |
482 | + (ulong) log_sys->max_checkpoint_age, | |
483 | + (ulong) log_max_checkpoint_age_async(), | |
484 | + (ulong) (log_sys->lsn - | |
485 | + log_buf_pool_get_oldest_modification()), | |
486 | + (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn)); | |
487 | + | |
488 | current_time = time(NULL); | |
489 | ||
490 | time_elapsed = 0.001 + difftime(current_time, | |
491 | diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c | |
492 | --- a/storage/innobase/log/log0recv.c 2010-11-03 07:01:13.000000000 +0900 | |
493 | +++ b/storage/innobase/log/log0recv.c 2010-12-03 15:10:09.089024191 +0900 | |
494 | @@ -2906,9 +2906,12 @@ | |
495 | ib_uint64_t archived_lsn; | |
496 | #endif /* UNIV_LOG_ARCHIVE */ | |
497 | byte* buf; | |
498 | - byte log_hdr_buf[LOG_FILE_HDR_SIZE]; | |
499 | + byte* log_hdr_buf; | |
500 | + byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE]; | |
501 | ulint err; | |
502 | ||
503 | + log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE); | |
504 | + | |
505 | #ifdef UNIV_LOG_ARCHIVE | |
506 | ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX); | |
507 | /** TRUE when recovering from a checkpoint */ | |
508 | diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c | |
509 | --- a/storage/innobase/os/os0file.c 2010-11-03 07:01:13.000000000 +0900 | |
510 | +++ b/storage/innobase/os/os0file.c 2010-12-03 15:10:09.093023540 +0900 | |
d8778560 | 511 | @@ -1424,7 +1424,7 @@ |
b4e1fa2c AM |
512 | #endif |
513 | #ifdef UNIV_NON_BUFFERED_IO | |
514 | # ifndef UNIV_HOTBACKUP | |
515 | - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { | |
516 | + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) { | |
517 | /* Do not use unbuffered i/o to log files because | |
518 | value 2 denotes that we do not flush the log at every | |
519 | commit, but only once per second */ | |
d8778560 | 520 | @@ -1440,7 +1440,7 @@ |
b4e1fa2c AM |
521 | attributes = 0; |
522 | #ifdef UNIV_NON_BUFFERED_IO | |
523 | # ifndef UNIV_HOTBACKUP | |
524 | - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { | |
525 | + if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) { | |
526 | /* Do not use unbuffered i/o to log files because | |
527 | value 2 denotes that we do not flush the log at every | |
528 | commit, but only once per second */ | |
d8778560 | 529 | @@ -1585,6 +1585,11 @@ |
b4e1fa2c AM |
530 | os_file_set_nocache(file, name, mode_str); |
531 | } | |
532 | ||
533 | + /* ALL_O_DIRECT: O_DIRECT also for transaction log file */ | |
534 | + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { | |
535 | + os_file_set_nocache(file, name, mode_str); | |
536 | + } | |
537 | + | |
538 | #ifdef USE_FILE_LOCK | |
539 | if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { | |
540 | ||
541 | diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c | |
542 | --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:09:51.301987792 +0900 | |
543 | +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:13:29.369986988 +0900 | |
adf0fb13 | 544 | @@ -183,7 +183,8 @@ |
b4e1fa2c AM |
545 | UNIV_INTERN ulint srv_log_file_size = ULINT_MAX; |
546 | /* size in database pages */ | |
547 | UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX; | |
548 | -UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; | |
549 | +//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; | |
550 | +UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE; | |
551 | ||
552 | /* Try to flush dirty pages so as to avoid IO bursts at | |
553 | the checkpoints. */ | |
adf0fb13 | 554 | @@ -402,6 +403,17 @@ |
b4e1fa2c AM |
555 | |
556 | UNIV_INTERN ulong srv_replication_delay = 0; | |
557 | ||
558 | +UNIV_INTERN long long srv_ibuf_max_size = 0; | |
559 | +UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */ | |
560 | +UNIV_INTERN ulint srv_ibuf_accel_rate = 100; | |
561 | +#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0))) | |
562 | + | |
563 | +UNIV_INTERN ulint srv_checkpoint_age_target = 0; | |
564 | +UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */ | |
565 | + | |
566 | +UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */ | |
567 | +UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ | |
568 | +UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */ | |
569 | /*-------------------------------------------*/ | |
570 | UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; | |
571 | UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; | |
adf0fb13 | 572 | @@ -2742,6 +2754,7 @@ |
b4e1fa2c AM |
573 | ulint n_pages_purged = 0; |
574 | ulint n_bytes_merged; | |
575 | ulint n_pages_flushed; | |
576 | + ulint n_pages_flushed_prev = 0; | |
577 | ulint n_bytes_archived; | |
578 | ulint n_tables_to_drop; | |
579 | ulint n_ios; | |
adf0fb13 | 580 | @@ -2749,7 +2762,20 @@ |
b4e1fa2c AM |
581 | ulint n_ios_very_old; |
582 | ulint n_pend_ios; | |
583 | ulint next_itr_time; | |
584 | + ulint prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
585 | + ulint inner_loop = 0; | |
586 | + ibool skip_sleep = FALSE; | |
587 | ulint i; | |
588 | + struct t_prev_flush_info_struct { | |
589 | + ulint count; | |
590 | + unsigned space:32; | |
591 | + unsigned offset:32; | |
592 | + ib_uint64_t oldest_modification; | |
593 | + } prev_flush_info[MAX_BUFFER_POOLS]; | |
594 | + | |
595 | + ib_uint64_t lsn_old; | |
596 | + | |
597 | + ib_uint64_t oldest_lsn; | |
598 | ||
599 | #ifdef UNIV_DEBUG_THREAD_CREATION | |
600 | fprintf(stderr, "Master thread starts, id %lu\n", | |
adf0fb13 | 601 | @@ -2771,6 +2797,9 @@ |
b4e1fa2c AM |
602 | |
603 | mutex_exit(&kernel_mutex); | |
604 | ||
605 | + mutex_enter(&(log_sys->mutex)); | |
606 | + lsn_old = log_sys->lsn; | |
607 | + mutex_exit(&(log_sys->mutex)); | |
608 | loop: | |
609 | /*****************************************************************/ | |
610 | /* ---- When there is database activity by users, we cycle in this | |
adf0fb13 | 611 | @@ -2801,9 +2830,13 @@ |
b4e1fa2c AM |
612 | /* Sleep for 1 second on entrying the for loop below the first time. */ |
613 | next_itr_time = ut_time_ms() + 1000; | |
614 | ||
615 | + skip_sleep = FALSE; | |
616 | + | |
617 | for (i = 0; i < 10; i++) { | |
618 | ulint cur_time = ut_time_ms(); | |
619 | ||
620 | + n_pages_flushed = 0; /* initialize */ | |
621 | + | |
622 | /* ALTER TABLE in MySQL requires on Unix that the table handler | |
623 | can drop tables lazily after there no longer are SELECT | |
624 | queries to them. */ | |
adf0fb13 | 625 | @@ -2827,6 +2860,7 @@ |
b4e1fa2c AM |
626 | srv_main_thread_op_info = "sleeping"; |
627 | srv_main_1_second_loops++; | |
628 | ||
629 | + if (!skip_sleep) { | |
630 | if (next_itr_time > cur_time | |
631 | && srv_shutdown_state == SRV_SHUTDOWN_NONE) { | |
632 | ||
adf0fb13 | 633 | @@ -2837,10 +2871,26 @@ |
b4e1fa2c AM |
634 | (next_itr_time - cur_time) |
635 | * 1000)); | |
636 | srv_main_sleeps++; | |
637 | + | |
638 | + /* | |
639 | + mutex_enter(&(log_sys->mutex)); | |
640 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
641 | + ib_uint64_t lsn = log_sys->lsn; | |
642 | + mutex_exit(&(log_sys->mutex)); | |
643 | + | |
644 | + if(oldest_lsn) | |
645 | + fprintf(stderr, | |
646 | + "InnoDB flush: age pct: %lu, lsn progress: %lu\n", | |
647 | + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age, | |
648 | + lsn - lsn_old); | |
649 | + */ | |
650 | } | |
651 | ||
652 | /* Each iteration should happen at 1 second interval. */ | |
653 | next_itr_time = ut_time_ms() + 1000; | |
654 | + } /* if (!skip_sleep) */ | |
655 | + | |
656 | + skip_sleep = FALSE; | |
657 | ||
658 | /* Flush logs if needed */ | |
659 | srv_sync_log_buffer_in_background(); | |
adf0fb13 | 660 | @@ -2860,7 +2910,7 @@ |
b4e1fa2c AM |
661 | if (n_pend_ios < SRV_PEND_IO_THRESHOLD |
662 | && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) { | |
663 | srv_main_thread_op_info = "doing insert buffer merge"; | |
664 | - ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); | |
665 | + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5)); | |
666 | ||
667 | /* Flush logs if needed */ | |
668 | srv_sync_log_buffer_in_background(); | |
adf0fb13 | 669 | @@ -2877,7 +2927,11 @@ |
b4e1fa2c AM |
670 | n_pages_flushed = buf_flush_list( |
671 | PCT_IO(100), IB_ULONGLONG_MAX); | |
672 | ||
673 | - } else if (srv_adaptive_flushing) { | |
674 | + mutex_enter(&(log_sys->mutex)); | |
675 | + lsn_old = log_sys->lsn; | |
676 | + mutex_exit(&(log_sys->mutex)); | |
677 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
678 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) { | |
679 | ||
680 | /* Try to keep the rate of flushing of dirty | |
681 | pages such that redo log generation does not | |
adf0fb13 | 682 | @@ -2893,6 +2947,224 @@ |
b4e1fa2c AM |
683 | n_flush, |
684 | IB_ULONGLONG_MAX); | |
685 | } | |
686 | + | |
687 | + mutex_enter(&(log_sys->mutex)); | |
688 | + lsn_old = log_sys->lsn; | |
689 | + mutex_exit(&(log_sys->mutex)); | |
690 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
691 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) { | |
692 | + | |
693 | + /* Try to keep modified age not to exceed | |
694 | + max_checkpoint_age * 7/8 line */ | |
695 | + | |
696 | + mutex_enter(&(log_sys->mutex)); | |
697 | + | |
698 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
699 | + if (oldest_lsn == 0) { | |
700 | + lsn_old = log_sys->lsn; | |
701 | + mutex_exit(&(log_sys->mutex)); | |
702 | + | |
703 | + } else { | |
704 | + if ((log_sys->lsn - oldest_lsn) | |
705 | + > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) { | |
706 | + /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */ | |
707 | + /* We should not flush from here. */ | |
708 | + lsn_old = log_sys->lsn; | |
709 | + mutex_exit(&(log_sys->mutex)); | |
710 | + } else if ((log_sys->lsn - oldest_lsn) | |
711 | + > (log_sys->max_checkpoint_age)/4 ) { | |
712 | + | |
713 | + /* defence line (max_checkpoint_age * 1/2) */ | |
714 | + ib_uint64_t lsn = log_sys->lsn; | |
715 | + | |
716 | + ib_uint64_t level, bpl; | |
717 | + buf_page_t* bpage; | |
718 | + ulint j; | |
719 | + | |
720 | + mutex_exit(&(log_sys->mutex)); | |
721 | + | |
722 | + bpl = 0; | |
723 | + | |
724 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
725 | + buf_pool_t* buf_pool; | |
726 | + ulint n_blocks; | |
727 | + | |
728 | + buf_pool = buf_pool_from_array(j); | |
729 | + | |
730 | + /* The scanning flush_list is optimistic here */ | |
731 | + | |
732 | + level = 0; | |
733 | + n_blocks = 0; | |
734 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
735 | + | |
736 | + while (bpage != NULL) { | |
737 | + ib_uint64_t oldest_modification = bpage->oldest_modification; | |
738 | + if (oldest_modification != 0) { | |
739 | + level += log_sys->max_checkpoint_age | |
740 | + - (lsn - oldest_modification); | |
741 | + } | |
742 | + bpage = UT_LIST_GET_NEXT(list, bpage); | |
743 | + n_blocks++; | |
744 | + } | |
745 | + | |
746 | + if (level) { | |
747 | + bpl += ((ib_uint64_t) n_blocks * n_blocks | |
748 | + * (lsn - lsn_old)) / level; | |
749 | + } | |
750 | + | |
751 | + } | |
752 | + | |
753 | + if (!srv_use_doublewrite_buf) { | |
754 | + /* flush is faster than when doublewrite */ | |
755 | + bpl = (bpl * 7) / 8; | |
756 | + } | |
757 | + | |
758 | + if (bpl) { | |
759 | +retry_flush_batch: | |
760 | + n_pages_flushed = buf_flush_list(bpl, | |
761 | + oldest_lsn + (lsn - lsn_old)); | |
762 | + if (n_pages_flushed == ULINT_UNDEFINED) { | |
763 | + os_thread_sleep(5000); | |
764 | + goto retry_flush_batch; | |
765 | + } | |
766 | + } | |
767 | + | |
768 | + lsn_old = lsn; | |
769 | + /* | |
770 | + fprintf(stderr, | |
771 | + "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n", | |
772 | + (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age, | |
773 | + lsn - lsn_old, bpl); | |
774 | + */ | |
775 | + } else { | |
776 | + lsn_old = log_sys->lsn; | |
777 | + mutex_exit(&(log_sys->mutex)); | |
778 | + } | |
779 | + } | |
780 | + prev_adaptive_flushing_method = 1; | |
781 | + } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) { | |
782 | + buf_pool_t* buf_pool; | |
783 | + buf_page_t* bpage; | |
784 | + ib_uint64_t lsn; | |
785 | + ulint j; | |
786 | + | |
787 | + mutex_enter(&(log_sys->mutex)); | |
788 | + oldest_lsn = buf_pool_get_oldest_modification(); | |
789 | + lsn = log_sys->lsn; | |
790 | + mutex_exit(&(log_sys->mutex)); | |
791 | + | |
792 | + /* upper loop/sec. (x10) */ | |
793 | + next_itr_time -= 900; /* 1000 - 900 == 100 */ | |
794 | + inner_loop++; | |
795 | + if (inner_loop < 10) { | |
796 | + i--; | |
797 | + } else { | |
798 | + inner_loop = 0; | |
799 | + } | |
800 | + | |
801 | + if (prev_adaptive_flushing_method == 2) { | |
802 | + lint n_flush; | |
d8778560 AM |
803 | + lint blocks_sum; |
804 | + ulint new_blocks_sum, flushed_blocks_sum; | |
b4e1fa2c AM |
805 | + |
806 | + blocks_sum = new_blocks_sum = flushed_blocks_sum = 0; | |
807 | + | |
808 | + /* prev_flush_info[j] should be the previous loop's */ | |
809 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
810 | + lint blocks_num, new_blocks_num, flushed_blocks_num; | |
811 | + ibool found; | |
812 | + | |
813 | + buf_pool = buf_pool_from_array(j); | |
814 | + | |
815 | + blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list); | |
816 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
817 | + new_blocks_num = 0; | |
818 | + | |
819 | + found = FALSE; | |
820 | + while (bpage != NULL) { | |
821 | + if (prev_flush_info[j].space == bpage->space | |
822 | + && prev_flush_info[j].offset == bpage->offset | |
823 | + && prev_flush_info[j].oldest_modification | |
824 | + == bpage->oldest_modification) { | |
825 | + found = TRUE; | |
826 | + break; | |
827 | + } | |
828 | + bpage = UT_LIST_GET_NEXT(list, bpage); | |
829 | + new_blocks_num++; | |
830 | + } | |
831 | + if (!found) { | |
832 | + new_blocks_num = blocks_num; | |
833 | + } | |
834 | + | |
835 | + flushed_blocks_num = new_blocks_num + prev_flush_info[j].count | |
836 | + - blocks_num; | |
837 | + if (flushed_blocks_num < 0) { | |
838 | + flushed_blocks_num = 0; | |
839 | + } | |
840 | + | |
841 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
842 | + | |
843 | + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list); | |
844 | + if (bpage) { | |
845 | + prev_flush_info[j].space = bpage->space; | |
846 | + prev_flush_info[j].offset = bpage->offset; | |
847 | + prev_flush_info[j].oldest_modification = bpage->oldest_modification; | |
848 | + } else { | |
849 | + prev_flush_info[j].space = 0; | |
850 | + prev_flush_info[j].offset = 0; | |
851 | + prev_flush_info[j].oldest_modification = 0; | |
852 | + } | |
853 | + | |
854 | + new_blocks_sum += new_blocks_num; | |
855 | + flushed_blocks_sum += flushed_blocks_num; | |
856 | + blocks_sum += blocks_num; | |
857 | + } | |
858 | + | |
859 | + n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async; | |
860 | + if (flushed_blocks_sum > n_pages_flushed_prev) { | |
861 | + n_flush -= (flushed_blocks_sum - n_pages_flushed_prev); | |
862 | + } | |
863 | + | |
864 | + if (n_flush > 0) { | |
865 | + n_flush++; | |
866 | + n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old)); | |
867 | + } else { | |
868 | + n_pages_flushed = 0; | |
869 | + } | |
870 | + } else { | |
871 | + /* store previous first pages of the flush_list */ | |
872 | + for (j = 0; j < srv_buf_pool_instances; j++) { | |
873 | + buf_pool = buf_pool_from_array(j); | |
874 | + | |
875 | + bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); | |
876 | + | |
877 | + prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list); | |
878 | + if (bpage) { | |
879 | + prev_flush_info[j].space = bpage->space; | |
880 | + prev_flush_info[j].offset = bpage->offset; | |
881 | + prev_flush_info[j].oldest_modification = bpage->oldest_modification; | |
882 | + } else { | |
883 | + prev_flush_info[j].space = 0; | |
884 | + prev_flush_info[j].offset = 0; | |
885 | + prev_flush_info[j].oldest_modification = 0; | |
886 | + } | |
887 | + } | |
888 | + n_pages_flushed = 0; | |
889 | + } | |
890 | + | |
891 | + lsn_old = lsn; | |
892 | + prev_adaptive_flushing_method = 2; | |
893 | + } else { | |
894 | + mutex_enter(&(log_sys->mutex)); | |
895 | + lsn_old = log_sys->lsn; | |
896 | + mutex_exit(&(log_sys->mutex)); | |
897 | + prev_adaptive_flushing_method = ULINT_UNDEFINED; | |
898 | + } | |
899 | + | |
900 | + if (n_pages_flushed == ULINT_UNDEFINED) { | |
901 | + n_pages_flushed_prev = 0; | |
902 | + } else { | |
903 | + n_pages_flushed_prev = n_pages_flushed; | |
904 | } | |
905 | ||
906 | if (srv_activity_count == old_activity_count) { | |
adf0fb13 | 907 | @@ -2941,7 +3213,7 @@ |
b4e1fa2c AM |
908 | even if the server were active */ |
909 | ||
910 | srv_main_thread_op_info = "doing insert buffer merge"; | |
911 | - ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); | |
912 | + ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5)); | |
913 | ||
914 | /* Flush logs if needed */ | |
915 | srv_sync_log_buffer_in_background(); | |
adf0fb13 | 916 | @@ -3049,7 +3321,7 @@ |
b4e1fa2c AM |
917 | buf_flush_list below. Otherwise, the system favors |
918 | clean pages over cleanup throughput. */ | |
919 | n_bytes_merged = ibuf_contract_for_n_pages(FALSE, | |
920 | - PCT_IO(100)); | |
921 | + PCT_IBUF_IO(100)); | |
922 | } | |
923 | ||
924 | srv_main_thread_op_info = "reserving kernel mutex"; | |
adf0fb13 AM |
925 | @@ -3189,6 +3461,7 @@ |
926 | srv_slot_t* slot; | |
11822e22 | 927 | ulint retries = 0; |
b4e1fa2c AM |
928 | ulint n_total_purged = ULINT_UNDEFINED; |
929 | + ulint next_itr_time; | |
930 | ||
931 | ut_a(srv_n_purge_threads == 1); | |
932 | ||
adf0fb13 | 933 | @@ -3209,9 +3482,12 @@ |
b4e1fa2c AM |
934 | |
935 | mutex_exit(&kernel_mutex); | |
936 | ||
937 | + next_itr_time = ut_time_ms(); | |
938 | + | |
939 | while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) { | |
940 | ||
11822e22 | 941 | ulint n_pages_purged = 0; |
b4e1fa2c AM |
942 | + ulint cur_time; |
943 | ||
944 | /* If there are very few records to purge or the last | |
945 | purge didn't purge any records then wait for activity. | |
adf0fb13 | 946 | @@ -3258,6 +3534,16 @@ |
b4e1fa2c AM |
947 | } while (n_pages_purged > 0 && !srv_fast_shutdown); |
948 | ||
949 | srv_sync_log_buffer_in_background(); | |
950 | + | |
951 | + cur_time = ut_time_ms(); | |
952 | + if (next_itr_time > cur_time) { | |
953 | + os_thread_sleep(ut_min(1000000, | |
954 | + (next_itr_time - cur_time) | |
955 | + * 1000)); | |
956 | + next_itr_time = ut_time_ms() + 1000; | |
957 | + } else { | |
958 | + next_itr_time = cur_time + 1000; | |
959 | + } | |
960 | } | |
961 | ||
962 | mutex_enter(&kernel_mutex); | |
963 | diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c | |
964 | --- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900 | |
965 | +++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900 | |
adf0fb13 | 966 | @@ -1217,6 +1217,9 @@ |
b4e1fa2c AM |
967 | } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { |
968 | srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; | |
969 | ||
970 | + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { | |
971 | + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; | |
972 | + | |
973 | } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { | |
974 | srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; | |
975 | ||
11822e22 AM |
976 | diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c |
977 | --- a/storage/innobase/trx/trx0purge.c 2011-04-12 14:14:14.000000000 +0900 | |
978 | +++ b/storage/innobase/trx/trx0purge.c 2011-04-12 14:15:44.000000000 +0900 | |
979 | @@ -392,10 +392,10 @@ | |
980 | trx_sys->rseg_history_len++; | |
981 | mutex_exit(&kernel_mutex); | |
982 | ||
983 | - if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { | |
984 | +// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/ | |
985 | /* Inform the purge thread that there is work to do. */ | |
986 | srv_wake_purge_thread_if_not_active(); | |
987 | - } | |
988 | +// } | |
989 | } | |
990 | ||
991 | /**********************************************************************//** | |
b4e1fa2c AM |
992 | diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c |
993 | --- a/storage/innobase/trx/trx0trx.c 2010-11-03 07:01:13.000000000 +0900 | |
994 | +++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:10:09.106023937 +0900 | |
adf0fb13 | 995 | @@ -984,6 +984,7 @@ |
b4e1fa2c AM |
996 | trx->read_view = NULL; |
997 | ||
998 | if (lsn) { | |
999 | + ulint flush_log_at_trx_commit; | |
1000 | ||
1001 | mutex_exit(&kernel_mutex); | |
1002 | ||
adf0fb13 | 1003 | @@ -992,6 +993,12 @@ |
b4e1fa2c AM |
1004 | trx_undo_insert_cleanup(trx); |
1005 | } | |
1006 | ||
1007 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1008 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1009 | + } else { | |
1010 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1011 | + } | |
1012 | + | |
1013 | /* NOTE that we could possibly make a group commit more | |
1014 | efficient here: call os_thread_yield here to allow also other | |
1015 | trxs to come to commit! */ | |
adf0fb13 | 1016 | @@ -1023,9 +1030,9 @@ |
b4e1fa2c AM |
1017 | if (trx->flush_log_later) { |
1018 | /* Do nothing yet */ | |
1019 | trx->must_flush_log_later = TRUE; | |
1020 | - } else if (srv_flush_log_at_trx_commit == 0) { | |
1021 | + } else if (flush_log_at_trx_commit == 0) { | |
1022 | /* Do nothing */ | |
1023 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1024 | + } else if (flush_log_at_trx_commit == 1) { | |
1025 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1026 | /* Write the log but do not flush it to disk */ | |
1027 | ||
adf0fb13 | 1028 | @@ -1037,7 +1044,7 @@ |
b4e1fa2c AM |
1029 | |
1030 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1031 | } | |
1032 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1033 | + } else if (flush_log_at_trx_commit == 2) { | |
1034 | ||
1035 | /* Write the log but do not flush it to disk */ | |
1036 | ||
adf0fb13 | 1037 | @@ -1701,16 +1708,23 @@ |
b4e1fa2c AM |
1038 | trx_t* trx) /*!< in: trx handle */ |
1039 | { | |
1040 | ib_uint64_t lsn = trx->commit_lsn; | |
1041 | + ulint flush_log_at_trx_commit; | |
1042 | ||
1043 | ut_a(trx); | |
1044 | ||
1045 | trx->op_info = "flushing log"; | |
1046 | ||
1047 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1048 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1049 | + } else { | |
1050 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1051 | + } | |
1052 | + | |
1053 | if (!trx->must_flush_log_later) { | |
1054 | /* Do nothing */ | |
1055 | - } else if (srv_flush_log_at_trx_commit == 0) { | |
1056 | + } else if (flush_log_at_trx_commit == 0) { | |
1057 | /* Do nothing */ | |
1058 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1059 | + } else if (flush_log_at_trx_commit == 1) { | |
1060 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1061 | /* Write the log but do not flush it to disk */ | |
1062 | ||
adf0fb13 | 1063 | @@ -1721,7 +1735,7 @@ |
b4e1fa2c AM |
1064 | |
1065 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1066 | } | |
1067 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1068 | + } else if (flush_log_at_trx_commit == 2) { | |
1069 | ||
1070 | /* Write the log but do not flush it to disk */ | |
1071 | ||
adf0fb13 | 1072 | @@ -1969,6 +1983,8 @@ |
b4e1fa2c AM |
1073 | /*--------------------------------------*/ |
1074 | ||
1075 | if (lsn) { | |
1076 | + ulint flush_log_at_trx_commit; | |
1077 | + | |
1078 | /* Depending on the my.cnf options, we may now write the log | |
1079 | buffer to the log files, making the prepared state of the | |
1080 | transaction durable if the OS does not crash. We may also | |
adf0fb13 | 1081 | @@ -1988,9 +2004,15 @@ |
b4e1fa2c AM |
1082 | |
1083 | mutex_exit(&kernel_mutex); | |
1084 | ||
1085 | - if (srv_flush_log_at_trx_commit == 0) { | |
1086 | + if (srv_use_global_flush_log_at_trx_commit) { | |
1087 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); | |
1088 | + } else { | |
1089 | + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); | |
1090 | + } | |
1091 | + | |
1092 | + if (flush_log_at_trx_commit == 0) { | |
1093 | /* Do nothing */ | |
1094 | - } else if (srv_flush_log_at_trx_commit == 1) { | |
1095 | + } else if (flush_log_at_trx_commit == 1) { | |
1096 | if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { | |
1097 | /* Write the log but do not flush it to disk */ | |
1098 | ||
adf0fb13 | 1099 | @@ -2002,7 +2024,7 @@ |
b4e1fa2c AM |
1100 | |
1101 | log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); | |
1102 | } | |
1103 | - } else if (srv_flush_log_at_trx_commit == 2) { | |
1104 | + } else if (flush_log_at_trx_commit == 2) { | |
1105 | ||
1106 | /* Write the log but do not flush it to disk */ | |
1107 |