1 diff -ruN a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
2 --- a/innobase/buf/buf0flu.c Fri Jul 03 15:41:25 2009 -0700
3 +++ b/innobase/buf/buf0flu.c Fri Jul 03 15:41:32 2009 -0700
6 /* Now flush the doublewrite buffer data to disk */
8 - fil_flush(TRX_SYS_SPACE);
9 + fil_flush(TRX_SYS_SPACE, FLUSH_FROM_DIRTY_BUFFER);
11 /* We know that the writes have been flushed to disk now
12 and in recovery we will find them in the doublewrite buffer
15 /* Now we flush the data to disk (for example, with fsync) */
17 - fil_flush_file_spaces(FIL_TABLESPACE);
18 + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_DIRTY_BUFFER);
20 /* We can now reuse the doublewrite memory buffer: */
25 /* Force the log to the disk before writing the modified block */
26 - log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
27 + log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE,
28 + LOG_WRITE_FROM_DIRTY_BUFFER);
30 buf_flush_init_for_writing(block->frame, block->newest_modification,
31 block->space, block->offset);
32 diff -ruN a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
33 --- a/innobase/fil/fil0fil.c Fri Jul 03 15:41:25 2009 -0700
34 +++ b/innobase/fil/fil0fil.c Fri Jul 03 15:41:32 2009 -0700
37 UT_LIST_BASE_NODE_T(fil_space_t) space_list;
38 /* list of all file spaces */
39 + ulint flush_types[FLUSH_FROM_NUMBER];/* calls to fil_flush by caller */
42 /* The tablespace memory cache. This variable is NULL before the module is
44 /* Flush tablespaces so that we can close modified files in the LRU
47 - fil_flush_file_spaces(FIL_TABLESPACE);
48 + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
52 @@ -1309,7 +1310,10 @@
54 UT_LIST_INIT(system->unflushed_spaces);
55 UT_LIST_INIT(system->space_list);
59 + for (x = 0; x < FLUSH_FROM_NUMBER; ++x) system->flush_types[x] = 0;
64 @@ -1440,6 +1444,23 @@
67 /********************************************************************
68 +Prints internal counters */
71 +fil_print(FILE *file)
74 + "fsync callers: %lu buffer pool, %lu other, %lu checkpoint, "
75 + "%lu log aio, %lu log sync, %lu archive\n",
76 + fil_system->flush_types[FLUSH_FROM_DIRTY_BUFFER],
77 + fil_system->flush_types[FLUSH_FROM_OTHER],
78 + fil_system->flush_types[FLUSH_FROM_CHECKPOINT],
79 + fil_system->flush_types[FLUSH_FROM_LOG_IO_COMPLETE],
80 + fil_system->flush_types[FLUSH_FROM_LOG_WRITE_UP_TO],
81 + fil_system->flush_types[FLUSH_FROM_ARCHIVE]);
84 +/********************************************************************
85 Initializes the ibuf data structure for space 0 == the system tablespace.
86 This can be called after the file space headers have been created and the
87 dictionary system has been initialized. */
90 os_thread_sleep(20000);
93 + fil_flush(id, FLUSH_FROM_OTHER);
98 size_after_extend, *actual_size); */
99 mutex_exit(&(system->mutex));
101 - fil_flush(space_id);
102 + fil_flush(space_id, FLUSH_FROM_OTHER);
106 @@ -4167,8 +4188,9 @@
110 - ulint space_id) /* in: file space id (this can be a group of
111 + ulint space_id, /* in: file space id (this can be a group of
112 log files or a tablespace of the database) */
113 + flush_from_type flush_type)/* in: identifies the caller */
115 fil_system_t* system = fil_system;
117 @@ -4177,7 +4199,7 @@
118 ib_longlong old_mod_counter;
120 mutex_enter(&(system->mutex));
122 + system->flush_types[flush_type]++;
123 HASH_SEARCH(hash, system->spaces, space_id, space,
124 space->id == space_id);
125 if (!space || space->is_being_deleted) {
126 @@ -4282,7 +4304,8 @@
128 fil_flush_file_spaces(
129 /*==================*/
130 - ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
131 + ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */
132 + flush_from_type flush_type)/* in: identifies the caller */
134 fil_system_t* system = fil_system;
136 @@ -4323,7 +4346,7 @@
137 a non-existing space id. */
138 for (i = 0; i < n_space_ids; i++) {
140 - fil_flush(space_ids[i]);
141 + fil_flush(space_ids[i], flush_type);
145 diff -ruN a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
146 --- a/innobase/include/fil0fil.h Fri Jul 03 15:41:25 2009 -0700
147 +++ b/innobase/include/fil0fil.h Fri Jul 03 15:41:32 2009 -0700
151 ulint max_n_open); /* in: max number of open files */
152 +/********************************************************************
153 + * Prints internal counters. */
158 + FILE* file); /* in: output stream */
159 /***********************************************************************
160 Opens all log files and system tablespace data files. They stay open until the
161 database server shutdown. This should be called at a server startup after the
162 @@ -625,14 +632,26 @@
163 ulint segment); /* in: the number of the segment in the aio
165 /**************************************************************************
166 +Identifies the caller of fil_flush. */
168 + FLUSH_FROM_DIRTY_BUFFER,
170 + FLUSH_FROM_CHECKPOINT,
171 + FLUSH_FROM_LOG_IO_COMPLETE,
172 + FLUSH_FROM_LOG_WRITE_UP_TO,
173 + FLUSH_FROM_ARCHIVE,
176 +/**************************************************************************
177 Flushes to disk possible writes cached by the OS. If the space does not exist
178 or is being dropped, does not do anything. */
183 - ulint space_id); /* in: file space id (this can be a group of
184 + ulint space_id, /* in: file space id (this can be a group of
185 log files or a tablespace of the database) */
186 + flush_from_type flush_type);/* in: identifies the caller */
187 /**************************************************************************
188 Flushes to disk writes in file spaces of the given type possibly cached by
192 fil_flush_file_spaces(
193 /*==================*/
194 - ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
195 + ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */
196 + flush_from_type flush_type);/* in: identifies the caller */
197 /**********************************************************************
198 Checks the consistency of the tablespace cache. */
200 diff -ruN a/innobase/include/log0log.h b/innobase/include/log0log.h
201 --- a/innobase/include/log0log.h Fri Jul 03 15:41:25 2009 -0700
202 +++ b/innobase/include/log0log.h Fri Jul 03 15:41:32 2009 -0700
206 log_group_t* group); /* in: log group */
208 +/**********************************************************
209 +Describes the caller of log_write_up_to. */
212 + LOG_WRITE_FROM_DIRTY_BUFFER,
213 + LOG_WRITE_FROM_BACKGROUND_SYNC,
214 + LOG_WRITE_FROM_BACKGROUND_ASYNC,
215 + LOG_WRITE_FROM_INTERNAL,
216 + LOG_WRITE_FROM_CHECKPOINT_SYNC,
217 + LOG_WRITE_FROM_CHECKPOINT_ASYNC,
218 + LOG_WRITE_FROM_LOG_ARCHIVE,
219 + LOG_WRITE_FROM_COMMIT_SYNC,
220 + LOG_WRITE_FROM_COMMIT_ASYNC,
221 + LOG_WRITE_FROM_NUMBER
223 /**********************************************************
224 This function is called, e.g., when a transaction wants to commit. It checks
225 that the log has been written to the log file up to the last log entry written
227 be written, ut_dulint_max if not specified */
228 ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
229 or LOG_WAIT_ALL_GROUPS */
230 - ibool flush_to_disk);
231 - /* in: TRUE if we want the written log also to be
233 + ibool flush_to_disk,
234 + /* in: TRUE if we want the written log also to be flushed to disk */
235 + log_sync_type caller);/* in: identifies the caller */
236 /********************************************************************
237 Does a syncronous flush of the log buffer to disk. */
242 log_buffer_flush_maybe_sync(void);
243 +/*==========================*/
244 +/********************************************************************
245 +Flushes the log buffer. Forces it to disk depending on the value of
246 +the configuration parameter innodb_flush_log_at_trx_commit. */
249 +log_buffer_flush_maybe_sync(void);
250 /*=============================*/
251 /********************************************************************
252 Advances the smallest lsn for which there are unflushed dirty blocks in the
254 AND flushed to disk */
255 ulint n_pending_writes;/* number of currently pending flushes
257 + ulint log_sync_callers[LOG_WRITE_FROM_NUMBER];
258 + /* counts calls to log_write_up_to */
259 + ulint log_sync_syncers[LOG_WRITE_FROM_NUMBER];
260 + /* counts calls to log_write_up_to when log file is sync'd */
261 + ulint n_syncs; /* number of fsyncs done for log file */
262 + ulint n_checkpoints; /* number of calls to log_checkpoint */
263 /* NOTE on the 'flush' in names of the fields below: starting from
264 4.0.14, we separate the write of the log file and the actual fsync()
265 or other method to flush it to disk. The names below shhould really
266 diff -ruN a/innobase/log/log0log.c b/innobase/log/log0log.c
267 --- a/innobase/log/log0log.c Fri Jul 03 15:41:25 2009 -0700
268 +++ b/innobase/log/log0log.c Fri Jul 03 15:41:32 2009 -0700
270 log_sys->written_to_all_lsn = log_sys->lsn;
272 log_sys->n_pending_writes = 0;
275 + for (x = 0; x < LOG_WRITE_FROM_NUMBER; ++x) {
276 + log_sys->log_sync_callers[x] = 0;
277 + log_sys->log_sync_syncers[x] = 0;
280 + log_sys->n_syncs = 0;
281 + log_sys->n_checkpoints = 0;
283 log_sys->no_flush_event = os_event_create(NULL);
285 @@ -1066,7 +1075,7 @@
286 if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
287 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
289 - fil_flush(group->space_id);
290 + fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE);
294 @@ -1088,7 +1097,7 @@
295 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
296 && srv_flush_log_at_trx_commit != 2) {
298 - fil_flush(group->space_id);
299 + fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE);
302 mutex_enter(&(log_sys->mutex));
303 @@ -1303,9 +1312,10 @@
304 be written, ut_dulint_max if not specified */
305 ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
306 or LOG_WAIT_ALL_GROUPS */
307 - ibool flush_to_disk)
308 + ibool flush_to_disk,
309 /* in: TRUE if we want the written log also to be
311 + log_sync_type caller) /* in: identifies caller */
315 @@ -1315,6 +1325,7 @@
319 + log_sys->log_sync_callers[caller]++;
320 if (recv_no_ibuf_operations) {
321 /* Recovery is running and no operations on the log files are
322 allowed yet (the variable name .._no_ibuf_.. is misleading) */
323 @@ -1465,13 +1476,17 @@
324 so we have also flushed to disk what we have written */
326 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
327 + log_sys->n_syncs++;
328 + log_sys->log_sync_syncers[caller]++;
330 } else if (flush_to_disk) {
332 group = UT_LIST_GET_FIRST(log_sys->log_groups);
334 - fil_flush(group->space_id);
335 + fil_flush(group->space_id, FLUSH_FROM_LOG_WRITE_UP_TO);
336 log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
337 + log_sys->n_syncs++;
338 + log_sys->log_sync_syncers[caller]++;
341 mutex_enter(&(log_sys->mutex));
342 @@ -1520,7 +1535,8 @@
344 mutex_exit(&(log_sys->mutex));
346 - log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
347 + log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE,
348 + LOG_WRITE_FROM_BACKGROUND_SYNC);
351 /********************************************************************
352 @@ -1574,7 +1590,7 @@
353 mutex_exit(&(log->mutex));
356 - log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
357 + log_write_up_to(lsn, LOG_NO_WAIT, FALSE, LOG_WRITE_FROM_INTERNAL);
361 @@ -1944,11 +1960,11 @@
364 if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
365 - fil_flush_file_spaces(FIL_TABLESPACE);
366 + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_CHECKPOINT);
369 mutex_enter(&(log_sys->mutex));
371 + log_sys->n_checkpoints++;
372 oldest_lsn = log_buf_pool_get_oldest_modification();
374 mutex_exit(&(log_sys->mutex));
375 @@ -1961,7 +1977,8 @@
376 write-ahead-logging algorithm ensures that the log has been flushed
379 - log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
380 + log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE,
381 + LOG_WRITE_FROM_CHECKPOINT_SYNC);
383 mutex_enter(&(log_sys->mutex));
385 @@ -2589,7 +2606,7 @@
387 mutex_exit(&(log_sys->mutex));
389 - fil_flush(group->archive_space_id);
390 + fil_flush(group->archive_space_id, FLUSH_FROM_ARCHIVE);
392 mutex_enter(&(log_sys->mutex));
394 @@ -2670,7 +2687,8 @@
396 mutex_exit(&(log_sys->mutex));
398 - log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
399 + log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE,
400 + LOG_WRITE_FROM_LOG_ARCHIVE);
402 calc_new_limit = FALSE;
404 @@ -3207,8 +3225,8 @@
406 mutex_exit(&kernel_mutex);
408 - fil_flush_file_spaces(FIL_TABLESPACE);
409 - fil_flush_file_spaces(FIL_LOG);
410 + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
411 + fil_flush_file_spaces(FIL_LOG, FLUSH_FROM_OTHER);
413 /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
414 pool: therefore it is essential that the buffer pool has been
415 @@ -3241,7 +3259,7 @@
417 fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
419 - fil_flush_file_spaces(FIL_TABLESPACE);
420 + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
422 fil_close_all_files();
424 @@ -3363,16 +3381,46 @@
425 time_elapsed = 0.001 + difftime(current_time,
426 log_sys->last_printout_time);
428 - "%lu pending log writes, %lu pending chkp writes\n"
429 - "%lu log i/o's done, %.2f log i/o's/second\n",
430 - (ulong) log_sys->n_pending_writes,
431 - (ulong) log_sys->n_pending_checkpoint_writes,
432 - (ulong) log_sys->n_log_ios,
433 - ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed));
434 + "%lu pending log writes, %lu pending chkp writes\n"
435 + "%lu log i/o's done, %.2f log i/o's/second, %lu syncs, %lu checkpoints\n",
436 + (ulong) log_sys->n_pending_writes,
437 + (ulong) log_sys->n_pending_checkpoint_writes,
438 + (ulong) log_sys->n_log_ios,
439 + (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed,
441 + log_sys->n_checkpoints);
443 log_sys->n_log_ios_old = log_sys->n_log_ios;
444 log_sys->last_printout_time = current_time;
447 + "log sync callers: %lu buffer pool, background %lu sync and %lu async, "
448 + "%lu internal, checkpoint %lu sync and %lu async, %lu archive, "
449 + "commit %lu sync and %lu async\n",
450 + log_sys->log_sync_callers[LOG_WRITE_FROM_DIRTY_BUFFER],
451 + log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_SYNC],
452 + log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_ASYNC],
453 + log_sys->log_sync_callers[LOG_WRITE_FROM_INTERNAL],
454 + log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_SYNC],
455 + log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_ASYNC],
456 + log_sys->log_sync_callers[LOG_WRITE_FROM_LOG_ARCHIVE],
457 + log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_SYNC],
458 + log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_ASYNC]);
461 + "log sync syncers: %lu buffer pool, background %lu sync and %lu async, "
462 + "%lu internal, checkpoint %lu sync and %lu async, %lu archive, "
463 + "commit %lu sync and %lu async\n",
464 + log_sys->log_sync_syncers[LOG_WRITE_FROM_DIRTY_BUFFER],
465 + log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_SYNC],
466 + log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_ASYNC],
467 + log_sys->log_sync_syncers[LOG_WRITE_FROM_INTERNAL],
468 + log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_SYNC],
469 + log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_ASYNC],
470 + log_sys->log_sync_syncers[LOG_WRITE_FROM_LOG_ARCHIVE],
471 + log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_SYNC],
472 + log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_ASYNC]);
474 mutex_exit(&(log_sys->mutex));
477 diff -ruN a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
478 --- a/innobase/srv/srv0srv.c Fri Jul 03 15:41:25 2009 -0700
479 +++ b/innobase/srv/srv0srv.c Fri Jul 03 15:41:32 2009 -0700
480 @@ -1698,6 +1698,12 @@
481 (ulong)time_elapsed);
484 + "BACKGROUND THREAD\n"
485 + "----------\n", file);
489 + fputs("----------\n"
491 "----------\n", file);
493 diff -ruN a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
494 --- a/innobase/trx/trx0sys.c Fri Jul 03 15:41:25 2009 -0700
495 +++ b/innobase/trx/trx0sys.c Fri Jul 03 15:41:32 2009 -0700
497 page += UNIV_PAGE_SIZE;
500 - fil_flush_file_spaces(FIL_TABLESPACE);
501 + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER);
504 ut_free(unaligned_read_buf);
505 diff -ruN a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
506 --- a/innobase/trx/trx0trx.c Fri Jul 03 15:41:25 2009 -0700
507 +++ b/innobase/trx/trx0trx.c Fri Jul 03 15:41:32 2009 -0700
508 @@ -942,19 +942,21 @@
509 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
510 /* Write the log but do not flush it to disk */
512 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
514 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
515 + LOG_WRITE_FROM_COMMIT_ASYNC);
517 /* Write the log to the log files AND flush
520 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
521 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
522 + LOG_WRITE_FROM_COMMIT_SYNC);
524 } else if (srv_flush_log_at_trx_commit == 2) {
526 /* Write the log but do not flush it to disk */
528 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
529 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
530 + LOG_WRITE_FROM_COMMIT_ASYNC);
534 @@ -1701,18 +1703,21 @@
535 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
536 /* Write the log but do not flush it to disk */
538 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
539 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
540 + LOG_WRITE_FROM_COMMIT_ASYNC);
542 /* Write the log to the log files AND flush them to
545 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
546 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
547 + LOG_WRITE_FROM_COMMIT_SYNC);
549 } else if (srv_flush_log_at_trx_commit == 2) {
551 /* Write the log but do not flush it to disk */
553 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
554 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
555 + LOG_WRITE_FROM_COMMIT_ASYNC);
559 @@ -1946,19 +1951,21 @@
560 if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
561 /* Write the log but do not flush it to disk */
563 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
565 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
566 + LOG_WRITE_FROM_COMMIT_ASYNC);
568 /* Write the log to the log files AND flush
571 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
572 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE,
573 + LOG_WRITE_FROM_COMMIT_SYNC);
575 } else if (srv_flush_log_at_trx_commit == 2) {
577 /* Write the log but do not flush it to disk */
579 - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
580 + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE,
581 + LOG_WRITE_FROM_COMMIT_ASYNC);
585 diff -ruN /dev/null b/patch_info/innodb_fsync_source.info
586 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
587 +++ b/patch_info/innodb_fsync_source.info Fri Jul 03 15:41:32 2009 -0700
589 +File=innodb_fsync_source.patch
590 +Name=Information of fsync callers in InnoDB