diff -r ef44d8017b6b innobase/buf/buf0flu.c --- a/innobase/buf/buf0flu.c Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/buf/buf0flu.c Fri Jul 03 15:41:32 2009 -0700 @@ -341,7 +341,7 @@ /* Now flush the doublewrite buffer data to disk */ - fil_flush(TRX_SYS_SPACE); + fil_flush(TRX_SYS_SPACE, FLUSH_FROM_DIRTY_BUFFER); /* We know that the writes have been flushed to disk now and in recovery we will find them in the doublewrite buffer @@ -381,7 +381,7 @@ /* Now we flush the data to disk (for example, with fsync) */ - fil_flush_file_spaces(FIL_TABLESPACE); + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_DIRTY_BUFFER); /* We can now reuse the doublewrite memory buffer: */ @@ -501,7 +501,8 @@ } #else /* Force the log to the disk before writing the modified block */ - log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE); + log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE, + LOG_WRITE_FROM_DIRTY_BUFFER); #endif buf_flush_init_for_writing(block->frame, block->newest_modification, block->space, block->offset); diff -r ef44d8017b6b innobase/fil/fil0fil.c --- a/innobase/fil/fil0fil.c Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/fil/fil0fil.c Fri Jul 03 15:41:32 2009 -0700 @@ -245,6 +245,7 @@ request */ UT_LIST_BASE_NODE_T(fil_space_t) space_list; /* list of all file spaces */ + ulint flush_types[FLUSH_FROM_NUMBER];/* calls to fil_flush by caller */ }; /* The tablespace memory cache. This variable is NULL before the module is @@ -849,7 +850,7 @@ /* Flush tablespaces so that we can close modified files in the LRU list */ - fil_flush_file_spaces(FIL_TABLESPACE); + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); count++; @@ -1309,7 +1310,10 @@ UT_LIST_INIT(system->unflushed_spaces); UT_LIST_INIT(system->space_list); - + { + int x; + for (x = 0; x < FLUSH_FROM_NUMBER; ++x) system->flush_types[x] = 0; + } return(system); } @@ -1437,6 +1441,23 @@ } mutex_exit(&(system->mutex)); +} + +/******************************************************************** +Prints internal counters */ + +void +fil_print(FILE *file) +{ + fprintf(file, + "fsync callers: %lu buffer pool, %lu other, %lu checkpoint, " + "%lu log aio, %lu log sync, %lu archive\n", + fil_system->flush_types[FLUSH_FROM_DIRTY_BUFFER], + fil_system->flush_types[FLUSH_FROM_OTHER], + fil_system->flush_types[FLUSH_FROM_CHECKPOINT], + fil_system->flush_types[FLUSH_FROM_LOG_IO_COMPLETE], + fil_system->flush_types[FLUSH_FROM_LOG_WRITE_UP_TO], + fil_system->flush_types[FLUSH_FROM_ARCHIVE]); } /******************************************************************** @@ -2256,7 +2277,7 @@ os_thread_sleep(20000); - fil_flush(id); + fil_flush(id, FLUSH_FROM_OTHER); goto retry; @@ -3574,7 +3595,7 @@ size_after_extend, *actual_size); */ mutex_exit(&(system->mutex)); - fil_flush(space_id); + fil_flush(space_id, FLUSH_FROM_OTHER); return(success); } @@ -4167,8 +4188,9 @@ void fil_flush( /*======*/ - ulint space_id) /* in: file space id (this can be a group of + ulint space_id, /* in: file space id (this can be a group of log files or a tablespace of the database) */ + flush_from_type flush_type)/* in: identifies the caller */ { fil_system_t* system = fil_system; fil_space_t* space; @@ -4177,7 +4199,7 @@ ib_longlong old_mod_counter; mutex_enter(&(system->mutex)); - + system->flush_types[flush_type]++; HASH_SEARCH(hash, system->spaces, space_id, space, space->id == space_id); if (!space || space->is_being_deleted) { @@ -4282,7 +4304,8 @@ void fil_flush_file_spaces( /*==================*/ - ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */ + ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ + flush_from_type flush_type)/* in: identifies the caller */ { fil_system_t* system = fil_system; fil_space_t* space; @@ -4323,7 +4346,7 @@ a non-existing space id. */ for (i = 0; i < n_space_ids; i++) { - fil_flush(space_ids[i]); + fil_flush(space_ids[i], flush_type); } mem_free(space_ids); diff -r ef44d8017b6b innobase/include/fil0fil.h --- a/innobase/include/fil0fil.h Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/include/fil0fil.h Fri Jul 03 15:41:32 2009 -0700 @@ -197,6 +197,13 @@ fil_init( /*=====*/ ulint max_n_open); /* in: max number of open files */ +/******************************************************************** + * Prints internal counters. */ + +void +fil_print( + /*=====*/ + FILE* file); /* in: output stream */ /*********************************************************************** Opens all log files and system tablespace data files. They stay open until the database server shutdown. This should be called at a server startup after the @@ -625,14 +632,26 @@ ulint segment); /* in: the number of the segment in the aio array to wait for */ /************************************************************************** +Identifies the caller of fil_flush. */ +typedef enum { + FLUSH_FROM_DIRTY_BUFFER, + FLUSH_FROM_OTHER, + FLUSH_FROM_CHECKPOINT, + FLUSH_FROM_LOG_IO_COMPLETE, + FLUSH_FROM_LOG_WRITE_UP_TO, + FLUSH_FROM_ARCHIVE, + FLUSH_FROM_NUMBER +} flush_from_type; +/************************************************************************** Flushes to disk possible writes cached by the OS. If the space does not exist or is being dropped, does not do anything. */ void fil_flush( /*======*/ - ulint space_id); /* in: file space id (this can be a group of + ulint space_id, /* in: file space id (this can be a group of log files or a tablespace of the database) */ + flush_from_type flush_type);/* in: identifies the caller */ /************************************************************************** Flushes to disk writes in file spaces of the given type possibly cached by the OS. */ @@ -640,7 +659,8 @@ void fil_flush_file_spaces( /*==================*/ - ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */ + ulint purpose, /* in: FIL_TABLESPACE, FIL_LOG */ + flush_from_type flush_type);/* in: identifies the caller */ /********************************************************************** Checks the consistency of the tablespace cache. */ diff -r ef44d8017b6b innobase/include/log0log.h --- a/innobase/include/log0log.h Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/include/log0log.h Fri Jul 03 15:41:32 2009 -0700 @@ -146,6 +146,22 @@ log_io_complete( /*============*/ log_group_t* group); /* in: log group */ + +/********************************************************** +Describes the caller of log_write_up_to. */ + +typedef enum { + LOG_WRITE_FROM_DIRTY_BUFFER, + LOG_WRITE_FROM_BACKGROUND_SYNC, + LOG_WRITE_FROM_BACKGROUND_ASYNC, + LOG_WRITE_FROM_INTERNAL, + LOG_WRITE_FROM_CHECKPOINT_SYNC, + LOG_WRITE_FROM_CHECKPOINT_ASYNC, + LOG_WRITE_FROM_LOG_ARCHIVE, + LOG_WRITE_FROM_COMMIT_SYNC, + LOG_WRITE_FROM_COMMIT_ASYNC, + LOG_WRITE_FROM_NUMBER +} log_sync_type; /********************************************************** This function is called, e.g., when a transaction wants to commit. It checks that the log has been written to the log file up to the last log entry written @@ -159,14 +175,21 @@ be written, ut_dulint_max if not specified */ ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, or LOG_WAIT_ALL_GROUPS */ - ibool flush_to_disk); - /* in: TRUE if we want the written log also to be - flushed to disk */ + ibool flush_to_disk, + /* in: TRUE if we want the written log also to be flushed to disk */ + log_sync_type caller);/* in: identifies the caller */ /******************************************************************** Does a syncronous flush of the log buffer to disk. */ void log_buffer_flush_to_disk(void); +/*==========================*/ +/******************************************************************** +Flushes the log buffer. Forces it to disk depending on the value of +the configuration parameter innodb_flush_log_at_trx_commit. */ + +void +log_buffer_flush_maybe_sync(void); /*==========================*/ /******************************************************************** Flushes the log buffer. Forces it to disk depending on the value of @@ -751,6 +774,12 @@ AND flushed to disk */ ulint n_pending_writes;/* number of currently pending flushes or writes */ + ulint log_sync_callers[LOG_WRITE_FROM_NUMBER]; + /* counts calls to log_write_up_to */ + ulint log_sync_syncers[LOG_WRITE_FROM_NUMBER]; + /* counts calls to log_write_up_to when log file is sync'd */ + ulint n_syncs; /* number of fsyncs done for log file */ + ulint n_checkpoints; /* number of calls to log_checkpoint */ /* NOTE on the 'flush' in names of the fields below: starting from 4.0.14, we separate the write of the log file and the actual fsync() or other method to flush it to disk. The names below shhould really diff -r ef44d8017b6b innobase/log/log0log.c --- a/innobase/log/log0log.c Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/log/log0log.c Fri Jul 03 15:41:32 2009 -0700 @@ -782,6 +782,15 @@ log_sys->written_to_all_lsn = log_sys->lsn; log_sys->n_pending_writes = 0; + { + int x; + for (x = 0; x < LOG_WRITE_FROM_NUMBER; ++x) { + log_sys->log_sync_callers[x] = 0; + log_sys->log_sync_syncers[x] = 0; + } + } + log_sys->n_syncs = 0; + log_sys->n_checkpoints = 0; log_sys->no_flush_event = os_event_create(NULL); @@ -1066,7 +1075,7 @@ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { - fil_flush(group->space_id); + fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); } #ifdef UNIV_DEBUG @@ -1088,7 +1097,7 @@ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC && srv_flush_log_at_trx_commit != 2) { - fil_flush(group->space_id); + fil_flush(group->space_id, FLUSH_FROM_LOG_IO_COMPLETE); } mutex_enter(&(log_sys->mutex)); @@ -1303,9 +1312,10 @@ be written, ut_dulint_max if not specified */ ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, or LOG_WAIT_ALL_GROUPS */ - ibool flush_to_disk) + ibool flush_to_disk, /* in: TRUE if we want the written log also to be flushed to disk */ + log_sync_type caller) /* in: identifies caller */ { log_group_t* group; ulint start_offset; @@ -1315,6 +1325,7 @@ ulint loop_count; ulint unlock; + log_sys->log_sync_callers[caller]++; if (recv_no_ibuf_operations) { /* Recovery is running and no operations on the log files are allowed yet (the variable name .._no_ibuf_.. is misleading) */ @@ -1465,13 +1476,17 @@ so we have also flushed to disk what we have written */ log_sys->flushed_to_disk_lsn = log_sys->write_lsn; + log_sys->n_syncs++; + log_sys->log_sync_syncers[caller]++; } else if (flush_to_disk) { group = UT_LIST_GET_FIRST(log_sys->log_groups); - fil_flush(group->space_id); + fil_flush(group->space_id, FLUSH_FROM_LOG_WRITE_UP_TO); log_sys->flushed_to_disk_lsn = log_sys->write_lsn; + log_sys->n_syncs++; + log_sys->log_sync_syncers[caller]++; } mutex_enter(&(log_sys->mutex)); @@ -1520,7 +1535,8 @@ mutex_exit(&(log_sys->mutex)); - log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE); + log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE, + LOG_WRITE_FROM_BACKGROUND_SYNC); } /******************************************************************** @@ -1574,7 +1590,7 @@ mutex_exit(&(log->mutex)); if (do_flush) { - log_write_up_to(lsn, LOG_NO_WAIT, FALSE); + log_write_up_to(lsn, LOG_NO_WAIT, FALSE, LOG_WRITE_FROM_INTERNAL); } } @@ -1944,11 +1960,11 @@ } if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { - fil_flush_file_spaces(FIL_TABLESPACE); + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_CHECKPOINT); } mutex_enter(&(log_sys->mutex)); - + log_sys->n_checkpoints++; oldest_lsn = log_buf_pool_get_oldest_modification(); mutex_exit(&(log_sys->mutex)); @@ -1961,7 +1977,8 @@ write-ahead-logging algorithm ensures that the log has been flushed up to oldest_lsn. */ - log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE); + log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE, + LOG_WRITE_FROM_CHECKPOINT_SYNC); mutex_enter(&(log_sys->mutex)); @@ -2589,7 +2606,7 @@ mutex_exit(&(log_sys->mutex)); - fil_flush(group->archive_space_id); + fil_flush(group->archive_space_id, FLUSH_FROM_ARCHIVE); mutex_enter(&(log_sys->mutex)); @@ -2670,7 +2687,8 @@ mutex_exit(&(log_sys->mutex)); - log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE); + log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE, + LOG_WRITE_FROM_LOG_ARCHIVE); calc_new_limit = FALSE; @@ -3207,8 +3225,8 @@ } mutex_exit(&kernel_mutex); - fil_flush_file_spaces(FIL_TABLESPACE); - fil_flush_file_spaces(FIL_LOG); + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); + fil_flush_file_spaces(FIL_LOG, FLUSH_FROM_OTHER); /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer pool: therefore it is essential that the buffer pool has been @@ -3241,7 +3259,7 @@ fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); - fil_flush_file_spaces(FIL_TABLESPACE); + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); fil_close_all_files(); @@ -3363,15 +3381,45 @@ time_elapsed = 0.001 + difftime(current_time, log_sys->last_printout_time); fprintf(file, - "%lu pending log writes, %lu pending chkp writes\n" - "%lu log i/o's done, %.2f log i/o's/second\n", - (ulong) log_sys->n_pending_writes, - (ulong) log_sys->n_pending_checkpoint_writes, - (ulong) log_sys->n_log_ios, - ((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed)); + "%lu pending log writes, %lu pending chkp writes\n" + "%lu log i/o's done, %.2f log i/o's/second, %lu syncs, %lu checkpoints\n", + (ulong) log_sys->n_pending_writes, + (ulong) log_sys->n_pending_checkpoint_writes, + (ulong) log_sys->n_log_ios, + (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed, + log_sys->n_syncs, + log_sys->n_checkpoints); log_sys->n_log_ios_old = log_sys->n_log_ios; log_sys->last_printout_time = current_time; + + fprintf(file, + "log sync callers: %lu buffer pool, background %lu sync and %lu async, " + "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " + "commit %lu sync and %lu async\n", + log_sys->log_sync_callers[LOG_WRITE_FROM_DIRTY_BUFFER], + log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_SYNC], + log_sys->log_sync_callers[LOG_WRITE_FROM_BACKGROUND_ASYNC], + log_sys->log_sync_callers[LOG_WRITE_FROM_INTERNAL], + log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_SYNC], + log_sys->log_sync_callers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], + log_sys->log_sync_callers[LOG_WRITE_FROM_LOG_ARCHIVE], + log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_SYNC], + log_sys->log_sync_callers[LOG_WRITE_FROM_COMMIT_ASYNC]); + + fprintf(file, + "log sync syncers: %lu buffer pool, background %lu sync and %lu async, " + "%lu internal, checkpoint %lu sync and %lu async, %lu archive, " + "commit %lu sync and %lu async\n", + log_sys->log_sync_syncers[LOG_WRITE_FROM_DIRTY_BUFFER], + log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_SYNC], + log_sys->log_sync_syncers[LOG_WRITE_FROM_BACKGROUND_ASYNC], + log_sys->log_sync_syncers[LOG_WRITE_FROM_INTERNAL], + log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_SYNC], + log_sys->log_sync_syncers[LOG_WRITE_FROM_CHECKPOINT_ASYNC], + log_sys->log_sync_syncers[LOG_WRITE_FROM_LOG_ARCHIVE], + log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_SYNC], + log_sys->log_sync_syncers[LOG_WRITE_FROM_COMMIT_ASYNC]); mutex_exit(&(log_sys->mutex)); } diff -r ef44d8017b6b innobase/srv/srv0srv.c --- a/innobase/srv/srv0srv.c Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/srv/srv0srv.c Fri Jul 03 15:41:32 2009 -0700 @@ -1698,6 +1698,12 @@ (ulong)time_elapsed); fputs("----------\n" + "BACKGROUND THREAD\n" + "----------\n", file); + fil_print(file); + + + fputs("----------\n" "SEMAPHORES\n" "----------\n", file); sync_print(file); diff -r ef44d8017b6b innobase/trx/trx0sys.c --- a/innobase/trx/trx0sys.c Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/trx/trx0sys.c Fri Jul 03 15:41:32 2009 -0700 @@ -511,7 +511,7 @@ page += UNIV_PAGE_SIZE; } - fil_flush_file_spaces(FIL_TABLESPACE); + fil_flush_file_spaces(FIL_TABLESPACE, FLUSH_FROM_OTHER); leave_func: ut_free(unaligned_read_buf); diff -r ef44d8017b6b innobase/trx/trx0trx.c --- a/innobase/trx/trx0trx.c Fri Jul 03 15:41:25 2009 -0700 +++ b/innobase/trx/trx0trx.c Fri Jul 03 15:41:32 2009 -0700 @@ -942,19 +942,21 @@ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, - FALSE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, + LOG_WRITE_FROM_COMMIT_ASYNC); } else { /* Write the log to the log files AND flush them to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, + LOG_WRITE_FROM_COMMIT_SYNC); } } else if (srv_flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, + LOG_WRITE_FROM_COMMIT_ASYNC); } else { ut_error; } @@ -1701,18 +1703,21 @@ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, + LOG_WRITE_FROM_COMMIT_ASYNC); } else { /* Write the log to the log files AND flush them to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, + LOG_WRITE_FROM_COMMIT_SYNC); } } else if (srv_flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, + LOG_WRITE_FROM_COMMIT_ASYNC); } else { ut_error; } @@ -1948,19 +1953,21 @@ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, - FALSE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, + LOG_WRITE_FROM_COMMIT_ASYNC); } else { /* Write the log to the log files AND flush them to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE, + LOG_WRITE_FROM_COMMIT_SYNC); } } else if (srv_flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE, + LOG_WRITE_FROM_COMMIT_ASYNC); } else { ut_error; } diff -r ef44d8017b6b patch_info/innodb_fsync_source.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patch_info/innodb_fsync_source.info Fri Jul 03 15:41:32 2009 -0700 @@ -0,0 +1,9 @@ +File=innodb_fsync_source.patch +Name=Information of fsync callers in InnoDB +Version=1.0 +Author=Google +License=GPL +Comment= +ChangeLog= +2008-11-01 +VT: Initial porting