# name : innodb_recovery_patches.patch # introduced : 11 or before # maintainer : Yasufumi # #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! --- a/storage/innobase/buf/buf0rea.c +++ b/storage/innobase/buf/buf0rea.c @@ -124,6 +124,46 @@ bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip, tablespace_version, offset); if (bpage == NULL) { + /* bugfix: http://bugs.mysql.com/bug.php?id=43948 */ + if (recv_recovery_is_on() && *err == DB_TABLESPACE_DELETED) { + /* hashed log recs must be treated here */ + recv_addr_t* recv_addr; + + mutex_enter(&(recv_sys->mutex)); + + if (recv_sys->apply_log_recs == FALSE) { + mutex_exit(&(recv_sys->mutex)); + goto not_to_recover; + } + + /* recv_get_fil_addr_struct() */ + recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, + hash_calc_hash(ut_fold_ulint_pair(space, offset), + recv_sys->addr_hash)); + while (recv_addr) { + if ((recv_addr->space == space) + && (recv_addr->page_no == offset)) { + break; + } + recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); + } + + if ((recv_addr == NULL) + || (recv_addr->state == RECV_BEING_PROCESSED) + || (recv_addr->state == RECV_PROCESSED)) { + mutex_exit(&(recv_sys->mutex)); + goto not_to_recover; + } + + fprintf(stderr, " (cannot find space: %lu)", space); + recv_addr->state = RECV_PROCESSED; + + ut_a(recv_sys->n_addrs); + recv_sys->n_addrs--; + + mutex_exit(&(recv_sys->mutex)); + } +not_to_recover: return(0); } @@ -777,6 +817,50 @@ /* It is a single table tablespace and the .ibd file is missing: do nothing */ + /* the log records should be treated here same reason + for http://bugs.mysql.com/bug.php?id=43948 */ + + if (recv_recovery_is_on()) { + recv_addr_t* recv_addr; + + mutex_enter(&(recv_sys->mutex)); + + if (recv_sys->apply_log_recs == FALSE) { + mutex_exit(&(recv_sys->mutex)); + goto not_to_recover; + } + + for (i = 0; i < n_stored; i++) { + /* recv_get_fil_addr_struct() */ + recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, + hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]), + recv_sys->addr_hash)); + while (recv_addr) { + if ((recv_addr->space == space) + && (recv_addr->page_no == page_nos[i])) { + break; + } + recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); + } + + if ((recv_addr == NULL) + || (recv_addr->state == RECV_BEING_PROCESSED) + || (recv_addr->state == RECV_PROCESSED)) { + continue; + } + + recv_addr->state = RECV_PROCESSED; + + ut_a(recv_sys->n_addrs); + recv_sys->n_addrs--; + } + + mutex_exit(&(recv_sys->mutex)); + + fprintf(stderr, " (cannot find space: %lu)", space); + } +not_to_recover: + return; } --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -182,6 +182,7 @@ #endif /* UNIV_LOG_ARCHIVE */ static my_bool innobase_use_doublewrite = TRUE; static my_bool innobase_use_checksums = TRUE; +static my_bool innobase_recovery_stats = TRUE; static my_bool innobase_locks_unsafe_for_binlog = FALSE; static my_bool innobase_overwrite_relay_log_info = FALSE; static my_bool innobase_rollback_on_timeout = FALSE; @@ -2610,6 +2611,8 @@ srv_force_recovery = (ulint) innobase_force_recovery; + srv_recovery_stats = (ibool) innobase_recovery_stats; + srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; srv_use_checksums = (ibool) innobase_use_checksums; @@ -11390,6 +11393,11 @@ "The common part for InnoDB table spaces.", NULL, NULL, NULL); +static MYSQL_SYSVAR_BOOL(recovery_stats, innobase_recovery_stats, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Output statistics of recovery process after it.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(recovery_update_relay_log, innobase_overwrite_relay_log_info, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "During InnoDB crash recovery on slave overwrite relay-log.info " @@ -11918,6 +11926,7 @@ MYSQL_SYSVAR(data_file_path), MYSQL_SYSVAR(data_home_dir), MYSQL_SYSVAR(doublewrite), + MYSQL_SYSVAR(recovery_stats), MYSQL_SYSVAR(fast_shutdown), MYSQL_SYSVAR(file_io_threads), MYSQL_SYSVAR(read_io_threads), --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -438,6 +438,39 @@ hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ ulint n_addrs;/*!< number of not processed hashed file addresses in the hash table */ + +/* If you modified the following defines at original file, + You should also modify them. */ +/* defined in os0file.c */ +#define OS_AIO_MERGE_N_CONSECUTIVE 64 +/* defined in log0recv.c */ +#define RECV_READ_AHEAD_AREA 32 + time_t stats_recv_start_time; + ulint stats_recv_turns; + + ulint stats_read_requested_pages; + ulint stats_read_in_area[RECV_READ_AHEAD_AREA]; + + ulint stats_read_io_pages; + ulint stats_read_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE]; + ulint stats_write_io_pages; + ulint stats_write_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE]; + + ulint stats_doublewrite_check_pages; + ulint stats_doublewrite_overwrite_pages; + + ulint stats_recover_pages_with_read; + ulint stats_recover_pages_without_read; + + ulint stats_log_recs; + ulint stats_log_len_sum; + + ulint stats_applied_log_recs; + ulint stats_applied_log_len_sum; + ulint stats_pages_already_new; + + ib_uint64_t stats_oldest_modified_lsn; + ib_uint64_t stats_newest_modified_lsn; }; /** The recovery system */ --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -126,6 +126,8 @@ extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_is_raw_partition; +extern ibool srv_recovery_stats; + extern ibool srv_auto_extend_last_data_file; extern ulint srv_last_file_size_max; extern char** srv_log_group_home_dirs; --- a/storage/innobase/log/log0recv.c +++ b/storage/innobase/log/log0recv.c @@ -187,6 +187,9 @@ recv_sys->heap = NULL; recv_sys->addr_hash = NULL; + + recv_sys->stats_recv_start_time = time(NULL); + recv_sys->stats_oldest_modified_lsn = IB_ULONGLONG_MAX; } /********************************************************//** @@ -327,6 +330,11 @@ recv_n_pool_free_frames = 512; } + if (buf_pool_get_curr_size() >= (32 * 1024 * 1024)) { + /* Buffer pool of size greater than 32 MB. */ + recv_n_pool_free_frames = 1024; + } + recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE); recv_sys->len = 0; recv_sys->recovered_offset = 0; @@ -1363,6 +1371,11 @@ len = rec_end - body; + if (srv_recovery_stats) { + recv_sys->stats_log_recs++; + recv_sys->stats_log_len_sum += len; + } + recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t)); recv->type = type; recv->len = rec_end - body; @@ -1474,6 +1487,7 @@ ib_uint64_t start_lsn; ib_uint64_t end_lsn; ib_uint64_t page_lsn; + ib_uint64_t page_lsn_orig; ib_uint64_t page_newest_lsn; ibool modification_to_page; #ifndef UNIV_HOTBACKUP @@ -1496,6 +1510,8 @@ buf_block_get_page_no(block)); if ((recv_addr == NULL) + /* bugfix: http://bugs.mysql.com/bug.php?id=44140 */ + || (recv_addr->state == RECV_BEING_READ && !just_read_in) || (recv_addr->state == RECV_BEING_PROCESSED) || (recv_addr->state == RECV_PROCESSED)) { @@ -1511,6 +1527,14 @@ recv_addr->state = RECV_BEING_PROCESSED; + if (srv_recovery_stats) { + if (just_read_in) { + recv_sys->stats_recover_pages_with_read++; + } else { + recv_sys->stats_recover_pages_without_read++; + } + } + mutex_exit(&(recv_sys->mutex)); mtr_start(&mtr); @@ -1540,6 +1564,7 @@ /* Read the newest modification lsn from the page */ page_lsn = mach_read_from_8(page + FIL_PAGE_LSN); + page_lsn_orig = page_lsn; #ifndef UNIV_HOTBACKUP /* It may be that the page has been modified in the buffer @@ -1559,6 +1584,21 @@ modification_to_page = FALSE; start_lsn = end_lsn = 0; + if (srv_recovery_stats) { + mutex_enter(&(recv_sys->mutex)); + if (page_lsn_orig && recv_sys->stats_oldest_modified_lsn > page_lsn_orig) { + recv_sys->stats_oldest_modified_lsn = page_lsn_orig; + } + if (page_lsn_orig && recv_sys->stats_newest_modified_lsn < page_lsn_orig) { + recv_sys->stats_newest_modified_lsn = page_lsn_orig; + } + if (UT_LIST_GET_LAST(recv_addr->rec_list)->start_lsn + < page_lsn_orig) { + recv_sys->stats_pages_already_new++; + } + mutex_exit(&(recv_sys->mutex)); + } + recv = UT_LIST_GET_FIRST(recv_addr->rec_list); while (recv) { @@ -1613,6 +1653,13 @@ buf + recv->len, block, &mtr); + if (srv_recovery_stats) { + mutex_enter(&(recv_sys->mutex)); + recv_sys->stats_applied_log_recs++; + recv_sys->stats_applied_log_len_sum += recv->len; + mutex_exit(&(recv_sys->mutex)); + } + end_lsn = recv->start_lsn + recv->len; mach_write_to_8(FIL_PAGE_LSN + page, end_lsn); mach_write_to_8(UNIV_PAGE_SIZE @@ -1715,6 +1762,13 @@ } } + if (srv_recovery_stats && n) { + mutex_enter(&(recv_sys->mutex)); + recv_sys->stats_read_requested_pages += n; + recv_sys->stats_read_in_area[n - 1]++; + mutex_exit(&(recv_sys->mutex)); + } + buf_read_recv_pages(FALSE, space, zip_size, page_nos, n); /* fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n); @@ -1867,6 +1921,10 @@ if (has_printed) { fprintf(stderr, "InnoDB: Apply batch completed\n"); + + if (srv_recovery_stats) { + recv_sys->stats_recv_turns++; + } } mutex_exit(&(recv_sys->mutex)); @@ -3270,6 +3328,90 @@ } #endif /* UNIV_DEBUG */ + if (recv_needed_recovery && srv_recovery_stats) { + ulint flush_list_len = 0; + ulint i; + + fprintf(stderr, + "InnoDB: Log records have been applied. The statistics that were gathered follow.\n"); + + fprintf(stderr, + "============================================================\n" + "-------------------\n" + "RECOVERY STATISTICS\n" + "-------------------\n"); + fprintf(stderr, + "Recovery time: %g sec. (%lu turns)\n", + difftime(time(NULL), recv_sys->stats_recv_start_time), + recv_sys->stats_recv_turns); + + for (i = 0; i < srv_buf_pool_instances; i++) { + buf_pool_t* buf_pool; + + buf_pool = buf_pool_from_array(i); + flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list); + } + fprintf(stderr, + "\n" + "Data page IO statistics\n" + " Requested pages: %lu\n" + " Read pages: %lu\n" + " Written pages: %lu\n" + " (Dirty blocks): %lu\n", + recv_sys->stats_read_requested_pages, + recv_sys->stats_read_io_pages, + recv_sys->stats_write_io_pages, + flush_list_len); + + fprintf(stderr, + " Grouping IO [times]:\n" + "\tnumber of pages,\n" + "\t\tread request neighbors (in %d pages chunk),\n" + "\t\t\tcombined read IO,\n" + "\t\t\t\tcombined write IO\n", + RECV_READ_AHEAD_AREA); + for (i = 0; i < ut_max(RECV_READ_AHEAD_AREA, + OS_AIO_MERGE_N_CONSECUTIVE); i++) { + fprintf(stderr, + "\t%3lu,\t%lu,\t%lu,\t%lu\n", i + 1, + (i < RECV_READ_AHEAD_AREA) ? + recv_sys->stats_read_in_area[i] : 0, + (i < OS_AIO_MERGE_N_CONSECUTIVE) ? + recv_sys->stats_read_io_consecutive[i] : 0, + (i < OS_AIO_MERGE_N_CONSECUTIVE) ? + recv_sys->stats_write_io_consecutive[i] : 0); + } + + fprintf(stderr, + "\n" + "Recovery process statistics\n" + " Checked pages by doublewrite buffer: %lu\n" + " Overwritten pages from doublewrite: %lu\n" + " Recovered pages by io_thread: %lu\n" + " Recovered pages by main thread: %lu\n" + " Parsed log records to apply: %lu\n" + " Sum of the length: %lu\n" + " Applied log records: %lu\n" + " Sum of the length: %lu\n" + " Pages which are already new enough: %lu (It may not be accurate, if turns > 1)\n" + " Oldest page's LSN: %llu\n" + " Newest page's LSN: %llu\n", + recv_sys->stats_doublewrite_check_pages, + recv_sys->stats_doublewrite_overwrite_pages, + recv_sys->stats_recover_pages_with_read, + recv_sys->stats_recover_pages_without_read, + recv_sys->stats_log_recs, + recv_sys->stats_log_len_sum, + recv_sys->stats_applied_log_recs, + recv_sys->stats_applied_log_len_sum, + recv_sys->stats_pages_already_new, + recv_sys->stats_oldest_modified_lsn, + recv_sys->stats_newest_modified_lsn); + + fprintf(stderr, + "============================================================\n"); + } + if (recv_needed_recovery) { trx_sys_print_mysql_master_log_pos(); trx_sys_print_mysql_binlog_offset(); --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -43,6 +43,7 @@ #include "srv0start.h" #include "fil0fil.h" #include "buf0buf.h" +#include "log0recv.h" #ifndef UNIV_HOTBACKUP # include "os0sync.h" # include "os0thread.h" @@ -4278,6 +4279,18 @@ INFINITE); } + if (srv_recovery_stats && recv_recovery_is_on() && n_consecutive) { + mutex_enter(&(recv_sys->mutex)); + if (slot->type == OS_FILE_READ) { + recv_sys->stats_read_io_pages += n_consecutive; + recv_sys->stats_read_io_consecutive[n_consecutive - 1]++; + } else if (slot->type == OS_FILE_WRITE) { + recv_sys->stats_write_io_pages += n_consecutive; + recv_sys->stats_write_io_consecutive[n_consecutive - 1]++; + } + mutex_exit(&(recv_sys->mutex)); + } + os_mutex_enter(array->mutex); if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -160,6 +160,8 @@ /* size in database pages */ UNIV_INTERN ulint* srv_data_file_sizes = NULL; +UNIV_INTERN ibool srv_recovery_stats = FALSE; + /* if TRUE, then we auto-extend the last data file */ UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE; /* if != 0, this tells the max size auto-extending may increase the --- a/storage/innobase/trx/trx0sys.c +++ b/storage/innobase/trx/trx0sys.c @@ -567,6 +567,12 @@ zip_size ? zip_size : UNIV_PAGE_SIZE, read_buf, NULL); + if (srv_recovery_stats && recv_recovery_is_on()) { + mutex_enter(&(recv_sys->mutex)); + recv_sys->stats_doublewrite_check_pages++; + mutex_exit(&(recv_sys->mutex)); + } + /* Check if the page is corrupt */ if (UNIV_UNLIKELY @@ -614,6 +620,13 @@ zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, page, NULL); + + if (srv_recovery_stats && recv_recovery_is_on()) { + mutex_enter(&(recv_sys->mutex)); + recv_sys->stats_doublewrite_overwrite_pages++; + mutex_exit(&(recv_sys->mutex)); + } + fprintf(stderr, "InnoDB: Recovered the page from" " the doublewrite buffer.\n");