1 # name : innodb_lru_dump_restore.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innobase/buf/buf0lru.c
9 +++ b/storage/innobase/buf/buf0lru.c
10 @@ -2197,6 +2197,289 @@
11 memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
14 +/********************************************************************//**
15 +Dump the LRU page list to the specific file. */
16 +#define LRU_DUMP_FILE "ib_lru_dump"
20 +buf_LRU_file_dump(void)
21 +/*===================*/
23 + os_file_t dump_file = -1;
25 + byte* buffer_base = NULL;
26 + byte* buffer = NULL;
33 + for (i = 0; i < srv_n_data_files; i++) {
34 + if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) {
36 + " InnoDB: The name '%s' seems to be used for"
37 + " innodb_data_file_path. For safety, dumping of the LRU list"
38 + " is not being done.\n", LRU_DUMP_FILE);
43 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
44 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
47 + " InnoDB: cannot allocate buffer.\n");
51 + dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE,
52 + OS_FILE_NORMAL, OS_DATA_FILE, &success);
54 + os_file_get_last_error(TRUE);
56 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
60 + buffers = offset = 0;
62 + for (i = 0; i < srv_buf_pool_instances; i++) {
63 + buf_pool_t* buf_pool;
65 + buf_pool = buf_pool_from_array(i);
67 + mutex_enter(&buf_pool->LRU_list_mutex);
68 + bpage = UT_LIST_GET_LAST(buf_pool->LRU);
70 + while (bpage != NULL) {
72 + memset(buffer, 0, UNIV_PAGE_SIZE);
75 + mach_write_to_4(buffer + offset * 4, bpage->space);
77 + mach_write_to_4(buffer + offset * 4, bpage->offset);
80 + if (offset == UNIV_PAGE_SIZE/4) {
81 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
82 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
83 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
86 + mutex_exit(&buf_pool->LRU_list_mutex);
88 + " InnoDB: cannot write page %lu of %s\n",
89 + buffers, LRU_DUMP_FILE);
96 + bpage = UT_LIST_GET_PREV(LRU, bpage);
98 + mutex_exit(&buf_pool->LRU_list_mutex);
102 + memset(buffer, 0, UNIV_PAGE_SIZE);
105 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
107 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
110 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
111 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
112 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
120 + if (dump_file != -1)
121 + os_file_close(dump_file);
123 + ut_free(buffer_base);
129 + ib_uint32_t space_id;
130 + ib_uint32_t page_no;
133 +static int dump_record_cmp(const void *a, const void *b)
135 + const dump_record_t *rec1 = (dump_record_t *) a;
136 + const dump_record_t *rec2 = (dump_record_t *) b;
138 + if (rec1->space_id < rec2->space_id)
140 + if (rec1->space_id > rec2->space_id)
142 + if (rec1->page_no < rec2->page_no)
144 + return rec1->page_no > rec2->page_no;
147 +/********************************************************************//**
148 +Read the pages based on the specific file.*/
151 +buf_LRU_file_restore(void)
152 +/*======================*/
154 + os_file_t dump_file = -1;
156 + byte* buffer_base = NULL;
157 + byte* buffer = NULL;
162 + ibool terminated = FALSE;
164 + dump_record_t* records = NULL;
169 + dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key,
170 + LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
171 + if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
172 + os_file_get_last_error(TRUE);
174 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
178 + ut_print_timestamp(stderr);
179 + fprintf(stderr, " InnoDB: Restoring buffer pool pages from %s\n",
182 + if (size == 0 || size_high > 0 || size % 8) {
183 + fprintf(stderr, " InnoDB: broken LRU dump file\n");
186 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
187 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
188 + records = ut_malloc(size);
189 + if (!buffer || !records) {
191 + " InnoDB: cannot allocate buffer.\n");
197 + while (!terminated) {
198 + success = os_file_read(dump_file, buffer,
199 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
200 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
204 + " InnoDB: either could not read page %lu of %s,"
205 + " or terminated unexpectedly.\n",
206 + buffers, LRU_DUMP_FILE);
210 + for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
214 + space_id = mach_read_from_4(buffer + offset * 4);
215 + page_no = mach_read_from_4(buffer + (offset + 1) * 4);
216 + if (space_id == 0xFFFFFFFFUL
217 + || page_no == 0xFFFFFFFFUL) {
222 + records[length].space_id = space_id;
223 + records[length].page_no = page_no;
225 + if (length * 8 >= size) {
227 + " InnoDB: could not find the "
228 + "end-of-file marker after reading "
229 + "the expected %lu bytes from the "
231 + " InnoDB: this could be caused by a "
232 + "broken or incomplete file.\n"
233 + " InnoDB: trying to process what has "
234 + "been read so far.\n",
243 + qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
245 + for (offset = 0; offset < length; offset++) {
250 + ib_int64_t tablespace_version;
252 + space_id = records[offset].space_id;
253 + page_no = records[offset].page_no;
255 + if (offset % 16 == 15) {
256 + os_aio_simulated_wake_handler_threads();
257 + buf_flush_free_margins(FALSE);
260 + zip_size = fil_space_get_zip_size(space_id);
261 + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
265 + if (fil_is_exist(space_id, page_no)) {
267 + tablespace_version = fil_space_get_version(space_id);
270 + reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
271 + | OS_AIO_SIMULATED_WAKE_LATER,
272 + space_id, zip_size, TRUE,
273 + tablespace_version, page_no, NULL);
274 + buf_LRU_stat_inc_io();
278 + os_aio_simulated_wake_handler_threads();
279 + buf_flush_free_margins(FALSE);
281 + ut_print_timestamp(stderr);
283 + " InnoDB: Completed reading buffer pool pages"
284 + " (requested: %lu, read: %lu)\n", req, reads);
287 + if (dump_file != -1)
288 + os_file_close(dump_file);
290 + ut_free(buffer_base);
297 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
298 /**********************************************************************//**
299 Validates the LRU list for one buffer pool instance. */
300 --- a/storage/innobase/buf/buf0rea.c
301 +++ b/storage/innobase/buf/buf0rea.c
303 which case it is never read into the pool, or if the tablespace does
304 not exist or is being dropped
305 @return 1 if read request is issued. 0 if it is not */
311 --- a/storage/innobase/fil/fil0fil.c
312 +++ b/storage/innobase/fil/fil0fil.c
313 @@ -5307,6 +5307,70 @@
317 +/********************************************************************//**
318 +Confirm whether the parameters are valid or not */
323 + ulint space_id, /*!< in: space id */
324 + ulint block_offset) /*!< in: offset in number of blocks */
326 + fil_space_t* space;
329 + /* Reserve the fil_system mutex and make sure that we can open at
330 + least one file while holding it, if the file is not already open */
332 + fil_mutex_enter_and_prepare_for_io(space_id);
334 + space = fil_space_get_by_id(space_id);
337 + mutex_exit(&fil_system->mutex);
341 + node = UT_LIST_GET_FIRST(space->chain);
344 + if (UNIV_UNLIKELY(node == NULL)) {
345 + mutex_exit(&fil_system->mutex);
349 + if (space->id != 0 && node->size == 0) {
350 + /* We do not know the size of a single-table tablespace
351 + before we open the file */
356 + if (node->size > block_offset) {
360 + block_offset -= node->size;
361 + node = UT_LIST_GET_NEXT(chain, node);
365 + /* Open file if closed */
366 + fil_node_prepare_for_io(node, fil_system, space);
367 + fil_node_complete_io(node, fil_system, OS_FILE_READ);
369 + /* Check that at least the start offset is within the bounds of a
370 + single-table tablespace */
371 + if (UNIV_UNLIKELY(node->size <= block_offset)
372 + && space->id != 0 && space->purpose == FIL_TABLESPACE) {
373 + mutex_exit(&fil_system->mutex);
377 + mutex_exit(&fil_system->mutex);
381 #ifndef UNIV_HOTBACKUP
382 /**********************************************************************//**
383 Waits for an aio operation to complete. This function is used to write the
384 --- a/storage/innobase/handler/ha_innodb.cc
385 +++ b/storage/innobase/handler/ha_innodb.cc
388 static char* innodb_version_str = (char*) INNODB_VERSION_STR;
390 +static my_bool innobase_blocking_lru_restore = FALSE;
392 /** Possible values for system variable "innodb_stats_method". The values
393 are defined the same as its corresponding MyISAM system variable
394 "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
395 @@ -2652,6 +2654,8 @@
396 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
397 srv_use_checksums = (ibool) innobase_use_checksums;
399 + srv_blocking_lru_restore = (ibool) innobase_blocking_lru_restore;
401 #ifdef HAVE_LARGE_PAGES
402 if ((os_use_large_pages = (ibool) my_use_large_pages))
403 os_large_page_size = (ulint) opt_large_page_size;
404 @@ -11964,6 +11968,19 @@
405 "Limit the allocated memory for dictionary cache. (0: unlimited)",
406 NULL, NULL, 0, 0, LONG_MAX, 0);
408 +static MYSQL_SYSVAR_UINT(buffer_pool_restore_at_startup, srv_auto_lru_dump,
409 + PLUGIN_VAR_RQCMDARG,
410 + "Time in seconds between automatic buffer pool dumps. "
411 + "0 (the default) disables automatic dumps.",
412 + NULL, NULL, 0, 0, UINT_MAX32, 0);
414 +static MYSQL_SYSVAR_BOOL(blocking_buffer_pool_restore,
415 + innobase_blocking_lru_restore,
416 + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
417 + "Block XtraDB startup process until buffer pool is full restored from a "
418 + "dump file (if present). Disabled by default.",
419 + NULL, NULL, FALSE);
421 static struct st_mysql_sys_var* innobase_system_variables[]= {
422 MYSQL_SYSVAR(additional_mem_pool_size),
423 MYSQL_SYSVAR(autoextend_increment),
424 @@ -12048,6 +12065,8 @@
425 MYSQL_SYSVAR(random_read_ahead),
426 MYSQL_SYSVAR(read_ahead_threshold),
427 MYSQL_SYSVAR(io_capacity),
428 + MYSQL_SYSVAR(buffer_pool_restore_at_startup),
429 + MYSQL_SYSVAR(blocking_buffer_pool_restore),
430 MYSQL_SYSVAR(purge_threads),
431 MYSQL_SYSVAR(purge_batch_size),
432 MYSQL_SYSVAR(rollback_segments),
433 --- a/storage/innobase/handler/i_s.cc
434 +++ b/storage/innobase/handler/i_s.cc
436 #include "trx0rseg.h" /* for trx_rseg_struct */
437 #include "trx0sys.h" /* for trx_sys */
438 #include "dict0dict.h" /* for dict_sys */
439 +#include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
443 @@ -4336,6 +4337,36 @@
447 + else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
448 + ut_print_timestamp(stderr);
449 + fprintf(stderr, " InnoDB: Administrative command 'XTRA_LRU_DUMP'"
450 + " was detected.\n");
452 + if (buf_LRU_file_dump()) {
453 + field_store_string(i_s_table->field[0],
454 + "XTRA_LRU_DUMP was succeeded.");
456 + field_store_string(i_s_table->field[0],
457 + "XTRA_LRU_DUMP was failed.");
462 + else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
463 + ut_print_timestamp(stderr);
464 + fprintf(stderr, " InnoDB: Administrative command 'XTRA_LRU_RESTORE'"
465 + " was detected.\n");
467 + if (buf_LRU_file_restore()) {
468 + field_store_string(i_s_table->field[0],
469 + "XTRA_LRU_RESTORE was succeeded.");
471 + field_store_string(i_s_table->field[0],
472 + "XTRA_LRU_RESTORE was failed.");
478 field_store_string(i_s_table->field[0],
479 "Undefined XTRA_* command.");
480 --- a/storage/innobase/include/buf0lru.h
481 +++ b/storage/innobase/include/buf0lru.h
484 buf_LRU_stat_update(void);
485 /*=====================*/
486 +/********************************************************************//**
487 +Dump the LRU page list to the specific file. */
490 +buf_LRU_file_dump(void);
491 +/*===================*/
492 +/********************************************************************//**
493 +Read the pages based on the specific file.*/
496 +buf_LRU_file_restore(void);
497 +/*======================*/
499 /******************************************************************//**
500 Remove one page from LRU list and put it to free list */
501 --- a/storage/innobase/include/buf0rea.h
502 +++ b/storage/innobase/include/buf0rea.h
504 #include "buf0types.h"
506 /********************************************************************//**
507 +Low-level function which reads a page asynchronously from a file to the
508 +buffer buf_pool if it is not already there, in which case does nothing.
509 +Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
510 +flag is cleared and the x-lock released by an i/o-handler thread.
511 +@return 1 if a read request was queued, 0 if the page already resided
512 +in buf_pool, or if the page is in the doublewrite buffer blocks in
513 +which case it is never read into the pool, or if the tablespace does
514 +not exist or is being dropped
515 +@return 1 if read request is issued. 0 if it is not */
520 + ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
521 + trying to read from a non-existent tablespace, or a
522 + tablespace which is just now being dropped */
523 + ibool sync, /*!< in: TRUE if synchronous aio is desired */
524 + ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
525 + ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
526 + at read-ahead functions) */
527 + ulint space, /*!< in: space id */
528 + ulint zip_size,/*!< in: compressed page size, or 0 */
529 + ibool unzip, /*!< in: TRUE=request uncompressed page */
530 + ib_int64_t tablespace_version, /*!< in: if the space memory object has
531 + this timestamp different from what we are giving here,
532 + treat the tablespace as dropped; this is a timestamp we
533 + use to stop dangling page reads from a tablespace
534 + which we have DISCARDed + IMPORTed back */
535 + ulint offset, /*!< in: page number */
537 +/********************************************************************//**
538 High-level function which reads a page asynchronously from a file to the
539 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
540 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
541 --- a/storage/innobase/include/fil0fil.h
542 +++ b/storage/innobase/include/fil0fil.h
544 void* message, /*!< in: message for aio handler if non-sync
545 aio used, else ignored */
547 +/********************************************************************//**
548 +Confirm whether the parameters are valid or not */
553 + ulint space_id, /*!< in: space id */
554 + ulint block_offset); /*!< in: offset in number of blocks */
555 /**********************************************************************//**
556 Waits for an aio operation to complete. This function is used to write the
557 handler for completed requests. The aio array of pending requests is divided
558 --- a/storage/innobase/include/srv0srv.h
559 +++ b/storage/innobase/include/srv0srv.h
561 reading of a disk page */
562 extern ulint srv_buf_pool_reads;
564 +/** Time in seconds between automatic buffer pool dumps */
565 +extern uint srv_auto_lru_dump;
567 +/** Whether startup should be blocked until buffer pool is fully restored */
568 +extern ibool srv_blocking_lru_restore;
570 /** Status variables to be passed to MySQL */
571 typedef struct export_var_struct export_struc;
574 /*=====================*/
575 void* arg); /*!< in: a dummy parameter required by
577 +/*********************************************************************//**
578 +A thread which restores the buffer pool from a dump file on startup and does
579 +periodic buffer pool dumps.
580 +@return a dummy parameter */
583 +srv_LRU_dump_restore_thread(
584 +/*====================*/
585 + void* arg); /*!< in: a dummy parameter required by
586 + os_thread_create */
587 /******************************************************************//**
588 Outputs to a file the output of the InnoDB Monitor.
589 @return FALSE if not all information printed
590 --- a/storage/innobase/srv/srv0srv.c
591 +++ b/storage/innobase/srv/srv0srv.c
593 reading of a disk page */
594 UNIV_INTERN ulint srv_buf_pool_reads = 0;
596 +/** Time in seconds between automatic buffer pool dumps */
597 +UNIV_INTERN uint srv_auto_lru_dump = 0;
599 +/** Whether startup should be blocked until buffer pool is fully restored */
600 +UNIV_INTERN ibool srv_blocking_lru_restore;
602 /* structure to pass status variables to MySQL */
603 UNIV_INTERN export_struc export_vars;
605 @@ -2708,6 +2714,58 @@
606 /* We count the number of threads in os_thread_exit(). A created
607 thread should always use that to exit and not use return() to exit. */
609 + os_thread_exit(NULL);
611 + OS_THREAD_DUMMY_RETURN;
614 +/*********************************************************************//**
615 +A thread which restores the buffer pool from a dump file on startup and does
616 +periodic buffer pool dumps.
617 +@return a dummy parameter */
620 +srv_LRU_dump_restore_thread(
621 +/*====================*/
622 + void* arg __attribute__((unused)))
623 + /*!< in: a dummy parameter required by
624 + os_thread_create */
626 + uint auto_lru_dump;
627 + time_t last_dump_time;
628 + time_t time_elapsed;
630 +#ifdef UNIV_DEBUG_THREAD_CREATION
631 + fprintf(stderr, "The LRU dump/restore thread has started, id %lu\n",
632 + os_thread_pf(os_thread_get_curr_id()));
635 + /* If srv_blocking_lru_restore is TRUE, restore will be done
636 + synchronously on startup. */
637 + if (srv_auto_lru_dump && !srv_blocking_lru_restore)
638 + buf_LRU_file_restore();
640 + last_dump_time = time(NULL);
643 + os_thread_sleep(5000000);
645 + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
649 + time_elapsed = time(NULL) - last_dump_time;
650 + auto_lru_dump = srv_auto_lru_dump;
651 + if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
652 + last_dump_time = time(NULL);
653 + buf_LRU_file_dump();
658 + /* We count the number of threads in os_thread_exit(). A created
659 + thread should always use that to exit and not use return() to exit. */
661 os_thread_exit(NULL);
663 OS_THREAD_DUMMY_RETURN;
664 --- a/storage/innobase/srv/srv0start.c
665 +++ b/storage/innobase/srv/srv0start.c
667 # include "btr0pcur.h"
668 # include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
669 # include "zlib.h" /* for ZLIB_VERSION */
670 +# include "buf0lru.h" /* for buf_LRU_file_restore() */
672 /** Log sequence number immediately after startup */
673 UNIV_INTERN ib_uint64_t srv_start_lsn;
675 static os_file_t files[1000];
677 /** io_handler_thread parameters for thread identification */
678 -static ulint n[SRV_MAX_N_IO_THREADS + 6];
679 +static ulint n[SRV_MAX_N_IO_THREADS + 7];
680 /** io_handler_thread identifiers */
681 -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
682 +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7];
684 /** We use this mutex to test the return value of pthread_mutex_trylock
685 on successful locking. HP-UX does NOT return 0, though Linux et al do. */
686 @@ -1841,6 +1842,15 @@
687 os_thread_create(&srv_monitor_thread, NULL,
688 thread_ids + 4 + SRV_MAX_N_IO_THREADS);
690 + /* Create the thread which automaticaly dumps/restore buffer pool */
691 + os_thread_create(&srv_LRU_dump_restore_thread, NULL,
692 + thread_ids + 5 + SRV_MAX_N_IO_THREADS);
694 + /* If srv_blocking_lru_restore is TRUE, load buffer pool contents
696 + if (srv_auto_lru_dump && srv_blocking_lru_restore)
697 + buf_LRU_file_restore();
699 srv_is_being_started = FALSE;
701 err = dict_create_or_check_foreign_constraint_tables();
703 +++ b/mysql-test/suite/sys_vars/r/innodb_blocking_buffer_pool_restore_basic.result
705 +SELECT @@global.innodb_blocking_buffer_pool_restore;
706 +@@global.innodb_blocking_buffer_pool_restore
709 +++ b/mysql-test/suite/sys_vars/t/innodb_blocking_buffer_pool_restore_basic.test
711 +SELECT @@global.innodb_blocking_buffer_pool_restore;