1 # name : innodb_lru_dump_restore.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innobase/buf/buf0lru.c
9 +++ b/storage/innobase/buf/buf0lru.c
10 @@ -2167,6 +2167,284 @@
11 memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
14 +/********************************************************************//**
15 +Dump the LRU page list to the specific file. */
16 +#define LRU_DUMP_FILE "ib_lru_dump"
20 +buf_LRU_file_dump(void)
21 +/*===================*/
23 + os_file_t dump_file = -1;
25 + byte* buffer_base = NULL;
26 + byte* buffer = NULL;
33 + for (i = 0; i < srv_n_data_files; i++) {
34 + if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) {
36 + " InnoDB: The name '%s' seems to be used for"
37 + " innodb_data_file_path. For safety, dumping of the LRU list"
38 + " is not being done.\n", LRU_DUMP_FILE);
43 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
44 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
47 + " InnoDB: cannot allocate buffer.\n");
51 + dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE,
52 + OS_FILE_NORMAL, OS_DATA_FILE, &success);
54 + os_file_get_last_error(TRUE);
56 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
60 + buffers = offset = 0;
62 + for (i = 0; i < srv_buf_pool_instances; i++) {
63 + buf_pool_t* buf_pool;
65 + buf_pool = buf_pool_from_array(i);
67 + mutex_enter(&buf_pool->LRU_list_mutex);
68 + bpage = UT_LIST_GET_LAST(buf_pool->LRU);
70 + while (bpage != NULL) {
72 + memset(buffer, 0, UNIV_PAGE_SIZE);
75 + mach_write_to_4(buffer + offset * 4, bpage->space);
77 + mach_write_to_4(buffer + offset * 4, bpage->offset);
80 + if (offset == UNIV_PAGE_SIZE/4) {
81 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
82 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
83 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
86 + mutex_exit(&buf_pool->LRU_list_mutex);
88 + " InnoDB: cannot write page %lu of %s\n",
89 + buffers, LRU_DUMP_FILE);
96 + bpage = UT_LIST_GET_PREV(LRU, bpage);
98 + mutex_exit(&buf_pool->LRU_list_mutex);
102 + memset(buffer, 0, UNIV_PAGE_SIZE);
105 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
107 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
110 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
111 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
112 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
120 + if (dump_file != -1)
121 + os_file_close(dump_file);
123 + ut_free(buffer_base);
129 + ib_uint32_t space_id;
130 + ib_uint32_t page_no;
133 +static int dump_record_cmp(const void *a, const void *b)
135 + const dump_record_t *rec1 = (dump_record_t *) a;
136 + const dump_record_t *rec2 = (dump_record_t *) b;
138 + if (rec1->space_id < rec2->space_id)
140 + if (rec1->space_id > rec2->space_id)
142 + if (rec1->page_no < rec2->page_no)
144 + return rec1->page_no > rec2->page_no;
147 +/********************************************************************//**
148 +Read the pages based on the specific file.*/
151 +buf_LRU_file_restore(void)
152 +/*======================*/
154 + os_file_t dump_file = -1;
156 + byte* buffer_base = NULL;
157 + byte* buffer = NULL;
162 + ibool terminated = FALSE;
164 + dump_record_t* records = NULL;
169 + dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key,
170 + LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
171 + if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
172 + os_file_get_last_error(TRUE);
174 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
177 + if (size == 0 || size_high > 0 || size % 8) {
178 + fprintf(stderr, " InnoDB: broken LRU dump file\n");
181 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
182 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
183 + records = ut_malloc(size);
184 + if (!buffer || !records) {
186 + " InnoDB: cannot allocate buffer.\n");
192 + while (!terminated) {
193 + success = os_file_read(dump_file, buffer,
194 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
195 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
199 + " InnoDB: either could not read page %lu of %s,"
200 + " or terminated unexpectedly.\n",
201 + buffers, LRU_DUMP_FILE);
205 + for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
209 + space_id = mach_read_from_4(buffer + offset * 4);
210 + page_no = mach_read_from_4(buffer + (offset + 1) * 4);
211 + if (space_id == 0xFFFFFFFFUL
212 + || page_no == 0xFFFFFFFFUL) {
217 + records[length].space_id = space_id;
218 + records[length].page_no = page_no;
220 + if (length * 8 >= size) {
222 + " InnoDB: could not find the "
223 + "end-of-file marker after reading "
224 + "the expected %lu bytes from the "
226 + " InnoDB: this could be caused by a "
227 + "broken or incomplete file.\n"
228 + " InnoDB: trying to process what has "
229 + "been read so far.\n",
238 + qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
240 + for (offset = 0; offset < length; offset++) {
245 + ib_int64_t tablespace_version;
247 + space_id = records[offset].space_id;
248 + page_no = records[offset].page_no;
250 + if (offset % 16 == 15) {
251 + os_aio_simulated_wake_handler_threads();
252 + buf_flush_free_margins(FALSE);
255 + zip_size = fil_space_get_zip_size(space_id);
256 + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
260 + if (fil_is_exist(space_id, page_no)) {
262 + tablespace_version = fil_space_get_version(space_id);
265 + reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
266 + | OS_AIO_SIMULATED_WAKE_LATER,
267 + space_id, zip_size, TRUE,
268 + tablespace_version, page_no, NULL);
269 + buf_LRU_stat_inc_io();
273 + os_aio_simulated_wake_handler_threads();
274 + buf_flush_free_margins(FALSE);
276 + ut_print_timestamp(stderr);
278 + " InnoDB: reading pages based on the dumped LRU list was done."
279 + " (requested: %lu, read: %lu)\n", req, reads);
282 + if (dump_file != -1)
283 + os_file_close(dump_file);
285 + ut_free(buffer_base);
292 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
293 /**********************************************************************//**
294 Validates the LRU list for one buffer pool instance. */
295 --- a/storage/innobase/buf/buf0rea.c
296 +++ b/storage/innobase/buf/buf0rea.c
298 which case it is never read into the pool, or if the tablespace does
299 not exist or is being dropped
300 @return 1 if read request is issued. 0 if it is not */
306 --- a/storage/innobase/fil/fil0fil.c
307 +++ b/storage/innobase/fil/fil0fil.c
308 @@ -5290,6 +5290,70 @@
312 +/********************************************************************//**
313 +Confirm whether the parameters are valid or not */
318 + ulint space_id, /*!< in: space id */
319 + ulint block_offset) /*!< in: offset in number of blocks */
321 + fil_space_t* space;
324 + /* Reserve the fil_system mutex and make sure that we can open at
325 + least one file while holding it, if the file is not already open */
327 + fil_mutex_enter_and_prepare_for_io(space_id);
329 + space = fil_space_get_by_id(space_id);
332 + mutex_exit(&fil_system->mutex);
336 + node = UT_LIST_GET_FIRST(space->chain);
339 + if (UNIV_UNLIKELY(node == NULL)) {
340 + mutex_exit(&fil_system->mutex);
344 + if (space->id != 0 && node->size == 0) {
345 + /* We do not know the size of a single-table tablespace
346 + before we open the file */
351 + if (node->size > block_offset) {
355 + block_offset -= node->size;
356 + node = UT_LIST_GET_NEXT(chain, node);
360 + /* Open file if closed */
361 + fil_node_prepare_for_io(node, fil_system, space);
362 + fil_node_complete_io(node, fil_system, OS_FILE_READ);
364 + /* Check that at least the start offset is within the bounds of a
365 + single-table tablespace */
366 + if (UNIV_UNLIKELY(node->size <= block_offset)
367 + && space->id != 0 && space->purpose == FIL_TABLESPACE) {
368 + mutex_exit(&fil_system->mutex);
372 + mutex_exit(&fil_system->mutex);
376 #ifndef UNIV_HOTBACKUP
377 /**********************************************************************//**
378 Waits for an aio operation to complete. This function is used to write the
379 --- a/storage/innobase/handler/ha_innodb.cc
380 +++ b/storage/innobase/handler/ha_innodb.cc
381 @@ -11809,6 +11809,12 @@
382 "Limit the allocated memory for dictionary cache. (0: unlimited)",
383 NULL, NULL, 0, 0, LONG_MAX, 0);
385 +static MYSQL_SYSVAR_UINT(buffer_pool_restore_at_startup, srv_auto_lru_dump,
386 + PLUGIN_VAR_RQCMDARG,
387 + "Time in seconds between automatic buffer pool dumps. "
388 + "0 (the default) disables automatic dumps.",
389 + NULL, NULL, 0, 0, UINT_MAX32, 0);
391 static struct st_mysql_sys_var* innobase_system_variables[]= {
392 MYSQL_SYSVAR(additional_mem_pool_size),
393 MYSQL_SYSVAR(autoextend_increment),
394 @@ -11891,6 +11897,7 @@
395 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
396 MYSQL_SYSVAR(read_ahead_threshold),
397 MYSQL_SYSVAR(io_capacity),
398 + MYSQL_SYSVAR(buffer_pool_restore_at_startup),
399 MYSQL_SYSVAR(purge_threads),
400 MYSQL_SYSVAR(purge_batch_size),
401 MYSQL_SYSVAR(rollback_segments),
402 --- a/storage/innobase/handler/i_s.cc
403 +++ b/storage/innobase/handler/i_s.cc
405 #include "trx0rseg.h" /* for trx_rseg_struct */
406 #include "trx0sys.h" /* for trx_sys */
407 #include "dict0dict.h" /* for dict_sys */
408 +#include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
412 @@ -4270,6 +4271,36 @@
416 + else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
417 + ut_print_timestamp(stderr);
418 + fprintf(stderr, " InnoDB: Administrative command 'XTRA_LRU_DUMP'"
419 + " was detected.\n");
421 + if (buf_LRU_file_dump()) {
422 + field_store_string(i_s_table->field[0],
423 + "XTRA_LRU_DUMP was succeeded.");
425 + field_store_string(i_s_table->field[0],
426 + "XTRA_LRU_DUMP was failed.");
431 + else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
432 + ut_print_timestamp(stderr);
433 + fprintf(stderr, " InnoDB: Administrative command 'XTRA_LRU_RESTORE'"
434 + " was detected.\n");
436 + if (buf_LRU_file_restore()) {
437 + field_store_string(i_s_table->field[0],
438 + "XTRA_LRU_RESTORE was succeeded.");
440 + field_store_string(i_s_table->field[0],
441 + "XTRA_LRU_RESTORE was failed.");
447 field_store_string(i_s_table->field[0],
448 "Undefined XTRA_* command.");
449 --- a/storage/innobase/include/buf0lru.h
450 +++ b/storage/innobase/include/buf0lru.h
453 buf_LRU_stat_update(void);
454 /*=====================*/
455 +/********************************************************************//**
456 +Dump the LRU page list to the specific file. */
459 +buf_LRU_file_dump(void);
460 +/*===================*/
461 +/********************************************************************//**
462 +Read the pages based on the specific file.*/
465 +buf_LRU_file_restore(void);
466 +/*======================*/
468 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
469 /**********************************************************************//**
470 --- a/storage/innobase/include/buf0rea.h
471 +++ b/storage/innobase/include/buf0rea.h
473 #include "buf0types.h"
475 /********************************************************************//**
476 +Low-level function which reads a page asynchronously from a file to the
477 +buffer buf_pool if it is not already there, in which case does nothing.
478 +Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
479 +flag is cleared and the x-lock released by an i/o-handler thread.
480 +@return 1 if a read request was queued, 0 if the page already resided
481 +in buf_pool, or if the page is in the doublewrite buffer blocks in
482 +which case it is never read into the pool, or if the tablespace does
483 +not exist or is being dropped
484 +@return 1 if read request is issued. 0 if it is not */
489 + ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
490 + trying to read from a non-existent tablespace, or a
491 + tablespace which is just now being dropped */
492 + ibool sync, /*!< in: TRUE if synchronous aio is desired */
493 + ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
494 + ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
495 + at read-ahead functions) */
496 + ulint space, /*!< in: space id */
497 + ulint zip_size,/*!< in: compressed page size, or 0 */
498 + ibool unzip, /*!< in: TRUE=request uncompressed page */
499 + ib_int64_t tablespace_version, /*!< in: if the space memory object has
500 + this timestamp different from what we are giving here,
501 + treat the tablespace as dropped; this is a timestamp we
502 + use to stop dangling page reads from a tablespace
503 + which we have DISCARDed + IMPORTed back */
504 + ulint offset, /*!< in: page number */
506 +/********************************************************************//**
507 High-level function which reads a page asynchronously from a file to the
508 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
509 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
510 --- a/storage/innobase/include/fil0fil.h
511 +++ b/storage/innobase/include/fil0fil.h
513 void* message, /*!< in: message for aio handler if non-sync
514 aio used, else ignored */
516 +/********************************************************************//**
517 +Confirm whether the parameters are valid or not */
522 + ulint space_id, /*!< in: space id */
523 + ulint block_offset); /*!< in: offset in number of blocks */
524 /**********************************************************************//**
525 Waits for an aio operation to complete. This function is used to write the
526 handler for completed requests. The aio array of pending requests is divided
527 --- a/storage/innobase/include/srv0srv.h
528 +++ b/storage/innobase/include/srv0srv.h
530 reading of a disk page */
531 extern ulint srv_buf_pool_reads;
533 +/** Time in seconds between automatic buffer pool dumps */
534 +extern uint srv_auto_lru_dump;
536 /** Status variables to be passed to MySQL */
537 typedef struct export_var_struct export_struc;
540 /*=====================*/
541 void* arg); /*!< in: a dummy parameter required by
543 +/*********************************************************************//**
544 +A thread which restores the buffer pool from a dump file on startup and does
545 +periodic buffer pool dumps.
546 +@return a dummy parameter */
549 +srv_LRU_dump_restore_thread(
550 +/*====================*/
551 + void* arg); /*!< in: a dummy parameter required by
552 + os_thread_create */
553 /******************************************************************//**
554 Outputs to a file the output of the InnoDB Monitor.
555 @return FALSE if not all information printed
556 --- a/storage/innobase/srv/srv0srv.c
557 +++ b/storage/innobase/srv/srv0srv.c
559 reading of a disk page */
560 UNIV_INTERN ulint srv_buf_pool_reads = 0;
562 +/** Time in seconds between automatic buffer pool dumps */
563 +UNIV_INTERN uint srv_auto_lru_dump = 0;
565 /* structure to pass status variables to MySQL */
566 UNIV_INTERN export_struc export_vars;
568 @@ -2706,6 +2709,56 @@
569 OS_THREAD_DUMMY_RETURN;
572 +/*********************************************************************//**
573 +A thread which restores the buffer pool from a dump file on startup and does
574 +periodic buffer pool dumps.
575 +@return a dummy parameter */
578 +srv_LRU_dump_restore_thread(
579 +/*====================*/
580 + void* arg __attribute__((unused)))
581 + /*!< in: a dummy parameter required by
582 + os_thread_create */
584 + uint auto_lru_dump;
585 + time_t last_dump_time;
586 + time_t time_elapsed;
588 +#ifdef UNIV_DEBUG_THREAD_CREATION
589 + fprintf(stderr, "The LRU dump/restore thread has started, id %lu\n",
590 + os_thread_pf(os_thread_get_curr_id()));
593 + if (srv_auto_lru_dump)
594 + buf_LRU_file_restore();
596 + last_dump_time = time(NULL);
599 + os_thread_sleep(5000000);
601 + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
605 + time_elapsed = time(NULL) - last_dump_time;
606 + auto_lru_dump = srv_auto_lru_dump;
607 + if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
608 + last_dump_time = time(NULL);
609 + buf_LRU_file_dump();
614 + /* We count the number of threads in os_thread_exit(). A created
615 + thread should always use that to exit and not use return() to exit. */
617 + os_thread_exit(NULL);
619 + OS_THREAD_DUMMY_RETURN;
622 /**********************************************************************//**
623 Check whether any background thread is active.
624 @return FALSE if all are are suspended or have exited. */
625 --- a/storage/innobase/srv/srv0start.c
626 +++ b/storage/innobase/srv/srv0start.c
628 static os_file_t files[1000];
630 /** io_handler_thread parameters for thread identification */
631 -static ulint n[SRV_MAX_N_IO_THREADS + 6];
632 +static ulint n[SRV_MAX_N_IO_THREADS + 7];
633 /** io_handler_thread identifiers */
634 -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
635 +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7];
637 /** We use this mutex to test the return value of pthread_mutex_trylock
638 on successful locking. HP-UX does NOT return 0, though Linux et al do. */
639 @@ -1821,6 +1821,10 @@
640 os_thread_create(&srv_monitor_thread, NULL,
641 thread_ids + 4 + SRV_MAX_N_IO_THREADS);
643 + /* Create the thread which automaticaly dumps/restore buffer pool */
644 + os_thread_create(&srv_LRU_dump_restore_thread, NULL,
645 + thread_ids + 5 + SRV_MAX_N_IO_THREADS);
647 srv_is_being_started = FALSE;
649 err = dict_create_or_check_foreign_constraint_tables();