1 # name : innodb_lru_dump_restore.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
9 --- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:49:59.185023424 +0900
10 +++ b/storage/innobase/buf/buf0lru.c 2010-12-04 15:33:37.626482350 +0900
11 @@ -2258,6 +2258,284 @@
12 memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
15 +/********************************************************************//**
16 +Dump the LRU page list to the specific file. */
17 +#define LRU_DUMP_FILE "ib_lru_dump"
21 +buf_LRU_file_dump(void)
22 +/*===================*/
24 + os_file_t dump_file = -1;
26 + byte* buffer_base = NULL;
27 + byte* buffer = NULL;
34 + for (i = 0; i < srv_n_data_files; i++) {
35 + if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) {
37 + " InnoDB: The name '%s' seems to be used for"
38 + " innodb_data_file_path. For safety, dumping of the LRU list"
39 + " is not being done.\n", LRU_DUMP_FILE);
44 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
45 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
48 + " InnoDB: cannot allocate buffer.\n");
52 + dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE,
53 + OS_FILE_NORMAL, OS_DATA_FILE, &success);
55 + os_file_get_last_error(TRUE);
57 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
61 + buffers = offset = 0;
63 + for (i = 0; i < srv_buf_pool_instances; i++) {
64 + buf_pool_t* buf_pool;
66 + buf_pool = buf_pool_from_array(i);
68 + mutex_enter(&buf_pool->LRU_list_mutex);
69 + bpage = UT_LIST_GET_LAST(buf_pool->LRU);
71 + while (bpage != NULL) {
73 + memset(buffer, 0, UNIV_PAGE_SIZE);
76 + mach_write_to_4(buffer + offset * 4, bpage->space);
78 + mach_write_to_4(buffer + offset * 4, bpage->offset);
81 + if (offset == UNIV_PAGE_SIZE/4) {
82 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
83 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
84 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
87 + mutex_exit(&buf_pool->LRU_list_mutex);
89 + " InnoDB: cannot write page %lu of %s\n",
90 + buffers, LRU_DUMP_FILE);
97 + bpage = UT_LIST_GET_PREV(LRU, bpage);
99 + mutex_exit(&buf_pool->LRU_list_mutex);
103 + memset(buffer, 0, UNIV_PAGE_SIZE);
106 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
108 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
111 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
112 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
113 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
121 + if (dump_file != -1)
122 + os_file_close(dump_file);
124 + ut_free(buffer_base);
130 + ib_uint32_t space_id;
131 + ib_uint32_t page_no;
134 +static int dump_record_cmp(const void *a, const void *b)
136 + const dump_record_t *rec1 = (dump_record_t *) a;
137 + const dump_record_t *rec2 = (dump_record_t *) b;
139 + if (rec1->space_id < rec2->space_id)
141 + if (rec1->space_id > rec2->space_id)
143 + if (rec1->page_no < rec2->page_no)
145 + return rec1->page_no > rec2->page_no;
148 +/********************************************************************//**
149 +Read the pages based on the specific file.*/
152 +buf_LRU_file_restore(void)
153 +/*======================*/
155 + os_file_t dump_file = -1;
157 + byte* buffer_base = NULL;
158 + byte* buffer = NULL;
163 + ibool terminated = FALSE;
165 + dump_record_t* records = NULL;
170 + dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key,
171 + LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
172 + if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
173 + os_file_get_last_error(TRUE);
175 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
178 + if (size == 0 || size_high > 0 || size % 8) {
179 + fprintf(stderr, " InnoDB: broken LRU dump file\n");
182 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
183 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
184 + records = ut_malloc(size);
185 + if (!buffer || !records) {
187 + " InnoDB: cannot allocate buffer.\n");
193 + while (!terminated) {
194 + success = os_file_read(dump_file, buffer,
195 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
196 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
200 + " InnoDB: either could not read page %lu of %s,"
201 + " or terminated unexpectedly.\n",
202 + buffers, LRU_DUMP_FILE);
206 + for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
210 + space_id = mach_read_from_4(buffer + offset * 4);
211 + page_no = mach_read_from_4(buffer + (offset + 1) * 4);
212 + if (space_id == 0xFFFFFFFFUL
213 + || page_no == 0xFFFFFFFFUL) {
218 + records[length].space_id = space_id;
219 + records[length].page_no = page_no;
221 + if (length * 8 >= size) {
223 + " InnoDB: could not find the "
224 + "end-of-file marker after reading "
225 + "the expected %lu bytes from the "
227 + " InnoDB: this could be caused by a "
228 + "broken or incomplete file.\n"
229 + " InnoDB: trying to process what has "
230 + "been read so far.\n",
239 + qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
241 + for (offset = 0; offset < length; offset++) {
246 + ib_int64_t tablespace_version;
248 + space_id = records[offset].space_id;
249 + page_no = records[offset].page_no;
251 + if (offset % 16 == 15) {
252 + os_aio_simulated_wake_handler_threads();
253 + buf_flush_free_margins(FALSE);
256 + zip_size = fil_space_get_zip_size(space_id);
257 + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
261 + if (fil_is_exist(space_id, page_no)) {
263 + tablespace_version = fil_space_get_version(space_id);
266 + reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
267 + | OS_AIO_SIMULATED_WAKE_LATER,
268 + space_id, zip_size, TRUE,
269 + tablespace_version, page_no, NULL);
270 + buf_LRU_stat_inc_io();
274 + os_aio_simulated_wake_handler_threads();
275 + buf_flush_free_margins(FALSE);
277 + ut_print_timestamp(stderr);
279 + " InnoDB: reading pages based on the dumped LRU list was done."
280 + " (requested: %lu, read: %lu)\n", req, reads);
283 + if (dump_file != -1)
284 + os_file_close(dump_file);
286 + ut_free(buffer_base);
293 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
294 /**********************************************************************//**
295 Validates the LRU list for one buffer pool instance. */
296 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
297 --- a/storage/innobase/buf/buf0rea.c 2010-12-03 17:49:11.576124814 +0900
298 +++ b/storage/innobase/buf/buf0rea.c 2010-12-04 15:33:37.628480605 +0900
300 which case it is never read into the pool, or if the tablespace does
301 not exist or is being dropped
302 @return 1 if read request is issued. 0 if it is not */
308 diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
309 --- a/storage/innobase/fil/fil0fil.c 2010-12-03 17:49:11.581025127 +0900
310 +++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:33:37.632482885 +0900
311 @@ -4967,6 +4967,70 @@
315 +/********************************************************************//**
316 +Confirm whether the parameters are valid or not */
321 + ulint space_id, /*!< in: space id */
322 + ulint block_offset) /*!< in: offset in number of blocks */
324 + fil_space_t* space;
327 + /* Reserve the fil_system mutex and make sure that we can open at
328 + least one file while holding it, if the file is not already open */
330 + fil_mutex_enter_and_prepare_for_io(space_id);
332 + space = fil_space_get_by_id(space_id);
335 + mutex_exit(&fil_system->mutex);
339 + node = UT_LIST_GET_FIRST(space->chain);
342 + if (UNIV_UNLIKELY(node == NULL)) {
343 + mutex_exit(&fil_system->mutex);
347 + if (space->id != 0 && node->size == 0) {
348 + /* We do not know the size of a single-table tablespace
349 + before we open the file */
354 + if (node->size > block_offset) {
358 + block_offset -= node->size;
359 + node = UT_LIST_GET_NEXT(chain, node);
363 + /* Open file if closed */
364 + fil_node_prepare_for_io(node, fil_system, space);
365 + fil_node_complete_io(node, fil_system, OS_FILE_READ);
367 + /* Check that at least the start offset is within the bounds of a
368 + single-table tablespace */
369 + if (UNIV_UNLIKELY(node->size <= block_offset)
370 + && space->id != 0 && space->purpose == FIL_TABLESPACE) {
371 + mutex_exit(&fil_system->mutex);
375 + mutex_exit(&fil_system->mutex);
379 #ifndef UNIV_HOTBACKUP
380 /**********************************************************************//**
381 Waits for an aio operation to complete. This function is used to write the
382 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
383 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:49:11.589956135 +0900
384 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:33:37.645555490 +0900
385 @@ -11706,6 +11706,12 @@
386 "Limit the allocated memory for dictionary cache. (0: unlimited)",
387 NULL, NULL, 0, 0, LONG_MAX, 0);
389 +static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump,
390 + PLUGIN_VAR_RQCMDARG,
391 + "Time in seconds between automatic buffer pool dumps. "
392 + "0 (the default) disables automatic dumps.",
393 + NULL, NULL, 0, 0, UINT_MAX32, 0);
395 static struct st_mysql_sys_var* innobase_system_variables[]= {
396 MYSQL_SYSVAR(additional_mem_pool_size),
397 MYSQL_SYSVAR(autoextend_increment),
398 @@ -11788,6 +11794,7 @@
399 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
400 MYSQL_SYSVAR(read_ahead_threshold),
401 MYSQL_SYSVAR(io_capacity),
402 + MYSQL_SYSVAR(auto_lru_dump),
403 MYSQL_SYSVAR(purge_threads),
404 MYSQL_SYSVAR(purge_batch_size),
406 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
407 --- a/storage/innobase/handler/i_s.cc 2010-12-03 17:34:35.286211349 +0900
408 +++ b/storage/innobase/handler/i_s.cc 2010-12-04 15:33:37.677480733 +0900
410 #include "trx0rseg.h" /* for trx_rseg_struct */
411 #include "trx0sys.h" /* for trx_sys */
412 #include "dict0dict.h" /* for dict_sys */
413 +#include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
416 static const char plugin_author[] = "Innobase Oy";
417 @@ -4254,6 +4255,36 @@
421 + else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
422 + ut_print_timestamp(stderr);
423 + fprintf(stderr, " InnoDB: Administrative command 'XTRA_LRU_DUMP'"
424 + " was detected.\n");
426 + if (buf_LRU_file_dump()) {
427 + field_store_string(i_s_table->field[0],
428 + "XTRA_LRU_DUMP was succeeded.");
430 + field_store_string(i_s_table->field[0],
431 + "XTRA_LRU_DUMP was failed.");
436 + else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
437 + ut_print_timestamp(stderr);
438 + fprintf(stderr, " InnoDB: Administrative command 'XTRA_LRU_RESTORE'"
439 + " was detected.\n");
441 + if (buf_LRU_file_restore()) {
442 + field_store_string(i_s_table->field[0],
443 + "XTRA_LRU_RESTORE was succeeded.");
445 + field_store_string(i_s_table->field[0],
446 + "XTRA_LRU_RESTORE was failed.");
452 field_store_string(i_s_table->field[0],
453 "Undefined XTRA_* command.");
454 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
455 --- a/storage/innobase/include/buf0lru.h 2010-12-03 15:49:59.223956070 +0900
456 +++ b/storage/innobase/include/buf0lru.h 2010-12-04 15:33:37.681481467 +0900
459 buf_LRU_stat_update(void);
460 /*=====================*/
461 +/********************************************************************//**
462 +Dump the LRU page list to the specific file. */
465 +buf_LRU_file_dump(void);
466 +/*===================*/
467 +/********************************************************************//**
468 +Read the pages based on the specific file.*/
471 +buf_LRU_file_restore(void);
472 +/*======================*/
474 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
475 /**********************************************************************//**
476 diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
477 --- a/storage/innobase/include/buf0rea.h 2010-12-03 17:49:11.596953870 +0900
478 +++ b/storage/innobase/include/buf0rea.h 2010-12-04 15:33:37.682563900 +0900
480 #include "buf0types.h"
482 /********************************************************************//**
483 +Low-level function which reads a page asynchronously from a file to the
484 +buffer buf_pool if it is not already there, in which case does nothing.
485 +Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
486 +flag is cleared and the x-lock released by an i/o-handler thread.
487 +@return 1 if a read request was queued, 0 if the page already resided
488 +in buf_pool, or if the page is in the doublewrite buffer blocks in
489 +which case it is never read into the pool, or if the tablespace does
490 +not exist or is being dropped
491 +@return 1 if read request is issued. 0 if it is not */
496 + ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
497 + trying to read from a non-existent tablespace, or a
498 + tablespace which is just now being dropped */
499 + ibool sync, /*!< in: TRUE if synchronous aio is desired */
500 + ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
501 + ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
502 + at read-ahead functions) */
503 + ulint space, /*!< in: space id */
504 + ulint zip_size,/*!< in: compressed page size, or 0 */
505 + ibool unzip, /*!< in: TRUE=request uncompressed page */
506 + ib_int64_t tablespace_version, /*!< in: if the space memory object has
507 + this timestamp different from what we are giving here,
508 + treat the tablespace as dropped; this is a timestamp we
509 + use to stop dangling page reads from a tablespace
510 + which we have DISCARDed + IMPORTed back */
511 + ulint offset, /*!< in: page number */
513 +/********************************************************************//**
514 High-level function which reads a page asynchronously from a file to the
515 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
516 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
517 diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
518 --- a/storage/innobase/include/fil0fil.h 2010-12-03 17:49:11.597953501 +0900
519 +++ b/storage/innobase/include/fil0fil.h 2010-12-04 15:33:37.684551372 +0900
521 void* message, /*!< in: message for aio handler if non-sync
522 aio used, else ignored */
524 +/********************************************************************//**
525 +Confirm whether the parameters are valid or not */
530 + ulint space_id, /*!< in: space id */
531 + ulint block_offset); /*!< in: offset in number of blocks */
532 /**********************************************************************//**
533 Waits for an aio operation to complete. This function is used to write the
534 handler for completed requests. The aio array of pending requests is divided
535 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
536 --- a/storage/innobase/include/srv0srv.h 2010-12-03 17:49:11.603969747 +0900
537 +++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:33:37.685550816 +0900
539 reading of a disk page */
540 extern ulint srv_buf_pool_reads;
542 +/** Time in seconds between automatic buffer pool dumps */
543 +extern uint srv_auto_lru_dump;
545 /** Status variables to be passed to MySQL */
546 typedef struct export_var_struct export_struc;
549 /*=====================*/
550 void* arg); /*!< in: a dummy parameter required by
552 +/*********************************************************************//**
553 +A thread which restores the buffer pool from a dump file on startup and does
554 +periodic buffer pool dumps.
555 +@return a dummy parameter */
558 +srv_LRU_dump_restore_thread(
559 +/*====================*/
560 + void* arg); /*!< in: a dummy parameter required by
561 + os_thread_create */
562 /******************************************************************//**
563 Outputs to a file the output of the InnoDB Monitor.
564 @return FALSE if not all information printed
565 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
566 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 17:49:11.620986661 +0900
567 +++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:33:37.708550811 +0900
569 reading of a disk page */
570 UNIV_INTERN ulint srv_buf_pool_reads = 0;
572 +/** Time in seconds between automatic buffer pool dumps */
573 +UNIV_INTERN uint srv_auto_lru_dump = 0;
575 /* structure to pass status variables to MySQL */
576 UNIV_INTERN export_struc export_vars;
578 @@ -2697,6 +2700,56 @@
579 /* We count the number of threads in os_thread_exit(). A created
580 thread should always use that to exit and not use return() to exit. */
582 + os_thread_exit(NULL);
584 + OS_THREAD_DUMMY_RETURN;
587 +/*********************************************************************//**
588 +A thread which restores the buffer pool from a dump file on startup and does
589 +periodic buffer pool dumps.
590 +@return a dummy parameter */
593 +srv_LRU_dump_restore_thread(
594 +/*====================*/
595 + void* arg __attribute__((unused)))
596 + /*!< in: a dummy parameter required by
597 + os_thread_create */
599 + uint auto_lru_dump;
600 + time_t last_dump_time;
601 + time_t time_elapsed;
603 +#ifdef UNIV_DEBUG_THREAD_CREATION
604 + fprintf(stderr, "The LRU dump/restore thread has started, id %lu\n",
605 + os_thread_pf(os_thread_get_curr_id()));
608 + if (srv_auto_lru_dump)
609 + buf_LRU_file_restore();
611 + last_dump_time = time(NULL);
614 + os_thread_sleep(5000000);
616 + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
620 + time_elapsed = time(NULL) - last_dump_time;
621 + auto_lru_dump = srv_auto_lru_dump;
622 + if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
623 + last_dump_time = time(NULL);
624 + buf_LRU_file_dump();
629 + /* We count the number of threads in os_thread_exit(). A created
630 + thread should always use that to exit and not use return() to exit. */
632 os_thread_exit(NULL);
634 OS_THREAD_DUMMY_RETURN;
635 diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
636 --- a/storage/innobase/srv/srv0start.c 2010-12-03 15:18:48.916955609 +0900
637 +++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:33:37.711484798 +0900
639 static os_file_t files[1000];
641 /** io_handler_thread parameters for thread identification */
642 -static ulint n[SRV_MAX_N_IO_THREADS + 6];
643 +static ulint n[SRV_MAX_N_IO_THREADS + 7];
644 /** io_handler_thread identifiers */
645 -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
646 +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7];
648 /** We use this mutex to test the return value of pthread_mutex_trylock
649 on successful locking. HP-UX does NOT return 0, though Linux et al do. */
650 @@ -1813,6 +1813,10 @@
651 os_thread_create(&srv_monitor_thread, NULL,
652 thread_ids + 4 + SRV_MAX_N_IO_THREADS);
654 + /* Create the thread which automaticaly dumps/restore buffer pool */
655 + os_thread_create(&srv_LRU_dump_restore_thread, NULL,
656 + thread_ids + 5 + SRV_MAX_N_IO_THREADS);
658 srv_is_being_started = FALSE;
660 err = dict_create_or_check_foreign_constraint_tables();