1 # name : innodb_lru_dump_restore.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
9 --- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:49:59.185023424 +0900
10 +++ b/storage/innobase/buf/buf0lru.c 2010-12-04 15:33:37.626482350 +0900
11 @@ -2250,6 +2250,285 @@
12 memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
15 +/********************************************************************//**
16 +Dump the LRU page list to the specific file. */
17 +#define LRU_DUMP_FILE "ib_lru_dump"
21 +buf_LRU_file_dump(void)
22 +/*===================*/
24 + os_file_t dump_file = -1;
26 + byte* buffer_base = NULL;
27 + byte* buffer = NULL;
34 + for (i = 0; i < srv_n_data_files; i++) {
35 + if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) {
37 + " InnoDB: The name '%s' seems to be used for"
38 + " innodb_data_file_path. Dumping LRU list is not"
39 + " done for safeness.\n", LRU_DUMP_FILE);
44 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
45 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
48 + " InnoDB: cannot allocate buffer.\n");
52 + dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE,
53 + OS_FILE_NORMAL, OS_DATA_FILE, &success);
55 + os_file_get_last_error(TRUE);
57 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
61 + buffers = offset = 0;
63 + for (i = 0; i < srv_buf_pool_instances; i++) {
64 + buf_pool_t* buf_pool;
66 + buf_pool = buf_pool_from_array(i);
68 + mutex_enter(&buf_pool->LRU_list_mutex);
69 + bpage = UT_LIST_GET_LAST(buf_pool->LRU);
71 + while (bpage != NULL) {
73 + memset(buffer, 0, UNIV_PAGE_SIZE);
76 + mach_write_to_4(buffer + offset * 4, bpage->space);
78 + mach_write_to_4(buffer + offset * 4, bpage->offset);
81 + if (offset == UNIV_PAGE_SIZE/4) {
82 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
83 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
84 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
87 + mutex_exit(&buf_pool->LRU_list_mutex);
89 + " InnoDB: cannot write page %lu of %s\n",
90 + buffers, LRU_DUMP_FILE);
97 + bpage = UT_LIST_GET_PREV(LRU, bpage);
99 + mutex_exit(&buf_pool->LRU_list_mutex);
103 + memset(buffer, 0, UNIV_PAGE_SIZE);
106 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
108 + mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
111 + success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
112 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
113 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
121 + if (dump_file != -1)
122 + os_file_close(dump_file);
124 + ut_free(buffer_base);
130 + ib_uint32_t space_id;
131 + ib_uint32_t page_no;
134 +static int dump_record_cmp(const void *a, const void *b)
136 + const dump_record_t *rec1 = (dump_record_t *) a;
137 + const dump_record_t *rec2 = (dump_record_t *) b;
139 + if (rec1->space_id < rec2->space_id)
141 + if (rec1->space_id > rec2->space_id)
143 + if (rec1->page_no < rec2->page_no)
145 + return rec1->page_no > rec2->page_no;
148 +/********************************************************************//**
149 +Read the pages based on the specific file.*/
152 +buf_LRU_file_restore(void)
153 +/*======================*/
155 + os_file_t dump_file = -1;
157 + byte* buffer_base = NULL;
158 + byte* buffer = NULL;
163 + ibool terminated = FALSE;
165 + dump_record_t* records = NULL;
170 + dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key,
171 + LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
172 + if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
173 + os_file_get_last_error(TRUE);
175 + " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
178 + if (size == 0 || size_high > 0 || size % 8) {
179 + fprintf(stderr, " InnoDB: broken LRU dump file\n");
182 + buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
183 + buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
184 + records = ut_malloc(size);
185 + if (!buffer || !records) {
187 + " InnoDB: cannot allocate buffer.\n");
193 + while (!terminated) {
194 + success = os_file_read(dump_file, buffer,
195 + (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
196 + (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
200 + " InnoDB: cannot read page %lu of %s,"
201 + " or meet unexpected terminal.\n",
202 + buffers, LRU_DUMP_FILE);
206 + for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
210 + space_id = mach_read_from_4(buffer + offset * 4);
211 + page_no = mach_read_from_4(buffer + (offset + 1) * 4);
212 + if (space_id == 0xFFFFFFFFUL
213 + || page_no == 0xFFFFFFFFUL) {
218 + records[length].space_id = space_id;
219 + records[length].page_no = page_no;
221 + if (length * 8 >= size) {
223 + " InnoDB: could not find the "
224 + "end-of-file marker after reading "
225 + "the expected %lu bytes from the "
227 + " InnoDB: this could be caused by a "
228 + "broken or incomplete file.\n"
229 + " InnoDB: trying to process what has "
230 + "been read so far.\n",
239 + qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
241 + for (offset = 0; offset < length; offset++) {
246 + ib_int64_t tablespace_version;
248 + space_id = records[offset].space_id;
249 + page_no = records[offset].page_no;
251 + if (offset % 16 == 15) {
252 + os_aio_simulated_wake_handler_threads();
253 + buf_flush_free_margins(FALSE);
256 + zip_size = fil_space_get_zip_size(space_id);
257 + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
261 + if (fil_area_is_exist(space_id, zip_size, page_no, 0,
262 + zip_size ? zip_size : UNIV_PAGE_SIZE)) {
264 + tablespace_version = fil_space_get_version(space_id);
267 + reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
268 + | OS_AIO_SIMULATED_WAKE_LATER,
269 + space_id, zip_size, TRUE,
270 + tablespace_version, page_no, NULL);
271 + buf_LRU_stat_inc_io();
275 + os_aio_simulated_wake_handler_threads();
276 + buf_flush_free_margins(FALSE);
278 + ut_print_timestamp(stderr);
280 + " InnoDB: reading pages based on the dumped LRU list was done."
281 + " (requested: %lu, read: %lu)\n", req, reads);
284 + if (dump_file != -1)
285 + os_file_close(dump_file);
287 + ut_free(buffer_base);
294 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
295 /**********************************************************************//**
296 Validates the LRU list for one buffer pool instance. */
297 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
298 --- a/storage/innobase/buf/buf0rea.c 2010-12-03 17:49:11.576124814 +0900
299 +++ b/storage/innobase/buf/buf0rea.c 2010-12-04 15:33:37.628480605 +0900
301 which case it is never read into the pool, or if the tablespace does
302 not exist or is being dropped
303 @return 1 if read request is issued. 0 if it is not */
309 diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
310 --- a/storage/innobase/fil/fil0fil.c 2010-12-03 17:49:11.581025127 +0900
311 +++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:33:37.632482885 +0900
312 @@ -4939,6 +4939,78 @@
316 +/********************************************************************//**
317 +Confirm whether the parameters are valid or not */
322 + ulint space_id, /*!< in: space id */
323 + ulint zip_size, /*!< in: compressed page size in bytes;
324 + 0 for uncompressed pages */
325 + ulint block_offset, /*!< in: offset in number of blocks */
326 + ulint byte_offset, /*!< in: remainder of offset in bytes; in
327 + aio this must be divisible by the OS block
329 + ulint len) /*!< in: how many bytes to read or write; this
330 + must not cross a file boundary; in aio this
331 + must be a block size multiple */
333 + fil_space_t* space;
336 + /* Reserve the fil_system mutex and make sure that we can open at
337 + least one file while holding it, if the file is not already open */
339 + fil_mutex_enter_and_prepare_for_io(space_id);
341 + space = fil_space_get_by_id(space_id);
344 + mutex_exit(&fil_system->mutex);
348 + node = UT_LIST_GET_FIRST(space->chain);
351 + if (UNIV_UNLIKELY(node == NULL)) {
352 + mutex_exit(&fil_system->mutex);
356 + if (space->id != 0 && node->size == 0) {
357 + /* We do not know the size of a single-table tablespace
358 + before we open the file */
363 + if (node->size > block_offset) {
367 + block_offset -= node->size;
368 + node = UT_LIST_GET_NEXT(chain, node);
372 + /* Open file if closed */
373 + fil_node_prepare_for_io(node, fil_system, space);
374 + fil_node_complete_io(node, fil_system, OS_FILE_READ);
376 + /* Check that at least the start offset is within the bounds of a
377 + single-table tablespace */
378 + if (UNIV_UNLIKELY(node->size <= block_offset)
379 + && space->id != 0 && space->purpose == FIL_TABLESPACE) {
380 + mutex_exit(&fil_system->mutex);
384 + mutex_exit(&fil_system->mutex);
388 #ifndef UNIV_HOTBACKUP
389 /**********************************************************************//**
390 Waits for an aio operation to complete. This function is used to write the
391 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
392 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:49:11.589956135 +0900
393 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:33:37.645555490 +0900
394 @@ -11708,6 +11708,12 @@
395 "Limit the allocated memory for dictionary cache. (0: unlimited)",
396 NULL, NULL, 0, 0, LONG_MAX, 0);
398 +static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump,
399 + PLUGIN_VAR_RQCMDARG,
400 + "Time in seconds between automatic buffer pool dumps. "
401 + "0 (the default) disables automatic dumps.",
402 + NULL, NULL, 0, 0, UINT_MAX32, 0);
404 static struct st_mysql_sys_var* innobase_system_variables[]= {
405 MYSQL_SYSVAR(additional_mem_pool_size),
406 MYSQL_SYSVAR(autoextend_increment),
407 @@ -11791,6 +11797,7 @@
408 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
409 MYSQL_SYSVAR(read_ahead_threshold),
410 MYSQL_SYSVAR(io_capacity),
411 + MYSQL_SYSVAR(auto_lru_dump),
412 MYSQL_SYSVAR(purge_threads),
413 MYSQL_SYSVAR(purge_batch_size),
415 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
416 --- a/storage/innobase/handler/i_s.cc 2010-12-03 17:34:35.286211349 +0900
417 +++ b/storage/innobase/handler/i_s.cc 2010-12-04 15:33:37.677480733 +0900
419 #include "trx0rseg.h" /* for trx_rseg_struct */
420 #include "trx0sys.h" /* for trx_sys */
421 #include "dict0dict.h" /* for dict_sys */
422 +#include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
425 static const char plugin_author[] = "Innobase Oy";
426 @@ -4255,6 +4256,36 @@
430 + else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
431 + ut_print_timestamp(stderr);
432 + fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_DUMP'"
433 + " was detected.\n");
435 + if (buf_LRU_file_dump()) {
436 + field_store_string(i_s_table->field[0],
437 + "XTRA_LRU_DUMP was succeeded.");
439 + field_store_string(i_s_table->field[0],
440 + "XTRA_LRU_DUMP was failed.");
445 + else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
446 + ut_print_timestamp(stderr);
447 + fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_RESTORE'"
448 + " was detected.\n");
450 + if (buf_LRU_file_restore()) {
451 + field_store_string(i_s_table->field[0],
452 + "XTRA_LRU_RESTORE was succeeded.");
454 + field_store_string(i_s_table->field[0],
455 + "XTRA_LRU_RESTORE was failed.");
461 field_store_string(i_s_table->field[0],
462 "Undefined XTRA_* command.");
463 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
464 --- a/storage/innobase/include/buf0lru.h 2010-12-03 15:49:59.223956070 +0900
465 +++ b/storage/innobase/include/buf0lru.h 2010-12-04 15:33:37.681481467 +0900
468 buf_LRU_stat_update(void);
469 /*=====================*/
470 +/********************************************************************//**
471 +Dump the LRU page list to the specific file. */
474 +buf_LRU_file_dump(void);
475 +/*===================*/
476 +/********************************************************************//**
477 +Read the pages based on the specific file.*/
480 +buf_LRU_file_restore(void);
481 +/*======================*/
483 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
484 /**********************************************************************//**
485 diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
486 --- a/storage/innobase/include/buf0rea.h 2010-12-03 17:49:11.596953870 +0900
487 +++ b/storage/innobase/include/buf0rea.h 2010-12-04 15:33:37.682563900 +0900
489 #include "buf0types.h"
491 /********************************************************************//**
492 +Low-level function which reads a page asynchronously from a file to the
493 +buffer buf_pool if it is not already there, in which case does nothing.
494 +Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
495 +flag is cleared and the x-lock released by an i/o-handler thread.
496 +@return 1 if a read request was queued, 0 if the page already resided
497 +in buf_pool, or if the page is in the doublewrite buffer blocks in
498 +which case it is never read into the pool, or if the tablespace does
499 +not exist or is being dropped
500 +@return 1 if read request is issued. 0 if it is not */
505 + ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
506 + trying to read from a non-existent tablespace, or a
507 + tablespace which is just now being dropped */
508 + ibool sync, /*!< in: TRUE if synchronous aio is desired */
509 + ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
510 + ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
511 + at read-ahead functions) */
512 + ulint space, /*!< in: space id */
513 + ulint zip_size,/*!< in: compressed page size, or 0 */
514 + ibool unzip, /*!< in: TRUE=request uncompressed page */
515 + ib_int64_t tablespace_version, /*!< in: if the space memory object has
516 + this timestamp different from what we are giving here,
517 + treat the tablespace as dropped; this is a timestamp we
518 + use to stop dangling page reads from a tablespace
519 + which we have DISCARDed + IMPORTed back */
520 + ulint offset, /*!< in: page number */
522 +/********************************************************************//**
523 High-level function which reads a page asynchronously from a file to the
524 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
525 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
526 diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
527 --- a/storage/innobase/include/fil0fil.h 2010-12-03 17:49:11.597953501 +0900
528 +++ b/storage/innobase/include/fil0fil.h 2010-12-04 15:33:37.684551372 +0900
530 void* message, /*!< in: message for aio handler if non-sync
531 aio used, else ignored */
533 +/********************************************************************//**
534 +Confirm whether the parameters are valid or not */
539 + ulint space_id, /*!< in: space id */
540 + ulint zip_size, /*!< in: compressed page size in bytes;
541 + 0 for uncompressed pages */
542 + ulint block_offset, /*!< in: offset in number of blocks */
543 + ulint byte_offset, /*!< in: remainder of offset in bytes; in
544 + aio this must be divisible by the OS block
546 + ulint len); /*!< in: how many bytes to read or write; this
547 + must not cross a file boundary; in aio this
548 + must be a block size multiple */
549 /**********************************************************************//**
550 Waits for an aio operation to complete. This function is used to write the
551 handler for completed requests. The aio array of pending requests is divided
552 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
553 --- a/storage/innobase/include/srv0srv.h 2010-12-03 17:49:11.603969747 +0900
554 +++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:33:37.685550816 +0900
556 reading of a disk page */
557 extern ulint srv_buf_pool_reads;
559 +/** Time in seconds between automatic buffer pool dumps */
560 +extern uint srv_auto_lru_dump;
562 /** Status variables to be passed to MySQL */
563 typedef struct export_var_struct export_struc;
566 /*=====================*/
567 void* arg); /*!< in: a dummy parameter required by
569 +/*********************************************************************//**
570 +A thread which restores the buffer pool from a dump file on startup and does
571 +periodic buffer pool dumps.
572 +@return a dummy parameter */
575 +srv_LRU_dump_restore_thread(
576 +/*====================*/
577 + void* arg); /*!< in: a dummy parameter required by
578 + os_thread_create */
579 /******************************************************************//**
580 Outputs to a file the output of the InnoDB Monitor.
581 @return FALSE if not all information printed
582 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
583 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 17:49:11.620986661 +0900
584 +++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:33:37.708550811 +0900
586 reading of a disk page */
587 UNIV_INTERN ulint srv_buf_pool_reads = 0;
589 +/** Time in seconds between automatic buffer pool dumps */
590 +UNIV_INTERN uint srv_auto_lru_dump = 0;
592 /* structure to pass status variables to MySQL */
593 UNIV_INTERN export_struc export_vars;
595 @@ -2663,6 +2666,56 @@
596 /* We count the number of threads in os_thread_exit(). A created
597 thread should always use that to exit and not use return() to exit. */
599 + os_thread_exit(NULL);
601 + OS_THREAD_DUMMY_RETURN;
604 +/*********************************************************************//**
605 +A thread which restores the buffer pool from a dump file on startup and does
606 +periodic buffer pool dumps.
607 +@return a dummy parameter */
610 +srv_LRU_dump_restore_thread(
611 +/*====================*/
612 + void* arg __attribute__((unused)))
613 + /*!< in: a dummy parameter required by
614 + os_thread_create */
616 + uint auto_lru_dump;
617 + time_t last_dump_time;
618 + time_t time_elapsed;
620 +#ifdef UNIV_DEBUG_THREAD_CREATION
621 + fprintf(stderr, "LRU dump/restore thread starts, id %lu\n",
622 + os_thread_pf(os_thread_get_curr_id()));
625 + if (srv_auto_lru_dump)
626 + buf_LRU_file_restore();
628 + last_dump_time = time(NULL);
631 + os_thread_sleep(5000000);
633 + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
637 + time_elapsed = time(NULL) - last_dump_time;
638 + auto_lru_dump = srv_auto_lru_dump;
639 + if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
640 + last_dump_time = time(NULL);
641 + buf_LRU_file_dump();
646 + /* We count the number of threads in os_thread_exit(). A created
647 + thread should always use that to exit and not use return() to exit. */
649 os_thread_exit(NULL);
651 OS_THREAD_DUMMY_RETURN;
652 diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
653 --- a/storage/innobase/srv/srv0start.c 2010-12-03 15:18:48.916955609 +0900
654 +++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:33:37.711484798 +0900
656 static os_file_t files[1000];
658 /** io_handler_thread parameters for thread identification */
659 -static ulint n[SRV_MAX_N_IO_THREADS + 6];
660 +static ulint n[SRV_MAX_N_IO_THREADS + 7];
661 /** io_handler_thread identifiers */
662 -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
663 +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7];
665 /** We use this mutex to test the return value of pthread_mutex_trylock
666 on successful locking. HP-UX does NOT return 0, though Linux et al do. */
667 @@ -1737,6 +1737,10 @@
668 os_thread_create(&srv_monitor_thread, NULL,
669 thread_ids + 4 + SRV_MAX_N_IO_THREADS);
671 + /* Create the thread which automaticaly dumps/restore buffer pool */
672 + os_thread_create(&srv_LRU_dump_restore_thread, NULL,
673 + thread_ids + 5 + SRV_MAX_N_IO_THREADS);
675 srv_is_being_started = FALSE;
677 err = dict_create_or_check_foreign_constraint_tables();