1 # name : innodb_separate_doublewrite.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innobase/buf/buf0buf.c
9 +++ b/storage/innobase/buf/buf0buf.c
11 read_space_id = mach_read_from_4(
12 frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
14 - if (bpage->space == TRX_SYS_SPACE
15 + if ((bpage->space == TRX_SYS_SPACE
16 + || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE))
17 && trx_doublewrite_page_inside(bpage->offset)) {
19 ut_print_timestamp(stderr);
20 --- a/storage/innobase/buf/buf0flu.c
21 +++ b/storage/innobase/buf/buf0flu.c
23 write_buf = trx_doublewrite->write_buf;
26 - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
27 + fil_io(OS_FILE_WRITE, TRUE,
28 + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
29 trx_doublewrite->block1, 0, len,
30 (void*) write_buf, NULL);
33 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
34 ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
36 - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
37 + fil_io(OS_FILE_WRITE, TRUE,
38 + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
39 trx_doublewrite->block2, 0, len,
40 (void*) write_buf, NULL);
44 /* Now flush the doublewrite buffer data to disk */
46 - fil_flush(TRX_SYS_SPACE, FALSE);
47 + fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE, FALSE);
49 /* We know that the writes have been flushed to disk now
50 and in recovery we will find them in the doublewrite buffer
51 --- a/storage/innobase/buf/buf0rea.c
52 +++ b/storage/innobase/buf/buf0rea.c
54 wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
55 mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
57 - if (trx_doublewrite && space == TRX_SYS_SPACE
59 + && (space == TRX_SYS_SPACE
60 + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
61 && ( (offset >= trx_doublewrite->block1
62 && offset < trx_doublewrite->block1
63 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
64 --- a/storage/innobase/dict/dict0load.c
65 +++ b/storage/innobase/dict/dict0load.c
67 #include "srv0start.h"
69 #include "ha_prototypes.h" /* innobase_casedn_str() */
73 /** Following are six InnoDB system tables */
78 - if (space_id == 0) {
79 + if (trx_sys_sys_space(space_id)) {
80 /* The system tablespace always exists. */
81 } else if (in_crash_recovery) {
82 /* Check that the tablespace (the .ibd file) really
84 space = mach_read_from_4(field);
86 /* Check if the tablespace exists and has the right name */
88 + if (!trx_sys_sys_space(space)) {
89 flags = dict_sys_tables_get_flags(rec);
91 if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
96 - if (table->space == 0) {
97 + if (trx_sys_sys_space(table->space)) {
98 /* The system tablespace is always available. */
99 } else if (!fil_space_for_table_exists_in_mem(
101 --- a/storage/innobase/fil/fil0fil.c
102 +++ b/storage/innobase/fil/fil0fil.c
105 UT_LIST_ADD_LAST(chain, space->chain, node);
107 - if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
108 + if (id < SRV_EXTRA_SYS_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
110 fil_system->max_assigned_id = id;
112 @@ -721,14 +721,14 @@
113 size_bytes = (((ib_int64_t)size_high) << 32)
114 + (ib_int64_t)size_low;
115 #ifdef UNIV_HOTBACKUP
116 - if (space->id == 0) {
117 + if (trx_sys_sys_space(space->id)) {
118 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
119 os_file_close(node->handle);
122 #endif /* UNIV_HOTBACKUP */
123 ut_a(space->purpose != FIL_LOG);
124 - ut_a(space->id != 0);
125 + ut_a(!trx_sys_sys_space(space->id));
127 if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
132 if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
133 - || space_id == 0)) {
134 + || trx_sys_sys_space(space_id))) {
136 "InnoDB: Error: tablespace id %lu"
137 " in file %s is not sensible\n",
142 - if (space->purpose == FIL_TABLESPACE && space->id != 0) {
143 + if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id)) {
144 /* Put the node to the LRU list */
145 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
148 ut_a(system->n_open > 0);
151 - if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
152 + if (node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) {
153 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
155 /* The node is in the LRU list, remove it */
158 mutex_enter(&fil_system->mutex);
160 - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
161 + if (trx_sys_sys_space(space_id) || space_id >= SRV_LOG_SPACE_FIRST_ID) {
162 /* We keep log files and system tablespace files always open;
163 this is important in preventing deadlocks in this module, as
164 a page read completion often performs another read from the
165 @@ -1193,7 +1193,7 @@
166 " tablespace memory cache!\n",
169 - if (id == 0 || purpose != FIL_TABLESPACE) {
170 + if (trx_sys_sys_space(id) || purpose != FIL_TABLESPACE) {
172 mutex_exit(&fil_system->mutex);
174 @@ -1255,6 +1255,7 @@
177 if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
178 + && UNIV_UNLIKELY(id < SRV_EXTRA_SYS_SPACE_FIRST_ID)
179 && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
180 if (!fil_system->space_id_reuse_warned) {
181 fil_system->space_id_reuse_warned = TRUE;
182 @@ -1338,7 +1339,7 @@
183 (ulong) SRV_LOG_SPACE_FIRST_ID);
186 - success = (id < SRV_LOG_SPACE_FIRST_ID);
187 + success = (id < SRV_EXTRA_SYS_SPACE_FIRST_ID);
190 *space_id = fil_system->max_assigned_id = id;
191 @@ -1601,6 +1602,8 @@
192 UT_LIST_INIT(fil_system->LRU);
194 fil_system->max_n_open = max_n_open;
196 + fil_system->max_assigned_id = TRX_SYS_SPACE_MAX;
199 /*******************************************************************//**
200 @@ -1622,7 +1625,7 @@
201 space = UT_LIST_GET_FIRST(fil_system->space_list);
203 while (space != NULL) {
204 - if (space->purpose != FIL_TABLESPACE || space->id == 0) {
205 + if (space->purpose != FIL_TABLESPACE || trx_sys_sys_space(space->id)) {
206 node = UT_LIST_GET_FIRST(space->chain);
208 while (node != NULL) {
209 @@ -1712,6 +1715,10 @@
213 + if (max_id >= SRV_EXTRA_SYS_SPACE_FIRST_ID) {
217 mutex_enter(&fil_system->mutex);
219 if (fil_system->max_assigned_id < max_id) {
220 @@ -1730,6 +1737,7 @@
222 fil_write_lsn_and_arch_no_to_file(
223 /*==============================*/
225 ulint sum_of_sizes, /*!< in: combined size of previous files
226 in space, in database pages */
227 ib_uint64_t lsn, /*!< in: lsn to write */
228 @@ -1739,14 +1747,16 @@
232 + ut_a(trx_sys_sys_space(space_id));
234 buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
235 buf = ut_align(buf1, UNIV_PAGE_SIZE);
237 - fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
238 + fil_read(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
240 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
242 - fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
243 + fil_write(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
247 @@ -1782,7 +1792,7 @@
250 if (space->purpose == FIL_TABLESPACE
251 - && space->id == 0) {
252 + && trx_sys_sys_space(space->id)) {
255 node = UT_LIST_GET_FIRST(space->chain);
256 @@ -1790,7 +1800,7 @@
257 mutex_exit(&fil_system->mutex);
259 err = fil_write_lsn_and_arch_no_to_file(
260 - sum_of_sizes, lsn, arch_log_no);
261 + space->id, sum_of_sizes, lsn, arch_log_no);
262 if (err != DB_SUCCESS) {
265 @@ -4176,7 +4186,7 @@
268 #ifndef UNIV_HOTBACKUP
269 - if (space_id == ULINT_UNDEFINED || space_id == 0) {
270 + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
272 "InnoDB: Error: tablespace id %lu in file %s"
273 " is not sensible\n",
274 @@ -4185,7 +4195,7 @@
278 - if (space_id == ULINT_UNDEFINED || space_id == 0) {
279 + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
283 @@ -5006,7 +5016,7 @@
286 if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
287 - && space->id != 0) {
288 + && !trx_sys_sys_space(space->id)) {
289 /* The node is in the LRU list, remove it */
291 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
292 @@ -5052,7 +5062,7 @@
295 if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
296 - && node->space->id != 0) {
297 + && !trx_sys_sys_space(node->space->id)) {
298 /* The node must be put back to the LRU list */
299 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
301 @@ -5663,7 +5673,7 @@
302 ut_a(fil_node->n_pending == 0);
303 ut_a(fil_node->open);
304 ut_a(fil_node->space->purpose == FIL_TABLESPACE);
305 - ut_a(fil_node->space->id != 0);
306 + ut_a(!trx_sys_sys_space(fil_node->space->id));
308 fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
310 --- a/storage/innobase/fsp/fsp0fsp.c
311 +++ b/storage/innobase/fsp/fsp0fsp.c
313 # include "log0log.h"
314 #endif /* UNIV_HOTBACKUP */
315 #include "dict0mem.h"
317 +#include "trx0sys.h"
319 /* FILE SEGMENT INODE
321 @@ -938,10 +938,10 @@
322 flst_init(header + FSP_SEG_INODES_FREE, mtr);
324 mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
326 + if (space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE) {
327 fsp_fill_free_list(FALSE, space, header, mtr);
328 btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
329 - 0, 0, DICT_IBUF_ID_MIN + space,
330 + space, 0, DICT_IBUF_ID_MIN + space,
331 dict_ind_redundant, mtr);
333 fsp_fill_free_list(TRUE, space, header, mtr);
334 --- a/storage/innobase/handler/ha_innodb.cc
335 +++ b/storage/innobase/handler/ha_innodb.cc
337 static char* innobase_log_group_home_dir = NULL;
338 static char* innobase_file_format_name = NULL;
339 static char* innobase_change_buffering = NULL;
340 +static char* innobase_doublewrite_file = NULL;
342 /* The highest file format being used in the database. The value can be
343 set by user, however, it will be adjusted to the newer file format if
344 @@ -2508,6 +2509,8 @@
348 + srv_doublewrite_file = innobase_doublewrite_file;
350 srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table;
352 /* -------------- Log files ---------------------------*/
353 @@ -11771,6 +11774,11 @@
354 "Path to individual files and their sizes.",
357 +static MYSQL_SYSVAR_STR(doublewrite_file, innobase_doublewrite_file,
358 + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
359 + "Path to special datafile for doublewrite buffer. (default is "": not used) ### ONLY FOR EXPERTS!!! ###",
362 static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
363 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
364 "The AUTOINC lock modes supported by InnoDB: "
365 @@ -11990,6 +11998,7 @@
366 MYSQL_SYSVAR(commit_concurrency),
367 MYSQL_SYSVAR(concurrency_tickets),
368 MYSQL_SYSVAR(data_file_path),
369 + MYSQL_SYSVAR(doublewrite_file),
370 MYSQL_SYSVAR(data_home_dir),
371 MYSQL_SYSVAR(doublewrite),
372 MYSQL_SYSVAR(recovery_stats),
373 --- a/storage/innobase/include/mtr0log.ic
374 +++ b/storage/innobase/include/mtr0log.ic
378 #include "fsp0types.h"
379 +#include "srv0srv.h"
382 /********************************************************//**
383 Opens a buffer to mlog. It must be closed with mlog_close.
384 @return buffer, NULL if log mode MTR_LOG_NONE */
386 the doublewrite buffer is located in pages
387 FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
389 - if (space == TRX_SYS_SPACE
390 + if ((space == TRX_SYS_SPACE
391 + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
392 && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
393 if (trx_doublewrite_buf_is_being_created) {
394 /* Do nothing: we only come to this branch in an
395 --- a/storage/innobase/include/srv0srv.h
396 +++ b/storage/innobase/include/srv0srv.h
398 extern ulint* srv_data_file_sizes;
399 extern ulint* srv_data_file_is_raw_partition;
401 +extern char* srv_doublewrite_file;
403 extern ibool srv_recovery_stats;
405 extern ibool srv_auto_extend_last_data_file;
406 --- a/storage/innobase/include/srv0start.h
407 +++ b/storage/innobase/include/srv0start.h
409 /** Log 'spaces' have id's >= this */
410 #define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
412 +/** reserved for extra system tables */
413 +#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL
416 --- a/storage/innobase/include/trx0sys.h
417 +++ b/storage/innobase/include/trx0sys.h
420 ulint space, /*!< in: space */
421 ulint page_no);/*!< in: page number */
422 +/***************************************************************//**
423 +Checks if a space is the system tablespaces.
424 +@return TRUE if system tablespace */
429 + ulint space); /*!< in: space */
430 +/***************************************************************//**
431 +Checks if a space is the doublewrite tablespace.
432 +@return TRUE if doublewrite tablespace */
435 +trx_sys_doublewrite_space(
436 +/*======================*/
437 + ulint space); /*!< in: space */
438 /*****************************************************************//**
439 Creates and initializes the central memory structures for the transaction
440 system. This is called when the database is started. */
443 trx_sys_create(void);
445 +/*****************************************************************//**
446 +Creates and initializes the dummy transaction system page for tablespace. */
449 +trx_sys_dummy_create(
450 +/*=================*/
452 /****************************************************************//**
453 Looks for a free slot for a rollback segment in the trx system file copy.
454 @return slot index or ULINT_UNDEFINED if not found */
457 /* Space id and page no where the trx system file copy resides */
458 #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
459 +#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */
460 +#define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */
462 #define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
464 --- a/storage/innobase/include/trx0sys.ic
465 +++ b/storage/innobase/include/trx0sys.ic
469 /***************************************************************//**
470 +Checks if a space is the system tablespaces.
471 +@return TRUE if system tablespace */
476 + ulint space) /*!< in: space */
478 + if (srv_doublewrite_file) {
479 + /* several spaces are reserved */
480 + return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE));
482 + return((ibool)(space == TRX_SYS_SPACE));
486 +/***************************************************************//**
487 +Checks if a space is the doublewrite tablespace.
488 +@return TRUE if doublewrite tablespace */
491 +trx_sys_doublewrite_space(
492 +/*======================*/
493 + ulint space) /*!< in: space */
495 + if (srv_doublewrite_file) {
496 + /* doublewrite buffer is separated */
497 + return((ibool)(space == TRX_DOUBLEWRITE_SPACE));
499 + return((ibool)(space == TRX_SYS_SPACE));
503 +/***************************************************************//**
504 Gets the pointer in the nth slot of the rseg array.
505 @return pointer to rseg object, NULL if slot not in use */
507 --- a/storage/innobase/row/row0mysql.c
508 +++ b/storage/innobase/row/row0mysql.c
509 @@ -3436,7 +3436,7 @@
510 /* Do not drop possible .ibd tablespace if something went
511 wrong: we do not want to delete valuable data of the user */
513 - if (err == DB_SUCCESS && space_id > 0) {
514 + if (err == DB_SUCCESS && !trx_sys_sys_space(space_id)) {
515 if (!fil_space_for_table_exists_in_mem(space_id,
518 --- a/storage/innobase/srv/srv0srv.c
519 +++ b/storage/innobase/srv/srv0srv.c
521 /* size in database pages */
522 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
524 +UNIV_INTERN char* srv_doublewrite_file = NULL;
526 UNIV_INTERN ibool srv_recovery_stats = FALSE;
528 /* if TRUE, then we auto-extend the last data file */
529 --- a/storage/innobase/srv/srv0start.c
530 +++ b/storage/innobase/srv/srv0start.c
532 /*======================*/
533 ibool* create_new_db, /*!< out: TRUE if new database should be
535 + ibool* create_new_doublewrite_file,
536 #ifdef UNIV_LOG_ARCHIVE
537 ulint* min_arch_log_no,/*!< out: min of archived log
538 numbers in data files */
540 *sum_of_new_sizes = 0;
542 *create_new_db = FALSE;
543 + *create_new_doublewrite_file = FALSE;
545 srv_normalize_path_for_win(srv_data_home);
547 @@ -1004,6 +1006,142 @@
548 srv_data_file_is_raw_partition[i] != 0);
551 + /* special file for doublewrite buffer */
552 + if (srv_doublewrite_file)
554 + srv_normalize_path_for_win(srv_doublewrite_file);
557 + "InnoDB: Note: The innodb_doublewrite_file option has been specified.\n"
558 + "InnoDB: This option is for experts only. Don't use it unless you understand WELL what it is.\n"
559 + "InnoDB: ### Don't specify a file older than the last checkpoint. ###\n"
560 + "InnoDB: Otherwise, the older doublewrite buffer will break your data during recovery!\n");
562 + strcpy(name, srv_doublewrite_file);
564 + /* First we try to create the file: if it already
565 + exists, ret will get value FALSE */
567 + files[i] = os_file_create(innodb_file_data_key, name, OS_FILE_CREATE,
569 + OS_DATA_FILE, &ret);
571 + if (ret == FALSE && os_file_get_last_error(FALSE)
572 + != OS_FILE_ALREADY_EXISTS
574 + /* AIX 5.1 after security patch ML7 may have
575 + errno set to 0 here, which causes our function
576 + to return 100; work around that AIX problem */
577 + && os_file_get_last_error(FALSE) != 100
581 + "InnoDB: Error in creating"
582 + " or opening %s\n",
588 + if (ret == FALSE) {
589 + /* We open the data file */
591 + files[i] = os_file_create(innodb_file_data_key,
592 + name, OS_FILE_OPEN, OS_FILE_NORMAL,
593 + OS_DATA_FILE, &ret);
597 + "InnoDB: Error in opening %s\n", name);
598 + os_file_get_last_error(TRUE);
603 + ret = os_file_get_size(files[i], &size, &size_high);
605 + /* Round size downward to megabytes */
608 + = (size / (1024 * 1024) + 4096 * size_high)
609 + << (20 - UNIV_PAGE_SIZE_SHIFT);
611 + if (rounded_size_pages != TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) {
614 + "InnoDB: Warning: doublewrite buffer file %s"
615 + " is of a different size\n"
616 + "InnoDB: %lu pages"
617 + " (rounded down to MB)\n"
618 + "InnoDB: than intended size"
621 + (ulong) rounded_size_pages,
622 + (ulong) TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9);
625 + fil_read_first_page(
626 + files[i], one_opened, &flags,
627 +#ifdef UNIV_LOG_ARCHIVE
628 + min_arch_log_no, max_arch_log_no,
629 +#endif /* UNIV_LOG_ARCHIVE */
630 + min_flushed_lsn, max_flushed_lsn);
633 + /* We created the data file and now write it full of
636 + *create_new_doublewrite_file = TRUE;
638 + ut_print_timestamp(stderr);
640 + " InnoDB: Doublewrite buffer file %s did not"
641 + " exist. It will be be created.\n",
644 + if (*create_new_db == FALSE) {
646 + "InnoDB: Notice: Previous version's ibdata files may cause crash.\n"
647 + " If you use that, please use the ibdata files of this version.\n");
650 + ut_print_timestamp(stderr);
652 + " InnoDB: Setting file %s size to %lu MB\n",
654 + (ulong) ((TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9)
655 + >> (20 - UNIV_PAGE_SIZE_SHIFT)));
658 + "InnoDB: Database physically writes the"
659 + " file full: wait...\n");
661 + ret = os_file_set_size(
663 + srv_calc_low32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9),
664 + srv_calc_high32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9));
668 + "InnoDB: Error in creating %s:"
669 + " probably out of disk space\n", name);
675 + ret = os_file_close(files[i]);
678 + fil_space_create(name, TRX_DOUBLEWRITE_SPACE, 0, FIL_TABLESPACE);
680 + ut_a(fil_validate());
682 + fil_node_create(name, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, TRX_DOUBLEWRITE_SPACE, FALSE);
690 @@ -1017,6 +1155,7 @@
691 /*====================================*/
694 + ibool create_new_doublewrite_file;
695 ibool log_file_created;
696 ibool log_created = FALSE;
697 ibool log_opened = FALSE;
698 @@ -1482,6 +1621,7 @@
701 err = open_or_create_data_files(&create_new_db,
702 + &create_new_doublewrite_file,
703 #ifdef UNIV_LOG_ARCHIVE
704 &min_arch_log_no, &max_arch_log_no,
705 #endif /* UNIV_LOG_ARCHIVE */
706 @@ -1649,6 +1789,14 @@
707 after the double write buffer has been created. */
710 + if (create_new_doublewrite_file) {
712 + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
715 + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
720 srv_startup_is_before_trx_rollback_phase = FALSE;
721 @@ -1682,6 +1830,13 @@
722 recv_recovery_from_archive_finish();
723 #endif /* UNIV_LOG_ARCHIVE */
725 + char* save_srv_doublewrite_file = NULL;
727 + if (create_new_doublewrite_file) {
728 + /* doublewrite_file cannot be used for recovery yet. */
729 + save_srv_doublewrite_file = srv_doublewrite_file;
730 + srv_doublewrite_file = NULL;
733 /* Check if we support the max format that is stamped
734 on the system tablespace.
735 @@ -1768,6 +1923,17 @@
736 we have finished the recovery process so that the
737 image of TRX_SYS_PAGE_NO is not stale. */
738 trx_sys_file_format_tag_init();
740 + if (create_new_doublewrite_file) {
741 + /* restore the value */
742 + srv_doublewrite_file = save_srv_doublewrite_file;
745 + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
748 + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
752 if (!create_new_db && sum_of_new_sizes > 0) {
753 --- a/storage/innobase/trx/trx0sys.c
754 +++ b/storage/innobase/trx/trx0sys.c
755 @@ -415,6 +415,152 @@
760 + if (srv_doublewrite_file) {
761 + /* the same doublewrite buffer to TRX_SYS_SPACE should exist.
762 + check and create if not exist.*/
765 + trx_doublewrite_buf_is_being_created = TRUE;
767 + block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, TRX_SYS_PAGE_NO,
769 + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
771 + doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
773 + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
774 + == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
775 + /* The doublewrite buffer has already been created:
776 + just read in some numbers */
781 + "InnoDB: Doublewrite buffer not found in the doublewrite file:"
782 + " creating new doublewrite buffer.\n");
784 + if (buf_pool_get_curr_size()
785 + < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
786 + + FSP_EXTENT_SIZE / 2 + 100)
787 + * UNIV_PAGE_SIZE)) {
789 + "InnoDB: Cannot create the doublewrite buffer:"
791 + "InnoDB: increase your buffer pool size.\n"
792 + "InnoDB: Cannot continue processing.\n");
797 + block2 = fseg_create(TRX_DOUBLEWRITE_SPACE, TRX_SYS_PAGE_NO,
798 + TRX_SYS_DOUBLEWRITE
799 + + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
801 + /* fseg_create acquires a second latch on the page,
802 + therefore we must declare it: */
804 + buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
806 + if (block2 == NULL) {
808 + "InnoDB: Cannot create the doublewrite buffer:"
810 + "InnoDB: increase your tablespace size.\n"
811 + "InnoDB: Cannot continue processing.\n");
813 + /* We exit without committing the mtr to prevent
814 + its modifications to the database getting to disk */
819 + fseg_header = buf_block_get_frame(block)
820 + + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
823 + for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
824 + + FSP_EXTENT_SIZE / 2; i++) {
825 + page_no = fseg_alloc_free_page(fseg_header,
828 + if (page_no == FIL_NULL) {
830 + "InnoDB: Cannot create the doublewrite"
831 + " buffer: You must\n"
832 + "InnoDB: increase your"
833 + " tablespace size.\n"
834 + "InnoDB: Cannot continue operation.\n"
840 + /* We read the allocated pages to the buffer pool;
841 + when they are written to disk in a flush, the space
842 + id and page number fields are also written to the
843 + pages. When we at database startup read pages
844 + from the doublewrite buffer, we know that if the
845 + space id and page number in them are the same as
846 + the page position in the tablespace, then the page
847 + has not been written to in doublewrite. */
849 +#ifdef UNIV_SYNC_DEBUG
851 +#endif /* UNIV_SYNC_DEBUG */
852 + buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no,
854 + buf_block_dbg_add_level(new_block,
855 + SYNC_NO_ORDER_CHECK);
857 + if (i == FSP_EXTENT_SIZE / 2) {
858 + ut_a(page_no == FSP_EXTENT_SIZE);
859 + mlog_write_ulint(doublewrite
860 + + TRX_SYS_DOUBLEWRITE_BLOCK1,
861 + page_no, MLOG_4BYTES, &mtr);
862 + mlog_write_ulint(doublewrite
863 + + TRX_SYS_DOUBLEWRITE_REPEAT
864 + + TRX_SYS_DOUBLEWRITE_BLOCK1,
865 + page_no, MLOG_4BYTES, &mtr);
866 + } else if (i == FSP_EXTENT_SIZE / 2
867 + + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
868 + ut_a(page_no == 2 * FSP_EXTENT_SIZE);
869 + mlog_write_ulint(doublewrite
870 + + TRX_SYS_DOUBLEWRITE_BLOCK2,
871 + page_no, MLOG_4BYTES, &mtr);
872 + mlog_write_ulint(doublewrite
873 + + TRX_SYS_DOUBLEWRITE_REPEAT
874 + + TRX_SYS_DOUBLEWRITE_BLOCK2,
875 + page_no, MLOG_4BYTES, &mtr);
876 + } else if (i > FSP_EXTENT_SIZE / 2) {
877 + ut_a(page_no == prev_page_no + 1);
880 + prev_page_no = page_no;
883 + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
884 + TRX_SYS_DOUBLEWRITE_MAGIC_N,
885 + MLOG_4BYTES, &mtr);
886 + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
887 + + TRX_SYS_DOUBLEWRITE_REPEAT,
888 + TRX_SYS_DOUBLEWRITE_MAGIC_N,
889 + MLOG_4BYTES, &mtr);
891 + mlog_write_ulint(doublewrite
892 + + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
893 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
894 + MLOG_4BYTES, &mtr);
897 + /* Flush the modified pages to disk and make a checkpoint */
898 + log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
900 + fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n");
901 + trx_sys_multiple_tablespace_format = TRUE;
903 + trx_doublewrite_buf_is_being_created = FALSE;
907 /****************************************************************//**
908 @@ -438,10 +584,19 @@
909 ulint source_page_no;
912 + ulint doublewrite_space_id;
917 + doublewrite_space_id = (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
919 + if (srv_doublewrite_file) {
921 + "InnoDB: doublewrite file '%s' is used.\n",
922 + srv_doublewrite_file);
925 /* We do the file i/o past the buffer pool */
927 unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
929 /* Read the trx sys header to check if we are using the doublewrite
932 - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
933 + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, TRX_SYS_PAGE_NO, 0,
934 UNIV_PAGE_SIZE, read_buf, NULL);
935 doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
937 @@ -488,10 +643,10 @@
939 /* Read the pages from the doublewrite buffer to memory */
941 - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
942 + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block1, 0,
943 TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
945 - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
946 + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block2, 0,
947 TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
948 buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
951 " doublewrite buf.\n",
952 (ulong) space_id, (ulong) page_no, (ulong) i);
954 - } else if (space_id == TRX_SYS_SPACE
955 + } else if ((space_id == TRX_SYS_SPACE
956 + || (srv_doublewrite_file && space_id == TRX_DOUBLEWRITE_SPACE))
957 && ((page_no >= block1
959 < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
960 @@ -1016,6 +1172,83 @@
963 /*****************************************************************//**
964 +Creates dummy of the file page for the transaction system. */
967 +trx_sysf_dummy_create(
968 +/*==================*/
972 + buf_block_t* block;
977 + /* Note that below we first reserve the file space x-latch, and
978 + then enter the kernel: we must do it in this order to conform
979 + to the latching order rules. */
981 + mtr_x_lock(fil_space_get_latch(space, NULL), mtr);
982 + mutex_enter(&kernel_mutex);
984 + /* Create the trx sys file block in a new allocated file segment */
985 + block = fseg_create(space, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
987 + buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
989 + fprintf(stderr, "%lu\n", buf_block_get_page_no(block));
990 + ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
992 + page = buf_block_get_frame(block);
994 + mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
997 + /* Reset the doublewrite buffer magic number to zero so that we
998 + know that the doublewrite buffer has not yet been created (this
999 + suppresses a Valgrind warning) */
1001 + mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
1002 + + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
1005 + /* TODO: REMOVE IT: The bellow is not needed, I think */
1006 + sys_header = trx_sysf_get(mtr);
1008 + /* Start counting transaction ids from number 1 up */
1009 + mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
1010 + ut_dulint_create(0, 1), mtr);
1012 + /* Reset the rollback segment slots */
1013 + for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
1015 + trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
1016 + trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
1019 + /* The remaining area (up to the page trailer) is uninitialized.
1020 + Silence Valgrind warnings about it. */
1021 + UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
1022 + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
1023 + + TRX_SYS_RSEG_SPACE),
1024 + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
1026 + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
1027 + + TRX_SYS_RSEG_SPACE))
1028 + + page - sys_header);
1030 + /* Create the first rollback segment in the SYSTEM tablespace */
1031 + page_no = trx_rseg_header_create(space, 0, ULINT_MAX, &slot_no,
1033 + ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
1034 + ut_a(page_no != FIL_NULL);
1037 + mutex_exit(&kernel_mutex);
1040 +/*****************************************************************//**
1041 Creates and initializes the central memory structures for the transaction
1042 system. This is called when the database is started. */
1044 @@ -1387,6 +1620,26 @@
1045 /* Does nothing at the moment */
1048 +/*****************************************************************//**
1049 +Creates and initializes the dummy transaction system page for tablespace. */
1052 +trx_sys_dummy_create(
1053 +/*=================*/
1058 + /* This function is only for doublewrite file for now */
1059 + ut_a(space == TRX_DOUBLEWRITE_SPACE);
1063 + trx_sysf_dummy_create(space, &mtr);
1068 /*********************************************************************
1069 Creates the rollback segments */
1072 +++ b/mysql-test/r/percona_innodb_doublewrite_file.result
1074 +show variables like 'innodb_doublewrite%';
1075 +Variable_name Value
1076 +innodb_doublewrite ON
1077 +innodb_doublewrite_file ib_doublewrite
1079 +++ b/mysql-test/t/percona_innodb_doublewrite_file-master.opt
1081 +--innodb_doublewrite_file=ib_doublewrite
1083 +++ b/mysql-test/t/percona_innodb_doublewrite_file.test
1085 +--source include/have_innodb.inc
1086 +show variables like 'innodb_doublewrite%';