1 # name : innodb_separate_doublewrite.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
9 --- a/storage/innobase/buf/buf0buf.c 2010-12-03 17:49:11.574962867 +0900
10 +++ b/storage/innobase/buf/buf0buf.c 2010-12-04 15:35:58.624514033 +0900
12 read_space_id = mach_read_from_4(
13 frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
15 - if (bpage->space == TRX_SYS_SPACE
16 + if ((bpage->space == TRX_SYS_SPACE
17 + || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE))
18 && trx_doublewrite_page_inside(bpage->offset)) {
20 ut_print_timestamp(stderr);
21 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
22 --- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:49:59.179956111 +0900
23 +++ b/storage/innobase/buf/buf0flu.c 2010-12-04 15:35:58.624514033 +0900
25 write_buf = trx_doublewrite->write_buf;
28 - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
29 + fil_io(OS_FILE_WRITE, TRUE,
30 + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
31 trx_doublewrite->block1, 0, len,
32 (void*) write_buf, NULL);
35 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
36 ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
38 - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
39 + fil_io(OS_FILE_WRITE, TRUE,
40 + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
41 trx_doublewrite->block2, 0, len,
42 (void*) write_buf, NULL);
46 /* Now flush the doublewrite buffer data to disk */
48 - fil_flush(TRX_SYS_SPACE);
49 + fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
51 /* We know that the writes have been flushed to disk now
52 and in recovery we will find them in the doublewrite buffer
53 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
54 --- a/storage/innobase/buf/buf0rea.c 2010-12-04 15:35:29.138514157 +0900
55 +++ b/storage/innobase/buf/buf0rea.c 2010-12-04 15:35:58.626486771 +0900
57 wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
58 mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
60 - if (trx_doublewrite && space == TRX_SYS_SPACE
62 + && (space == TRX_SYS_SPACE
63 + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
64 && ( (offset >= trx_doublewrite->block1
65 && offset < trx_doublewrite->block1
66 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
67 diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
68 --- a/storage/innobase/dict/dict0load.c 2010-12-03 17:30:16.252956569 +0900
69 +++ b/storage/innobase/dict/dict0load.c 2010-12-04 15:35:58.627482825 +0900
71 #include "srv0start.h"
73 #include "ha_prototypes.h" /* innobase_casedn_str() */
77 /** Following are six InnoDB system tables */
82 - if (space_id == 0) {
83 + if (trx_sys_sys_space(space_id)) {
84 /* The system tablespace always exists. */
85 } else if (in_crash_recovery) {
86 /* Check that the tablespace (the .ibd file) really
88 space = mach_read_from_4(field);
90 /* Check if the tablespace exists and has the right name */
92 + if (!trx_sys_sys_space(space)) {
93 flags = dict_sys_tables_get_flags(rec);
95 if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
100 - if (table->space == 0) {
101 + if (trx_sys_sys_space(table->space)) {
102 /* The system tablespace is always available. */
103 } else if (!fil_space_for_table_exists_in_mem(
105 diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
106 --- a/storage/innobase/fil/fil0fil.c 2010-12-04 15:35:29.143813775 +0900
107 +++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:35:58.628498870 +0900
110 UT_LIST_ADD_LAST(chain, space->chain, node);
112 - if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
113 + if (id < SRV_EXTRA_SYS_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
115 fil_system->max_assigned_id = id;
117 @@ -719,14 +719,14 @@
118 size_bytes = (((ib_int64_t)size_high) << 32)
119 + (ib_int64_t)size_low;
120 #ifdef UNIV_HOTBACKUP
121 - if (space->id == 0) {
122 + if (trx_sys_sys_space(space->id)) {
123 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
124 os_file_close(node->handle);
127 #endif /* UNIV_HOTBACKUP */
128 ut_a(space->purpose != FIL_LOG);
129 - ut_a(space->id != 0);
130 + ut_a(!trx_sys_sys_space(space->id));
132 if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
137 if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
138 - || space_id == 0)) {
139 + || trx_sys_sys_space(space_id))) {
141 "InnoDB: Error: tablespace id %lu"
142 " in file %s is not sensible\n",
147 - if (space->purpose == FIL_TABLESPACE && space->id != 0) {
148 + if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id)) {
149 /* Put the node to the LRU list */
150 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
153 ut_a(system->n_open > 0);
156 - if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
157 + if (node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) {
158 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
160 /* The node is in the LRU list, remove it */
163 mutex_enter(&fil_system->mutex);
165 - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
166 + if (trx_sys_sys_space(space_id) || space_id >= SRV_LOG_SPACE_FIRST_ID) {
167 /* We keep log files and system tablespace files always open;
168 this is important in preventing deadlocks in this module, as
169 a page read completion often performs another read from the
170 @@ -1190,7 +1190,7 @@
171 " tablespace memory cache!\n",
174 - if (id == 0 || purpose != FIL_TABLESPACE) {
175 + if (trx_sys_sys_space(id) || purpose != FIL_TABLESPACE) {
177 mutex_exit(&fil_system->mutex);
179 @@ -1252,6 +1252,7 @@
182 if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
183 + && UNIV_UNLIKELY(id < SRV_EXTRA_SYS_SPACE_FIRST_ID)
184 && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
185 if (!fil_system->space_id_reuse_warned) {
186 fil_system->space_id_reuse_warned = TRUE;
187 @@ -1335,7 +1336,7 @@
188 (ulong) SRV_LOG_SPACE_FIRST_ID);
191 - success = (id < SRV_LOG_SPACE_FIRST_ID);
192 + success = (id < SRV_EXTRA_SYS_SPACE_FIRST_ID);
195 *space_id = fil_system->max_assigned_id = id;
196 @@ -1598,6 +1599,8 @@
197 UT_LIST_INIT(fil_system->LRU);
199 fil_system->max_n_open = max_n_open;
201 + fil_system->max_assigned_id = TRX_SYS_SPACE_MAX;
204 /*******************************************************************//**
205 @@ -1619,7 +1622,7 @@
206 space = UT_LIST_GET_FIRST(fil_system->space_list);
208 while (space != NULL) {
209 - if (space->purpose != FIL_TABLESPACE || space->id == 0) {
210 + if (space->purpose != FIL_TABLESPACE || trx_sys_sys_space(space->id)) {
211 node = UT_LIST_GET_FIRST(space->chain);
213 while (node != NULL) {
214 @@ -1709,6 +1712,10 @@
218 + if (max_id >= SRV_EXTRA_SYS_SPACE_FIRST_ID) {
222 mutex_enter(&fil_system->mutex);
224 if (fil_system->max_assigned_id < max_id) {
225 @@ -1727,6 +1734,7 @@
227 fil_write_lsn_and_arch_no_to_file(
228 /*==============================*/
230 ulint sum_of_sizes, /*!< in: combined size of previous files
231 in space, in database pages */
232 ib_uint64_t lsn, /*!< in: lsn to write */
233 @@ -1736,14 +1744,16 @@
237 + ut_a(trx_sys_sys_space(space_id));
239 buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
240 buf = ut_align(buf1, UNIV_PAGE_SIZE);
242 - fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
243 + fil_read(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
245 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
247 - fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
248 + fil_write(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
252 @@ -1779,7 +1789,7 @@
255 if (space->purpose == FIL_TABLESPACE
256 - && space->id == 0) {
257 + && trx_sys_sys_space(space->id)) {
260 node = UT_LIST_GET_FIRST(space->chain);
261 @@ -1787,7 +1797,7 @@
262 mutex_exit(&fil_system->mutex);
264 err = fil_write_lsn_and_arch_no_to_file(
265 - sum_of_sizes, lsn, arch_log_no);
266 + space->id, sum_of_sizes, lsn, arch_log_no);
267 if (err != DB_SUCCESS) {
270 @@ -3834,7 +3844,7 @@
273 #ifndef UNIV_HOTBACKUP
274 - if (space_id == ULINT_UNDEFINED || space_id == 0) {
275 + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
277 "InnoDB: Error: tablespace id %lu in file %s"
278 " is not sensible\n",
279 @@ -3843,7 +3853,7 @@
283 - if (space_id == ULINT_UNDEFINED || space_id == 0) {
284 + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
288 @@ -4664,7 +4674,7 @@
291 if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
292 - && space->id != 0) {
293 + && !trx_sys_sys_space(space->id)) {
294 /* The node is in the LRU list, remove it */
296 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
297 @@ -4710,7 +4720,7 @@
300 if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
301 - && node->space->id != 0) {
302 + && !trx_sys_sys_space(node->space->id)) {
303 /* The node must be put back to the LRU list */
304 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
306 @@ -5318,7 +5328,7 @@
307 ut_a(fil_node->n_pending == 0);
308 ut_a(fil_node->open);
309 ut_a(fil_node->space->purpose == FIL_TABLESPACE);
310 - ut_a(fil_node->space->id != 0);
311 + ut_a(!trx_sys_sys_space(fil_node->space->id));
313 fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
315 diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
316 --- a/storage/innobase/fsp/fsp0fsp.c 2010-11-03 07:01:13.000000000 +0900
317 +++ b/storage/innobase/fsp/fsp0fsp.c 2010-12-04 15:35:58.632513243 +0900
319 # include "log0log.h"
320 #endif /* UNIV_HOTBACKUP */
321 #include "dict0mem.h"
323 +#include "trx0sys.h"
325 #define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
326 within a file page */
327 @@ -999,10 +999,10 @@
328 flst_init(header + FSP_SEG_INODES_FREE, mtr);
330 mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
332 + if (space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE) {
333 fsp_fill_free_list(FALSE, space, header, mtr);
334 btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
335 - 0, 0, DICT_IBUF_ID_MIN + space,
336 + space, 0, DICT_IBUF_ID_MIN + space,
337 dict_ind_redundant, mtr);
339 fsp_fill_free_list(TRUE, space, header, mtr);
340 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
341 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:35:29.153514047 +0900
342 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:35:58.636549909 +0900
344 static char* innobase_log_group_home_dir = NULL;
345 static char* innobase_file_format_name = NULL;
346 static char* innobase_change_buffering = NULL;
347 +static char* innobase_doublewrite_file = NULL;
349 /* The highest file format being used in the database. The value can be
350 set by user, however, it will be adjusted to the newer file format if
351 @@ -2426,6 +2427,8 @@
355 + srv_doublewrite_file = innobase_doublewrite_file;
357 srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table;
359 /* -------------- Log files ---------------------------*/
360 @@ -11556,6 +11559,11 @@
361 "Path to individual files and their sizes.",
364 +static MYSQL_SYSVAR_STR(doublewrite_file, innobase_doublewrite_file,
365 + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
366 + "Path to special datafile for doublewrite buffer. (default is "": not used) ### ONLY FOR EXPERTS!!! ###",
369 static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
370 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
371 "The AUTOINC lock modes supported by InnoDB: "
372 @@ -11721,6 +11729,7 @@
373 MYSQL_SYSVAR(commit_concurrency),
374 MYSQL_SYSVAR(concurrency_tickets),
375 MYSQL_SYSVAR(data_file_path),
376 + MYSQL_SYSVAR(doublewrite_file),
377 MYSQL_SYSVAR(data_home_dir),
378 MYSQL_SYSVAR(doublewrite),
379 MYSQL_SYSVAR(recovery_stats),
380 diff -ruN a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
381 --- a/storage/innobase/include/mtr0log.ic 2010-11-03 07:01:13.000000000 +0900
382 +++ b/storage/innobase/include/mtr0log.ic 2010-12-04 15:35:58.644607059 +0900
386 #include "fsp0types.h"
387 +#include "srv0srv.h"
390 /********************************************************//**
391 Opens a buffer to mlog. It must be closed with mlog_close.
392 @return buffer, NULL if log mode MTR_LOG_NONE */
394 the doublewrite buffer is located in pages
395 FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
397 - if (space == TRX_SYS_SPACE
398 + if ((space == TRX_SYS_SPACE
399 + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
400 && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
401 if (trx_doublewrite_buf_is_being_created) {
402 /* Do nothing: we only come to this branch in an
403 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
404 --- a/storage/innobase/include/srv0srv.h 2010-12-04 15:35:29.177480351 +0900
405 +++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:35:58.646556250 +0900
407 extern ulint* srv_data_file_sizes;
408 extern ulint* srv_data_file_is_raw_partition;
410 +extern char* srv_doublewrite_file;
412 extern ibool srv_recovery_stats;
414 extern ibool srv_auto_extend_last_data_file;
415 diff -ruN a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
416 --- a/storage/innobase/include/srv0start.h 2010-11-03 07:01:13.000000000 +0900
417 +++ b/storage/innobase/include/srv0start.h 2010-12-08 17:15:07.602605797 +0900
419 /** Log 'spaces' have id's >= this */
420 #define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
422 +/** reserved for extra system tables */
423 +#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL
426 diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
427 --- a/storage/innobase/include/trx0sys.h 2010-12-03 15:41:52.047049291 +0900
428 +++ b/storage/innobase/include/trx0sys.h 2010-12-04 15:35:58.647551222 +0900
431 ulint space, /*!< in: space */
432 ulint page_no);/*!< in: page number */
433 +/***************************************************************//**
434 +Checks if a space is the system tablespaces.
435 +@return TRUE if system tablespace */
440 + ulint space); /*!< in: space */
441 +/***************************************************************//**
442 +Checks if a space is the doublewrite tablespace.
443 +@return TRUE if doublewrite tablespace */
446 +trx_sys_doublewrite_space(
447 +/*======================*/
448 + ulint space); /*!< in: space */
449 /*****************************************************************//**
450 Creates and initializes the central memory structures for the transaction
451 system. This is called when the database is started. */
454 trx_sys_create(void);
456 +/*****************************************************************//**
457 +Creates and initializes the dummy transaction system page for tablespace. */
460 +trx_sys_dummy_create(
461 +/*=================*/
463 /****************************************************************//**
464 Looks for a free slot for a rollback segment in the trx system file copy.
465 @return slot index or ULINT_UNDEFINED if not found */
468 /* Space id and page no where the trx system file copy resides */
469 #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
470 +#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */
471 +#define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */
473 #define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
475 diff -ruN a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
476 --- a/storage/innobase/include/trx0sys.ic 2010-11-03 07:01:13.000000000 +0900
477 +++ b/storage/innobase/include/trx0sys.ic 2010-12-04 15:35:58.649473284 +0900
481 /***************************************************************//**
482 +Checks if a space is the system tablespaces.
483 +@return TRUE if system tablespace */
488 + ulint space) /*!< in: space */
490 + if (srv_doublewrite_file) {
491 + /* several spaces are reserved */
492 + return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE));
494 + return((ibool)(space == TRX_SYS_SPACE));
498 +/***************************************************************//**
499 +Checks if a space is the doublewrite tablespace.
500 +@return TRUE if doublewrite tablespace */
503 +trx_sys_doublewrite_space(
504 +/*======================*/
505 + ulint space) /*!< in: space */
507 + if (srv_doublewrite_file) {
508 + /* doublewrite buffer is separated */
509 + return((ibool)(space == TRX_DOUBLEWRITE_SPACE));
511 + return((ibool)(space == TRX_SYS_SPACE));
515 +/***************************************************************//**
516 Gets the pointer in the nth slot of the rseg array.
517 @return pointer to rseg object, NULL if slot not in use */
519 diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
520 --- a/storage/innobase/row/row0mysql.c 2010-12-03 17:30:16.334989510 +0900
521 +++ b/storage/innobase/row/row0mysql.c 2010-12-04 15:35:58.652496484 +0900
522 @@ -3421,7 +3421,7 @@
523 /* Do not drop possible .ibd tablespace if something went
524 wrong: we do not want to delete valuable data of the user */
526 - if (err == DB_SUCCESS && space_id > 0) {
527 + if (err == DB_SUCCESS && !trx_sys_sys_space(space_id)) {
528 if (!fil_space_for_table_exists_in_mem(space_id,
531 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
532 --- a/storage/innobase/srv/srv0srv.c 2010-12-04 15:35:29.180483212 +0900
533 +++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:35:58.656550107 +0900
535 /* size in database pages */
536 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
538 +UNIV_INTERN char* srv_doublewrite_file = NULL;
540 UNIV_INTERN ibool srv_recovery_stats = FALSE;
542 /* if TRUE, then we auto-extend the last data file */
543 diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
544 --- a/storage/innobase/srv/srv0start.c 2010-12-04 15:35:29.183481330 +0900
545 +++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:35:58.661550545 +0900
547 /*======================*/
548 ibool* create_new_db, /*!< out: TRUE if new database should be
550 + ibool* create_new_doublewrite_file,
551 #ifdef UNIV_LOG_ARCHIVE
552 ulint* min_arch_log_no,/*!< out: min of archived log
553 numbers in data files */
555 *sum_of_new_sizes = 0;
557 *create_new_db = FALSE;
558 + *create_new_doublewrite_file = FALSE;
560 srv_normalize_path_for_win(srv_data_home);
562 @@ -984,6 +986,142 @@
563 srv_data_file_is_raw_partition[i] != 0);
566 + /* special file for doublewrite buffer */
567 + if (srv_doublewrite_file)
569 + srv_normalize_path_for_win(srv_doublewrite_file);
572 + "InnoDB: Note: The innodb_doublewrite_file option has been specified.\n"
573 + "InnoDB: This option is for experts only. Don't use it unless you understand WELL what it is.\n"
574 + "InnoDB: ### Don't specify a file older than the last checkpoint. ###\n"
575 + "InnoDB: Otherwise, the older doublewrite buffer will break your data during recovery!\n");
577 + strcpy(name, srv_doublewrite_file);
579 + /* First we try to create the file: if it already
580 + exists, ret will get value FALSE */
582 + files[i] = os_file_create(innodb_file_data_key, name, OS_FILE_CREATE,
584 + OS_DATA_FILE, &ret);
586 + if (ret == FALSE && os_file_get_last_error(FALSE)
587 + != OS_FILE_ALREADY_EXISTS
589 + /* AIX 5.1 after security patch ML7 may have
590 + errno set to 0 here, which causes our function
591 + to return 100; work around that AIX problem */
592 + && os_file_get_last_error(FALSE) != 100
596 + "InnoDB: Error in creating"
597 + " or opening %s\n",
603 + if (ret == FALSE) {
604 + /* We open the data file */
606 + files[i] = os_file_create(innodb_file_data_key,
607 + name, OS_FILE_OPEN, OS_FILE_NORMAL,
608 + OS_DATA_FILE, &ret);
612 + "InnoDB: Error in opening %s\n", name);
613 + os_file_get_last_error(TRUE);
618 + ret = os_file_get_size(files[i], &size, &size_high);
620 + /* Round size downward to megabytes */
623 + = (size / (1024 * 1024) + 4096 * size_high)
624 + << (20 - UNIV_PAGE_SIZE_SHIFT);
626 + if (rounded_size_pages != TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) {
629 + "InnoDB: Warning: doublewrite buffer file %s"
630 + " is of a different size\n"
631 + "InnoDB: %lu pages"
632 + " (rounded down to MB)\n"
633 + "InnoDB: than intended size"
636 + (ulong) rounded_size_pages,
637 + (ulong) TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9);
640 + fil_read_flushed_lsn_and_arch_log_no(
641 + files[i], one_opened,
642 +#ifdef UNIV_LOG_ARCHIVE
643 + min_arch_log_no, max_arch_log_no,
644 +#endif /* UNIV_LOG_ARCHIVE */
645 + min_flushed_lsn, max_flushed_lsn);
648 + /* We created the data file and now write it full of
651 + *create_new_doublewrite_file = TRUE;
653 + ut_print_timestamp(stderr);
655 + " InnoDB: Doublewrite buffer file %s did not"
656 + " exist. It will be be created.\n",
659 + if (*create_new_db == FALSE) {
661 + "InnoDB: Notice: Previous version's ibdata files may cause crash.\n"
662 + " If you use that, please use the ibdata files of this version.\n");
665 + ut_print_timestamp(stderr);
667 + " InnoDB: Setting file %s size to %lu MB\n",
669 + (ulong) ((TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9)
670 + >> (20 - UNIV_PAGE_SIZE_SHIFT)));
673 + "InnoDB: Database physically writes the"
674 + " file full: wait...\n");
676 + ret = os_file_set_size(
678 + srv_calc_low32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9),
679 + srv_calc_high32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9));
683 + "InnoDB: Error in creating %s:"
684 + " probably out of disk space\n", name);
690 + ret = os_file_close(files[i]);
693 + fil_space_create(name, TRX_DOUBLEWRITE_SPACE, 0, FIL_TABLESPACE);
695 + ut_a(fil_validate());
697 + fil_node_create(name, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, TRX_DOUBLEWRITE_SPACE, FALSE);
706 /*====================================*/
709 + ibool create_new_doublewrite_file;
710 ibool log_file_created;
711 ibool log_created = FALSE;
712 ibool log_opened = FALSE;
713 @@ -1453,6 +1592,7 @@
716 err = open_or_create_data_files(&create_new_db,
717 + &create_new_doublewrite_file,
718 #ifdef UNIV_LOG_ARCHIVE
719 &min_arch_log_no, &max_arch_log_no,
720 #endif /* UNIV_LOG_ARCHIVE */
721 @@ -1620,6 +1760,14 @@
722 after the double write buffer has been created. */
725 + if (create_new_doublewrite_file) {
727 + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
730 + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
735 srv_startup_is_before_trx_rollback_phase = FALSE;
736 @@ -1653,6 +1801,13 @@
737 recv_recovery_from_archive_finish();
738 #endif /* UNIV_LOG_ARCHIVE */
740 + char* save_srv_doublewrite_file = NULL;
742 + if (create_new_doublewrite_file) {
743 + /* doublewrite_file cannot be used for recovery yet. */
744 + save_srv_doublewrite_file = srv_doublewrite_file;
745 + srv_doublewrite_file = NULL;
748 /* Check if we support the max format that is stamped
749 on the system tablespace.
750 @@ -1739,6 +1894,17 @@
751 we have finished the recovery process so that the
752 image of TRX_SYS_PAGE_NO is not stale. */
753 trx_sys_file_format_tag_init();
755 + if (create_new_doublewrite_file) {
756 + /* restore the value */
757 + srv_doublewrite_file = save_srv_doublewrite_file;
760 + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
763 + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
767 if (!create_new_db && sum_of_new_sizes > 0) {
768 diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
769 --- a/storage/innobase/trx/trx0sys.c 2010-12-03 17:32:15.651024019 +0900
770 +++ b/storage/innobase/trx/trx0sys.c 2010-12-04 15:35:58.664550291 +0900
771 @@ -414,6 +414,152 @@
776 + if (srv_doublewrite_file) {
777 + /* the same doublewrite buffer to TRX_SYS_SPACE should exist.
778 + check and create if not exist.*/
781 + trx_doublewrite_buf_is_being_created = TRUE;
783 + block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, TRX_SYS_PAGE_NO,
785 + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
787 + doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
789 + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
790 + == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
791 + /* The doublewrite buffer has already been created:
792 + just read in some numbers */
797 + "InnoDB: Doublewrite buffer not found in the doublewrite file:"
798 + " creating new doublewrite buffer.\n");
800 + if (buf_pool_get_curr_size()
801 + < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
802 + + FSP_EXTENT_SIZE / 2 + 100)
803 + * UNIV_PAGE_SIZE)) {
805 + "InnoDB: Cannot create the doublewrite buffer:"
807 + "InnoDB: increase your buffer pool size.\n"
808 + "InnoDB: Cannot continue processing.\n");
813 + block2 = fseg_create(TRX_DOUBLEWRITE_SPACE, TRX_SYS_PAGE_NO,
814 + TRX_SYS_DOUBLEWRITE
815 + + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
817 + /* fseg_create acquires a second latch on the page,
818 + therefore we must declare it: */
820 + buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
822 + if (block2 == NULL) {
824 + "InnoDB: Cannot create the doublewrite buffer:"
826 + "InnoDB: increase your tablespace size.\n"
827 + "InnoDB: Cannot continue processing.\n");
829 + /* We exit without committing the mtr to prevent
830 + its modifications to the database getting to disk */
835 + fseg_header = buf_block_get_frame(block)
836 + + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
839 + for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
840 + + FSP_EXTENT_SIZE / 2; i++) {
841 + page_no = fseg_alloc_free_page(fseg_header,
844 + if (page_no == FIL_NULL) {
846 + "InnoDB: Cannot create the doublewrite"
847 + " buffer: You must\n"
848 + "InnoDB: increase your"
849 + " tablespace size.\n"
850 + "InnoDB: Cannot continue operation.\n"
856 + /* We read the allocated pages to the buffer pool;
857 + when they are written to disk in a flush, the space
858 + id and page number fields are also written to the
859 + pages. When we at database startup read pages
860 + from the doublewrite buffer, we know that if the
861 + space id and page number in them are the same as
862 + the page position in the tablespace, then the page
863 + has not been written to in doublewrite. */
865 +#ifdef UNIV_SYNC_DEBUG
867 +#endif /* UNIV_SYNC_DEBUG */
868 + buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no,
870 + buf_block_dbg_add_level(new_block,
871 + SYNC_NO_ORDER_CHECK);
873 + if (i == FSP_EXTENT_SIZE / 2) {
874 + ut_a(page_no == FSP_EXTENT_SIZE);
875 + mlog_write_ulint(doublewrite
876 + + TRX_SYS_DOUBLEWRITE_BLOCK1,
877 + page_no, MLOG_4BYTES, &mtr);
878 + mlog_write_ulint(doublewrite
879 + + TRX_SYS_DOUBLEWRITE_REPEAT
880 + + TRX_SYS_DOUBLEWRITE_BLOCK1,
881 + page_no, MLOG_4BYTES, &mtr);
882 + } else if (i == FSP_EXTENT_SIZE / 2
883 + + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
884 + ut_a(page_no == 2 * FSP_EXTENT_SIZE);
885 + mlog_write_ulint(doublewrite
886 + + TRX_SYS_DOUBLEWRITE_BLOCK2,
887 + page_no, MLOG_4BYTES, &mtr);
888 + mlog_write_ulint(doublewrite
889 + + TRX_SYS_DOUBLEWRITE_REPEAT
890 + + TRX_SYS_DOUBLEWRITE_BLOCK2,
891 + page_no, MLOG_4BYTES, &mtr);
892 + } else if (i > FSP_EXTENT_SIZE / 2) {
893 + ut_a(page_no == prev_page_no + 1);
896 + prev_page_no = page_no;
899 + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
900 + TRX_SYS_DOUBLEWRITE_MAGIC_N,
901 + MLOG_4BYTES, &mtr);
902 + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
903 + + TRX_SYS_DOUBLEWRITE_REPEAT,
904 + TRX_SYS_DOUBLEWRITE_MAGIC_N,
905 + MLOG_4BYTES, &mtr);
907 + mlog_write_ulint(doublewrite
908 + + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
909 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
910 + MLOG_4BYTES, &mtr);
913 + /* Flush the modified pages to disk and make a checkpoint */
914 + log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
916 + fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n");
917 + trx_sys_multiple_tablespace_format = TRUE;
919 + trx_doublewrite_buf_is_being_created = FALSE;
923 /****************************************************************//**
924 @@ -437,10 +583,19 @@
925 ulint source_page_no;
928 + ulint doublewrite_space_id;
933 + doublewrite_space_id = (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
935 + if (srv_doublewrite_file) {
937 + "InnoDB: doublewrite file '%s' is used.\n",
938 + srv_doublewrite_file);
941 /* We do the file i/o past the buffer pool */
943 unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
945 /* Read the trx sys header to check if we are using the doublewrite
948 - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
949 + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, TRX_SYS_PAGE_NO, 0,
950 UNIV_PAGE_SIZE, read_buf, NULL);
951 doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
953 @@ -487,10 +642,10 @@
955 /* Read the pages from the doublewrite buffer to memory */
957 - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
958 + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block1, 0,
959 TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
961 - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
962 + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block2, 0,
963 TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
964 buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
967 " doublewrite buf.\n",
968 (ulong) space_id, (ulong) page_no, (ulong) i);
970 - } else if (space_id == TRX_SYS_SPACE
971 + } else if ((space_id == TRX_SYS_SPACE
972 + || (srv_doublewrite_file && space_id == TRX_DOUBLEWRITE_SPACE))
973 && ((page_no >= block1
975 < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
976 @@ -990,6 +1146,83 @@
979 /*****************************************************************//**
980 +Creates dummy of the file page for the transaction system. */
983 +trx_sysf_dummy_create(
984 +/*==================*/
988 + buf_block_t* block;
993 + /* Note that below we first reserve the file space x-latch, and
994 + then enter the kernel: we must do it in this order to conform
995 + to the latching order rules. */
997 + mtr_x_lock(fil_space_get_latch(space, NULL), mtr);
998 + mutex_enter(&kernel_mutex);
1000 + /* Create the trx sys file block in a new allocated file segment */
1001 + block = fseg_create(space, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
1003 + buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
1005 + fprintf(stderr, "%lu\n", buf_block_get_page_no(block));
1006 + ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
1008 + page = buf_block_get_frame(block);
1010 + mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
1011 + MLOG_2BYTES, mtr);
1013 + /* Reset the doublewrite buffer magic number to zero so that we
1014 + know that the doublewrite buffer has not yet been created (this
1015 + suppresses a Valgrind warning) */
1017 + mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
1018 + + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
1021 + /* TODO: REMOVE IT: The bellow is not needed, I think */
1022 + sys_header = trx_sysf_get(mtr);
1024 + /* Start counting transaction ids from number 1 up */
1025 + mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
1026 + ut_dulint_create(0, 1), mtr);
1028 + /* Reset the rollback segment slots */
1029 + for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
1031 + trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
1032 + trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
1035 + /* The remaining area (up to the page trailer) is uninitialized.
1036 + Silence Valgrind warnings about it. */
1037 + UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
1038 + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
1039 + + TRX_SYS_RSEG_SPACE),
1040 + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
1042 + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
1043 + + TRX_SYS_RSEG_SPACE))
1044 + + page - sys_header);
1046 + /* Create the first rollback segment in the SYSTEM tablespace */
1047 + page_no = trx_rseg_header_create(space, 0, ULINT_MAX, &slot_no,
1049 + ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
1050 + ut_a(page_no != FIL_NULL);
1053 + mutex_exit(&kernel_mutex);
1056 +/*****************************************************************//**
1057 Creates and initializes the central memory structures for the transaction
1058 system. This is called when the database is started. */
1060 @@ -1351,6 +1584,26 @@
1061 /* Does nothing at the moment */
1064 +/*****************************************************************//**
1065 +Creates and initializes the dummy transaction system page for tablespace. */
1068 +trx_sys_dummy_create(
1069 +/*=================*/
1074 + /* This function is only for doublewrite file for now */
1075 + ut_a(space == TRX_DOUBLEWRITE_SPACE);
1079 + trx_sysf_dummy_create(space, &mtr);
1084 /*********************************************************************
1085 Creates the rollback segments */