]>
Commit | Line | Data |
---|---|---|
b4e1fa2c AM |
1 | # name : innodb_separate_doublewrite.patch |
2 | # introduced : 11 or before | |
3 | # maintainer : Yasufumi | |
4 | # | |
5 | #!!! notice !!! | |
6 | # Any small change to this file in the main branch | |
7 | # should be done or reviewed by the maintainer! | |
8 | diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c | |
9 | --- a/storage/innobase/buf/buf0buf.c 2010-12-03 17:49:11.574962867 +0900 | |
10 | +++ b/storage/innobase/buf/buf0buf.c 2010-12-04 15:35:58.624514033 +0900 | |
11 | @@ -4247,7 +4247,8 @@ | |
12 | read_space_id = mach_read_from_4( | |
13 | frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); | |
14 | ||
15 | - if (bpage->space == TRX_SYS_SPACE | |
16 | + if ((bpage->space == TRX_SYS_SPACE | |
17 | + || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE)) | |
18 | && trx_doublewrite_page_inside(bpage->offset)) { | |
19 | ||
20 | ut_print_timestamp(stderr); | |
21 | diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c | |
22 | --- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:49:59.179956111 +0900 | |
23 | +++ b/storage/innobase/buf/buf0flu.c 2010-12-04 15:35:58.624514033 +0900 | |
24 | @@ -763,7 +763,8 @@ | |
25 | write_buf = trx_doublewrite->write_buf; | |
26 | i = 0; | |
27 | ||
28 | - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, | |
29 | + fil_io(OS_FILE_WRITE, TRUE, | |
30 | + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0, | |
31 | trx_doublewrite->block1, 0, len, | |
32 | (void*) write_buf, NULL); | |
33 | ||
34 | @@ -800,7 +801,8 @@ | |
35 | + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; | |
36 | ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE); | |
37 | ||
38 | - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, | |
39 | + fil_io(OS_FILE_WRITE, TRUE, | |
40 | + (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0, | |
41 | trx_doublewrite->block2, 0, len, | |
42 | (void*) write_buf, NULL); | |
43 | ||
44 | @@ -830,7 +832,7 @@ | |
45 | flush: | |
46 | /* Now flush the doublewrite buffer data to disk */ | |
47 | ||
48 | - fil_flush(TRX_SYS_SPACE); | |
49 | + fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE); | |
50 | ||
51 | /* We know that the writes have been flushed to disk now | |
52 | and in recovery we will find them in the doublewrite buffer | |
53 | diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c | |
54 | --- a/storage/innobase/buf/buf0rea.c 2010-12-04 15:35:29.138514157 +0900 | |
55 | +++ b/storage/innobase/buf/buf0rea.c 2010-12-04 15:35:58.626486771 +0900 | |
56 | @@ -88,7 +88,9 @@ | |
57 | wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; | |
58 | mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER; | |
59 | ||
60 | - if (trx_doublewrite && space == TRX_SYS_SPACE | |
61 | + if (trx_doublewrite | |
62 | + && (space == TRX_SYS_SPACE | |
63 | + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE)) | |
64 | && ( (offset >= trx_doublewrite->block1 | |
65 | && offset < trx_doublewrite->block1 | |
66 | + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) | |
67 | diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c | |
68 | --- a/storage/innobase/dict/dict0load.c 2010-12-03 17:30:16.252956569 +0900 | |
69 | +++ b/storage/innobase/dict/dict0load.c 2010-12-04 15:35:58.627482825 +0900 | |
70 | @@ -781,7 +781,7 @@ | |
71 | ||
72 | mtr_commit(&mtr); | |
73 | ||
74 | - if (space_id == 0) { | |
75 | + if (trx_sys_sys_space(space_id)) { | |
76 | /* The system tablespace always exists. */ | |
77 | } else if (in_crash_recovery) { | |
78 | /* Check that the tablespace (the .ibd file) really | |
79 | @@ -1578,7 +1578,7 @@ | |
80 | space = mach_read_from_4(field); | |
81 | ||
82 | /* Check if the tablespace exists and has the right name */ | |
83 | - if (space != 0) { | |
84 | + if (!trx_sys_sys_space(space)) { | |
85 | flags = dict_sys_tables_get_flags(rec); | |
86 | ||
87 | if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) { | |
88 | @@ -1728,7 +1728,7 @@ | |
89 | goto err_exit; | |
90 | } | |
91 | ||
92 | - if (table->space == 0) { | |
93 | + if (trx_sys_sys_space(table->space)) { | |
94 | /* The system tablespace is always available. */ | |
95 | } else if (!fil_space_for_table_exists_in_mem( | |
96 | table->space, name, | |
97 | diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c | |
98 | --- a/storage/innobase/fil/fil0fil.c 2010-12-04 15:35:29.143813775 +0900 | |
99 | +++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:35:58.628498870 +0900 | |
100 | @@ -627,7 +627,7 @@ | |
101 | ||
102 | UT_LIST_ADD_LAST(chain, space->chain, node); | |
103 | ||
104 | - if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { | |
105 | + if (id < SRV_EXTRA_SYS_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { | |
106 | ||
107 | fil_system->max_assigned_id = id; | |
108 | } | |
109 | @@ -691,14 +691,14 @@ | |
110 | size_bytes = (((ib_int64_t)size_high) << 32) | |
111 | + (ib_int64_t)size_low; | |
112 | #ifdef UNIV_HOTBACKUP | |
113 | - if (space->id == 0) { | |
114 | + if (trx_sys_sys_space(space->id)) { | |
115 | node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); | |
116 | os_file_close(node->handle); | |
117 | goto add_size; | |
118 | } | |
119 | #endif /* UNIV_HOTBACKUP */ | |
120 | ut_a(space->purpose != FIL_LOG); | |
121 | - ut_a(space->id != 0); | |
122 | + ut_a(!trx_sys_sys_space(space->id)); | |
123 | ||
124 | if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { | |
125 | fprintf(stderr, | |
126 | @@ -744,7 +744,7 @@ | |
127 | } | |
128 | ||
129 | if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED | |
130 | - || space_id == 0)) { | |
131 | + || trx_sys_sys_space(space_id))) { | |
132 | fprintf(stderr, | |
133 | "InnoDB: Error: tablespace id %lu" | |
134 | " in file %s is not sensible\n", | |
135 | @@ -812,7 +812,7 @@ | |
136 | ||
137 | system->n_open++; | |
138 | ||
139 | - if (space->purpose == FIL_TABLESPACE && space->id != 0) { | |
140 | + if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id)) { | |
141 | /* Put the node to the LRU list */ | |
142 | UT_LIST_ADD_FIRST(LRU, system->LRU, node); | |
143 | } | |
144 | @@ -845,7 +845,7 @@ | |
145 | ut_a(system->n_open > 0); | |
146 | system->n_open--; | |
147 | ||
148 | - if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) { | |
149 | + if (node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) { | |
150 | ut_a(UT_LIST_GET_LEN(system->LRU) > 0); | |
151 | ||
152 | /* The node is in the LRU list, remove it */ | |
153 | @@ -931,7 +931,7 @@ | |
154 | retry: | |
155 | mutex_enter(&fil_system->mutex); | |
156 | ||
157 | - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { | |
158 | + if (trx_sys_sys_space(space_id) || space_id >= SRV_LOG_SPACE_FIRST_ID) { | |
159 | /* We keep log files and system tablespace files always open; | |
160 | this is important in preventing deadlocks in this module, as | |
161 | a page read completion often performs another read from the | |
162 | @@ -1162,7 +1162,7 @@ | |
163 | " tablespace memory cache!\n", | |
164 | (ulong) space->id); | |
165 | ||
166 | - if (id == 0 || purpose != FIL_TABLESPACE) { | |
167 | + if (trx_sys_sys_space(id) || purpose != FIL_TABLESPACE) { | |
168 | ||
169 | mutex_exit(&fil_system->mutex); | |
170 | ||
171 | @@ -1224,6 +1224,7 @@ | |
172 | space->mark = FALSE; | |
173 | ||
174 | if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on) | |
175 | + && UNIV_UNLIKELY(id < SRV_EXTRA_SYS_SPACE_FIRST_ID) | |
176 | && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) { | |
177 | if (!fil_system->space_id_reuse_warned) { | |
178 | fil_system->space_id_reuse_warned = TRUE; | |
179 | @@ -1307,7 +1308,7 @@ | |
180 | (ulong) SRV_LOG_SPACE_FIRST_ID); | |
181 | } | |
182 | ||
183 | - success = (id < SRV_LOG_SPACE_FIRST_ID); | |
184 | + success = (id < SRV_EXTRA_SYS_SPACE_FIRST_ID); | |
185 | ||
186 | if (success) { | |
187 | *space_id = fil_system->max_assigned_id = id; | |
188 | @@ -1570,6 +1571,8 @@ | |
189 | UT_LIST_INIT(fil_system->LRU); | |
190 | ||
191 | fil_system->max_n_open = max_n_open; | |
192 | + | |
193 | + fil_system->max_assigned_id = TRX_SYS_SPACE_MAX; | |
194 | } | |
195 | ||
196 | /*******************************************************************//** | |
197 | @@ -1591,7 +1594,7 @@ | |
198 | space = UT_LIST_GET_FIRST(fil_system->space_list); | |
199 | ||
200 | while (space != NULL) { | |
201 | - if (space->purpose != FIL_TABLESPACE || space->id == 0) { | |
202 | + if (space->purpose != FIL_TABLESPACE || trx_sys_sys_space(space->id)) { | |
203 | node = UT_LIST_GET_FIRST(space->chain); | |
204 | ||
205 | while (node != NULL) { | |
206 | @@ -1681,6 +1684,10 @@ | |
207 | ut_error; | |
208 | } | |
209 | ||
210 | + if (max_id >= SRV_EXTRA_SYS_SPACE_FIRST_ID) { | |
211 | + return; | |
212 | + } | |
213 | + | |
214 | mutex_enter(&fil_system->mutex); | |
215 | ||
216 | if (fil_system->max_assigned_id < max_id) { | |
217 | @@ -1699,6 +1706,7 @@ | |
218 | ulint | |
219 | fil_write_lsn_and_arch_no_to_file( | |
220 | /*==============================*/ | |
221 | + ulint space_id, | |
222 | ulint sum_of_sizes, /*!< in: combined size of previous files | |
223 | in space, in database pages */ | |
224 | ib_uint64_t lsn, /*!< in: lsn to write */ | |
225 | @@ -1708,14 +1716,16 @@ | |
226 | byte* buf1; | |
227 | byte* buf; | |
228 | ||
229 | + ut_a(trx_sys_sys_space(space_id)); | |
230 | + | |
231 | buf1 = mem_alloc(2 * UNIV_PAGE_SIZE); | |
232 | buf = ut_align(buf1, UNIV_PAGE_SIZE); | |
233 | ||
234 | - fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); | |
235 | + fil_read(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); | |
236 | ||
237 | mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); | |
238 | ||
239 | - fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); | |
240 | + fil_write(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); | |
241 | ||
242 | mem_free(buf1); | |
243 | ||
244 | @@ -1751,7 +1761,7 @@ | |
245 | always open. */ | |
246 | ||
247 | if (space->purpose == FIL_TABLESPACE | |
248 | - && space->id == 0) { | |
249 | + && trx_sys_sys_space(space->id)) { | |
250 | sum_of_sizes = 0; | |
251 | ||
252 | node = UT_LIST_GET_FIRST(space->chain); | |
253 | @@ -1759,7 +1769,7 @@ | |
254 | mutex_exit(&fil_system->mutex); | |
255 | ||
256 | err = fil_write_lsn_and_arch_no_to_file( | |
257 | - sum_of_sizes, lsn, arch_log_no); | |
258 | + space->id, sum_of_sizes, lsn, arch_log_no); | |
259 | if (err != DB_SUCCESS) { | |
260 | ||
261 | return(err); | |
262 | @@ -3806,7 +3816,7 @@ | |
263 | } | |
264 | ||
265 | #ifndef UNIV_HOTBACKUP | |
266 | - if (space_id == ULINT_UNDEFINED || space_id == 0) { | |
267 | + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) { | |
268 | fprintf(stderr, | |
269 | "InnoDB: Error: tablespace id %lu in file %s" | |
270 | " is not sensible\n", | |
271 | @@ -3815,7 +3825,7 @@ | |
272 | goto func_exit; | |
273 | } | |
274 | #else | |
275 | - if (space_id == ULINT_UNDEFINED || space_id == 0) { | |
276 | + if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) { | |
277 | char* new_path; | |
278 | ||
279 | fprintf(stderr, | |
280 | @@ -4636,7 +4646,7 @@ | |
281 | } | |
282 | ||
283 | if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE | |
284 | - && space->id != 0) { | |
285 | + && !trx_sys_sys_space(space->id)) { | |
286 | /* The node is in the LRU list, remove it */ | |
287 | ||
288 | ut_a(UT_LIST_GET_LEN(system->LRU) > 0); | |
289 | @@ -4682,7 +4692,7 @@ | |
290 | } | |
291 | ||
292 | if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE | |
293 | - && node->space->id != 0) { | |
294 | + && !trx_sys_sys_space(node->space->id)) { | |
295 | /* The node must be put back to the LRU list */ | |
296 | UT_LIST_ADD_FIRST(LRU, system->LRU, node); | |
297 | } | |
298 | @@ -5298,7 +5308,7 @@ | |
299 | ut_a(fil_node->n_pending == 0); | |
300 | ut_a(fil_node->open); | |
301 | ut_a(fil_node->space->purpose == FIL_TABLESPACE); | |
302 | - ut_a(fil_node->space->id != 0); | |
303 | + ut_a(!trx_sys_sys_space(fil_node->space->id)); | |
304 | ||
305 | fil_node = UT_LIST_GET_NEXT(LRU, fil_node); | |
306 | } | |
307 | diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c | |
308 | --- a/storage/innobase/fsp/fsp0fsp.c 2010-11-03 07:01:13.000000000 +0900 | |
309 | +++ b/storage/innobase/fsp/fsp0fsp.c 2010-12-04 15:35:58.632513243 +0900 | |
310 | @@ -48,7 +48,7 @@ | |
311 | # include "log0log.h" | |
312 | #endif /* UNIV_HOTBACKUP */ | |
313 | #include "dict0mem.h" | |
314 | - | |
315 | +#include "trx0sys.h" | |
316 | ||
317 | #define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header | |
318 | within a file page */ | |
319 | @@ -999,10 +999,10 @@ | |
320 | flst_init(header + FSP_SEG_INODES_FREE, mtr); | |
321 | ||
322 | mlog_write_ull(header + FSP_SEG_ID, 1, mtr); | |
323 | - if (space == 0) { | |
324 | + if (space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE) { | |
325 | fsp_fill_free_list(FALSE, space, header, mtr); | |
326 | btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, | |
327 | - 0, 0, DICT_IBUF_ID_MIN + space, | |
328 | + space, 0, DICT_IBUF_ID_MIN + space, | |
329 | dict_ind_redundant, mtr); | |
330 | } else { | |
331 | fsp_fill_free_list(TRUE, space, header, mtr); | |
332 | diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc | |
333 | --- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:35:29.153514047 +0900 | |
334 | +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:35:58.636549909 +0900 | |
335 | @@ -163,6 +163,7 @@ | |
336 | static char* innobase_log_group_home_dir = NULL; | |
337 | static char* innobase_file_format_name = NULL; | |
338 | static char* innobase_change_buffering = NULL; | |
339 | +static char* innobase_doublewrite_file = NULL; | |
340 | ||
341 | /* The highest file format being used in the database. The value can be | |
342 | set by user, however, it will be adjusted to the newer file format if | |
343 | @@ -2425,6 +2426,8 @@ | |
344 | goto error; | |
345 | } | |
346 | ||
347 | + srv_doublewrite_file = innobase_doublewrite_file; | |
348 | + | |
349 | srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; | |
350 | ||
351 | /* -------------- Log files ---------------------------*/ | |
352 | @@ -11553,6 +11556,11 @@ | |
353 | "Path to individual files and their sizes.", | |
354 | NULL, NULL, NULL); | |
355 | ||
356 | +static MYSQL_SYSVAR_STR(doublewrite_file, innobase_doublewrite_file, | |
357 | + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
358 | + "Path to special datafile for doublewrite buffer. (default is "": not used) ### ONLY FOR EXPERTS!!! ###", | |
359 | + NULL, NULL, NULL); | |
360 | + | |
361 | static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, | |
362 | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
363 | "The AUTOINC lock modes supported by InnoDB: " | |
364 | @@ -11723,6 +11731,7 @@ | |
365 | MYSQL_SYSVAR(commit_concurrency), | |
366 | MYSQL_SYSVAR(concurrency_tickets), | |
367 | MYSQL_SYSVAR(data_file_path), | |
368 | + MYSQL_SYSVAR(doublewrite_file), | |
369 | MYSQL_SYSVAR(data_home_dir), | |
370 | MYSQL_SYSVAR(doublewrite), | |
371 | MYSQL_SYSVAR(recovery_stats), | |
372 | diff -ruN a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic | |
373 | --- a/storage/innobase/include/mtr0log.ic 2010-11-03 07:01:13.000000000 +0900 | |
374 | +++ b/storage/innobase/include/mtr0log.ic 2010-12-04 15:35:58.644607059 +0900 | |
375 | @@ -27,8 +27,8 @@ | |
376 | #include "ut0lst.h" | |
377 | #include "buf0buf.h" | |
378 | #include "fsp0types.h" | |
379 | +#include "srv0srv.h" | |
380 | #include "trx0sys.h" | |
381 | - | |
382 | /********************************************************//** | |
383 | Opens a buffer to mlog. It must be closed with mlog_close. | |
384 | @return buffer, NULL if log mode MTR_LOG_NONE */ | |
385 | @@ -201,7 +201,8 @@ | |
386 | the doublewrite buffer is located in pages | |
387 | FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the | |
388 | system tablespace */ | |
389 | - if (space == TRX_SYS_SPACE | |
390 | + if ((space == TRX_SYS_SPACE | |
391 | + || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE)) | |
392 | && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) { | |
393 | if (trx_doublewrite_buf_is_being_created) { | |
394 | /* Do nothing: we only come to this branch in an | |
395 | diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h | |
396 | --- a/storage/innobase/include/srv0srv.h 2010-12-04 15:35:29.177480351 +0900 | |
397 | +++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:35:58.646556250 +0900 | |
398 | @@ -132,6 +132,8 @@ | |
399 | extern ulint* srv_data_file_sizes; | |
400 | extern ulint* srv_data_file_is_raw_partition; | |
401 | ||
402 | +extern char* srv_doublewrite_file; | |
403 | + | |
404 | extern ibool srv_recovery_stats; | |
405 | ||
406 | extern ibool srv_auto_extend_last_data_file; | |
407 | diff -ruN a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h | |
408 | --- a/storage/innobase/include/srv0start.h 2010-11-03 07:01:13.000000000 +0900 | |
409 | +++ b/storage/innobase/include/srv0start.h 2010-12-08 17:15:07.602605797 +0900 | |
410 | @@ -127,4 +127,7 @@ | |
411 | /** Log 'spaces' have id's >= this */ | |
412 | #define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL | |
413 | ||
414 | +/** reserved for extra system tables */ | |
415 | +#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL | |
416 | + | |
417 | #endif | |
418 | diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h | |
419 | --- a/storage/innobase/include/trx0sys.h 2010-12-03 15:41:52.047049291 +0900 | |
420 | +++ b/storage/innobase/include/trx0sys.h 2010-12-04 15:35:58.647551222 +0900 | |
421 | @@ -124,6 +124,22 @@ | |
422 | /*=============*/ | |
423 | ulint space, /*!< in: space */ | |
424 | ulint page_no);/*!< in: page number */ | |
425 | +/***************************************************************//** | |
426 | +Checks if a space is the system tablespaces. | |
427 | +@return TRUE if system tablespace */ | |
428 | +UNIV_INLINE | |
429 | +ibool | |
430 | +trx_sys_sys_space( | |
431 | +/*==============*/ | |
432 | + ulint space); /*!< in: space */ | |
433 | +/***************************************************************//** | |
434 | +Checks if a space is the doublewrite tablespace. | |
435 | +@return TRUE if doublewrite tablespace */ | |
436 | +UNIV_INLINE | |
437 | +ibool | |
438 | +trx_sys_doublewrite_space( | |
439 | +/*======================*/ | |
440 | + ulint space); /*!< in: space */ | |
441 | /*****************************************************************//** | |
442 | Creates and initializes the central memory structures for the transaction | |
443 | system. This is called when the database is started. */ | |
444 | @@ -137,6 +153,13 @@ | |
445 | void | |
446 | trx_sys_create(void); | |
447 | /*================*/ | |
448 | +/*****************************************************************//** | |
449 | +Creates and initializes the dummy transaction system page for tablespace. */ | |
450 | +UNIV_INTERN | |
451 | +void | |
452 | +trx_sys_dummy_create( | |
453 | +/*=================*/ | |
454 | + ulint space); | |
455 | /****************************************************************//** | |
456 | Looks for a free slot for a rollback segment in the trx system file copy. | |
457 | @return slot index or ULINT_UNDEFINED if not found */ | |
458 | @@ -448,6 +471,8 @@ | |
459 | ||
460 | /* Space id and page no where the trx system file copy resides */ | |
461 | #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ | |
462 | +#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */ | |
463 | +#define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */ | |
464 | #include "fsp0fsp.h" | |
465 | #define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO | |
466 | ||
467 | diff -ruN a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic | |
468 | --- a/storage/innobase/include/trx0sys.ic 2010-11-03 07:01:13.000000000 +0900 | |
469 | +++ b/storage/innobase/include/trx0sys.ic 2010-12-04 15:35:58.649473284 +0900 | |
470 | @@ -71,6 +71,40 @@ | |
471 | } | |
472 | ||
473 | /***************************************************************//** | |
474 | +Checks if a space is the system tablespaces. | |
475 | +@return TRUE if system tablespace */ | |
476 | +UNIV_INLINE | |
477 | +ibool | |
478 | +trx_sys_sys_space( | |
479 | +/*==============*/ | |
480 | + ulint space) /*!< in: space */ | |
481 | +{ | |
482 | + if (srv_doublewrite_file) { | |
483 | + /* several spaces are reserved */ | |
484 | + return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE)); | |
485 | + } else { | |
486 | + return((ibool)(space == TRX_SYS_SPACE)); | |
487 | + } | |
488 | +} | |
489 | + | |
490 | +/***************************************************************//** | |
491 | +Checks if a space is the doublewrite tablespace. | |
492 | +@return TRUE if doublewrite tablespace */ | |
493 | +UNIV_INLINE | |
494 | +ibool | |
495 | +trx_sys_doublewrite_space( | |
496 | +/*======================*/ | |
497 | + ulint space) /*!< in: space */ | |
498 | +{ | |
499 | + if (srv_doublewrite_file) { | |
500 | + /* doublewrite buffer is separated */ | |
501 | + return((ibool)(space == TRX_DOUBLEWRITE_SPACE)); | |
502 | + } else { | |
503 | + return((ibool)(space == TRX_SYS_SPACE)); | |
504 | + } | |
505 | +} | |
506 | + | |
507 | +/***************************************************************//** | |
508 | Gets the pointer in the nth slot of the rseg array. | |
509 | @return pointer to rseg object, NULL if slot not in use */ | |
510 | UNIV_INLINE | |
511 | diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c | |
512 | --- a/storage/innobase/row/row0mysql.c 2010-12-03 17:30:16.334989510 +0900 | |
513 | +++ b/storage/innobase/row/row0mysql.c 2010-12-04 15:35:58.652496484 +0900 | |
514 | @@ -3423,7 +3423,7 @@ | |
515 | /* Do not drop possible .ibd tablespace if something went | |
516 | wrong: we do not want to delete valuable data of the user */ | |
517 | ||
518 | - if (err == DB_SUCCESS && space_id > 0) { | |
519 | + if (err == DB_SUCCESS && !trx_sys_sys_space(space_id)) { | |
520 | if (!fil_space_for_table_exists_in_mem(space_id, | |
521 | name_or_path, | |
522 | is_temp, FALSE, | |
523 | diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c | |
524 | --- a/storage/innobase/srv/srv0srv.c 2010-12-04 15:35:29.180483212 +0900 | |
525 | +++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:35:58.656550107 +0900 | |
526 | @@ -168,6 +168,8 @@ | |
527 | /* size in database pages */ | |
528 | UNIV_INTERN ulint* srv_data_file_sizes = NULL; | |
529 | ||
530 | +UNIV_INTERN char* srv_doublewrite_file = NULL; | |
531 | + | |
532 | UNIV_INTERN ibool srv_recovery_stats = FALSE; | |
533 | ||
534 | /* if TRUE, then we auto-extend the last data file */ | |
535 | diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c | |
536 | --- a/storage/innobase/srv/srv0start.c 2010-12-04 15:35:29.183481330 +0900 | |
537 | +++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:35:58.661550545 +0900 | |
538 | @@ -715,6 +715,7 @@ | |
539 | /*======================*/ | |
540 | ibool* create_new_db, /*!< out: TRUE if new database should be | |
541 | created */ | |
542 | + ibool* create_new_doublewrite_file, | |
543 | #ifdef UNIV_LOG_ARCHIVE | |
544 | ulint* min_arch_log_no,/*!< out: min of archived log | |
545 | numbers in data files */ | |
546 | @@ -747,6 +748,7 @@ | |
547 | *sum_of_new_sizes = 0; | |
548 | ||
549 | *create_new_db = FALSE; | |
550 | + *create_new_doublewrite_file = FALSE; | |
551 | ||
552 | srv_normalize_path_for_win(srv_data_home); | |
553 | ||
554 | @@ -984,6 +986,142 @@ | |
555 | srv_data_file_is_raw_partition[i] != 0); | |
556 | } | |
557 | ||
558 | + /* special file for doublewrite buffer */ | |
559 | + if (srv_doublewrite_file) | |
560 | + { | |
561 | + srv_normalize_path_for_win(srv_doublewrite_file); | |
562 | + | |
563 | + fprintf(stderr, | |
564 | + "InnoDB: Notice: innodb_doublewrite_file is specified.\n" | |
565 | + "InnoDB: This is for expert only. Don't use if you don't understand what is it 'WELL'.\n" | |
566 | + "InnoDB: ### Don't specify older file than the last checkpoint ###\n" | |
567 | + "InnoDB: otherwise the older doublewrite buffer will break your data during recovery!\n"); | |
568 | + | |
569 | + strcpy(name, srv_doublewrite_file); | |
570 | + | |
571 | + /* First we try to create the file: if it already | |
572 | + exists, ret will get value FALSE */ | |
573 | + | |
574 | + files[i] = os_file_create(innodb_file_data_key, name, OS_FILE_CREATE, | |
575 | + OS_FILE_NORMAL, | |
576 | + OS_DATA_FILE, &ret); | |
577 | + | |
578 | + if (ret == FALSE && os_file_get_last_error(FALSE) | |
579 | + != OS_FILE_ALREADY_EXISTS | |
580 | +#ifdef UNIV_AIX | |
581 | + /* AIX 5.1 after security patch ML7 may have | |
582 | + errno set to 0 here, which causes our function | |
583 | + to return 100; work around that AIX problem */ | |
584 | + && os_file_get_last_error(FALSE) != 100 | |
585 | +#endif | |
586 | + ) { | |
587 | + fprintf(stderr, | |
588 | + "InnoDB: Error in creating" | |
589 | + " or opening %s\n", | |
590 | + name); | |
591 | + | |
592 | + return(DB_ERROR); | |
593 | + } | |
594 | + | |
595 | + if (ret == FALSE) { | |
596 | + /* We open the data file */ | |
597 | + | |
598 | + files[i] = os_file_create(innodb_file_data_key, | |
599 | + name, OS_FILE_OPEN, OS_FILE_NORMAL, | |
600 | + OS_DATA_FILE, &ret); | |
601 | + | |
602 | + if (!ret) { | |
603 | + fprintf(stderr, | |
604 | + "InnoDB: Error in opening %s\n", name); | |
605 | + os_file_get_last_error(TRUE); | |
606 | + | |
607 | + return(DB_ERROR); | |
608 | + } | |
609 | + | |
610 | + ret = os_file_get_size(files[i], &size, &size_high); | |
611 | + ut_a(ret); | |
612 | + /* Round size downward to megabytes */ | |
613 | + | |
614 | + rounded_size_pages | |
615 | + = (size / (1024 * 1024) + 4096 * size_high) | |
616 | + << (20 - UNIV_PAGE_SIZE_SHIFT); | |
617 | + | |
618 | + if (rounded_size_pages != TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) { | |
619 | + | |
620 | + fprintf(stderr, | |
621 | + "InnoDB: Warning: doublewrite buffer file %s" | |
622 | + " is of a different size\n" | |
623 | + "InnoDB: %lu pages" | |
624 | + " (rounded down to MB)\n" | |
625 | + "InnoDB: than intended size" | |
626 | + " %lu pages...\n", | |
627 | + name, | |
628 | + (ulong) rounded_size_pages, | |
629 | + (ulong) TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9); | |
630 | + } | |
631 | + | |
632 | + fil_read_flushed_lsn_and_arch_log_no( | |
633 | + files[i], one_opened, | |
634 | +#ifdef UNIV_LOG_ARCHIVE | |
635 | + min_arch_log_no, max_arch_log_no, | |
636 | +#endif /* UNIV_LOG_ARCHIVE */ | |
637 | + min_flushed_lsn, max_flushed_lsn); | |
638 | + one_opened = TRUE; | |
639 | + } else { | |
640 | + /* We created the data file and now write it full of | |
641 | + zeros */ | |
642 | + | |
643 | + *create_new_doublewrite_file = TRUE; | |
644 | + | |
645 | + ut_print_timestamp(stderr); | |
646 | + fprintf(stderr, | |
647 | + " InnoDB: Doublewrite buffer file %s did not" | |
648 | + " exist: new to be created\n", | |
649 | + name); | |
650 | + | |
651 | + if (*create_new_db == FALSE) { | |
652 | + fprintf(stderr, | |
653 | + "InnoDB: Warning: Previous version's ibdata files may cause crash.\n" | |
654 | + " If you use that, please use the ibdata files of this version.\n"); | |
655 | + } | |
656 | + | |
657 | + ut_print_timestamp(stderr); | |
658 | + fprintf(stderr, | |
659 | + " InnoDB: Setting file %s size to %lu MB\n", | |
660 | + name, | |
661 | + (ulong) ((TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) | |
662 | + >> (20 - UNIV_PAGE_SIZE_SHIFT))); | |
663 | + | |
664 | + fprintf(stderr, | |
665 | + "InnoDB: Database physically writes the" | |
666 | + " file full: wait...\n"); | |
667 | + | |
668 | + ret = os_file_set_size( | |
669 | + name, files[i], | |
670 | + srv_calc_low32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9), | |
671 | + srv_calc_high32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9)); | |
672 | + | |
673 | + if (!ret) { | |
674 | + fprintf(stderr, | |
675 | + "InnoDB: Error in creating %s:" | |
676 | + " probably out of disk space\n", name); | |
677 | + | |
678 | + return(DB_ERROR); | |
679 | + } | |
680 | + } | |
681 | + | |
682 | + ret = os_file_close(files[i]); | |
683 | + ut_a(ret); | |
684 | + | |
685 | + fil_space_create(name, TRX_DOUBLEWRITE_SPACE, 0, FIL_TABLESPACE); | |
686 | + | |
687 | + ut_a(fil_validate()); | |
688 | + | |
689 | + fil_node_create(name, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, TRX_DOUBLEWRITE_SPACE, FALSE); | |
690 | + | |
691 | + i++; | |
692 | + } | |
693 | + | |
694 | return(DB_SUCCESS); | |
695 | } | |
696 | ||
697 | @@ -997,6 +1135,7 @@ | |
698 | /*====================================*/ | |
699 | { | |
700 | ibool create_new_db; | |
701 | + ibool create_new_doublewrite_file; | |
702 | ibool log_file_created; | |
703 | ibool log_created = FALSE; | |
704 | ibool log_opened = FALSE; | |
705 | @@ -1416,6 +1555,7 @@ | |
706 | } | |
707 | ||
708 | err = open_or_create_data_files(&create_new_db, | |
709 | + &create_new_doublewrite_file, | |
710 | #ifdef UNIV_LOG_ARCHIVE | |
711 | &min_arch_log_no, &max_arch_log_no, | |
712 | #endif /* UNIV_LOG_ARCHIVE */ | |
713 | @@ -1545,6 +1685,14 @@ | |
714 | after the double write buffer has been created. */ | |
715 | trx_sys_create(); | |
716 | ||
717 | + if (create_new_doublewrite_file) { | |
718 | + mtr_start(&mtr); | |
719 | + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); | |
720 | + mtr_commit(&mtr); | |
721 | + | |
722 | + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); | |
723 | + } | |
724 | + | |
725 | dict_create(); | |
726 | ||
727 | srv_startup_is_before_trx_rollback_phase = FALSE; | |
728 | @@ -1577,6 +1725,13 @@ | |
729 | recv_recovery_from_archive_finish(); | |
730 | #endif /* UNIV_LOG_ARCHIVE */ | |
731 | } else { | |
732 | + char* save_srv_doublewrite_file = NULL; | |
733 | + | |
734 | + if (create_new_doublewrite_file) { | |
735 | + /* doublewrite_file cannot be used for recovery yet. */ | |
736 | + save_srv_doublewrite_file = srv_doublewrite_file; | |
737 | + srv_doublewrite_file = NULL; | |
738 | + } | |
739 | ||
740 | /* Check if we support the max format that is stamped | |
741 | on the system tablespace. | |
742 | @@ -1663,6 +1818,17 @@ | |
743 | we have finished the recovery process so that the | |
744 | image of TRX_SYS_PAGE_NO is not stale. */ | |
745 | trx_sys_file_format_tag_init(); | |
746 | + | |
747 | + if (create_new_doublewrite_file) { | |
748 | + /* restore the value */ | |
749 | + srv_doublewrite_file = save_srv_doublewrite_file; | |
750 | + | |
751 | + mtr_start(&mtr); | |
752 | + fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr); | |
753 | + mtr_commit(&mtr); | |
754 | + | |
755 | + trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE); | |
756 | + } | |
757 | } | |
758 | ||
759 | if (!create_new_db && sum_of_new_sizes > 0) { | |
760 | diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c | |
761 | --- a/storage/innobase/trx/trx0sys.c 2010-12-03 17:32:15.651024019 +0900 | |
762 | +++ b/storage/innobase/trx/trx0sys.c 2010-12-04 15:35:58.664550291 +0900 | |
763 | @@ -414,6 +414,152 @@ | |
764 | ||
765 | goto start_again; | |
766 | } | |
767 | + | |
768 | + if (srv_doublewrite_file) { | |
769 | + /* the same doublewrite buffer to TRX_SYS_SPACE should exist. | |
770 | + check and create if not exist.*/ | |
771 | + | |
772 | + mtr_start(&mtr); | |
773 | + trx_doublewrite_buf_is_being_created = TRUE; | |
774 | + | |
775 | + block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, TRX_SYS_PAGE_NO, | |
776 | + RW_X_LATCH, &mtr); | |
777 | + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); | |
778 | + | |
779 | + doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE; | |
780 | + | |
781 | + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) | |
782 | + == TRX_SYS_DOUBLEWRITE_MAGIC_N) { | |
783 | + /* The doublewrite buffer has already been created: | |
784 | + just read in some numbers */ | |
785 | + | |
786 | + mtr_commit(&mtr); | |
787 | + } else { | |
788 | + fprintf(stderr, | |
789 | + "InnoDB: Doublewrite buffer not found in the doublewrite file:" | |
790 | + " creating new\n"); | |
791 | + | |
792 | + if (buf_pool_get_curr_size() | |
793 | + < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE | |
794 | + + FSP_EXTENT_SIZE / 2 + 100) | |
795 | + * UNIV_PAGE_SIZE)) { | |
796 | + fprintf(stderr, | |
797 | + "InnoDB: Cannot create doublewrite buffer:" | |
798 | + " you must\n" | |
799 | + "InnoDB: increase your buffer pool size.\n" | |
800 | + "InnoDB: Cannot continue operation.\n"); | |
801 | + | |
802 | + exit(1); | |
803 | + } | |
804 | + | |
805 | + block2 = fseg_create(TRX_DOUBLEWRITE_SPACE, TRX_SYS_PAGE_NO, | |
806 | + TRX_SYS_DOUBLEWRITE | |
807 | + + TRX_SYS_DOUBLEWRITE_FSEG, &mtr); | |
808 | + | |
809 | + /* fseg_create acquires a second latch on the page, | |
810 | + therefore we must declare it: */ | |
811 | + | |
812 | + buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK); | |
813 | + | |
814 | + if (block2 == NULL) { | |
815 | + fprintf(stderr, | |
816 | + "InnoDB: Cannot create doublewrite buffer:" | |
817 | + " you must\n" | |
818 | + "InnoDB: increase your tablespace size.\n" | |
819 | + "InnoDB: Cannot continue operation.\n"); | |
820 | + | |
821 | + /* We exit without committing the mtr to prevent | |
822 | + its modifications to the database getting to disk */ | |
823 | + | |
824 | + exit(1); | |
825 | + } | |
826 | + | |
827 | + fseg_header = buf_block_get_frame(block) | |
828 | + + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG; | |
829 | + prev_page_no = 0; | |
830 | + | |
831 | + for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE | |
832 | + + FSP_EXTENT_SIZE / 2; i++) { | |
833 | + page_no = fseg_alloc_free_page(fseg_header, | |
834 | + prev_page_no + 1, | |
835 | + FSP_UP, &mtr); | |
836 | + if (page_no == FIL_NULL) { | |
837 | + fprintf(stderr, | |
838 | + "InnoDB: Cannot create doublewrite" | |
839 | + " buffer: you must\n" | |
840 | + "InnoDB: increase your" | |
841 | + " tablespace size.\n" | |
842 | + "InnoDB: Cannot continue operation.\n" | |
843 | + ); | |
844 | + | |
845 | + exit(1); | |
846 | + } | |
847 | + | |
848 | + /* We read the allocated pages to the buffer pool; | |
849 | + when they are written to disk in a flush, the space | |
850 | + id and page number fields are also written to the | |
851 | + pages. When we at database startup read pages | |
852 | + from the doublewrite buffer, we know that if the | |
853 | + space id and page number in them are the same as | |
854 | + the page position in the tablespace, then the page | |
855 | + has not been written to in doublewrite. */ | |
856 | + | |
857 | +#ifdef UNIV_SYNC_DEBUG | |
858 | + new_block = | |
859 | +#endif /* UNIV_SYNC_DEBUG */ | |
860 | + buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no, | |
861 | + RW_X_LATCH, &mtr); | |
862 | + buf_block_dbg_add_level(new_block, | |
863 | + SYNC_NO_ORDER_CHECK); | |
864 | + | |
865 | + if (i == FSP_EXTENT_SIZE / 2) { | |
866 | + ut_a(page_no == FSP_EXTENT_SIZE); | |
867 | + mlog_write_ulint(doublewrite | |
868 | + + TRX_SYS_DOUBLEWRITE_BLOCK1, | |
869 | + page_no, MLOG_4BYTES, &mtr); | |
870 | + mlog_write_ulint(doublewrite | |
871 | + + TRX_SYS_DOUBLEWRITE_REPEAT | |
872 | + + TRX_SYS_DOUBLEWRITE_BLOCK1, | |
873 | + page_no, MLOG_4BYTES, &mtr); | |
874 | + } else if (i == FSP_EXTENT_SIZE / 2 | |
875 | + + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { | |
876 | + ut_a(page_no == 2 * FSP_EXTENT_SIZE); | |
877 | + mlog_write_ulint(doublewrite | |
878 | + + TRX_SYS_DOUBLEWRITE_BLOCK2, | |
879 | + page_no, MLOG_4BYTES, &mtr); | |
880 | + mlog_write_ulint(doublewrite | |
881 | + + TRX_SYS_DOUBLEWRITE_REPEAT | |
882 | + + TRX_SYS_DOUBLEWRITE_BLOCK2, | |
883 | + page_no, MLOG_4BYTES, &mtr); | |
884 | + } else if (i > FSP_EXTENT_SIZE / 2) { | |
885 | + ut_a(page_no == prev_page_no + 1); | |
886 | + } | |
887 | + | |
888 | + prev_page_no = page_no; | |
889 | + } | |
890 | + | |
891 | + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC, | |
892 | + TRX_SYS_DOUBLEWRITE_MAGIC_N, | |
893 | + MLOG_4BYTES, &mtr); | |
894 | + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC | |
895 | + + TRX_SYS_DOUBLEWRITE_REPEAT, | |
896 | + TRX_SYS_DOUBLEWRITE_MAGIC_N, | |
897 | + MLOG_4BYTES, &mtr); | |
898 | + | |
899 | + mlog_write_ulint(doublewrite | |
900 | + + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED, | |
901 | + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, | |
902 | + MLOG_4BYTES, &mtr); | |
903 | + mtr_commit(&mtr); | |
904 | + | |
905 | + /* Flush the modified pages to disk and make a checkpoint */ | |
906 | + log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE); | |
907 | + | |
908 | + fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n"); | |
909 | + trx_sys_multiple_tablespace_format = TRUE; | |
910 | + } | |
911 | + trx_doublewrite_buf_is_being_created = FALSE; | |
912 | + } | |
913 | } | |
914 | ||
915 | /****************************************************************//** | |
916 | @@ -437,10 +583,19 @@ | |
917 | ulint source_page_no; | |
918 | byte* page; | |
919 | byte* doublewrite; | |
920 | + ulint doublewrite_space_id; | |
921 | ulint space_id; | |
922 | ulint page_no; | |
923 | ulint i; | |
924 | ||
925 | + doublewrite_space_id = (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE); | |
926 | + | |
927 | + if (srv_doublewrite_file) { | |
928 | + fprintf(stderr, | |
929 | + "InnoDB: doublewrite file '%s' is used.\n", | |
930 | + srv_doublewrite_file); | |
931 | + } | |
932 | + | |
933 | /* We do the file i/o past the buffer pool */ | |
934 | ||
935 | unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE); | |
936 | @@ -449,7 +604,7 @@ | |
937 | /* Read the trx sys header to check if we are using the doublewrite | |
938 | buffer */ | |
939 | ||
940 | - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0, | |
941 | + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, TRX_SYS_PAGE_NO, 0, | |
942 | UNIV_PAGE_SIZE, read_buf, NULL); | |
943 | doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; | |
944 | ||
945 | @@ -487,10 +642,10 @@ | |
946 | ||
947 | /* Read the pages from the doublewrite buffer to memory */ | |
948 | ||
949 | - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0, | |
950 | + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block1, 0, | |
951 | TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, | |
952 | buf, NULL); | |
953 | - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0, | |
954 | + fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block2, 0, | |
955 | TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, | |
956 | buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, | |
957 | NULL); | |
958 | @@ -546,7 +701,8 @@ | |
959 | " doublewrite buf.\n", | |
960 | (ulong) space_id, (ulong) page_no, (ulong) i); | |
961 | ||
962 | - } else if (space_id == TRX_SYS_SPACE | |
963 | + } else if ((space_id == TRX_SYS_SPACE | |
964 | + || (srv_doublewrite_file && space_id == TRX_DOUBLEWRITE_SPACE)) | |
965 | && ((page_no >= block1 | |
966 | && page_no | |
967 | < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) | |
968 | @@ -990,6 +1146,83 @@ | |
969 | } | |
970 | ||
971 | /*****************************************************************//** | |
972 | +Creates dummy of the file page for the transaction system. */ | |
973 | +static | |
974 | +void | |
975 | +trx_sysf_dummy_create( | |
976 | +/*==================*/ | |
977 | + ulint space, | |
978 | + mtr_t* mtr) | |
979 | +{ | |
980 | + buf_block_t* block; | |
981 | + page_t* page; | |
982 | + | |
983 | + ut_ad(mtr); | |
984 | + | |
985 | + /* Note that below we first reserve the file space x-latch, and | |
986 | + then enter the kernel: we must do it in this order to conform | |
987 | + to the latching order rules. */ | |
988 | + | |
989 | + mtr_x_lock(fil_space_get_latch(space, NULL), mtr); | |
990 | + mutex_enter(&kernel_mutex); | |
991 | + | |
992 | + /* Create the trx sys file block in a new allocated file segment */ | |
993 | + block = fseg_create(space, 0, TRX_SYS + TRX_SYS_FSEG_HEADER, | |
994 | + mtr); | |
995 | + buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER); | |
996 | + | |
997 | + fprintf(stderr, "%lu\n", buf_block_get_page_no(block)); | |
998 | + ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO); | |
999 | + | |
1000 | + page = buf_block_get_frame(block); | |
1001 | + | |
1002 | + mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS, | |
1003 | + MLOG_2BYTES, mtr); | |
1004 | + | |
1005 | + /* Reset the doublewrite buffer magic number to zero so that we | |
1006 | + know that the doublewrite buffer has not yet been created (this | |
1007 | + suppresses a Valgrind warning) */ | |
1008 | + | |
1009 | + mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE | |
1010 | + + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr); | |
1011 | + | |
1012 | +#ifdef UNDEFINED | |
1013 | + /* TODO: REMOVE IT: The bellow is not needed, I think */ | |
1014 | + sys_header = trx_sysf_get(mtr); | |
1015 | + | |
1016 | + /* Start counting transaction ids from number 1 up */ | |
1017 | + mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, | |
1018 | + ut_dulint_create(0, 1), mtr); | |
1019 | + | |
1020 | + /* Reset the rollback segment slots */ | |
1021 | + for (i = 0; i < TRX_SYS_N_RSEGS; i++) { | |
1022 | + | |
1023 | + trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr); | |
1024 | + trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr); | |
1025 | + } | |
1026 | + | |
1027 | + /* The remaining area (up to the page trailer) is uninitialized. | |
1028 | + Silence Valgrind warnings about it. */ | |
1029 | + UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS | |
1030 | + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE | |
1031 | + + TRX_SYS_RSEG_SPACE), | |
1032 | + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END | |
1033 | + - (TRX_SYS_RSEGS | |
1034 | + + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE | |
1035 | + + TRX_SYS_RSEG_SPACE)) | |
1036 | + + page - sys_header); | |
1037 | + | |
1038 | + /* Create the first rollback segment in the SYSTEM tablespace */ | |
1039 | + page_no = trx_rseg_header_create(space, 0, ULINT_MAX, &slot_no, | |
1040 | + mtr); | |
1041 | + ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID); | |
1042 | + ut_a(page_no != FIL_NULL); | |
1043 | +#endif | |
1044 | + | |
1045 | + mutex_exit(&kernel_mutex); | |
1046 | +} | |
1047 | + | |
1048 | +/*****************************************************************//** | |
1049 | Creates and initializes the central memory structures for the transaction | |
1050 | system. This is called when the database is started. */ | |
1051 | UNIV_INTERN | |
1052 | @@ -1351,6 +1584,26 @@ | |
1053 | /* Does nothing at the moment */ | |
1054 | } | |
1055 | ||
1056 | +/*****************************************************************//** | |
1057 | +Creates and initializes the dummy transaction system page for tablespace. */ | |
1058 | +UNIV_INTERN | |
1059 | +void | |
1060 | +trx_sys_dummy_create( | |
1061 | +/*=================*/ | |
1062 | + ulint space) | |
1063 | +{ | |
1064 | + mtr_t mtr; | |
1065 | + | |
1066 | + /* This function is only for doublewrite file for now */ | |
1067 | + ut_a(space == TRX_DOUBLEWRITE_SPACE); | |
1068 | + | |
1069 | + mtr_start(&mtr); | |
1070 | + | |
1071 | + trx_sysf_dummy_create(space, &mtr); | |
1072 | + | |
1073 | + mtr_commit(&mtr); | |
1074 | +} | |
1075 | + | |
1076 | /********************************************************************* | |
1077 | Creates the rollback segments */ | |
1078 | UNIV_INTERN |