1 # name : innodb_expand_import.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
9 --- a/storage/innobase/fil/fil0fil.c 2010-12-03 15:09:51.274957577 +0900
10 +++ b/storage/innobase/fil/fil0fil.c 2010-12-03 15:52:23.553986552 +0900
12 #include "dict0dict.h"
13 #include "page0page.h"
17 +#include "pars0pars.h"
18 +#include "row0mysql.h"
21 #ifndef UNIV_HOTBACKUP
23 # include "ibuf0ibuf.h"
26 file = os_file_create_simple_no_error_handling(
27 innodb_file_data_key, filepath, OS_FILE_OPEN,
28 - OS_FILE_READ_ONLY, &success);
29 + OS_FILE_READ_WRITE, &success);
31 /* The following call prints an error message */
32 os_file_get_last_error(TRUE);
33 @@ -3097,6 +3103,466 @@
34 space_id = fsp_header_get_space_id(page);
35 space_flags = fsp_header_get_flags(page);
37 + if (srv_expand_import
38 + && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
39 + ibool file_is_corrupt = FALSE;
42 + ibool descr_is_corrupt = FALSE;
43 + index_id_t old_id[31];
44 + index_id_t new_id[31];
45 + ulint root_page[31];
47 + os_file_t info_file = -1;
48 + char* info_file_path;
51 + ib_uint64_t current_lsn;
52 + ulint size_low, size_high, size, free_limit;
53 + ib_int64_t size_bytes, free_limit_bytes;
54 + dict_table_t* table;
55 + dict_index_t* index;
56 + fil_system_t* system;
57 + fil_node_t* node = NULL;
60 + buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
61 + descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
63 + current_lsn = log_get_lsn();
65 + /* check the header page's consistency */
66 + if (buf_page_is_corrupted(page,
67 + dict_table_flags_to_zip_size(space_flags))) {
68 + fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
69 + file_is_corrupt = TRUE;
70 + descr_is_corrupt = TRUE;
73 + /* store as first descr page */
74 + memcpy(descr_page, page, UNIV_PAGE_SIZE);
76 + /* get free limit (page number) of the table space */
77 +/* these should be same to the definition in fsp0fsp.c */
78 +#define FSP_HEADER_OFFSET FIL_PAGE_DATA
79 +#define FSP_FREE_LIMIT 12
80 + free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
81 + free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE;
83 + /* overwrite fsp header */
84 + fsp_header_init_fields(page, id, flags);
85 + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
87 + space_flags = flags;
88 + if (mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
89 + mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
90 + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
92 + ? buf_calc_page_new_checksum(page)
93 + : BUF_NO_CHECKSUM_MAGIC);
94 + mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
96 + ? buf_calc_page_old_checksum(page)
97 + : BUF_NO_CHECKSUM_MAGIC);
98 + success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
100 + /* get file size */
101 + os_file_get_size(file, &size_low, &size_high);
102 + size_bytes = (((ib_int64_t)size_high) << 32)
103 + + (ib_int64_t)size_low;
105 + if (size_bytes < free_limit_bytes) {
106 + free_limit_bytes = size_bytes;
107 + if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
108 + fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
109 + file_is_corrupt = TRUE;
113 + /* get cruster index information */
114 + table = dict_table_get_low(name);
115 + index = dict_table_get_first_index(table);
116 + ut_a(index->page==3);
118 + /* read metadata from .exp file */
120 + memset(old_id, 0, sizeof(old_id));
121 + memset(new_id, 0, sizeof(new_id));
122 + memset(root_page, 0, sizeof(root_page));
124 + info_file_path = fil_make_ibd_name(name, FALSE);
125 + len = strlen(info_file_path);
126 + info_file_path[len - 3] = 'e';
127 + info_file_path[len - 2] = 'x';
128 + info_file_path[len - 1] = 'p';
130 + info_file = os_file_create_simple_no_error_handling(innodb_file_data_key,
131 + info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
133 + fprintf(stderr, "InnoDB: cannot open %s\n", info_file_path);
134 + file_is_corrupt = TRUE;
137 + success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
139 + fprintf(stderr, "InnoDB: cannot read %s\n", info_file_path);
140 + file_is_corrupt = TRUE;
143 + if (mach_read_from_4(page) != 0x78706f72UL
144 + || mach_read_from_4(page + 4) != 0x74696e66UL) {
145 + fprintf(stderr, "InnoDB: %s seems not to be a correct .exp file\n", info_file_path);
146 + file_is_corrupt = TRUE;
150 + fprintf(stderr, "InnoDB: import: extended import of %s is started.\n", name);
152 + n_index = mach_read_from_4(page + 8);
153 + fprintf(stderr, "InnoDB: import: %lu indexes are detected.\n", (ulong)n_index);
154 + for (i = 0; i < n_index; i++) {
156 + dict_table_get_index_on_name(table,
157 + (char*)(page + (i + 1) * 512 + 12))->id;
158 + old_id[i] = mach_read_from_8(page + (i + 1) * 512);
159 + root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8);
163 + if (info_file != -1)
164 + os_file_close(info_file);
167 + if (size_bytes >= 1024 * 1024) {
168 + size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
171 + if (!(flags & DICT_TF_ZSSIZE_MASK)) {
172 + mem_heap_t* heap = NULL;
173 + ulint offsets_[REC_OFFS_NORMAL_SIZE];
174 + ulint* offsets = offsets_;
177 + size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
178 + /* over write space id of all pages */
179 + rec_offs_init(offsets_);
181 + fprintf(stderr, "InnoDB: Progress in %%:");
183 + for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) {
184 + ulint checksum_field;
185 + ulint old_checksum_field;
186 + ibool page_is_corrupt;
188 + success = os_file_read(file, page,
189 + (ulint)(offset & 0xFFFFFFFFUL),
190 + (ulint)(offset >> 32), UNIV_PAGE_SIZE);
192 + page_is_corrupt = FALSE;
194 + /* check consistency */
195 + if (memcmp(page + FIL_PAGE_LSN + 4,
196 + page + UNIV_PAGE_SIZE
197 + - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
199 + page_is_corrupt = TRUE;
202 + if (mach_read_from_4(page + FIL_PAGE_OFFSET)
203 + != offset / UNIV_PAGE_SIZE) {
205 + page_is_corrupt = TRUE;
208 + checksum_field = mach_read_from_4(page
209 + + FIL_PAGE_SPACE_OR_CHKSUM);
211 + old_checksum_field = mach_read_from_4(
212 + page + UNIV_PAGE_SIZE
213 + - FIL_PAGE_END_LSN_OLD_CHKSUM);
215 + if (old_checksum_field != mach_read_from_4(page
217 + && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
218 + && old_checksum_field
219 + != buf_calc_page_old_checksum(page)) {
221 + page_is_corrupt = TRUE;
224 + if (checksum_field != 0
225 + && checksum_field != BUF_NO_CHECKSUM_MAGIC
227 + != buf_calc_page_new_checksum(page)) {
229 + page_is_corrupt = TRUE;
232 + /* if it is free page, inconsistency is acceptable */
235 + /* it should be overwritten already */
236 + ut_a(!page_is_corrupt);
238 + } else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) {
239 + /* descr page (not header) */
240 + if (page_is_corrupt) {
241 + file_is_corrupt = TRUE;
242 + descr_is_corrupt = TRUE;
244 + ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
245 + descr_is_corrupt = FALSE;
248 + /* store as descr page */
249 + memcpy(descr_page, page, UNIV_PAGE_SIZE);
251 + } else if (descr_is_corrupt) {
252 + /* unknown state of the page */
253 + if (page_is_corrupt) {
254 + file_is_corrupt = TRUE;
258 + /* check free page or not */
259 + /* These definitions should be same to fsp0fsp.c */
260 +#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
262 +#define XDES_BITMAP (FLST_NODE_SIZE + 12)
263 +#define XDES_BITS_PER_PAGE 2
264 +#define XDES_FREE_BIT 0
266 + (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
267 +#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
269 + /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
270 + /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
276 + descr = descr_page + XDES_ARR_OFFSET
277 + + XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE);
279 + index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE);
280 + byte_index = index / 8;
281 + bit_index = index % 8;
283 + if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
285 + if (page_is_corrupt) {
290 + if (page_is_corrupt) {
291 + file_is_corrupt = TRUE;
296 + if (page_is_corrupt) {
297 + fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE);
299 + /* cannot treat corrupt page */
303 + if (mach_read_from_4(page + FIL_PAGE_OFFSET) || !offset) {
304 + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
306 + for (i = 0; i < n_index; i++) {
307 + if (offset / UNIV_PAGE_SIZE == root_page[i]) {
308 + /* this is index root page */
309 + mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
310 + + FSEG_HDR_SPACE, id);
311 + mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
312 + + FSEG_HDR_SPACE, id);
317 + if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
318 + index_id_t tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
320 + if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
321 + && old_id[0] == tmp) {
322 + /* leaf page of cluster index, reset trx_id of records */
327 + supremum = page_get_supremum_rec(page);
328 + rec = page_rec_get_next(page_get_infimum_rec(page));
329 + n_recs = page_get_n_recs(page);
331 + while (rec && rec != supremum && n_recs > 0) {
334 + ulint offset = index->trx_id_offset;
335 + offsets = rec_get_offsets(rec, index, offsets,
336 + ULINT_UNDEFINED, &heap);
337 + n_fields = rec_offs_n_fields(offsets);
339 + offset = row_get_trx_id_offset(rec, index, offsets);
341 + trx_write_trx_id(rec + offset, 1);
343 + for (i = 0; i < n_fields; i++) {
344 + if (rec_offs_nth_extern(offsets, i)) {
348 + data = rec_get_nth_field(rec, offsets, i, &local_len);
350 + local_len -= BTR_EXTERN_FIELD_REF_SIZE;
352 + mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
356 + rec = page_rec_get_next(rec);
361 + for (i = 0; i < n_index; i++) {
362 + if (old_id[i] == tmp) {
363 + mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
369 + if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
370 + mach_write_to_8(page + FIL_PAGE_LSN, current_lsn);
371 + mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
375 + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
377 + ? buf_calc_page_new_checksum(page)
378 + : BUF_NO_CHECKSUM_MAGIC);
379 + mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
381 + ? buf_calc_page_old_checksum(page)
382 + : BUF_NO_CHECKSUM_MAGIC);
384 + success = os_file_write(filepath, file, page,
385 + (ulint)(offset & 0xFFFFFFFFUL),
386 + (ulint)(offset >> 32), UNIV_PAGE_SIZE);
390 + if (free_limit_bytes
391 + && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)
392 + != ((offset * 100) / free_limit_bytes)) {
393 + fprintf(stderr, " %lu",
394 + (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes));
398 + fprintf(stderr, " done.\n");
400 + /* update SYS_INDEXES set root page */
401 + index = dict_table_get_first_index(table);
403 + for (i = 0; i < n_index; i++) {
404 + if (new_id[i] == index->id) {
410 + && root_page[i] != index->page) {
414 + pars_info_t* info = NULL;
416 + trx = trx_allocate_for_mysql();
417 + trx->op_info = "extended import";
419 + info = pars_info_create();
421 + pars_info_add_ull_literal(info, "indexid", new_id[i]);
422 + pars_info_add_int4_literal(info, "new_page", (lint) root_page[i]);
424 + error = que_eval_sql(info,
425 + "PROCEDURE UPDATE_INDEX_PAGE () IS\n"
427 + "UPDATE SYS_INDEXES"
428 + " SET PAGE_NO = :new_page"
429 + " WHERE ID = :indexid;\n"
434 + if (error != DB_SUCCESS) {
435 + fprintf(stderr, "InnoDB: failed to update SYS_INDEXES\n");
438 + trx_commit_for_mysql(trx);
440 + trx_free_for_mysql(trx);
442 + index->page = root_page[i];
445 + index = dict_table_get_next_index(index);
447 + if (UNIV_LIKELY_NULL(heap)) {
448 + mem_heap_free(heap);
454 + / dict_table_flags_to_zip_size(flags));
455 + fprintf(stderr, "InnoDB: import: table %s seems to be in newer format."
456 + " It may not be able to treated for now.\n", name);
458 + /* .exp file should be removed */
459 + success = os_file_delete(info_file_path);
461 + success = os_file_delete_if_exists(info_file_path);
463 + mem_free(info_file_path);
465 + system = fil_system;
466 + mutex_enter(&(system->mutex));
467 + space = fil_space_get_by_id(id);
469 + node = UT_LIST_GET_FIRST(space->chain);
470 + if (node && node->size < size) {
471 + space->size += (size - node->size);
474 + mutex_exit(&(system->mutex));
478 + if (file_is_corrupt) {
479 + ut_print_timestamp(stderr);
480 + fputs(" InnoDB: Error: file ",
482 + ut_print_filename(stderr, filepath);
483 + fprintf(stderr, " seems to be corrupt.\n"
484 + "InnoDB: anyway, all not corrupt pages were tried to be converted to salvage.\n"
485 + "InnoDB: ##### CAUTION #####\n"
486 + "InnoDB: ## The .ibd must cause to crash InnoDB, though re-import would seem to be succeeded.\n"
487 + "InnoDB: ## If you don't have knowledge about salvaging data from .ibd, you should not use the file.\n"
488 + "InnoDB: ###################\n");
499 if (UNIV_UNLIKELY(space_id != id
500 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
501 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:49:59.195023983 +0900
502 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:52:23.555957062 +0900
503 @@ -7330,6 +7330,14 @@
504 err = row_discard_tablespace_for_mysql(dict_table->name, trx);
506 err = row_import_tablespace_for_mysql(dict_table->name, trx);
508 + /* in expanded import mode re-initialize auto_increment again */
509 + if ((err == DB_SUCCESS) && srv_expand_import &&
510 + (table->found_next_number_field != NULL)) {
511 + dict_table_autoinc_lock(dict_table);
512 + innobase_initialize_autoinc();
513 + dict_table_autoinc_unlock(dict_table);
517 err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
518 @@ -11545,6 +11553,11 @@
519 "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
520 NULL, NULL, 0, 0, 1, 0);
522 +static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
523 + PLUGIN_VAR_RQCMDARG,
524 + "Enable/Disable converting automatically *.ibd files when import tablespace.",
525 + NULL, NULL, 0, 0, 1, 0);
527 static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
529 "Number of extra user rollback segments which are used in a round-robin fashion.",
530 @@ -11622,6 +11635,7 @@
531 MYSQL_SYSVAR(read_ahead),
532 MYSQL_SYSVAR(adaptive_flushing_method),
533 MYSQL_SYSVAR(enable_unsafe_group_commit),
534 + MYSQL_SYSVAR(expand_import),
535 MYSQL_SYSVAR(extra_rsegments),
536 MYSQL_SYSVAR(dict_size_limit),
537 MYSQL_SYSVAR(use_sys_malloc),
538 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
539 --- a/storage/innobase/include/srv0srv.h 2010-12-03 15:48:03.077954270 +0900
540 +++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:52:23.561986996 +0900
542 extern ulint srv_read_ahead;
543 extern ulint srv_adaptive_flushing_method;
545 +extern ulint srv_expand_import;
547 extern ulint srv_extra_rsegments;
548 extern ulint srv_dict_size_limit;
549 /*-------------------------------------------*/
550 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
551 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:49:59.230956118 +0900
552 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:52:23.562954411 +0900
554 UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
555 UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
557 +UNIV_INTERN ulint srv_expand_import = 0; /* 0:disable 1:enable */
559 UNIV_INTERN ulint srv_extra_rsegments = 127; /* extra rseg for users */
560 UNIV_INTERN ulint srv_dict_size_limit = 0;
561 /*-------------------------------------------*/