1 # name : innodb_expand_import.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
9 --- a/storage/innobase/btr/btr0btr.c 2011-04-11 19:44:03.000000000 +0900
10 +++ b/storage/innobase/btr/btr0btr.c 2011-05-24 20:30:12.455852287 +0900
12 /**************************************************************//**
13 Creates a new index page (not the root, and also not
14 used in page reorganization). @see btr_page_empty(). */
21 #ifndef UNIV_HOTBACKUP
22 /*************************************************************//**
23 Empties an index page. @see btr_page_create(). */
30 /**************************************************************//**
31 Attaches the halves of an index page on the appropriate level in an
36 btr_attach_half_pages(
37 /*==================*/
38 diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
39 --- a/storage/innobase/fil/fil0fil.c 2010-12-03 15:09:51.274957577 +0900
40 +++ b/storage/innobase/fil/fil0fil.c 2010-12-03 15:52:23.553986552 +0900
42 #include "dict0dict.h"
43 #include "page0page.h"
47 +#include "pars0pars.h"
48 +#include "row0mysql.h"
53 #ifndef UNIV_HOTBACKUP
55 # include "ibuf0ibuf.h"
56 @@ -3032,6 +3040,84 @@
59 /********************************************************************//**
60 +Checks if a page is corrupt. (for offline page)
64 +fil_page_buf_page_is_corrupted_offline(
65 +/*===================================*/
66 + const byte* page, /*!< in: a database page */
67 + ulint zip_size) /*!< in: size of compressed page;
68 + 0 for uncompressed pages */
70 + ulint checksum_field;
71 + ulint old_checksum_field;
74 + && memcmp(page + FIL_PAGE_LSN + 4,
75 + page + UNIV_PAGE_SIZE
76 + - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
80 + checksum_field = mach_read_from_4(page
81 + + FIL_PAGE_SPACE_OR_CHKSUM);
84 + return(checksum_field != BUF_NO_CHECKSUM_MAGIC
86 + != page_zip_calc_checksum(page, zip_size));
89 + old_checksum_field = mach_read_from_4(
90 + page + UNIV_PAGE_SIZE
91 + - FIL_PAGE_END_LSN_OLD_CHKSUM);
93 + if (old_checksum_field != mach_read_from_4(page
95 + && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
96 + && old_checksum_field
97 + != buf_calc_page_old_checksum(page)) {
101 + if (checksum_field != 0
102 + && checksum_field != BUF_NO_CHECKSUM_MAGIC
104 + != buf_calc_page_new_checksum(page)) {
111 +/********************************************************************//**
115 +fil_page_buf_page_store_checksum(
116 +/*=============================*/
121 + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
123 + ? buf_calc_page_new_checksum(page)
124 + : BUF_NO_CHECKSUM_MAGIC);
125 + mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
127 + ? buf_calc_page_old_checksum(page)
128 + : BUF_NO_CHECKSUM_MAGIC);
130 + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
132 + ? page_zip_calc_checksum(page, zip_size)
133 + : BUF_NO_CHECKSUM_MAGIC);
137 +/********************************************************************//**
138 Tries to open a single-table tablespace and optionally checks the space id is
139 right in it. If does not succeed, prints an error message to the .err log. This
140 function is used to open a tablespace when we start up mysqld, and also in
141 @@ -3078,7 +3164,7 @@
143 file = os_file_create_simple_no_error_handling(
144 innodb_file_data_key, filepath, OS_FILE_OPEN,
145 - OS_FILE_READ_ONLY, &success);
146 + OS_FILE_READ_WRITE, &success);
148 /* The following call prints an error message */
149 os_file_get_last_error(TRUE);
150 @@ -3125,6 +3211,445 @@
151 space_id = fsp_header_get_space_id(page);
152 space_flags = fsp_header_get_flags(page);
154 + if (srv_expand_import) {
156 + ibool file_is_corrupt = FALSE;
159 + ibool descr_is_corrupt = FALSE;
160 + index_id_t old_id[31];
161 + index_id_t new_id[31];
162 + ulint root_page[31];
164 + os_file_t info_file = -1;
165 + char* info_file_path;
168 + ib_uint64_t current_lsn;
169 + ulint size_low, size_high, size, free_limit;
170 + ib_int64_t size_bytes, free_limit_bytes;
171 + dict_table_t* table;
172 + dict_index_t* index;
173 + fil_system_t* system;
174 + fil_node_t* node = NULL;
175 + fil_space_t* space;
178 + buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
179 + descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
181 + current_lsn = log_get_lsn();
183 + /* check the header page's consistency */
184 + if (buf_page_is_corrupted(page,
185 + dict_table_flags_to_zip_size(space_flags))) {
186 + fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
187 + file_is_corrupt = TRUE;
188 + descr_is_corrupt = TRUE;
191 + /* store as first descr page */
192 + memcpy(descr_page, page, UNIV_PAGE_SIZE);
194 + zip_size = dict_table_flags_to_zip_size(flags);
195 + ut_a(zip_size == dict_table_flags_to_zip_size(space_flags));
197 + /* get free limit (page number) of the table space */
198 +/* these should be same to the definition in fsp0fsp.c */
199 +#define FSP_HEADER_OFFSET FIL_PAGE_DATA
200 +#define FSP_FREE_LIMIT 12
201 + free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
202 + free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)(zip_size ? zip_size : UNIV_PAGE_SIZE);
204 + /* overwrite fsp header */
205 + fsp_header_init_fields(page, id, flags);
206 + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
208 + space_flags = flags;
209 + if (mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
210 + mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
212 + fil_page_buf_page_store_checksum(page, zip_size);
214 + success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
216 + /* get file size */
217 + os_file_get_size(file, &size_low, &size_high);
218 + size_bytes = (((ib_int64_t)size_high) << 32)
219 + + (ib_int64_t)size_low;
221 + if (size_bytes < free_limit_bytes) {
222 + free_limit_bytes = size_bytes;
223 + if (size_bytes >= (lint)FSP_EXTENT_SIZE * (lint)(zip_size ? zip_size : UNIV_PAGE_SIZE)) {
224 + fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
225 + file_is_corrupt = TRUE;
229 + /* get cruster index information */
230 + table = dict_table_get_low(name);
231 + index = dict_table_get_first_index(table);
232 + ut_a(index->page==3);
234 + /* read metadata from .exp file */
236 + memset(old_id, 0, sizeof(old_id));
237 + memset(new_id, 0, sizeof(new_id));
238 + memset(root_page, 0, sizeof(root_page));
240 + info_file_path = fil_make_ibd_name(name, FALSE);
241 + len = strlen(info_file_path);
242 + info_file_path[len - 3] = 'e';
243 + info_file_path[len - 2] = 'x';
244 + info_file_path[len - 1] = 'p';
246 + info_file = os_file_create_simple_no_error_handling(innodb_file_data_key,
247 + info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
249 + fprintf(stderr, "InnoDB: Cannot open the file: %s\n", info_file_path);
250 + file_is_corrupt = TRUE;
253 + success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
255 + fprintf(stderr, "InnoDB: Cannot read the file: %s\n", info_file_path);
256 + file_is_corrupt = TRUE;
259 + if (mach_read_from_4(page) != 0x78706f72UL
260 + || mach_read_from_4(page + 4) != 0x74696e66UL) {
261 + fprintf(stderr, "InnoDB: %s seems to be an incorrect .exp file.\n", info_file_path);
262 + file_is_corrupt = TRUE;
266 + fprintf(stderr, "InnoDB: Import: The extended import of %s is being started.\n", name);
268 + n_index = mach_read_from_4(page + 8);
269 + fprintf(stderr, "InnoDB: Import: %lu indexes have been detected.\n", (ulong)n_index);
270 + for (i = 0; i < n_index; i++) {
272 + dict_table_get_index_on_name(table,
273 + (char*)(page + (i + 1) * 512 + 12))->id;
274 + old_id[i] = mach_read_from_8(page + (i + 1) * 512);
275 + root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8);
279 + if (info_file != -1)
280 + os_file_close(info_file);
283 + if (size_bytes >= 1024 * 1024) {
284 + size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
289 + fprintf(stderr, "InnoDB: Warning: importing compressed table is still EXPERIMENTAL, currently.\n");
293 + mem_heap_t* heap = NULL;
294 + ulint offsets_[REC_OFFS_NORMAL_SIZE];
295 + ulint* offsets = offsets_;
298 + size = (ulint) (size_bytes / (zip_size ? zip_size : UNIV_PAGE_SIZE));
299 + /* over write space id of all pages */
300 + rec_offs_init(offsets_);
302 + fprintf(stderr, "InnoDB: Progress in %%:");
304 + for (offset = 0; offset < free_limit_bytes;
305 + offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
306 + ibool page_is_corrupt;
308 + success = os_file_read(file, page,
309 + (ulint)(offset & 0xFFFFFFFFUL),
310 + (ulint)(offset >> 32),
311 + zip_size ? zip_size : UNIV_PAGE_SIZE);
313 + page_is_corrupt = FALSE;
315 + /* check consistency */
316 + if (fil_page_buf_page_is_corrupted_offline(page, zip_size)) {
317 + page_is_corrupt = TRUE;
320 + if (mach_read_from_4(page + FIL_PAGE_OFFSET)
321 + != offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) {
323 + page_is_corrupt = TRUE;
326 + /* if it is free page, inconsistency is acceptable */
329 + /* it should be overwritten already */
330 + ut_a(!page_is_corrupt);
332 + } else if (!((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE))
333 + % (zip_size ? zip_size : UNIV_PAGE_SIZE))) {
334 + /* descr page (not header) */
335 + if (page_is_corrupt) {
336 + file_is_corrupt = TRUE;
337 + descr_is_corrupt = TRUE;
339 + ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
340 + descr_is_corrupt = FALSE;
343 + /* store as descr page */
344 + memcpy(descr_page, page, (zip_size ? zip_size : UNIV_PAGE_SIZE));
346 + } else if (descr_is_corrupt) {
347 + /* unknown state of the page */
348 + if (page_is_corrupt) {
349 + file_is_corrupt = TRUE;
353 + /* check free page or not */
354 + /* These definitions should be same to fsp0fsp.c */
355 +#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
357 +#define XDES_BITMAP (FLST_NODE_SIZE + 12)
358 +#define XDES_BITS_PER_PAGE 2
359 +#define XDES_FREE_BIT 0
361 + (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
362 +#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
364 + /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
365 + /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
371 + descr = descr_page + XDES_ARR_OFFSET
372 + + XDES_SIZE * (ut_2pow_remainder(
373 + (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)),
374 + (zip_size ? zip_size : UNIV_PAGE_SIZE)) / FSP_EXTENT_SIZE);
376 + index = XDES_FREE_BIT
377 + + XDES_BITS_PER_PAGE * ((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) % FSP_EXTENT_SIZE);
378 + byte_index = index / 8;
379 + bit_index = index % 8;
381 + if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
383 + if (page_is_corrupt) {
388 + if (page_is_corrupt) {
389 + file_is_corrupt = TRUE;
394 + if (page_is_corrupt) {
395 + fprintf(stderr, " [errp:%lld]", offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
397 + /* cannot treat corrupt page */
401 + if (mach_read_from_4(page + FIL_PAGE_OFFSET) || !offset) {
402 + mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
404 + for (i = 0; i < n_index; i++) {
405 + if (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE) == root_page[i]) {
406 + if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
407 + file_is_corrupt = TRUE;
408 + fprintf(stderr, " [etyp:%lld]",
409 + offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
412 + /* this is index root page */
413 + mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
414 + + FSEG_HDR_SPACE, id);
415 + mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
416 + + FSEG_HDR_SPACE, id);
421 + if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
422 + index_id_t tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
424 + for (i = 0; i < n_index; i++) {
425 + if (old_id[i] == tmp) {
426 + mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
431 + if (!zip_size && mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
432 + && old_id[0] == tmp) {
433 + /* leaf page of cluster index, reset trx_id of records */
438 + supremum = page_get_supremum_rec(page);
439 + rec = page_rec_get_next(page_get_infimum_rec(page));
440 + n_recs = page_get_n_recs(page);
442 + while (rec && rec != supremum && n_recs > 0) {
445 + ulint offset = index->trx_id_offset;
446 + offsets = rec_get_offsets(rec, index, offsets,
447 + ULINT_UNDEFINED, &heap);
448 + n_fields = rec_offs_n_fields(offsets);
450 + offset = row_get_trx_id_offset(rec, index, offsets);
452 + trx_write_trx_id(rec + offset, 1);
454 + for (i = 0; i < n_fields; i++) {
455 + if (rec_offs_nth_extern(offsets, i)) {
459 + data = rec_get_nth_field(rec, offsets, i, &local_len);
461 + local_len -= BTR_EXTERN_FIELD_REF_SIZE;
463 + mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
467 + rec = page_rec_get_next(rec);
470 + } else if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
471 + && old_id[0] != tmp) {
472 + mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 1);
476 + if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
477 + mach_write_to_8(page + FIL_PAGE_LSN, current_lsn);
479 + mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
484 + fil_page_buf_page_store_checksum(page, zip_size);
486 + success = os_file_write(filepath, file, page,
487 + (ulint)(offset & 0xFFFFFFFFUL),
488 + (ulint)(offset >> 32),
489 + zip_size ? zip_size : UNIV_PAGE_SIZE);
493 + if (free_limit_bytes
494 + && ((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes)
495 + != ((offset * 100) / free_limit_bytes)) {
496 + fprintf(stderr, " %lu",
497 + (ulong)((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes));
501 + fprintf(stderr, " done.\n");
503 + /* update SYS_INDEXES set root page */
504 + index = dict_table_get_first_index(table);
506 + for (i = 0; i < n_index; i++) {
507 + if (new_id[i] == index->id) {
513 + && root_page[i] != index->page) {
517 + pars_info_t* info = NULL;
519 + trx = trx_allocate_for_mysql();
520 + trx->op_info = "extended import";
522 + info = pars_info_create();
524 + pars_info_add_ull_literal(info, "indexid", new_id[i]);
525 + pars_info_add_int4_literal(info, "new_page", (lint) root_page[i]);
527 + error = que_eval_sql(info,
528 + "PROCEDURE UPDATE_INDEX_PAGE () IS\n"
530 + "UPDATE SYS_INDEXES"
531 + " SET PAGE_NO = :new_page"
532 + " WHERE ID = :indexid;\n"
537 + if (error != DB_SUCCESS) {
538 + fprintf(stderr, "InnoDB: failed to update SYS_INDEXES\n");
541 + trx_commit_for_mysql(trx);
543 + trx_free_for_mysql(trx);
545 + index->page = root_page[i];
548 + index = dict_table_get_next_index(index);
550 + if (UNIV_LIKELY_NULL(heap)) {
551 + mem_heap_free(heap);
554 + /* .exp file should be removed */
555 + success = os_file_delete(info_file_path);
557 + success = os_file_delete_if_exists(info_file_path);
559 + mem_free(info_file_path);
561 + system = fil_system;
562 + mutex_enter(&(system->mutex));
563 + space = fil_space_get_by_id(id);
565 + node = UT_LIST_GET_FIRST(space->chain);
566 + if (node && node->size < size) {
567 + space->size += (size - node->size);
570 + mutex_exit(&(system->mutex));
574 + if (file_is_corrupt) {
575 + ut_print_timestamp(stderr);
576 + fputs(" InnoDB: Error: file ",
578 + ut_print_filename(stderr, filepath);
579 + fprintf(stderr, " seems to be corrupt.\n"
580 + "InnoDB: An attempt to convert and salvage all corrupt pages was not made.\n"
581 + "InnoDB: ##### CAUTION #####\n"
582 + "InnoDB: ## The .ibd file may cause InnoDB to crash, even though its re-import seems to have succeeded.\n"
583 + "InnoDB: ## If you don't know how to salvage data from a .ibd, you should not use the file.\n"
584 + "InnoDB: ###################\n");
595 if (UNIV_UNLIKELY(space_id != id
596 @@ -3166,6 +3691,271 @@
600 + if (srv_expand_import && dict_table_flags_to_zip_size(flags)) {
604 + ulint root_height = 0;
606 + dict_table_t* table;
607 + dict_index_t* index;
608 + buf_block_t* block;
610 + page_zip_des_t* page_zip;
613 + mem_heap_t* heap = NULL;
614 + ulint offsets_[REC_OFFS_NORMAL_SIZE];
615 + ulint* offsets = offsets_;
617 + rec_offs_init(offsets_);
619 + zip_size = dict_table_flags_to_zip_size(flags);
621 + table = dict_table_get_low(name);
622 + index = dict_table_get_first_index(table);
623 + page_no = dict_index_get_page(index);
624 + ut_a(page_no == 3);
626 + fprintf(stderr, "InnoDB: It is compressed .ibd file. need to convert additionaly on buffer pool.\n");
630 + mtr_set_log_mode(&mtr, MTR_LOG_NONE);
632 + height = ULINT_UNDEFINED;
635 + block = buf_page_get(space_id, zip_size, page_no,
636 + RW_NO_LATCH, &mtr);
637 + page = buf_block_get_frame(block);
639 + block->check_index_page_at_flush = TRUE;
641 + if (height == ULINT_UNDEFINED) {
642 + height = btr_page_get_level(page, &mtr);
643 + root_height = height;
650 + node_ptr = page_rec_get_next(page_get_infimum_rec(page));
654 + offsets = rec_get_offsets(node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
655 + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
660 + fprintf(stderr, "InnoDB: pages needs split are ...");
662 + /* scan reaf pages */
663 + while (page_no != FIL_NULL) {
670 + block = buf_page_get(space_id, zip_size, page_no,
672 + page = buf_block_get_frame(block);
673 + page_zip = buf_block_get_page_zip(block);
676 + /*something wrong*/
677 + fprintf(stderr, "InnoDB: Something wrong with reading page %lu.\n", page_no);
680 + mutex_enter(&fil_system->mutex);
681 + fil_space_free(space_id, FALSE);
682 + mutex_exit(&fil_system->mutex);
687 + supremum = page_get_supremum_rec(page);
688 + rec = page_rec_get_next(page_get_infimum_rec(page));
689 + n_recs = page_get_n_recs(page);
691 + /* illegal operation as InnoDB online system. so not logged */
692 + while (rec && rec != supremum && n_recs > 0) {
695 + ulint offset = index->trx_id_offset;
697 + offsets = rec_get_offsets(rec, index, offsets,
698 + ULINT_UNDEFINED, &heap);
699 + n_fields = rec_offs_n_fields(offsets);
701 + offset = row_get_trx_id_offset(rec, index, offsets);
703 + trx_write_trx_id(rec + offset, 1);
705 + for (i = 0; i < n_fields; i++) {
706 + if (rec_offs_nth_extern(offsets, i)) {
710 + data = rec_get_nth_field(rec, offsets, i, &local_len);
712 + local_len -= BTR_EXTERN_FIELD_REF_SIZE;
714 + mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
718 + rec = page_rec_get_next(rec);
722 + /* dummy logged update for along with modified page path */
723 + if (index->id != btr_page_get_index_id(page)) {
724 + /* this should be adjusted already */
725 + fprintf(stderr, "InnoDB: The page %lu seems to be converted wrong.\n", page_no);
726 + goto convert_err_exit;
728 + btr_page_set_index_id(page, page_zip, index->id, &mtr);
730 + /* confirm whether fits to the page size or not */
731 + if (!page_zip_compress(page_zip, page, index, &mtr)
732 + && !btr_page_reorganize(block, index, &mtr)) {
733 + buf_block_t* new_block;
735 + page_zip_des_t* new_page_zip;
739 + /* split page is needed */
740 + fprintf(stderr, " %lu", page_no);
742 + mtr_x_lock(dict_index_get_lock(index), &mtr);
744 + n_uniq = dict_index_get_n_unique_in_tree(index);
746 + if(page_get_n_recs(page) < 2) {
747 + /* no way to make smaller */
748 + fprintf(stderr, "InnoDB: The page %lu cannot be store to the page size.\n", page_no);
749 + goto convert_err_exit;
752 + if (UNIV_UNLIKELY(page_no == dict_index_get_page(index))) {
754 + dtuple_t* node_ptr;
756 + rec_t* node_ptr_rec;
757 + page_cur_t page_cursor;
759 + /* it is root page, need to raise before split */
761 + level = btr_page_get_level(page, &mtr);
763 + new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, &mtr);
764 + new_page = buf_block_get_frame(new_block);
765 + new_page_zip = buf_block_get_page_zip(new_block);
766 + btr_page_create(new_block, new_page_zip, index, level, &mtr);
768 + btr_page_set_next(new_page, new_page_zip, FIL_NULL, &mtr);
769 + btr_page_set_prev(new_page, new_page_zip, FIL_NULL, &mtr);
771 + page_zip_copy_recs(new_page_zip, new_page,
772 + page_zip, page, index, &mtr);
773 + btr_search_move_or_delete_hash_entries(new_block, block, index);
775 + rec = page_rec_get_next(page_get_infimum_rec(new_page));
776 + new_page_no = buf_block_get_page_no(new_block);
778 + node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
780 + dtuple_set_info_bits(node_ptr,
781 + dtuple_get_info_bits(node_ptr)
782 + | REC_INFO_MIN_REC_FLAG);
783 + btr_page_empty(block, page_zip, index, level + 1, &mtr);
785 + btr_page_set_next(page, page_zip, FIL_NULL, &mtr);
786 + btr_page_set_prev(page, page_zip, FIL_NULL, &mtr);
788 + page_cur_set_before_first(block, &page_cursor);
790 + node_ptr_rec = page_cur_tuple_insert(&page_cursor, node_ptr,
792 + ut_a(node_ptr_rec);
794 + if (!btr_page_reorganize(block, index, &mtr)) {
795 + fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
796 + goto convert_err_exit;
799 + /* move to the raised page */
800 + page_no = new_page_no;
803 + page_zip = new_page_zip;
805 + fprintf(stderr, "(raise_to:%lu)", page_no);
808 + split_rec = page_get_middle_rec(page);
810 + new_block = btr_page_alloc(index, page_no + 1, FSP_UP,
811 + btr_page_get_level(page, &mtr), &mtr);
812 + new_page = buf_block_get_frame(new_block);
813 + new_page_zip = buf_block_get_page_zip(new_block);
814 + btr_page_create(new_block, new_page_zip, index,
815 + btr_page_get_level(page, &mtr), &mtr);
817 + offsets = rec_get_offsets(split_rec, index, offsets, n_uniq, &heap);
819 + btr_attach_half_pages(index, block,
820 + split_rec, new_block, FSP_UP, &mtr);
822 + page_zip_copy_recs(new_page_zip, new_page,
823 + page_zip, page, index, &mtr);
824 + page_delete_rec_list_start(split_rec - page + new_page,
825 + new_block, index, &mtr);
826 + btr_search_move_or_delete_hash_entries(new_block, block, index);
827 + page_delete_rec_list_end(split_rec, block, index,
828 + ULINT_UNDEFINED, ULINT_UNDEFINED, &mtr);
830 + fprintf(stderr, "(new:%lu)", buf_block_get_page_no(new_block));
832 + /* Are they needed? */
833 + if (!btr_page_reorganize(block, index, &mtr)) {
834 + fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
835 + goto convert_err_exit;
837 + if (!btr_page_reorganize(new_block, index, &mtr)) {
838 + fprintf(stderr, "InnoDB: failed to store the page %lu.\n", buf_block_get_page_no(new_block));
839 + goto convert_err_exit;
843 + page_no = btr_page_get_next(page, &mtr);
848 + mem_heap_empty(heap);
852 + fprintf(stderr, "...done.\nInnoDB: waiting the flush batch of the additional conversion.\n");
854 + /* should wait for the not-logged changes are all flushed */
855 + buf_flush_list(ULINT_MAX, mtr.end_lsn + 1);
856 + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
858 + fprintf(stderr, "InnoDB: done.\n");
860 + if (UNIV_LIKELY_NULL(heap)) {
861 + mem_heap_free(heap);
867 #endif /* !UNIV_HOTBACKUP */
868 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
869 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:49:59.195023983 +0900
870 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:52:23.555957062 +0900
871 @@ -7377,6 +7377,14 @@
872 err = row_discard_tablespace_for_mysql(dict_table->name, trx);
874 err = row_import_tablespace_for_mysql(dict_table->name, trx);
876 + /* in expanded import mode re-initialize auto_increment again */
877 + if ((err == DB_SUCCESS) && srv_expand_import &&
878 + (table->found_next_number_field != NULL)) {
879 + dict_table_autoinc_lock(dict_table);
880 + innobase_initialize_autoinc();
881 + dict_table_autoinc_unlock(dict_table);
885 err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
886 @@ -11649,6 +11657,11 @@
887 "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
888 NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
890 +static MYSQL_SYSVAR_ULONG(import_table_from_xtrabackup, srv_expand_import,
891 + PLUGIN_VAR_RQCMDARG,
892 + "Enable/Disable converting automatically *.ibd files when import tablespace.",
893 + NULL, NULL, 0, 0, 1, 0);
895 static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
897 "Limit the allocated memory for dictionary cache. (0: unlimited)",
898 @@ -11721,6 +11734,7 @@
899 MYSQL_SYSVAR(flush_neighbor_pages),
900 MYSQL_SYSVAR(read_ahead),
901 MYSQL_SYSVAR(adaptive_flushing_method),
902 + MYSQL_SYSVAR(import_table_from_xtrabackup),
903 MYSQL_SYSVAR(dict_size_limit),
904 MYSQL_SYSVAR(use_sys_malloc),
905 MYSQL_SYSVAR(use_native_aio),
906 diff -ruN a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
907 --- a/storage/innobase/include/btr0btr.h 2011-04-11 19:44:03.000000000 +0900
908 +++ b/storage/innobase/include/btr0btr.h 2011-05-24 20:30:12.459853343 +0900
910 @return the uncompressed page frame */
911 # define btr_page_get(space,zip_size,page_no,mode,mtr) \
912 buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,mtr))
913 +/**************************************************************//**
914 +Sets the index id field of a page. */
917 +btr_page_set_index_id(
918 +/*==================*/
919 + page_t* page, /*!< in: page to be created */
920 + page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
921 + part will be updated, or NULL */
922 + index_id_t id, /*!< in: index id */
923 + mtr_t* mtr); /*!< in: mtr */
924 #endif /* !UNIV_HOTBACKUP */
925 /**************************************************************//**
926 Gets the index id field of a page.
928 const page_t* page, /*!< in: index page */
929 mtr_t* mtr); /*!< in: mini-transaction handle */
930 /********************************************************//**
931 +Sets the next index page field. */
936 + page_t* page, /*!< in: index page */
937 + page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
938 + part will be updated, or NULL */
939 + ulint next, /*!< in: next page number */
940 + mtr_t* mtr); /*!< in: mini-transaction handle */
941 +/********************************************************//**
942 Gets the previous index page number.
943 @return prev page number */
947 const page_t* page, /*!< in: index page */
948 mtr_t* mtr); /*!< in: mini-transaction handle */
949 +/********************************************************//**
950 +Sets the previous index page field. */
955 + page_t* page, /*!< in: index page */
956 + page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
957 + part will be updated, or NULL */
958 + ulint prev, /*!< in: previous page number */
959 + mtr_t* mtr); /*!< in: mini-transaction handle */
960 /*************************************************************//**
961 Gets pointer to the previous user record in the tree. It is assumed
962 that the caller has appropriate latches on the page and its neighbor.
964 /*===========================*/
965 const rec_t* rec, /*!< in: node pointer record */
966 const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
967 +/**************************************************************//**
968 +Creates a new index page (not the root, and also not
969 +used in page reorganization). @see btr_page_empty(). */
974 + buf_block_t* block, /*!< in/out: page to be created */
975 + page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
976 + dict_index_t* index, /*!< in: index */
977 + ulint level, /*!< in: the B-tree level of the page */
978 + mtr_t* mtr); /*!< in: mtr */
979 /************************************************************//**
980 Creates the root node for a new index tree.
981 @return page number of the created root, FIL_NULL if did not succeed */
983 dict_index_t* index, /*!< in: record descriptor */
984 mtr_t* mtr); /*!< in: mtr */
985 /*************************************************************//**
986 +Empties an index page. @see btr_page_create(). */
991 + buf_block_t* block, /*!< in: page to be emptied */
992 + page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
993 + dict_index_t* index, /*!< in: index of the page */
994 + ulint level, /*!< in: the B-tree level of the page */
995 + mtr_t* mtr); /*!< in: mtr */
996 +/*************************************************************//**
997 Decides if the page should be split at the convergence point of
998 inserts converging to left.
999 @return TRUE if split recommended */
1000 @@ -437,6 +493,20 @@
1001 # define btr_insert_on_non_leaf_level(i,l,t,m) \
1002 btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
1003 #endif /* !UNIV_HOTBACKUP */
1004 +/**************************************************************//**
1005 +Attaches the halves of an index page on the appropriate level in an
1009 +btr_attach_half_pages(
1010 +/*==================*/
1011 + dict_index_t* index, /*!< in: the index tree */
1012 + buf_block_t* block, /*!< in/out: page to be split */
1013 + rec_t* split_rec, /*!< in: first record on upper
1015 + buf_block_t* new_block, /*!< in/out: the new half page */
1016 + ulint direction, /*!< in: FSP_UP or FSP_DOWN */
1017 + mtr_t* mtr); /*!< in: mtr */
1018 /****************************************************************//**
1019 Sets a record as the predefined minimum record. */
1021 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
1022 --- a/storage/innobase/include/srv0srv.h 2010-12-03 15:48:03.077954270 +0900
1023 +++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:52:23.561986996 +0900
1025 extern ulint srv_read_ahead;
1026 extern ulint srv_adaptive_flushing_method;
1028 +extern ulint srv_expand_import;
1030 extern ulint srv_dict_size_limit;
1031 /*-------------------------------------------*/
1033 diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
1034 --- a/storage/innobase/row/row0mysql.c 2011-04-11 19:44:03.000000000 +0900
1035 +++ b/storage/innobase/row/row0mysql.c 2011-06-06 11:53:18.395764565 +0900
1036 @@ -2568,6 +2568,11 @@
1038 current_lsn = log_get_lsn();
1040 + /* Enlarge the fatal lock wait timeout during import. */
1041 + mutex_enter(&kernel_mutex);
1042 + srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
1043 + mutex_exit(&kernel_mutex);
1045 /* It is possible, though very improbable, that the lsn's in the
1046 tablespace to be imported have risen above the current system lsn, if
1047 a lengthy purge, ibuf merge, or rollback was performed on a backup
1048 @@ -2679,6 +2684,11 @@
1052 + /* Restore the fatal semaphore wait timeout */
1053 + mutex_enter(&kernel_mutex);
1054 + srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
1055 + mutex_exit(&kernel_mutex);
1060 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
1061 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:49:59.230956118 +0900
1062 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:52:23.562954411 +0900
1064 UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
1065 UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
1067 +UNIV_INTERN ulint srv_expand_import = 0; /* 0:disable 1:enable */
1069 UNIV_INTERN ulint srv_dict_size_limit = 0;
1070 /*-------------------------------------------*/
1071 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;