#!!! notice !!!
# Any small change to this file in the main branch
# should be done or reviewed by the maintainer!
-diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
---- a/storage/innobase/fil/fil0fil.c 2010-12-03 15:09:51.274957577 +0900
-+++ b/storage/innobase/fil/fil0fil.c 2010-12-03 15:52:23.553986552 +0900
-@@ -40,6 +40,12 @@
+--- a/storage/innobase/btr/btr0btr.c
++++ b/storage/innobase/btr/btr0btr.c
+@@ -838,7 +838,7 @@
+ /**************************************************************//**
+ Creates a new index page (not the root, and also not
+ used in page reorganization). @see btr_page_empty(). */
+-static
++UNIV_INTERN
+ void
+ btr_page_create(
+ /*============*/
+@@ -1712,7 +1712,7 @@
+ #ifndef UNIV_HOTBACKUP
+ /*************************************************************//**
+ Empties an index page. @see btr_page_create(). */
+-static
++UNIV_INTERN
+ void
+ btr_page_empty(
+ /*===========*/
+@@ -2274,7 +2274,7 @@
+ /**************************************************************//**
+ Attaches the halves of an index page on the appropriate level in an
+ index tree. */
+-static
++UNIV_INTERN
+ void
+ btr_attach_half_pages(
+ /*==================*/
+--- a/storage/innobase/fil/fil0fil.c
++++ b/storage/innobase/fil/fil0fil.c
+@@ -40,6 +40,14 @@
#include "dict0dict.h"
#include "page0page.h"
#include "page0zip.h"
+#include "row0mysql.h"
+#include "row0row.h"
+#include "que0que.h"
++#include "btr0btr.h"
++#include "btr0sea.h"
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
# include "ibuf0ibuf.h"
-@@ -3078,7 +3084,7 @@
+@@ -3041,6 +3049,84 @@
+ }
+
+ /********************************************************************//**
++Checks if a page is corrupt. (for offline page)
++*/
++static
++ibool
++fil_page_buf_page_is_corrupted_offline(
++/*===================================*/
++ const byte* page, /*!< in: a database page */
++ ulint zip_size) /*!< in: size of compressed page;
++ 0 for uncompressed pages */
++{
++ ulint checksum_field;
++ ulint old_checksum_field;
++
++ if (!zip_size
++ && memcmp(page + FIL_PAGE_LSN + 4,
++ page + UNIV_PAGE_SIZE
++ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
++ return(TRUE);
++ }
++
++ checksum_field = mach_read_from_4(page
++ + FIL_PAGE_SPACE_OR_CHKSUM);
++
++ if (zip_size) {
++ return(checksum_field != BUF_NO_CHECKSUM_MAGIC
++ && checksum_field
++ != page_zip_calc_checksum(page, zip_size));
++ }
++
++ old_checksum_field = mach_read_from_4(
++ page + UNIV_PAGE_SIZE
++ - FIL_PAGE_END_LSN_OLD_CHKSUM);
++
++ if (old_checksum_field != mach_read_from_4(page
++ + FIL_PAGE_LSN)
++ && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
++ && old_checksum_field
++ != buf_calc_page_old_checksum(page)) {
++ return(TRUE);
++ }
++
++ if (checksum_field != 0
++ && checksum_field != BUF_NO_CHECKSUM_MAGIC
++ && checksum_field
++ != buf_calc_page_new_checksum(page)) {
++ return(TRUE);
++ }
++
++ return(FALSE);
++}
++
++/********************************************************************//**
++*/
++static
++void
++fil_page_buf_page_store_checksum(
++/*=============================*/
++ byte* page,
++ ulint zip_size)
++{
++ if (!zip_size) {
++ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
++ srv_use_checksums
++ ? buf_calc_page_new_checksum(page)
++ : BUF_NO_CHECKSUM_MAGIC);
++ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++ srv_use_checksums
++ ? buf_calc_page_old_checksum(page)
++ : BUF_NO_CHECKSUM_MAGIC);
++ } else {
++ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
++ srv_use_checksums
++ ? page_zip_calc_checksum(page, zip_size)
++ : BUF_NO_CHECKSUM_MAGIC);
++ }
++}
++
++/********************************************************************//**
+ Tries to open a single-table tablespace and optionally checks the space id is
+ right in it. If does not succeed, prints an error message to the .err log. This
+ function is used to open a tablespace when we start up mysqld, and also in
+@@ -3063,8 +3149,11 @@
+ accessing the first page of the file */
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: tablespace flags */
+- const char* name) /*!< in: table name in the
++ const char* name, /*!< in: table name in the
+ databasename/tablename format */
++ trx_t* trx) /*!< in: transaction. This is only used
++ for IMPORT TABLESPACE, must be NULL
++ otherwise */
+ {
+ os_file_t file;
+ char* filepath;
+@@ -3087,7 +3176,7 @@
file = os_file_create_simple_no_error_handling(
innodb_file_data_key, filepath, OS_FILE_OPEN,
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
-@@ -3125,6 +3131,466 @@
+@@ -3134,6 +3223,453 @@
space_id = fsp_header_get_space_id(page);
space_flags = fsp_header_get_flags(page);
+ fil_system_t* system;
+ fil_node_t* node = NULL;
+ fil_space_t* space;
++ ulint zip_size;
+
+ buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
+ descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
+ /* store as first descr page */
+ memcpy(descr_page, page, UNIV_PAGE_SIZE);
+
++ zip_size = dict_table_flags_to_zip_size(flags);
++ ut_a(zip_size == dict_table_flags_to_zip_size(space_flags));
++
+ /* get free limit (page number) of the table space */
+/* these should be same to the definition in fsp0fsp.c */
+#define FSP_HEADER_OFFSET FIL_PAGE_DATA
+#define FSP_FREE_LIMIT 12
+ free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
-+ free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE;
++ free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)(zip_size ? zip_size : UNIV_PAGE_SIZE);
+
+ /* overwrite fsp header */
+ fsp_header_init_fields(page, id, flags);
+ space_flags = flags;
+ if (mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
+ mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
-+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
-+ srv_use_checksums
-+ ? buf_calc_page_new_checksum(page)
-+ : BUF_NO_CHECKSUM_MAGIC);
-+ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-+ srv_use_checksums
-+ ? buf_calc_page_old_checksum(page)
-+ : BUF_NO_CHECKSUM_MAGIC);
++
++ fil_page_buf_page_store_checksum(page, zip_size);
++
+ success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
+
+ /* get file size */
+
+ if (size_bytes < free_limit_bytes) {
+ free_limit_bytes = size_bytes;
-+ if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
++ if (size_bytes >= (lint)FSP_EXTENT_SIZE * (lint)(zip_size ? zip_size : UNIV_PAGE_SIZE)) {
+ fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
+ file_is_corrupt = TRUE;
+ }
+ size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
+ }
+ */
-+ if (!(flags & DICT_TF_ZSSIZE_MASK)) {
++
++ if (zip_size) {
++ fprintf(stderr, "InnoDB: Warning: importing compressed table is still EXPERIMENTAL, currently.\n");
++ }
++
++ {
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ ib_int64_t offset;
+
-+ size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
++ size = (ulint) (size_bytes / (zip_size ? zip_size : UNIV_PAGE_SIZE));
+ /* over write space id of all pages */
+ rec_offs_init(offsets_);
+
++ /* Unlock the data dictionary to not block queries
++ accessing other tables */
++ ut_a(trx);
++ row_mysql_unlock_data_dictionary(trx);
++
+ fprintf(stderr, "InnoDB: Progress in %%:");
+
-+ for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) {
-+ ulint checksum_field;
-+ ulint old_checksum_field;
++ for (offset = 0; offset < free_limit_bytes;
++ offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
+ ibool page_is_corrupt;
+
+ success = os_file_read(file, page,
+ (ulint)(offset & 0xFFFFFFFFUL),
-+ (ulint)(offset >> 32), UNIV_PAGE_SIZE);
++ (ulint)(offset >> 32),
++ zip_size ? zip_size : UNIV_PAGE_SIZE);
+
+ page_is_corrupt = FALSE;
+
+ /* check consistency */
-+ if (memcmp(page + FIL_PAGE_LSN + 4,
-+ page + UNIV_PAGE_SIZE
-+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
-+
++ if (fil_page_buf_page_is_corrupted_offline(page, zip_size)) {
+ page_is_corrupt = TRUE;
+ }
+
+ if (mach_read_from_4(page + FIL_PAGE_OFFSET)
-+ != offset / UNIV_PAGE_SIZE) {
-+
-+ page_is_corrupt = TRUE;
-+ }
-+
-+ checksum_field = mach_read_from_4(page
-+ + FIL_PAGE_SPACE_OR_CHKSUM);
-+
-+ old_checksum_field = mach_read_from_4(
-+ page + UNIV_PAGE_SIZE
-+ - FIL_PAGE_END_LSN_OLD_CHKSUM);
-+
-+ if (old_checksum_field != mach_read_from_4(page
-+ + FIL_PAGE_LSN)
-+ && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
-+ && old_checksum_field
-+ != buf_calc_page_old_checksum(page)) {
-+
-+ page_is_corrupt = TRUE;
-+ }
-+
-+ if (checksum_field != 0
-+ && checksum_field != BUF_NO_CHECKSUM_MAGIC
-+ && checksum_field
-+ != buf_calc_page_new_checksum(page)) {
++ != offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) {
+
+ page_is_corrupt = TRUE;
+ }
+ /* it should be overwritten already */
+ ut_a(!page_is_corrupt);
+
-+ } else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) {
++ } else if (!((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE))
++ % (zip_size ? zip_size : UNIV_PAGE_SIZE))) {
+ /* descr page (not header) */
+ if (page_is_corrupt) {
+ file_is_corrupt = TRUE;
+ }
+
+ /* store as descr page */
-+ memcpy(descr_page, page, UNIV_PAGE_SIZE);
++ memcpy(descr_page, page, (zip_size ? zip_size : UNIV_PAGE_SIZE));
+
+ } else if (descr_is_corrupt) {
+ /* unknown state of the page */
+ ulint bit_index;
+
+ descr = descr_page + XDES_ARR_OFFSET
-+ + XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE);
++ + XDES_SIZE * (ut_2pow_remainder(
++ (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)),
++ (zip_size ? zip_size : UNIV_PAGE_SIZE)) / FSP_EXTENT_SIZE);
+
-+ index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE);
++ index = XDES_FREE_BIT
++ + XDES_BITS_PER_PAGE * ((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) % FSP_EXTENT_SIZE);
+ byte_index = index / 8;
+ bit_index = index % 8;
+
+ }
+
+ if (page_is_corrupt) {
-+ fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE);
++ fprintf(stderr, " [errp:%lld]", offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
+
+ /* cannot treat corrupt page */
+ goto skip_write;
+ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
+
+ for (i = 0; i < n_index; i++) {
-+ if (offset / UNIV_PAGE_SIZE == root_page[i]) {
++ if (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE) == root_page[i]) {
++ if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
++ file_is_corrupt = TRUE;
++ fprintf(stderr, " [etyp:%lld]",
++ offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
++ goto skip_write;
++ }
+ /* this is index root page */
+ mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + FSEG_HDR_SPACE, id);
+ if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
+ index_id_t tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
+
-+ if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
++ for (i = 0; i < n_index; i++) {
++ if (old_id[i] == tmp) {
++ mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
++ break;
++ }
++ }
++
++ if (!zip_size && mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
+ && old_id[0] == tmp) {
+ /* leaf page of cluster index, reset trx_id of records */
+ rec_t* rec;
+ ULINT_UNDEFINED, &heap);
+ n_fields = rec_offs_n_fields(offsets);
+ if (!offset) {
-+ offset = row_get_trx_id_offset(rec, index, offsets);
++ offset = row_get_trx_id_offset(index, offsets);
+ }
+ trx_write_trx_id(rec + offset, 1);
+
+ rec = page_rec_get_next(rec);
+ n_recs--;
+ }
-+ }
-+
-+ for (i = 0; i < n_index; i++) {
-+ if (old_id[i] == tmp) {
-+ mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
-+ break;
-+ }
++ } else if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
++ && old_id[0] != tmp) {
++ mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 1);
+ }
+ }
+
+ if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
+ mach_write_to_8(page + FIL_PAGE_LSN, current_lsn);
-+ mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-+ current_lsn);
++ if (!zip_size) {
++ mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++ current_lsn);
++ }
+ }
+
-+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
-+ srv_use_checksums
-+ ? buf_calc_page_new_checksum(page)
-+ : BUF_NO_CHECKSUM_MAGIC);
-+ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-+ srv_use_checksums
-+ ? buf_calc_page_old_checksum(page)
-+ : BUF_NO_CHECKSUM_MAGIC);
++ fil_page_buf_page_store_checksum(page, zip_size);
+
+ success = os_file_write(filepath, file, page,
+ (ulint)(offset & 0xFFFFFFFFUL),
-+ (ulint)(offset >> 32), UNIV_PAGE_SIZE);
++ (ulint)(offset >> 32),
++ zip_size ? zip_size : UNIV_PAGE_SIZE);
+ }
+
+skip_write:
+ if (free_limit_bytes
-+ && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)
++ && ((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes)
+ != ((offset * 100) / free_limit_bytes)) {
+ fprintf(stderr, " %lu",
-+ (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes));
++ (ulong)((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes));
+ }
+ }
+
+ fprintf(stderr, " done.\n");
+
++ /* Reacquire the data dictionary lock */
++ row_mysql_lock_data_dictionary(trx);
++
+ /* update SYS_INDEXES set root page */
+ index = dict_table_get_first_index(table);
+ while (index) {
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
-+ } else {
-+ /* zip page? */
-+ size = (ulint)
-+ (size_bytes
-+ / dict_table_flags_to_zip_size(flags));
-+ fprintf(stderr, "InnoDB: Import: The table %s seems to be in a newer format."
-+ " It may not be possible to process it.\n", name);
+ }
+ /* .exp file should be removed */
+ success = os_file_delete(info_file_path);
ut_free(buf2);
if (UNIV_UNLIKELY(space_id != id
-diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
---- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:49:59.195023983 +0900
-+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:52:23.555957062 +0900
-@@ -7356,6 +7356,14 @@
+@@ -3175,6 +3711,269 @@
+ os_file_close(file);
+ mem_free(filepath);
+
++ if (srv_expand_import && dict_table_flags_to_zip_size(flags)) {
++ ulint page_no;
++ ulint zip_size;
++ ulint height;
++ rec_t* node_ptr;
++ dict_table_t* table;
++ dict_index_t* index;
++ buf_block_t* block;
++ page_t* page;
++ page_zip_des_t* page_zip;
++ mtr_t mtr;
++
++ mem_heap_t* heap = NULL;
++ ulint offsets_[REC_OFFS_NORMAL_SIZE];
++ ulint* offsets = offsets_;
++
++ rec_offs_init(offsets_);
++
++ zip_size = dict_table_flags_to_zip_size(flags);
++
++ table = dict_table_get_low(name);
++ index = dict_table_get_first_index(table);
++ page_no = dict_index_get_page(index);
++ ut_a(page_no == 3);
++
++ fprintf(stderr, "InnoDB: It is compressed .ibd file. need to convert additionaly on buffer pool.\n");
++
++ /* down to leaf */
++ mtr_start(&mtr);
++ mtr_set_log_mode(&mtr, MTR_LOG_NONE);
++
++ height = ULINT_UNDEFINED;
++
++ for (;;) {
++ block = buf_page_get(space_id, zip_size, page_no,
++ RW_NO_LATCH, &mtr);
++ page = buf_block_get_frame(block);
++
++ block->check_index_page_at_flush = TRUE;
++
++ if (height == ULINT_UNDEFINED) {
++ height = btr_page_get_level(page, &mtr);
++ }
++
++ if (height == 0) {
++ break;
++ }
++
++ node_ptr = page_rec_get_next(page_get_infimum_rec(page));
++
++ height--;
++
++ offsets = rec_get_offsets(node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
++ page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
++ }
++
++ mtr_commit(&mtr);
++
++ fprintf(stderr, "InnoDB: pages needs split are ...");
++
++ /* scan reaf pages */
++ while (page_no != FIL_NULL) {
++ rec_t* rec;
++ rec_t* supremum;
++ ulint n_recs;
++
++ mtr_start(&mtr);
++
++ block = buf_page_get(space_id, zip_size, page_no,
++ RW_X_LATCH, &mtr);
++ page = buf_block_get_frame(block);
++ page_zip = buf_block_get_page_zip(block);
++
++ if (!page_zip) {
++ /*something wrong*/
++ fprintf(stderr, "InnoDB: Something wrong with reading page %lu.\n", page_no);
++convert_err_exit:
++ mtr_commit(&mtr);
++ mutex_enter(&fil_system->mutex);
++ fil_space_free(space_id, FALSE);
++ mutex_exit(&fil_system->mutex);
++ success = FALSE;
++ goto convert_exit;
++ }
++
++ supremum = page_get_supremum_rec(page);
++ rec = page_rec_get_next(page_get_infimum_rec(page));
++ n_recs = page_get_n_recs(page);
++
++ /* illegal operation as InnoDB online system. so not logged */
++ while (rec && rec != supremum && n_recs > 0) {
++ ulint n_fields;
++ ulint i;
++ ulint offset = index->trx_id_offset;
++
++ offsets = rec_get_offsets(rec, index, offsets,
++ ULINT_UNDEFINED, &heap);
++ n_fields = rec_offs_n_fields(offsets);
++ if (!offset) {
++ offset = row_get_trx_id_offset(index, offsets);
++ }
++ trx_write_trx_id(rec + offset, 1);
++
++ for (i = 0; i < n_fields; i++) {
++ if (rec_offs_nth_extern(offsets, i)) {
++ ulint local_len;
++ byte* data;
++
++ data = rec_get_nth_field(rec, offsets, i, &local_len);
++
++ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
++
++ mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
++ }
++ }
++
++ rec = page_rec_get_next(rec);
++ n_recs--;
++ }
++
++ /* dummy logged update for along with modified page path */
++ if (index->id != btr_page_get_index_id(page)) {
++ /* this should be adjusted already */
++ fprintf(stderr, "InnoDB: The page %lu seems to be converted wrong.\n", page_no);
++ goto convert_err_exit;
++ }
++ btr_page_set_index_id(page, page_zip, index->id, &mtr);
++
++ /* confirm whether fits to the page size or not */
++ if (!page_zip_compress(page_zip, page, index, &mtr)
++ && !btr_page_reorganize(block, index, &mtr)) {
++ buf_block_t* new_block;
++ page_t* new_page;
++ page_zip_des_t* new_page_zip;
++ rec_t* split_rec;
++ ulint n_uniq;
++
++ /* split page is needed */
++ fprintf(stderr, " %lu", page_no);
++
++ mtr_x_lock(dict_index_get_lock(index), &mtr);
++
++ n_uniq = dict_index_get_n_unique_in_tree(index);
++
++ if(page_get_n_recs(page) < 2) {
++ /* no way to make smaller */
++ fprintf(stderr, "InnoDB: The page %lu cannot be store to the page size.\n", page_no);
++ goto convert_err_exit;
++ }
++
++ if (UNIV_UNLIKELY(page_no == dict_index_get_page(index))) {
++ ulint new_page_no;
++ dtuple_t* node_ptr;
++ ulint level;
++ rec_t* node_ptr_rec;
++ page_cur_t page_cursor;
++
++ /* it is root page, need to raise before split */
++
++ level = btr_page_get_level(page, &mtr);
++
++ new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, &mtr);
++ new_page = buf_block_get_frame(new_block);
++ new_page_zip = buf_block_get_page_zip(new_block);
++ btr_page_create(new_block, new_page_zip, index, level, &mtr);
++
++ btr_page_set_next(new_page, new_page_zip, FIL_NULL, &mtr);
++ btr_page_set_prev(new_page, new_page_zip, FIL_NULL, &mtr);
++
++ page_zip_copy_recs(new_page_zip, new_page,
++ page_zip, page, index, &mtr);
++ btr_search_move_or_delete_hash_entries(new_block, block, index);
++
++ rec = page_rec_get_next(page_get_infimum_rec(new_page));
++ new_page_no = buf_block_get_page_no(new_block);
++
++ node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
++ level);
++ dtuple_set_info_bits(node_ptr,
++ dtuple_get_info_bits(node_ptr)
++ | REC_INFO_MIN_REC_FLAG);
++ btr_page_empty(block, page_zip, index, level + 1, &mtr);
++
++ btr_page_set_next(page, page_zip, FIL_NULL, &mtr);
++ btr_page_set_prev(page, page_zip, FIL_NULL, &mtr);
++
++ page_cur_set_before_first(block, &page_cursor);
++
++ node_ptr_rec = page_cur_tuple_insert(&page_cursor, node_ptr,
++ index, 0, &mtr);
++ ut_a(node_ptr_rec);
++
++ if (!btr_page_reorganize(block, index, &mtr)) {
++ fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
++ goto convert_err_exit;
++ }
++
++ /* move to the raised page */
++ page_no = new_page_no;
++ block = new_block;
++ page = new_page;
++ page_zip = new_page_zip;
++
++ fprintf(stderr, "(raise_to:%lu)", page_no);
++ }
++
++ split_rec = page_get_middle_rec(page);
++
++ new_block = btr_page_alloc(index, page_no + 1, FSP_UP,
++ btr_page_get_level(page, &mtr), &mtr);
++ new_page = buf_block_get_frame(new_block);
++ new_page_zip = buf_block_get_page_zip(new_block);
++ btr_page_create(new_block, new_page_zip, index,
++ btr_page_get_level(page, &mtr), &mtr);
++
++ offsets = rec_get_offsets(split_rec, index, offsets, n_uniq, &heap);
++
++ btr_attach_half_pages(index, block,
++ split_rec, new_block, FSP_UP, &mtr);
++
++ page_zip_copy_recs(new_page_zip, new_page,
++ page_zip, page, index, &mtr);
++ page_delete_rec_list_start(split_rec - page + new_page,
++ new_block, index, &mtr);
++ btr_search_move_or_delete_hash_entries(new_block, block, index);
++ page_delete_rec_list_end(split_rec, block, index,
++ ULINT_UNDEFINED, ULINT_UNDEFINED, &mtr);
++
++ fprintf(stderr, "(new:%lu)", buf_block_get_page_no(new_block));
++
++ /* Are they needed? */
++ if (!btr_page_reorganize(block, index, &mtr)) {
++ fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
++ goto convert_err_exit;
++ }
++ if (!btr_page_reorganize(new_block, index, &mtr)) {
++ fprintf(stderr, "InnoDB: failed to store the page %lu.\n", buf_block_get_page_no(new_block));
++ goto convert_err_exit;
++ }
++ }
++
++ page_no = btr_page_get_next(page, &mtr);
++
++ mtr_commit(&mtr);
++
++ if (heap) {
++ mem_heap_empty(heap);
++ }
++ }
++
++ fprintf(stderr, "...done.\nInnoDB: waiting the flush batch of the additional conversion.\n");
++
++ /* should wait for the not-logged changes are all flushed */
++ buf_flush_list(ULINT_MAX, mtr.end_lsn + 1);
++ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
++
++ fprintf(stderr, "InnoDB: done.\n");
++convert_exit:
++ if (UNIV_LIKELY_NULL(heap)) {
++ mem_heap_free(heap);
++ }
++ }
++
+ return(success);
+ }
+ #endif /* !UNIV_HOTBACKUP */
+--- a/storage/innobase/handler/ha_innodb.cc
++++ b/storage/innobase/handler/ha_innodb.cc
+@@ -7421,6 +7421,14 @@
err = row_discard_tablespace_for_mysql(dict_table->name, trx);
} else {
err = row_import_tablespace_for_mysql(dict_table->name, trx);
}
err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
-@@ -11625,6 +11633,11 @@
- "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
- NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
+@@ -11820,6 +11828,11 @@
+ NULL, NULL, 0, 0, 1, 0);
+ #endif
+static MYSQL_SYSVAR_ULONG(import_table_from_xtrabackup, srv_expand_import,
+ PLUGIN_VAR_RQCMDARG,
static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
PLUGIN_VAR_RQCMDARG,
"Limit the allocated memory for dictionary cache. (0: unlimited)",
-@@ -11697,6 +11710,7 @@
+@@ -11894,6 +11907,7 @@
MYSQL_SYSVAR(flush_neighbor_pages),
MYSQL_SYSVAR(read_ahead),
MYSQL_SYSVAR(adaptive_flushing_method),
MYSQL_SYSVAR(dict_size_limit),
MYSQL_SYSVAR(use_sys_malloc),
MYSQL_SYSVAR(use_native_aio),
-diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
---- a/storage/innobase/include/srv0srv.h 2010-12-03 15:48:03.077954270 +0900
-+++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:52:23.561986996 +0900
-@@ -232,6 +232,8 @@
+--- a/storage/innobase/include/btr0btr.h
++++ b/storage/innobase/include/btr0btr.h
+@@ -238,6 +238,17 @@
+ @return the uncompressed page frame */
+ # define btr_page_get(space,zip_size,page_no,mode,idx,mtr) \
+ buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,idx,mtr))
++/**************************************************************//**
++Sets the index id field of a page. */
++UNIV_INLINE
++void
++btr_page_set_index_id(
++/*==================*/
++ page_t* page, /*!< in: page to be created */
++ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
++ part will be updated, or NULL */
++ index_id_t id, /*!< in: index id */
++ mtr_t* mtr); /*!< in: mtr */
+ #endif /* !UNIV_HOTBACKUP */
+ /**************************************************************//**
+ Gets the index id field of a page.
+@@ -275,6 +286,17 @@
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+ /********************************************************//**
++Sets the next index page field. */
++UNIV_INLINE
++void
++btr_page_set_next(
++/*==============*/
++ page_t* page, /*!< in: index page */
++ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
++ part will be updated, or NULL */
++ ulint next, /*!< in: next page number */
++ mtr_t* mtr); /*!< in: mini-transaction handle */
++/********************************************************//**
+ Gets the previous index page number.
+ @return prev page number */
+ UNIV_INLINE
+@@ -283,6 +305,17 @@
+ /*==============*/
+ const page_t* page, /*!< in: index page */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
++/********************************************************//**
++Sets the previous index page field. */
++UNIV_INLINE
++void
++btr_page_set_prev(
++/*==============*/
++ page_t* page, /*!< in: index page */
++ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
++ part will be updated, or NULL */
++ ulint prev, /*!< in: previous page number */
++ mtr_t* mtr); /*!< in: mini-transaction handle */
+ /*************************************************************//**
+ Gets pointer to the previous user record in the tree. It is assumed
+ that the caller has appropriate latches on the page and its neighbor.
+@@ -328,6 +361,18 @@
+ /*===========================*/
+ const rec_t* rec, /*!< in: node pointer record */
+ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
++/**************************************************************//**
++Creates a new index page (not the root, and also not
++used in page reorganization). @see btr_page_empty(). */
++UNIV_INTERN
++void
++btr_page_create(
++/*============*/
++ buf_block_t* block, /*!< in/out: page to be created */
++ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
++ dict_index_t* index, /*!< in: index */
++ ulint level, /*!< in: the B-tree level of the page */
++ mtr_t* mtr); /*!< in: mtr */
+ /************************************************************//**
+ Creates the root node for a new index tree.
+ @return page number of the created root, FIL_NULL if did not succeed */
+@@ -397,6 +442,17 @@
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr); /*!< in: mtr */
+ /*************************************************************//**
++Empties an index page. @see btr_page_create(). */
++UNIV_INTERN
++void
++btr_page_empty(
++/*===========*/
++ buf_block_t* block, /*!< in: page to be emptied */
++ page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
++ dict_index_t* index, /*!< in: index of the page */
++ ulint level, /*!< in: the B-tree level of the page */
++ mtr_t* mtr); /*!< in: mtr */
++/*************************************************************//**
+ Decides if the page should be split at the convergence point of
+ inserts converging to left.
+ @return TRUE if split recommended */
+@@ -455,6 +511,20 @@
+ # define btr_insert_on_non_leaf_level(i,l,t,m) \
+ btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
+ #endif /* !UNIV_HOTBACKUP */
++/**************************************************************//**
++Attaches the halves of an index page on the appropriate level in an
++index tree. */
++UNIV_INTERN
++void
++btr_attach_half_pages(
++/*==================*/
++ dict_index_t* index, /*!< in: the index tree */
++ buf_block_t* block, /*!< in/out: page to be split */
++ rec_t* split_rec, /*!< in: first record on upper
++ half page */
++ buf_block_t* new_block, /*!< in/out: the new half page */
++ ulint direction, /*!< in: FSP_UP or FSP_DOWN */
++ mtr_t* mtr); /*!< in: mtr */
+ /****************************************************************//**
+ Sets a record as the predefined minimum record. */
+ UNIV_INTERN
+--- a/storage/innobase/include/srv0srv.h
++++ b/storage/innobase/include/srv0srv.h
+@@ -234,6 +234,8 @@
extern ulint srv_read_ahead;
extern ulint srv_adaptive_flushing_method;
extern ulint srv_dict_size_limit;
/*-------------------------------------------*/
-diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
---- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:49:59.230956118 +0900
-+++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:52:23.562954411 +0900
-@@ -423,6 +423,8 @@
+--- a/storage/innobase/row/row0mysql.c
++++ b/storage/innobase/row/row0mysql.c
+@@ -2547,6 +2547,11 @@
+
+ current_lsn = log_get_lsn();
+
++ /* Enlarge the fatal lock wait timeout during import. */
++ mutex_enter(&kernel_mutex);
++ srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
++ mutex_exit(&kernel_mutex);
++
+ /* It is possible, though very improbable, that the lsn's in the
+ tablespace to be imported have risen above the current system lsn, if
+ a lengthy purge, ibuf merge, or rollback was performed on a backup
+@@ -2632,7 +2637,7 @@
+ success = fil_open_single_table_tablespace(
+ TRUE, table->space,
+ table->flags == DICT_TF_COMPACT ? 0 : table->flags,
+- table->name);
++ table->name, trx);
+ if (success) {
+ table->ibd_file_missing = FALSE;
+ table->tablespace_discarded = FALSE;
+@@ -2658,6 +2663,11 @@
+
+ trx->op_info = "";
+
++ /* Restore the fatal semaphore wait timeout */
++ mutex_enter(&kernel_mutex);
++ srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
++ mutex_exit(&kernel_mutex);
++
+ return((int) err);
+ }
+
+--- a/storage/innobase/srv/srv0srv.c
++++ b/storage/innobase/srv/srv0srv.c
+@@ -418,6 +418,8 @@
UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
UNIV_INTERN ulint srv_dict_size_limit = 0;
/*-------------------------------------------*/
UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
+--- a/storage/innobase/dict/dict0load.c
++++ b/storage/innobase/dict/dict0load.c
+@@ -778,7 +778,7 @@
+ object and check that the .ibd file exists. */
+
+ fil_open_single_table_tablespace(FALSE, space_id,
+- flags, name);
++ flags, name, NULL);
+ }
+
+ mem_free(name);
+@@ -1833,7 +1833,7 @@
+ if (!fil_open_single_table_tablespace(
+ TRUE, table->space,
+ table->flags == DICT_TF_COMPACT ? 0 :
+- table->flags & ~(~0 << DICT_TF_BITS), name)) {
++ table->flags & ~(~0 << DICT_TF_BITS), name, NULL)) {
+ /* We failed to find a sensible
+ tablespace file */
+
+--- a/storage/innobase/include/fil0fil.h
++++ b/storage/innobase/include/fil0fil.h
+@@ -34,6 +34,7 @@
+ #include "sync0rw.h"
+ #include "ibuf0types.h"
+ #endif /* !UNIV_HOTBACKUP */
++#include "trx0types.h"
+
+ /** When mysqld is run, the default directory "." is the mysqld datadir,
+ but in the MySQL Embedded Server Library and ibbackup it is not the default
+@@ -478,8 +479,11 @@
+ accessing the first page of the file */
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: tablespace flags */
+- const char* name); /*!< in: table name in the
++ const char* name, /*!< in: table name in the
+ databasename/tablename format */
++ trx_t* trx); /*!< in: transaction. This is only used
++ for IMPORT TABLESPACE, must be NULL
++ otherwise */
+ /********************************************************************//**
+ It is possible, though very improbable, that the lsn's in the tablespace to be
+ imported have risen above the current system lsn, if a lengthy purge, ibuf