]> git.pld-linux.org Git - packages/mysql.git/blame_incremental - innodb_expand_import.patch
- updated to 5.5.32
[packages/mysql.git] / innodb_expand_import.patch
... / ...
CommitLineData
1# name : innodb_expand_import.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8--- a/storage/innobase/btr/btr0btr.c
9+++ b/storage/innobase/btr/btr0btr.c
10@@ -838,7 +838,7 @@
11 /**************************************************************//**
12 Creates a new index page (not the root, and also not
13 used in page reorganization). @see btr_page_empty(). */
14-static
15+UNIV_INTERN
16 void
17 btr_page_create(
18 /*============*/
19@@ -1712,7 +1712,7 @@
20 #ifndef UNIV_HOTBACKUP
21 /*************************************************************//**
22 Empties an index page. @see btr_page_create(). */
23-static
24+UNIV_INTERN
25 void
26 btr_page_empty(
27 /*===========*/
28@@ -2274,7 +2274,7 @@
29 /**************************************************************//**
30 Attaches the halves of an index page on the appropriate level in an
31 index tree. */
32-static
33+UNIV_INTERN
34 void
35 btr_attach_half_pages(
36 /*==================*/
37--- a/storage/innobase/fil/fil0fil.c
38+++ b/storage/innobase/fil/fil0fil.c
39@@ -40,6 +40,14 @@
40 #include "dict0dict.h"
41 #include "page0page.h"
42 #include "page0zip.h"
43+#include "trx0trx.h"
44+#include "trx0sys.h"
45+#include "pars0pars.h"
46+#include "row0mysql.h"
47+#include "row0row.h"
48+#include "que0que.h"
49+#include "btr0btr.h"
50+#include "btr0sea.h"
51 #ifndef UNIV_HOTBACKUP
52 # include "buf0lru.h"
53 # include "ibuf0ibuf.h"
54@@ -3041,6 +3049,84 @@
55 }
56
57 /********************************************************************//**
58+Checks if a page is corrupt. (for offline page)
59+*/
60+static
61+ibool
62+fil_page_buf_page_is_corrupted_offline(
63+/*===================================*/
64+ const byte* page, /*!< in: a database page */
65+ ulint zip_size) /*!< in: size of compressed page;
66+ 0 for uncompressed pages */
67+{
68+ ulint checksum_field;
69+ ulint old_checksum_field;
70+
71+ if (!zip_size
72+ && memcmp(page + FIL_PAGE_LSN + 4,
73+ page + UNIV_PAGE_SIZE
74+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
75+ return(TRUE);
76+ }
77+
78+ checksum_field = mach_read_from_4(page
79+ + FIL_PAGE_SPACE_OR_CHKSUM);
80+
81+ if (zip_size) {
82+ return(checksum_field != BUF_NO_CHECKSUM_MAGIC
83+ && checksum_field
84+ != page_zip_calc_checksum(page, zip_size));
85+ }
86+
87+ old_checksum_field = mach_read_from_4(
88+ page + UNIV_PAGE_SIZE
89+ - FIL_PAGE_END_LSN_OLD_CHKSUM);
90+
91+ if (old_checksum_field != mach_read_from_4(page
92+ + FIL_PAGE_LSN)
93+ && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
94+ && old_checksum_field
95+ != buf_calc_page_old_checksum(page)) {
96+ return(TRUE);
97+ }
98+
99+ if (checksum_field != 0
100+ && checksum_field != BUF_NO_CHECKSUM_MAGIC
101+ && checksum_field
102+ != buf_calc_page_new_checksum(page)) {
103+ return(TRUE);
104+ }
105+
106+ return(FALSE);
107+}
108+
109+/********************************************************************//**
110+*/
111+static
112+void
113+fil_page_buf_page_store_checksum(
114+/*=============================*/
115+ byte* page,
116+ ulint zip_size)
117+{
118+ if (!zip_size) {
119+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
120+ srv_use_checksums
121+ ? buf_calc_page_new_checksum(page)
122+ : BUF_NO_CHECKSUM_MAGIC);
123+ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
124+ srv_use_checksums
125+ ? buf_calc_page_old_checksum(page)
126+ : BUF_NO_CHECKSUM_MAGIC);
127+ } else {
128+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
129+ srv_use_checksums
130+ ? page_zip_calc_checksum(page, zip_size)
131+ : BUF_NO_CHECKSUM_MAGIC);
132+ }
133+}
134+
135+/********************************************************************//**
136 Tries to open a single-table tablespace and optionally checks the space id is
137 right in it. If does not succeed, prints an error message to the .err log. This
138 function is used to open a tablespace when we start up mysqld, and also in
139@@ -3063,8 +3149,11 @@
140 accessing the first page of the file */
141 ulint id, /*!< in: space id */
142 ulint flags, /*!< in: tablespace flags */
143- const char* name) /*!< in: table name in the
144+ const char* name, /*!< in: table name in the
145 databasename/tablename format */
146+ trx_t* trx) /*!< in: transaction. This is only used
147+ for IMPORT TABLESPACE, must be NULL
148+ otherwise */
149 {
150 os_file_t file;
151 char* filepath;
152@@ -3087,7 +3176,7 @@
153
154 file = os_file_create_simple_no_error_handling(
155 innodb_file_data_key, filepath, OS_FILE_OPEN,
156- OS_FILE_READ_ONLY, &success);
157+ OS_FILE_READ_WRITE, &success);
158 if (!success) {
159 /* The following call prints an error message */
160 os_file_get_last_error(TRUE);
161@@ -3134,6 +3223,453 @@
162 space_id = fsp_header_get_space_id(page);
163 space_flags = fsp_header_get_flags(page);
164
165+ if (srv_expand_import) {
166+
167+ ibool file_is_corrupt = FALSE;
168+ byte* buf3;
169+ byte* descr_page;
170+ ibool descr_is_corrupt = FALSE;
171+ index_id_t old_id[31];
172+ index_id_t new_id[31];
173+ ulint root_page[31];
174+ ulint n_index;
175+ os_file_t info_file = -1;
176+ char* info_file_path;
177+ ulint i;
178+ int len;
179+ ib_uint64_t current_lsn;
180+ ulint size_low, size_high, size, free_limit;
181+ ib_int64_t size_bytes, free_limit_bytes;
182+ dict_table_t* table;
183+ dict_index_t* index;
184+ fil_system_t* system;
185+ fil_node_t* node = NULL;
186+ fil_space_t* space;
187+ ulint zip_size;
188+
189+ buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
190+ descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
191+
192+ current_lsn = log_get_lsn();
193+
194+ /* check the header page's consistency */
195+ if (buf_page_is_corrupted(page,
196+ dict_table_flags_to_zip_size(space_flags))) {
197+ fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
198+ file_is_corrupt = TRUE;
199+ descr_is_corrupt = TRUE;
200+ }
201+
202+ /* store as first descr page */
203+ memcpy(descr_page, page, UNIV_PAGE_SIZE);
204+
205+ zip_size = dict_table_flags_to_zip_size(flags);
206+ ut_a(zip_size == dict_table_flags_to_zip_size(space_flags));
207+
208+ /* get free limit (page number) of the table space */
209+/* these should be same to the definition in fsp0fsp.c */
210+#define FSP_HEADER_OFFSET FIL_PAGE_DATA
211+#define FSP_FREE_LIMIT 12
212+ free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
213+ free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)(zip_size ? zip_size : UNIV_PAGE_SIZE);
214+
215+ /* overwrite fsp header */
216+ fsp_header_init_fields(page, id, flags);
217+ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
218+ space_id = id;
219+ space_flags = flags;
220+ if (mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
221+ mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
222+
223+ fil_page_buf_page_store_checksum(page, zip_size);
224+
225+ success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
226+
227+ /* get file size */
228+ os_file_get_size(file, &size_low, &size_high);
229+ size_bytes = (((ib_int64_t)size_high) << 32)
230+ + (ib_int64_t)size_low;
231+
232+ if (size_bytes < free_limit_bytes) {
233+ free_limit_bytes = size_bytes;
234+ if (size_bytes >= (lint)FSP_EXTENT_SIZE * (lint)(zip_size ? zip_size : UNIV_PAGE_SIZE)) {
235+ fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
236+ file_is_corrupt = TRUE;
237+ }
238+ }
239+
240+ /* get cruster index information */
241+ table = dict_table_get_low(name);
242+ index = dict_table_get_first_index(table);
243+ ut_a(index->page==3);
244+
245+ /* read metadata from .exp file */
246+ n_index = 0;
247+ memset(old_id, 0, sizeof(old_id));
248+ memset(new_id, 0, sizeof(new_id));
249+ memset(root_page, 0, sizeof(root_page));
250+
251+ info_file_path = fil_make_ibd_name(name, FALSE);
252+ len = strlen(info_file_path);
253+ info_file_path[len - 3] = 'e';
254+ info_file_path[len - 2] = 'x';
255+ info_file_path[len - 1] = 'p';
256+
257+ info_file = os_file_create_simple_no_error_handling(innodb_file_data_key,
258+ info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
259+ if (!success) {
260+ fprintf(stderr, "InnoDB: Cannot open the file: %s\n", info_file_path);
261+ file_is_corrupt = TRUE;
262+ goto skip_info;
263+ }
264+ success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
265+ if (!success) {
266+ fprintf(stderr, "InnoDB: Cannot read the file: %s\n", info_file_path);
267+ file_is_corrupt = TRUE;
268+ goto skip_info;
269+ }
270+ if (mach_read_from_4(page) != 0x78706f72UL
271+ || mach_read_from_4(page + 4) != 0x74696e66UL) {
272+ fprintf(stderr, "InnoDB: %s seems to be an incorrect .exp file.\n", info_file_path);
273+ file_is_corrupt = TRUE;
274+ goto skip_info;
275+ }
276+
277+ fprintf(stderr, "InnoDB: Import: The extended import of %s is being started.\n", name);
278+
279+ n_index = mach_read_from_4(page + 8);
280+ fprintf(stderr, "InnoDB: Import: %lu indexes have been detected.\n", (ulong)n_index);
281+ for (i = 0; i < n_index; i++) {
282+ new_id[i] =
283+ dict_table_get_index_on_name(table,
284+ (char*)(page + (i + 1) * 512 + 12))->id;
285+ old_id[i] = mach_read_from_8(page + (i + 1) * 512);
286+ root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8);
287+ }
288+
289+skip_info:
290+ if (info_file != -1)
291+ os_file_close(info_file);
292+
293+ /*
294+ if (size_bytes >= 1024 * 1024) {
295+ size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
296+ }
297+ */
298+
299+ if (zip_size) {
300+ fprintf(stderr, "InnoDB: Warning: importing compressed table is still EXPERIMENTAL, currently.\n");
301+ }
302+
303+ {
304+ mem_heap_t* heap = NULL;
305+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
306+ ulint* offsets = offsets_;
307+ ib_int64_t offset;
308+
309+ size = (ulint) (size_bytes / (zip_size ? zip_size : UNIV_PAGE_SIZE));
310+ /* over write space id of all pages */
311+ rec_offs_init(offsets_);
312+
313+ /* Unlock the data dictionary to not block queries
314+ accessing other tables */
315+ ut_a(trx);
316+ row_mysql_unlock_data_dictionary(trx);
317+
318+ fprintf(stderr, "InnoDB: Progress in %%:");
319+
320+ for (offset = 0; offset < free_limit_bytes;
321+ offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
322+ ibool page_is_corrupt;
323+
324+ success = os_file_read(file, page,
325+ (ulint)(offset & 0xFFFFFFFFUL),
326+ (ulint)(offset >> 32),
327+ zip_size ? zip_size : UNIV_PAGE_SIZE);
328+
329+ page_is_corrupt = FALSE;
330+
331+ /* check consistency */
332+ if (fil_page_buf_page_is_corrupted_offline(page, zip_size)) {
333+ page_is_corrupt = TRUE;
334+ }
335+
336+ if (mach_read_from_4(page + FIL_PAGE_OFFSET)
337+ != offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) {
338+
339+ page_is_corrupt = TRUE;
340+ }
341+
342+ /* if it is free page, inconsistency is acceptable */
343+ if (!offset) {
344+ /* header page*/
345+ /* it should be overwritten already */
346+ ut_a(!page_is_corrupt);
347+
348+ } else if (!((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE))
349+ % (zip_size ? zip_size : UNIV_PAGE_SIZE))) {
350+ /* descr page (not header) */
351+ if (page_is_corrupt) {
352+ file_is_corrupt = TRUE;
353+ descr_is_corrupt = TRUE;
354+ } else {
355+ ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
356+ descr_is_corrupt = FALSE;
357+ }
358+
359+ /* store as descr page */
360+ memcpy(descr_page, page, (zip_size ? zip_size : UNIV_PAGE_SIZE));
361+
362+ } else if (descr_is_corrupt) {
363+ /* unknown state of the page */
364+ if (page_is_corrupt) {
365+ file_is_corrupt = TRUE;
366+ }
367+
368+ } else {
369+ /* check free page or not */
370+ /* These definitions should be same to fsp0fsp.c */
371+#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
372+
373+#define XDES_BITMAP (FLST_NODE_SIZE + 12)
374+#define XDES_BITS_PER_PAGE 2
375+#define XDES_FREE_BIT 0
376+#define XDES_SIZE \
377+ (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
378+#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
379+
380+ /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
381+ /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
382+ byte* descr;
383+ ulint index;
384+ ulint byte_index;
385+ ulint bit_index;
386+
387+ descr = descr_page + XDES_ARR_OFFSET
388+ + XDES_SIZE * (ut_2pow_remainder(
389+ (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)),
390+ (zip_size ? zip_size : UNIV_PAGE_SIZE)) / FSP_EXTENT_SIZE);
391+
392+ index = XDES_FREE_BIT
393+ + XDES_BITS_PER_PAGE * ((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) % FSP_EXTENT_SIZE);
394+ byte_index = index / 8;
395+ bit_index = index % 8;
396+
397+ if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
398+ /* free page */
399+ if (page_is_corrupt) {
400+ goto skip_write;
401+ }
402+ } else {
403+ /* not free */
404+ if (page_is_corrupt) {
405+ file_is_corrupt = TRUE;
406+ }
407+ }
408+ }
409+
410+ if (page_is_corrupt) {
411+ fprintf(stderr, " [errp:%lld]", offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
412+
413+ /* cannot treat corrupt page */
414+ goto skip_write;
415+ }
416+
417+ if (mach_read_from_4(page + FIL_PAGE_OFFSET) || !offset) {
418+ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
419+
420+ for (i = 0; i < n_index; i++) {
421+ if (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE) == root_page[i]) {
422+ if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
423+ file_is_corrupt = TRUE;
424+ fprintf(stderr, " [etyp:%lld]",
425+ offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
426+ goto skip_write;
427+ }
428+ /* this is index root page */
429+ mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
430+ + FSEG_HDR_SPACE, id);
431+ mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
432+ + FSEG_HDR_SPACE, id);
433+ break;
434+ }
435+ }
436+
437+ if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
438+ index_id_t tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
439+
440+ for (i = 0; i < n_index; i++) {
441+ if (old_id[i] == tmp) {
442+ mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
443+ break;
444+ }
445+ }
446+
447+ if (!zip_size && mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
448+ && old_id[0] == tmp) {
449+ /* leaf page of cluster index, reset trx_id of records */
450+ rec_t* rec;
451+ rec_t* supremum;
452+ ulint n_recs;
453+
454+ supremum = page_get_supremum_rec(page);
455+ rec = page_rec_get_next(page_get_infimum_rec(page));
456+ n_recs = page_get_n_recs(page);
457+
458+ while (rec && rec != supremum && n_recs > 0) {
459+ ulint n_fields;
460+ ulint i;
461+ ulint offset = index->trx_id_offset;
462+ offsets = rec_get_offsets(rec, index, offsets,
463+ ULINT_UNDEFINED, &heap);
464+ n_fields = rec_offs_n_fields(offsets);
465+ if (!offset) {
466+ offset = row_get_trx_id_offset(index, offsets);
467+ }
468+ trx_write_trx_id(rec + offset, 1);
469+
470+ for (i = 0; i < n_fields; i++) {
471+ if (rec_offs_nth_extern(offsets, i)) {
472+ ulint local_len;
473+ byte* data;
474+
475+ data = rec_get_nth_field(rec, offsets, i, &local_len);
476+
477+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
478+
479+ mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
480+ }
481+ }
482+
483+ rec = page_rec_get_next(rec);
484+ n_recs--;
485+ }
486+ } else if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
487+ && old_id[0] != tmp) {
488+ mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 1);
489+ }
490+ }
491+
492+ if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
493+ mach_write_to_8(page + FIL_PAGE_LSN, current_lsn);
494+ if (!zip_size) {
495+ mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
496+ current_lsn);
497+ }
498+ }
499+
500+ fil_page_buf_page_store_checksum(page, zip_size);
501+
502+ success = os_file_write(filepath, file, page,
503+ (ulint)(offset & 0xFFFFFFFFUL),
504+ (ulint)(offset >> 32),
505+ zip_size ? zip_size : UNIV_PAGE_SIZE);
506+ }
507+
508+skip_write:
509+ if (free_limit_bytes
510+ && ((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes)
511+ != ((offset * 100) / free_limit_bytes)) {
512+ fprintf(stderr, " %lu",
513+ (ulong)((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes));
514+ }
515+ }
516+
517+ fprintf(stderr, " done.\n");
518+
519+ /* Reacquire the data dictionary lock */
520+ row_mysql_lock_data_dictionary(trx);
521+
522+ /* update SYS_INDEXES set root page */
523+ index = dict_table_get_first_index(table);
524+ while (index) {
525+ for (i = 0; i < n_index; i++) {
526+ if (new_id[i] == index->id) {
527+ break;
528+ }
529+ }
530+
531+ if (i != n_index
532+ && root_page[i] != index->page) {
533+ /* must update */
534+ ulint error;
535+ trx_t* trx;
536+ pars_info_t* info = NULL;
537+
538+ trx = trx_allocate_for_mysql();
539+ trx->op_info = "extended import";
540+
541+ info = pars_info_create();
542+
543+ pars_info_add_ull_literal(info, "indexid", new_id[i]);
544+ pars_info_add_int4_literal(info, "new_page", (lint) root_page[i]);
545+
546+ error = que_eval_sql(info,
547+ "PROCEDURE UPDATE_INDEX_PAGE () IS\n"
548+ "BEGIN\n"
549+ "UPDATE SYS_INDEXES"
550+ " SET PAGE_NO = :new_page"
551+ " WHERE ID = :indexid;\n"
552+ "COMMIT WORK;\n"
553+ "END;\n",
554+ FALSE, trx);
555+
556+ if (error != DB_SUCCESS) {
557+ fprintf(stderr, "InnoDB: failed to update SYS_INDEXES\n");
558+ }
559+
560+ trx_commit_for_mysql(trx);
561+
562+ trx_free_for_mysql(trx);
563+
564+ index->page = root_page[i];
565+ }
566+
567+ index = dict_table_get_next_index(index);
568+ }
569+ if (UNIV_LIKELY_NULL(heap)) {
570+ mem_heap_free(heap);
571+ }
572+ }
573+ /* .exp file should be removed */
574+ success = os_file_delete(info_file_path);
575+ if (!success) {
576+ success = os_file_delete_if_exists(info_file_path);
577+ }
578+ mem_free(info_file_path);
579+
580+ system = fil_system;
581+ mutex_enter(&(system->mutex));
582+ space = fil_space_get_by_id(id);
583+ if (space)
584+ node = UT_LIST_GET_FIRST(space->chain);
585+ if (node && node->size < size) {
586+ space->size += (size - node->size);
587+ node->size = size;
588+ }
589+ mutex_exit(&(system->mutex));
590+
591+ ut_free(buf3);
592+
593+ if (file_is_corrupt) {
594+ ut_print_timestamp(stderr);
595+ fputs(" InnoDB: Error: file ",
596+ stderr);
597+ ut_print_filename(stderr, filepath);
598+ fprintf(stderr, " seems to be corrupt.\n"
599+ "InnoDB: An attempt to convert and salvage all corrupt pages was not made.\n"
600+ "InnoDB: ##### CAUTION #####\n"
601+ "InnoDB: ## The .ibd file may cause InnoDB to crash, even though its re-import seems to have succeeded.\n"
602+ "InnoDB: ## If you don't know how to salvage data from a .ibd, you should not use the file.\n"
603+ "InnoDB: ###################\n");
604+ success = FALSE;
605+
606+ ut_free(buf2);
607+
608+ goto func_exit;
609+ }
610+ }
611+
612 ut_free(buf2);
613
614 if (UNIV_UNLIKELY(space_id != id
615@@ -3175,6 +3711,269 @@
616 os_file_close(file);
617 mem_free(filepath);
618
619+ if (srv_expand_import && dict_table_flags_to_zip_size(flags)) {
620+ ulint page_no;
621+ ulint zip_size;
622+ ulint height;
623+ rec_t* node_ptr;
624+ dict_table_t* table;
625+ dict_index_t* index;
626+ buf_block_t* block;
627+ page_t* page;
628+ page_zip_des_t* page_zip;
629+ mtr_t mtr;
630+
631+ mem_heap_t* heap = NULL;
632+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
633+ ulint* offsets = offsets_;
634+
635+ rec_offs_init(offsets_);
636+
637+ zip_size = dict_table_flags_to_zip_size(flags);
638+
639+ table = dict_table_get_low(name);
640+ index = dict_table_get_first_index(table);
641+ page_no = dict_index_get_page(index);
642+ ut_a(page_no == 3);
643+
644+ fprintf(stderr, "InnoDB: It is compressed .ibd file. need to convert additionaly on buffer pool.\n");
645+
646+ /* down to leaf */
647+ mtr_start(&mtr);
648+ mtr_set_log_mode(&mtr, MTR_LOG_NONE);
649+
650+ height = ULINT_UNDEFINED;
651+
652+ for (;;) {
653+ block = buf_page_get(space_id, zip_size, page_no,
654+ RW_NO_LATCH, &mtr);
655+ page = buf_block_get_frame(block);
656+
657+ block->check_index_page_at_flush = TRUE;
658+
659+ if (height == ULINT_UNDEFINED) {
660+ height = btr_page_get_level(page, &mtr);
661+ }
662+
663+ if (height == 0) {
664+ break;
665+ }
666+
667+ node_ptr = page_rec_get_next(page_get_infimum_rec(page));
668+
669+ height--;
670+
671+ offsets = rec_get_offsets(node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
672+ page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
673+ }
674+
675+ mtr_commit(&mtr);
676+
677+ fprintf(stderr, "InnoDB: pages needs split are ...");
678+
679+ /* scan reaf pages */
680+ while (page_no != FIL_NULL) {
681+ rec_t* rec;
682+ rec_t* supremum;
683+ ulint n_recs;
684+
685+ mtr_start(&mtr);
686+
687+ block = buf_page_get(space_id, zip_size, page_no,
688+ RW_X_LATCH, &mtr);
689+ page = buf_block_get_frame(block);
690+ page_zip = buf_block_get_page_zip(block);
691+
692+ if (!page_zip) {
693+ /*something wrong*/
694+ fprintf(stderr, "InnoDB: Something wrong with reading page %lu.\n", page_no);
695+convert_err_exit:
696+ mtr_commit(&mtr);
697+ mutex_enter(&fil_system->mutex);
698+ fil_space_free(space_id, FALSE);
699+ mutex_exit(&fil_system->mutex);
700+ success = FALSE;
701+ goto convert_exit;
702+ }
703+
704+ supremum = page_get_supremum_rec(page);
705+ rec = page_rec_get_next(page_get_infimum_rec(page));
706+ n_recs = page_get_n_recs(page);
707+
708+ /* illegal operation as InnoDB online system. so not logged */
709+ while (rec && rec != supremum && n_recs > 0) {
710+ ulint n_fields;
711+ ulint i;
712+ ulint offset = index->trx_id_offset;
713+
714+ offsets = rec_get_offsets(rec, index, offsets,
715+ ULINT_UNDEFINED, &heap);
716+ n_fields = rec_offs_n_fields(offsets);
717+ if (!offset) {
718+ offset = row_get_trx_id_offset(index, offsets);
719+ }
720+ trx_write_trx_id(rec + offset, 1);
721+
722+ for (i = 0; i < n_fields; i++) {
723+ if (rec_offs_nth_extern(offsets, i)) {
724+ ulint local_len;
725+ byte* data;
726+
727+ data = rec_get_nth_field(rec, offsets, i, &local_len);
728+
729+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
730+
731+ mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
732+ }
733+ }
734+
735+ rec = page_rec_get_next(rec);
736+ n_recs--;
737+ }
738+
739+ /* dummy logged update for along with modified page path */
740+ if (index->id != btr_page_get_index_id(page)) {
741+ /* this should be adjusted already */
742+ fprintf(stderr, "InnoDB: The page %lu seems to be converted wrong.\n", page_no);
743+ goto convert_err_exit;
744+ }
745+ btr_page_set_index_id(page, page_zip, index->id, &mtr);
746+
747+ /* confirm whether fits to the page size or not */
748+ if (!page_zip_compress(page_zip, page, index, &mtr)
749+ && !btr_page_reorganize(block, index, &mtr)) {
750+ buf_block_t* new_block;
751+ page_t* new_page;
752+ page_zip_des_t* new_page_zip;
753+ rec_t* split_rec;
754+ ulint n_uniq;
755+
756+ /* split page is needed */
757+ fprintf(stderr, " %lu", page_no);
758+
759+ mtr_x_lock(dict_index_get_lock(index), &mtr);
760+
761+ n_uniq = dict_index_get_n_unique_in_tree(index);
762+
763+ if(page_get_n_recs(page) < 2) {
764+ /* no way to make smaller */
765+ fprintf(stderr, "InnoDB: The page %lu cannot be store to the page size.\n", page_no);
766+ goto convert_err_exit;
767+ }
768+
769+ if (UNIV_UNLIKELY(page_no == dict_index_get_page(index))) {
770+ ulint new_page_no;
771+ dtuple_t* node_ptr;
772+ ulint level;
773+ rec_t* node_ptr_rec;
774+ page_cur_t page_cursor;
775+
776+ /* it is root page, need to raise before split */
777+
778+ level = btr_page_get_level(page, &mtr);
779+
780+ new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, &mtr);
781+ new_page = buf_block_get_frame(new_block);
782+ new_page_zip = buf_block_get_page_zip(new_block);
783+ btr_page_create(new_block, new_page_zip, index, level, &mtr);
784+
785+ btr_page_set_next(new_page, new_page_zip, FIL_NULL, &mtr);
786+ btr_page_set_prev(new_page, new_page_zip, FIL_NULL, &mtr);
787+
788+ page_zip_copy_recs(new_page_zip, new_page,
789+ page_zip, page, index, &mtr);
790+ btr_search_move_or_delete_hash_entries(new_block, block, index);
791+
792+ rec = page_rec_get_next(page_get_infimum_rec(new_page));
793+ new_page_no = buf_block_get_page_no(new_block);
794+
795+ node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
796+ level);
797+ dtuple_set_info_bits(node_ptr,
798+ dtuple_get_info_bits(node_ptr)
799+ | REC_INFO_MIN_REC_FLAG);
800+ btr_page_empty(block, page_zip, index, level + 1, &mtr);
801+
802+ btr_page_set_next(page, page_zip, FIL_NULL, &mtr);
803+ btr_page_set_prev(page, page_zip, FIL_NULL, &mtr);
804+
805+ page_cur_set_before_first(block, &page_cursor);
806+
807+ node_ptr_rec = page_cur_tuple_insert(&page_cursor, node_ptr,
808+ index, 0, &mtr);
809+ ut_a(node_ptr_rec);
810+
811+ if (!btr_page_reorganize(block, index, &mtr)) {
812+ fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
813+ goto convert_err_exit;
814+ }
815+
816+ /* move to the raised page */
817+ page_no = new_page_no;
818+ block = new_block;
819+ page = new_page;
820+ page_zip = new_page_zip;
821+
822+ fprintf(stderr, "(raise_to:%lu)", page_no);
823+ }
824+
825+ split_rec = page_get_middle_rec(page);
826+
827+ new_block = btr_page_alloc(index, page_no + 1, FSP_UP,
828+ btr_page_get_level(page, &mtr), &mtr);
829+ new_page = buf_block_get_frame(new_block);
830+ new_page_zip = buf_block_get_page_zip(new_block);
831+ btr_page_create(new_block, new_page_zip, index,
832+ btr_page_get_level(page, &mtr), &mtr);
833+
834+ offsets = rec_get_offsets(split_rec, index, offsets, n_uniq, &heap);
835+
836+ btr_attach_half_pages(index, block,
837+ split_rec, new_block, FSP_UP, &mtr);
838+
839+ page_zip_copy_recs(new_page_zip, new_page,
840+ page_zip, page, index, &mtr);
841+ page_delete_rec_list_start(split_rec - page + new_page,
842+ new_block, index, &mtr);
843+ btr_search_move_or_delete_hash_entries(new_block, block, index);
844+ page_delete_rec_list_end(split_rec, block, index,
845+ ULINT_UNDEFINED, ULINT_UNDEFINED, &mtr);
846+
847+ fprintf(stderr, "(new:%lu)", buf_block_get_page_no(new_block));
848+
849+ /* Are they needed? */
850+ if (!btr_page_reorganize(block, index, &mtr)) {
851+ fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
852+ goto convert_err_exit;
853+ }
854+ if (!btr_page_reorganize(new_block, index, &mtr)) {
855+ fprintf(stderr, "InnoDB: failed to store the page %lu.\n", buf_block_get_page_no(new_block));
856+ goto convert_err_exit;
857+ }
858+ }
859+
860+ page_no = btr_page_get_next(page, &mtr);
861+
862+ mtr_commit(&mtr);
863+
864+ if (heap) {
865+ mem_heap_empty(heap);
866+ }
867+ }
868+
869+ fprintf(stderr, "...done.\nInnoDB: waiting the flush batch of the additional conversion.\n");
870+
871+ /* should wait for the not-logged changes are all flushed */
872+ buf_flush_list(ULINT_MAX, mtr.end_lsn + 1);
873+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
874+
875+ fprintf(stderr, "InnoDB: done.\n");
876+convert_exit:
877+ if (UNIV_LIKELY_NULL(heap)) {
878+ mem_heap_free(heap);
879+ }
880+ }
881+
882 return(success);
883 }
884 #endif /* !UNIV_HOTBACKUP */
885--- a/storage/innobase/handler/ha_innodb.cc
886+++ b/storage/innobase/handler/ha_innodb.cc
887@@ -7421,6 +7421,14 @@
888 err = row_discard_tablespace_for_mysql(dict_table->name, trx);
889 } else {
890 err = row_import_tablespace_for_mysql(dict_table->name, trx);
891+
892+ /* in expanded import mode re-initialize auto_increment again */
893+ if ((err == DB_SUCCESS) && srv_expand_import &&
894+ (table->found_next_number_field != NULL)) {
895+ dict_table_autoinc_lock(dict_table);
896+ innobase_initialize_autoinc();
897+ dict_table_autoinc_unlock(dict_table);
898+ }
899 }
900
901 err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
902@@ -11820,6 +11828,11 @@
903 NULL, NULL, 0, 0, 1, 0);
904 #endif
905
906+static MYSQL_SYSVAR_ULONG(import_table_from_xtrabackup, srv_expand_import,
907+ PLUGIN_VAR_RQCMDARG,
908+ "Enable/Disable converting automatically *.ibd files when import tablespace.",
909+ NULL, NULL, 0, 0, 1, 0);
910+
911 static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
912 PLUGIN_VAR_RQCMDARG,
913 "Limit the allocated memory for dictionary cache. (0: unlimited)",
914@@ -11894,6 +11907,7 @@
915 MYSQL_SYSVAR(flush_neighbor_pages),
916 MYSQL_SYSVAR(read_ahead),
917 MYSQL_SYSVAR(adaptive_flushing_method),
918+ MYSQL_SYSVAR(import_table_from_xtrabackup),
919 MYSQL_SYSVAR(dict_size_limit),
920 MYSQL_SYSVAR(use_sys_malloc),
921 MYSQL_SYSVAR(use_native_aio),
922--- a/storage/innobase/include/btr0btr.h
923+++ b/storage/innobase/include/btr0btr.h
924@@ -238,6 +238,17 @@
925 @return the uncompressed page frame */
926 # define btr_page_get(space,zip_size,page_no,mode,idx,mtr) \
927 buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,idx,mtr))
928+/**************************************************************//**
929+Sets the index id field of a page. */
930+UNIV_INLINE
931+void
932+btr_page_set_index_id(
933+/*==================*/
934+ page_t* page, /*!< in: page to be created */
935+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
936+ part will be updated, or NULL */
937+ index_id_t id, /*!< in: index id */
938+ mtr_t* mtr); /*!< in: mtr */
939 #endif /* !UNIV_HOTBACKUP */
940 /**************************************************************//**
941 Gets the index id field of a page.
942@@ -275,6 +286,17 @@
943 const page_t* page, /*!< in: index page */
944 mtr_t* mtr); /*!< in: mini-transaction handle */
945 /********************************************************//**
946+Sets the next index page field. */
947+UNIV_INLINE
948+void
949+btr_page_set_next(
950+/*==============*/
951+ page_t* page, /*!< in: index page */
952+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
953+ part will be updated, or NULL */
954+ ulint next, /*!< in: next page number */
955+ mtr_t* mtr); /*!< in: mini-transaction handle */
956+/********************************************************//**
957 Gets the previous index page number.
958 @return prev page number */
959 UNIV_INLINE
960@@ -283,6 +305,17 @@
961 /*==============*/
962 const page_t* page, /*!< in: index page */
963 mtr_t* mtr); /*!< in: mini-transaction handle */
964+/********************************************************//**
965+Sets the previous index page field. */
966+UNIV_INLINE
967+void
968+btr_page_set_prev(
969+/*==============*/
970+ page_t* page, /*!< in: index page */
971+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
972+ part will be updated, or NULL */
973+ ulint prev, /*!< in: previous page number */
974+ mtr_t* mtr); /*!< in: mini-transaction handle */
975 /*************************************************************//**
976 Gets pointer to the previous user record in the tree. It is assumed
977 that the caller has appropriate latches on the page and its neighbor.
978@@ -328,6 +361,18 @@
979 /*===========================*/
980 const rec_t* rec, /*!< in: node pointer record */
981 const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
982+/**************************************************************//**
983+Creates a new index page (not the root, and also not
984+used in page reorganization). @see btr_page_empty(). */
985+UNIV_INTERN
986+void
987+btr_page_create(
988+/*============*/
989+ buf_block_t* block, /*!< in/out: page to be created */
990+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
991+ dict_index_t* index, /*!< in: index */
992+ ulint level, /*!< in: the B-tree level of the page */
993+ mtr_t* mtr); /*!< in: mtr */
994 /************************************************************//**
995 Creates the root node for a new index tree.
996 @return page number of the created root, FIL_NULL if did not succeed */
997@@ -397,6 +442,17 @@
998 dict_index_t* index, /*!< in: record descriptor */
999 mtr_t* mtr); /*!< in: mtr */
1000 /*************************************************************//**
1001+Empties an index page. @see btr_page_create(). */
1002+UNIV_INTERN
1003+void
1004+btr_page_empty(
1005+/*===========*/
1006+ buf_block_t* block, /*!< in: page to be emptied */
1007+ page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
1008+ dict_index_t* index, /*!< in: index of the page */
1009+ ulint level, /*!< in: the B-tree level of the page */
1010+ mtr_t* mtr); /*!< in: mtr */
1011+/*************************************************************//**
1012 Decides if the page should be split at the convergence point of
1013 inserts converging to left.
1014 @return TRUE if split recommended */
1015@@ -455,6 +511,20 @@
1016 # define btr_insert_on_non_leaf_level(i,l,t,m) \
1017 btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
1018 #endif /* !UNIV_HOTBACKUP */
1019+/**************************************************************//**
1020+Attaches the halves of an index page on the appropriate level in an
1021+index tree. */
1022+UNIV_INTERN
1023+void
1024+btr_attach_half_pages(
1025+/*==================*/
1026+ dict_index_t* index, /*!< in: the index tree */
1027+ buf_block_t* block, /*!< in/out: page to be split */
1028+ rec_t* split_rec, /*!< in: first record on upper
1029+ half page */
1030+ buf_block_t* new_block, /*!< in/out: the new half page */
1031+ ulint direction, /*!< in: FSP_UP or FSP_DOWN */
1032+ mtr_t* mtr); /*!< in: mtr */
1033 /****************************************************************//**
1034 Sets a record as the predefined minimum record. */
1035 UNIV_INTERN
1036--- a/storage/innobase/include/srv0srv.h
1037+++ b/storage/innobase/include/srv0srv.h
1038@@ -234,6 +234,8 @@
1039 extern ulint srv_read_ahead;
1040 extern ulint srv_adaptive_flushing_method;
1041
1042+extern ulint srv_expand_import;
1043+
1044 extern ulint srv_dict_size_limit;
1045 /*-------------------------------------------*/
1046
1047--- a/storage/innobase/row/row0mysql.c
1048+++ b/storage/innobase/row/row0mysql.c
1049@@ -2547,6 +2547,11 @@
1050
1051 current_lsn = log_get_lsn();
1052
1053+ /* Enlarge the fatal lock wait timeout during import. */
1054+ mutex_enter(&kernel_mutex);
1055+ srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
1056+ mutex_exit(&kernel_mutex);
1057+
1058 /* It is possible, though very improbable, that the lsn's in the
1059 tablespace to be imported have risen above the current system lsn, if
1060 a lengthy purge, ibuf merge, or rollback was performed on a backup
1061@@ -2632,7 +2637,7 @@
1062 success = fil_open_single_table_tablespace(
1063 TRUE, table->space,
1064 table->flags == DICT_TF_COMPACT ? 0 : table->flags,
1065- table->name);
1066+ table->name, trx);
1067 if (success) {
1068 table->ibd_file_missing = FALSE;
1069 table->tablespace_discarded = FALSE;
1070@@ -2658,6 +2663,11 @@
1071
1072 trx->op_info = "";
1073
1074+ /* Restore the fatal semaphore wait timeout */
1075+ mutex_enter(&kernel_mutex);
1076+ srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
1077+ mutex_exit(&kernel_mutex);
1078+
1079 return((int) err);
1080 }
1081
1082--- a/storage/innobase/srv/srv0srv.c
1083+++ b/storage/innobase/srv/srv0srv.c
1084@@ -418,6 +418,8 @@
1085 UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
1086 UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
1087
1088+UNIV_INTERN ulint srv_expand_import = 0; /* 0:disable 1:enable */
1089+
1090 UNIV_INTERN ulint srv_dict_size_limit = 0;
1091 /*-------------------------------------------*/
1092 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
1093--- a/storage/innobase/dict/dict0load.c
1094+++ b/storage/innobase/dict/dict0load.c
1095@@ -778,7 +778,7 @@
1096 object and check that the .ibd file exists. */
1097
1098 fil_open_single_table_tablespace(FALSE, space_id,
1099- flags, name);
1100+ flags, name, NULL);
1101 }
1102
1103 mem_free(name);
1104@@ -1833,7 +1833,7 @@
1105 if (!fil_open_single_table_tablespace(
1106 TRUE, table->space,
1107 table->flags == DICT_TF_COMPACT ? 0 :
1108- table->flags & ~(~0 << DICT_TF_BITS), name)) {
1109+ table->flags & ~(~0 << DICT_TF_BITS), name, NULL)) {
1110 /* We failed to find a sensible
1111 tablespace file */
1112
1113--- a/storage/innobase/include/fil0fil.h
1114+++ b/storage/innobase/include/fil0fil.h
1115@@ -34,6 +34,7 @@
1116 #include "sync0rw.h"
1117 #include "ibuf0types.h"
1118 #endif /* !UNIV_HOTBACKUP */
1119+#include "trx0types.h"
1120
1121 /** When mysqld is run, the default directory "." is the mysqld datadir,
1122 but in the MySQL Embedded Server Library and ibbackup it is not the default
1123@@ -478,8 +479,11 @@
1124 accessing the first page of the file */
1125 ulint id, /*!< in: space id */
1126 ulint flags, /*!< in: tablespace flags */
1127- const char* name); /*!< in: table name in the
1128+ const char* name, /*!< in: table name in the
1129 databasename/tablename format */
1130+ trx_t* trx); /*!< in: transaction. This is only used
1131+ for IMPORT TABLESPACE, must be NULL
1132+ otherwise */
1133 /********************************************************************//**
1134 It is possible, though very improbable, that the lsn's in the tablespace to be
1135 imported have risen above the current system lsn, if a lengthy purge, ibuf
This page took 0.047505 seconds and 4 git commands to generate.