]> git.pld-linux.org Git - packages/mysql.git/blob - innodb_expand_import.patch
- up to 5.5.17
[packages/mysql.git] / innodb_expand_import.patch
1 # name       : innodb_expand_import.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innobase/btr/btr0btr.c
9 +++ b/storage/innobase/btr/btr0btr.c
10 @@ -838,7 +838,7 @@
11  /**************************************************************//**
12  Creates a new index page (not the root, and also not
13  used in page reorganization).  @see btr_page_empty(). */
14 -static
15 +UNIV_INTERN
16  void
17  btr_page_create(
18  /*============*/
19 @@ -1712,7 +1712,7 @@
20  #ifndef UNIV_HOTBACKUP
21  /*************************************************************//**
22  Empties an index page.  @see btr_page_create(). */
23 -static
24 +UNIV_INTERN
25  void
26  btr_page_empty(
27  /*===========*/
28 @@ -2274,7 +2274,7 @@
29  /**************************************************************//**
30  Attaches the halves of an index page on the appropriate level in an
31  index tree. */
32 -static
33 +UNIV_INTERN
34  void
35  btr_attach_half_pages(
36  /*==================*/
37 --- a/storage/innobase/fil/fil0fil.c
38 +++ b/storage/innobase/fil/fil0fil.c
39 @@ -40,6 +40,14 @@
40  #include "dict0dict.h"
41  #include "page0page.h"
42  #include "page0zip.h"
43 +#include "trx0trx.h"
44 +#include "trx0sys.h"
45 +#include "pars0pars.h"
46 +#include "row0mysql.h"
47 +#include "row0row.h"
48 +#include "que0que.h"
49 +#include "btr0btr.h"
50 +#include "btr0sea.h"
51  #ifndef UNIV_HOTBACKUP
52  # include "buf0lru.h"
53  # include "ibuf0ibuf.h"
54 @@ -3033,6 +3041,84 @@
55  }
56  
57  /********************************************************************//**
58 +Checks if a page is corrupt. (for offline page)
59 +*/
60 +static
61 +ibool
62 +fil_page_buf_page_is_corrupted_offline(
63 +/*===================================*/
64 +       const byte*     page,           /*!< in: a database page */
65 +       ulint           zip_size)       /*!< in: size of compressed page;
66 +                                       0 for uncompressed pages */
67 +{
68 +       ulint           checksum_field;
69 +       ulint           old_checksum_field;
70 +
71 +       if (!zip_size
72 +           && memcmp(page + FIL_PAGE_LSN + 4,
73 +                     page + UNIV_PAGE_SIZE
74 +                     - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
75 +               return(TRUE);
76 +       }
77 +
78 +       checksum_field = mach_read_from_4(page
79 +                                         + FIL_PAGE_SPACE_OR_CHKSUM);
80 +
81 +       if (zip_size) {
82 +               return(checksum_field != BUF_NO_CHECKSUM_MAGIC
83 +                      && checksum_field
84 +                      != page_zip_calc_checksum(page, zip_size));
85 +       }
86 +
87 +       old_checksum_field = mach_read_from_4(
88 +               page + UNIV_PAGE_SIZE
89 +               - FIL_PAGE_END_LSN_OLD_CHKSUM);
90 +
91 +       if (old_checksum_field != mach_read_from_4(page
92 +                                                  + FIL_PAGE_LSN)
93 +           && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
94 +           && old_checksum_field
95 +           != buf_calc_page_old_checksum(page)) {
96 +               return(TRUE);
97 +       }
98 +
99 +       if (checksum_field != 0
100 +           && checksum_field != BUF_NO_CHECKSUM_MAGIC
101 +           && checksum_field
102 +           != buf_calc_page_new_checksum(page)) {
103 +               return(TRUE);
104 +       }
105 +
106 +       return(FALSE);
107 +}
108 +
109 +/********************************************************************//**
110 +*/
111 +static
112 +void
113 +fil_page_buf_page_store_checksum(
114 +/*=============================*/
115 +       byte*   page,
116 +       ulint   zip_size)
117 +{
118 +       if (!zip_size) {
119 +               mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
120 +                               srv_use_checksums
121 +                               ? buf_calc_page_new_checksum(page)
122 +                                               : BUF_NO_CHECKSUM_MAGIC);
123 +               mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
124 +                               srv_use_checksums
125 +                               ? buf_calc_page_old_checksum(page)
126 +                                               : BUF_NO_CHECKSUM_MAGIC);
127 +       } else {
128 +               mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
129 +                               srv_use_checksums
130 +                               ? page_zip_calc_checksum(page, zip_size)
131 +                               : BUF_NO_CHECKSUM_MAGIC);
132 +       }
133 +}
134 +
135 +/********************************************************************//**
136  Tries to open a single-table tablespace and optionally checks the space id is
137  right in it. If does not succeed, prints an error message to the .err log. This
138  function is used to open a tablespace when we start up mysqld, and also in
139 @@ -3079,7 +3165,7 @@
140  
141         file = os_file_create_simple_no_error_handling(
142                 innodb_file_data_key, filepath, OS_FILE_OPEN,
143 -               OS_FILE_READ_ONLY, &success);
144 +               OS_FILE_READ_WRITE, &success);
145         if (!success) {
146                 /* The following call prints an error message */
147                 os_file_get_last_error(TRUE);
148 @@ -3126,6 +3212,445 @@
149         space_id = fsp_header_get_space_id(page);
150         space_flags = fsp_header_get_flags(page);
151  
152 +       if (srv_expand_import) {
153 +
154 +               ibool           file_is_corrupt = FALSE;
155 +               byte*           buf3;
156 +               byte*           descr_page;
157 +               ibool           descr_is_corrupt = FALSE;
158 +               index_id_t      old_id[31];
159 +               index_id_t      new_id[31];
160 +               ulint           root_page[31];
161 +               ulint           n_index;
162 +               os_file_t       info_file = -1;
163 +               char*           info_file_path;
164 +               ulint   i;
165 +               int             len;
166 +               ib_uint64_t     current_lsn;
167 +               ulint           size_low, size_high, size, free_limit;
168 +               ib_int64_t      size_bytes, free_limit_bytes;
169 +               dict_table_t*   table;
170 +               dict_index_t*   index;
171 +               fil_system_t*   system;
172 +               fil_node_t*     node = NULL;
173 +               fil_space_t*    space;
174 +               ulint           zip_size;
175 +
176 +               buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
177 +               descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
178 +
179 +               current_lsn = log_get_lsn();
180 +
181 +               /* check the header page's consistency */
182 +               if (buf_page_is_corrupted(page,
183 +                                         dict_table_flags_to_zip_size(space_flags))) {
184 +                       fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
185 +                       file_is_corrupt = TRUE;
186 +                       descr_is_corrupt = TRUE;
187 +               }
188 +
189 +               /* store as first descr page */
190 +               memcpy(descr_page, page, UNIV_PAGE_SIZE);
191 +
192 +               zip_size = dict_table_flags_to_zip_size(flags);
193 +               ut_a(zip_size == dict_table_flags_to_zip_size(space_flags));
194 +
195 +               /* get free limit (page number) of the table space */
196 +/* these should be same to the definition in fsp0fsp.c */
197 +#define FSP_HEADER_OFFSET      FIL_PAGE_DATA
198 +#define        FSP_FREE_LIMIT          12
199 +               free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
200 +               free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)(zip_size ? zip_size : UNIV_PAGE_SIZE);
201 +
202 +               /* overwrite fsp header */
203 +               fsp_header_init_fields(page, id, flags);
204 +               mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
205 +               space_id = id;
206 +               space_flags = flags;
207 +               if (mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
208 +                       mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
209 +
210 +               fil_page_buf_page_store_checksum(page, zip_size);
211 +
212 +               success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
213 +
214 +               /* get file size */
215 +               os_file_get_size(file, &size_low, &size_high);
216 +               size_bytes = (((ib_int64_t)size_high) << 32)
217 +                               + (ib_int64_t)size_low;
218 +
219 +               if (size_bytes < free_limit_bytes) {
220 +                       free_limit_bytes = size_bytes;
221 +                       if (size_bytes >= (lint)FSP_EXTENT_SIZE * (lint)(zip_size ? zip_size : UNIV_PAGE_SIZE)) {
222 +                               fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
223 +                               file_is_corrupt = TRUE;
224 +                       }
225 +               }
226 +
227 +               /* get cruster index information */
228 +               table = dict_table_get_low(name);
229 +               index = dict_table_get_first_index(table);
230 +               ut_a(index->page==3);
231 +
232 +               /* read metadata from .exp file */
233 +               n_index = 0;
234 +               memset(old_id, 0, sizeof(old_id));
235 +               memset(new_id, 0, sizeof(new_id));
236 +               memset(root_page, 0, sizeof(root_page));
237 +
238 +               info_file_path = fil_make_ibd_name(name, FALSE);
239 +               len = strlen(info_file_path);
240 +               info_file_path[len - 3] = 'e';
241 +               info_file_path[len - 2] = 'x';
242 +               info_file_path[len - 1] = 'p';
243 +
244 +               info_file = os_file_create_simple_no_error_handling(innodb_file_data_key,
245 +                               info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
246 +               if (!success) {
247 +                       fprintf(stderr, "InnoDB: Cannot open the file: %s\n", info_file_path);
248 +                       file_is_corrupt = TRUE;
249 +                       goto skip_info;
250 +               }
251 +               success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
252 +               if (!success) {
253 +                       fprintf(stderr, "InnoDB: Cannot read the file: %s\n", info_file_path);
254 +                       file_is_corrupt = TRUE;
255 +                       goto skip_info;
256 +               }
257 +               if (mach_read_from_4(page) != 0x78706f72UL
258 +                   || mach_read_from_4(page + 4) != 0x74696e66UL) {
259 +                       fprintf(stderr, "InnoDB: %s seems to be an incorrect .exp file.\n", info_file_path);
260 +                       file_is_corrupt = TRUE;
261 +                       goto skip_info;
262 +               }
263 +
264 +               fprintf(stderr, "InnoDB: Import: The extended import of %s is being started.\n", name);
265 +
266 +               n_index = mach_read_from_4(page + 8);
267 +               fprintf(stderr, "InnoDB: Import: %lu indexes have been detected.\n", (ulong)n_index);
268 +               for (i = 0; i < n_index; i++) {
269 +                       new_id[i] =
270 +                               dict_table_get_index_on_name(table,
271 +                                               (char*)(page + (i + 1) * 512 + 12))->id;
272 +                       old_id[i] = mach_read_from_8(page + (i + 1) * 512);
273 +                       root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8);
274 +               }
275 +
276 +skip_info:
277 +               if (info_file != -1)
278 +                       os_file_close(info_file);
279 +
280 +               /*
281 +               if (size_bytes >= 1024 * 1024) {
282 +                       size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
283 +               }
284 +               */
285 +
286 +               if (zip_size) {
287 +                       fprintf(stderr, "InnoDB: Warning: importing compressed table is still EXPERIMENTAL, currently.\n");
288 +               }
289 +
290 +               {
291 +                       mem_heap_t*     heap = NULL;
292 +                       ulint           offsets_[REC_OFFS_NORMAL_SIZE];
293 +                       ulint*          offsets = offsets_;
294 +                       ib_int64_t      offset;
295 +
296 +                       size = (ulint) (size_bytes / (zip_size ? zip_size : UNIV_PAGE_SIZE));
297 +                       /* over write space id of all pages */
298 +                       rec_offs_init(offsets_);
299 +
300 +                       fprintf(stderr, "InnoDB: Progress in %%:");
301 +
302 +                       for (offset = 0; offset < free_limit_bytes;
303 +                            offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
304 +                               ibool           page_is_corrupt;
305 +
306 +                               success = os_file_read(file, page,
307 +                                                       (ulint)(offset & 0xFFFFFFFFUL),
308 +                                                       (ulint)(offset >> 32),
309 +                                                       zip_size ? zip_size : UNIV_PAGE_SIZE);
310 +
311 +                               page_is_corrupt = FALSE;
312 +
313 +                               /* check consistency */
314 +                               if (fil_page_buf_page_is_corrupted_offline(page, zip_size)) {
315 +                                       page_is_corrupt = TRUE;
316 +                               }
317 +
318 +                               if (mach_read_from_4(page + FIL_PAGE_OFFSET)
319 +                                   != offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) {
320 +
321 +                                       page_is_corrupt = TRUE;
322 +                               }
323 +
324 +                               /* if it is free page, inconsistency is acceptable */
325 +                               if (!offset) {
326 +                                       /* header page*/
327 +                                       /* it should be overwritten already */
328 +                                       ut_a(!page_is_corrupt);
329 +
330 +                               } else if (!((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE))
331 +                                            % (zip_size ? zip_size : UNIV_PAGE_SIZE))) {
332 +                                       /* descr page (not header) */
333 +                                       if (page_is_corrupt) {
334 +                                               file_is_corrupt = TRUE;
335 +                                               descr_is_corrupt = TRUE;
336 +                                       } else {
337 +                                               ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
338 +                                               descr_is_corrupt = FALSE;
339 +                                       }
340 +
341 +                                       /* store as descr page */
342 +                                       memcpy(descr_page, page, (zip_size ? zip_size : UNIV_PAGE_SIZE));
343 +
344 +                               } else if (descr_is_corrupt) {
345 +                                       /* unknown state of the page */
346 +                                       if (page_is_corrupt) {
347 +                                               file_is_corrupt = TRUE;
348 +                                       }
349 +
350 +                               } else {
351 +                                       /* check free page or not */
352 +                                       /* These definitions should be same to fsp0fsp.c */
353 +#define        FSP_HEADER_SIZE         (32 + 5 * FLST_BASE_NODE_SIZE)
354 +
355 +#define        XDES_BITMAP             (FLST_NODE_SIZE + 12)
356 +#define        XDES_BITS_PER_PAGE      2
357 +#define        XDES_FREE_BIT           0
358 +#define        XDES_SIZE                                                       \
359 +       (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
360 +#define        XDES_ARR_OFFSET         (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
361 +
362 +                                       /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
363 +                                       /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
364 +                                       byte*   descr;
365 +                                       ulint   index;
366 +                                       ulint   byte_index;
367 +                                       ulint   bit_index;
368 +
369 +                                       descr = descr_page + XDES_ARR_OFFSET
370 +                                               + XDES_SIZE * (ut_2pow_remainder(
371 +                                                       (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)),
372 +                                                       (zip_size ? zip_size : UNIV_PAGE_SIZE)) / FSP_EXTENT_SIZE);
373 +
374 +                                       index = XDES_FREE_BIT
375 +                                               + XDES_BITS_PER_PAGE * ((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) % FSP_EXTENT_SIZE);
376 +                                       byte_index = index / 8;
377 +                                       bit_index = index % 8;
378 +
379 +                                       if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
380 +                                               /* free page */
381 +                                               if (page_is_corrupt) {
382 +                                                       goto skip_write;
383 +                                               }
384 +                                       } else {
385 +                                               /* not free */
386 +                                               if (page_is_corrupt) {
387 +                                                       file_is_corrupt = TRUE;
388 +                                               }
389 +                                       }
390 +                               }
391 +
392 +                               if (page_is_corrupt) {
393 +                                       fprintf(stderr, " [errp:%lld]", offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
394 +
395 +                                       /* cannot treat corrupt page */
396 +                                       goto skip_write;
397 +                               }
398 +
399 +                               if (mach_read_from_4(page + FIL_PAGE_OFFSET) || !offset) {
400 +                                       mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
401 +
402 +                                       for (i = 0; i < n_index; i++) {
403 +                                               if (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE) == root_page[i]) {
404 +                                                       if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
405 +                                                               file_is_corrupt = TRUE;
406 +                                                               fprintf(stderr, " [etyp:%lld]",
407 +                                                                       offset / (zip_size ? zip_size : UNIV_PAGE_SIZE));
408 +                                                               goto skip_write;
409 +                                                       }
410 +                                                       /* this is index root page */
411 +                                                       mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
412 +                                                                                       + FSEG_HDR_SPACE, id);
413 +                                                       mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
414 +                                                                                       + FSEG_HDR_SPACE, id);
415 +                                                       break;
416 +                                               }
417 +                                       }
418 +
419 +                                       if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
420 +                                               index_id_t tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
421 +
422 +                                               for (i = 0; i < n_index; i++) {
423 +                                                       if (old_id[i] == tmp) {
424 +                                                               mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
425 +                                                               break;
426 +                                                       }
427 +                                               }
428 +
429 +                                               if (!zip_size && mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
430 +                                                   && old_id[0] == tmp) {
431 +                                                       /* leaf page of cluster index, reset trx_id of records */
432 +                                                       rec_t*  rec;
433 +                                                       rec_t*  supremum;
434 +                                                       ulint   n_recs;
435 +
436 +                                                       supremum = page_get_supremum_rec(page);
437 +                                                       rec = page_rec_get_next(page_get_infimum_rec(page));
438 +                                                       n_recs = page_get_n_recs(page);
439 +
440 +                                                       while (rec && rec != supremum && n_recs > 0) {
441 +                                                               ulint   n_fields;
442 +                                                               ulint   i;
443 +                                                               ulint   offset = index->trx_id_offset;
444 +                                                               offsets = rec_get_offsets(rec, index, offsets,
445 +                                                                               ULINT_UNDEFINED, &heap);
446 +                                                               n_fields = rec_offs_n_fields(offsets);
447 +                                                               if (!offset) {
448 +                                                                       offset = row_get_trx_id_offset(index, offsets);
449 +                                                               }
450 +                                                               trx_write_trx_id(rec + offset, 1);
451 +
452 +                                                               for (i = 0; i < n_fields; i++) {
453 +                                                                       if (rec_offs_nth_extern(offsets, i)) {
454 +                                                                               ulint   local_len;
455 +                                                                               byte*   data;
456 +
457 +                                                                               data = rec_get_nth_field(rec, offsets, i, &local_len);
458 +
459 +                                                                               local_len -= BTR_EXTERN_FIELD_REF_SIZE;
460 +
461 +                                                                               mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
462 +                                                                       }
463 +                                                               }
464 +
465 +                                                               rec = page_rec_get_next(rec);
466 +                                                               n_recs--;
467 +                                                       }
468 +                                               } else if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
469 +                                                          && old_id[0] != tmp) {
470 +                                                       mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 1);
471 +                                               }
472 +                                       }
473 +
474 +                                       if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
475 +                                               mach_write_to_8(page + FIL_PAGE_LSN, current_lsn);
476 +                                               if (!zip_size) {
477 +                                                       mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
478 +                                                                       current_lsn);
479 +                                               }
480 +                                       }
481 +
482 +                                       fil_page_buf_page_store_checksum(page, zip_size);
483 +
484 +                                       success = os_file_write(filepath, file, page,
485 +                                                               (ulint)(offset & 0xFFFFFFFFUL),
486 +                                                               (ulint)(offset >> 32),
487 +                                                               zip_size ? zip_size : UNIV_PAGE_SIZE);
488 +                               }
489 +
490 +skip_write:
491 +                               if (free_limit_bytes
492 +                                   && ((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes)
493 +                                       != ((offset * 100) / free_limit_bytes)) {
494 +                                       fprintf(stderr, " %lu",
495 +                                               (ulong)((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes));
496 +                               }
497 +                       }
498 +
499 +                       fprintf(stderr, " done.\n");
500 +
501 +                       /* update SYS_INDEXES set root page */
502 +                       index = dict_table_get_first_index(table);
503 +                       while (index) {
504 +                               for (i = 0; i < n_index; i++) {
505 +                                       if (new_id[i] == index->id) {
506 +                                               break;
507 +                                       }
508 +                               }
509 +
510 +                               if (i != n_index
511 +                                   && root_page[i] != index->page) {
512 +                                       /* must update */
513 +                                       ulint   error;
514 +                                       trx_t*  trx;
515 +                                       pars_info_t*    info = NULL;
516 +
517 +                                       trx = trx_allocate_for_mysql();
518 +                                       trx->op_info = "extended import";
519 +
520 +                                       info = pars_info_create();
521 +
522 +                                       pars_info_add_ull_literal(info, "indexid", new_id[i]);
523 +                                       pars_info_add_int4_literal(info, "new_page", (lint) root_page[i]);
524 +
525 +                                       error = que_eval_sql(info,
526 +                                               "PROCEDURE UPDATE_INDEX_PAGE () IS\n"
527 +                                               "BEGIN\n"
528 +                                               "UPDATE SYS_INDEXES"
529 +                                               " SET PAGE_NO = :new_page"
530 +                                               " WHERE ID = :indexid;\n"
531 +                                               "COMMIT WORK;\n"
532 +                                               "END;\n",
533 +                                               FALSE, trx);
534 +
535 +                                       if (error != DB_SUCCESS) {
536 +                                               fprintf(stderr, "InnoDB: failed to update SYS_INDEXES\n");
537 +                                       }
538 +
539 +                                       trx_commit_for_mysql(trx);
540 +
541 +                                       trx_free_for_mysql(trx);
542 +
543 +                                       index->page = root_page[i];
544 +                               }
545 +
546 +                               index = dict_table_get_next_index(index);
547 +                       }
548 +                       if (UNIV_LIKELY_NULL(heap)) {
549 +                               mem_heap_free(heap);
550 +                       }
551 +               }
552 +               /* .exp file should be removed */
553 +               success = os_file_delete(info_file_path);
554 +               if (!success) {
555 +                       success = os_file_delete_if_exists(info_file_path);
556 +               }
557 +               mem_free(info_file_path);
558 +
559 +               system  = fil_system;
560 +               mutex_enter(&(system->mutex));
561 +               space = fil_space_get_by_id(id);
562 +               if (space)
563 +                       node = UT_LIST_GET_FIRST(space->chain);
564 +               if (node && node->size < size) {
565 +                       space->size += (size - node->size);
566 +                       node->size = size;
567 +               }
568 +               mutex_exit(&(system->mutex));
569 +
570 +               ut_free(buf3);
571 +
572 +               if (file_is_corrupt) {
573 +                       ut_print_timestamp(stderr);
574 +                       fputs("  InnoDB: Error: file ",
575 +                             stderr);
576 +                       ut_print_filename(stderr, filepath);
577 +                       fprintf(stderr, " seems to be corrupt.\n"
578 +                               "InnoDB: An attempt to convert and salvage all corrupt pages was not made.\n"
579 +                               "InnoDB: ##### CAUTION #####\n"
580 +                               "InnoDB: ## The .ibd file may cause InnoDB to crash, even though its re-import seems to have succeeded.\n"
581 +                               "InnoDB: ## If you don't know how to salvage data from a .ibd, you should not use the file.\n"
582 +                               "InnoDB: ###################\n");
583 +                       success = FALSE;
584 +
585 +                       ut_free(buf2);
586 +
587 +                       goto func_exit;
588 +               }
589 +       }
590 +
591         ut_free(buf2);
592  
593         if (UNIV_UNLIKELY(space_id != id
594 @@ -3167,6 +3692,271 @@
595         os_file_close(file);
596         mem_free(filepath);
597  
598 +       if (srv_expand_import && dict_table_flags_to_zip_size(flags)) {
599 +               ulint           page_no;
600 +               ulint           zip_size;
601 +               ulint           height;
602 +               ulint           root_height = 0;
603 +               rec_t*          node_ptr;
604 +               dict_table_t*   table;
605 +               dict_index_t*   index;
606 +               buf_block_t*    block;
607 +               page_t*         page;
608 +               page_zip_des_t* page_zip;
609 +               mtr_t           mtr;
610 +
611 +               mem_heap_t*     heap            = NULL;
612 +               ulint           offsets_[REC_OFFS_NORMAL_SIZE];
613 +               ulint*          offsets         = offsets_;
614 +
615 +               rec_offs_init(offsets_);
616 +
617 +               zip_size = dict_table_flags_to_zip_size(flags);
618 +
619 +               table = dict_table_get_low(name);
620 +               index = dict_table_get_first_index(table);
621 +               page_no = dict_index_get_page(index);
622 +               ut_a(page_no == 3);
623 +
624 +               fprintf(stderr, "InnoDB: It is compressed .ibd file. need to convert additionaly on buffer pool.\n");
625 +
626 +               /* down to leaf */
627 +               mtr_start(&mtr);
628 +               mtr_set_log_mode(&mtr, MTR_LOG_NONE);
629 +
630 +               height = ULINT_UNDEFINED;
631 +
632 +               for (;;) {
633 +                       block = buf_page_get(space_id, zip_size, page_no,
634 +                                            RW_NO_LATCH, &mtr);
635 +                       page = buf_block_get_frame(block);
636 +
637 +                       block->check_index_page_at_flush = TRUE;
638 +
639 +                       if (height == ULINT_UNDEFINED) {
640 +                               height = btr_page_get_level(page, &mtr);
641 +                               root_height = height;
642 +                       }
643 +
644 +                       if (height == 0) {
645 +                               break;
646 +                       }
647 +
648 +                       node_ptr = page_rec_get_next(page_get_infimum_rec(page));
649 +
650 +                       height--;
651 +
652 +                       offsets = rec_get_offsets(node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
653 +                       page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
654 +               }
655 +
656 +               mtr_commit(&mtr);
657 +
658 +               fprintf(stderr, "InnoDB: pages needs split are ...");
659 +
660 +               /* scan reaf pages */
661 +               while (page_no != FIL_NULL) {
662 +                       rec_t*  rec;
663 +                       rec_t*  supremum;
664 +                       ulint   n_recs;
665 +
666 +                       mtr_start(&mtr);
667 +
668 +                       block = buf_page_get(space_id, zip_size, page_no,
669 +                                            RW_X_LATCH, &mtr);
670 +                       page = buf_block_get_frame(block);
671 +                       page_zip = buf_block_get_page_zip(block);
672 +
673 +                       if (!page_zip) {
674 +                               /*something wrong*/
675 +                               fprintf(stderr, "InnoDB: Something wrong with reading page %lu.\n", page_no);
676 +convert_err_exit:
677 +                               mtr_commit(&mtr);
678 +                               mutex_enter(&fil_system->mutex);
679 +                               fil_space_free(space_id, FALSE);
680 +                               mutex_exit(&fil_system->mutex);
681 +                               success = FALSE;
682 +                               goto convert_exit;
683 +                       }
684 +
685 +                       supremum = page_get_supremum_rec(page);
686 +                       rec = page_rec_get_next(page_get_infimum_rec(page));
687 +                       n_recs = page_get_n_recs(page);
688 +
689 +                       /* illegal operation as InnoDB online system. so not logged */
690 +                       while (rec && rec != supremum && n_recs > 0) {
691 +                               ulint   n_fields;
692 +                               ulint   i;
693 +                               ulint   offset = index->trx_id_offset;
694 +
695 +                               offsets = rec_get_offsets(rec, index, offsets,
696 +                                               ULINT_UNDEFINED, &heap);
697 +                               n_fields = rec_offs_n_fields(offsets);
698 +                               if (!offset) {
699 +                                       offset = row_get_trx_id_offset(index, offsets);
700 +                               }
701 +                               trx_write_trx_id(rec + offset, 1);
702 +
703 +                               for (i = 0; i < n_fields; i++) {
704 +                                       if (rec_offs_nth_extern(offsets, i)) {
705 +                                               ulint   local_len;
706 +                                               byte*   data;
707 +
708 +                                               data = rec_get_nth_field(rec, offsets, i, &local_len);
709 +
710 +                                               local_len -= BTR_EXTERN_FIELD_REF_SIZE;
711 +
712 +                                               mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
713 +                                       }
714 +                               }
715 +
716 +                               rec = page_rec_get_next(rec);
717 +                               n_recs--;
718 +                       }
719 +
720 +                       /* dummy logged update for along with modified page path */
721 +                       if (index->id != btr_page_get_index_id(page)) {
722 +                               /* this should be adjusted already */
723 +                               fprintf(stderr, "InnoDB: The page %lu seems to be converted wrong.\n", page_no);
724 +                               goto convert_err_exit;
725 +                       }
726 +                       btr_page_set_index_id(page, page_zip, index->id, &mtr);
727 +
728 +                       /* confirm whether fits to the page size or not */
729 +                       if (!page_zip_compress(page_zip, page, index, &mtr)
730 +                           && !btr_page_reorganize(block, index, &mtr)) {
731 +                               buf_block_t*    new_block;
732 +                               page_t*         new_page;
733 +                               page_zip_des_t* new_page_zip;
734 +                               rec_t*          split_rec;
735 +                               ulint           n_uniq;
736 +
737 +                               /* split page is needed */
738 +                               fprintf(stderr, " %lu", page_no);
739 +
740 +                               mtr_x_lock(dict_index_get_lock(index), &mtr);
741 +
742 +                               n_uniq = dict_index_get_n_unique_in_tree(index);
743 +
744 +                               if(page_get_n_recs(page) < 2) {
745 +                                       /* no way to make smaller */
746 +                                       fprintf(stderr, "InnoDB: The page %lu cannot be store to the page size.\n", page_no);
747 +                                       goto convert_err_exit;
748 +                               }
749 +
750 +                               if (UNIV_UNLIKELY(page_no == dict_index_get_page(index))) {
751 +                                       ulint           new_page_no;
752 +                                       dtuple_t*       node_ptr;
753 +                                       ulint           level;
754 +                                       rec_t*          node_ptr_rec;
755 +                                       page_cur_t      page_cursor;
756 +
757 +                                       /* it is root page, need to raise before split */
758 +
759 +                                       level = btr_page_get_level(page, &mtr);
760 +
761 +                                       new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, &mtr);
762 +                                       new_page = buf_block_get_frame(new_block);
763 +                                       new_page_zip = buf_block_get_page_zip(new_block);
764 +                                       btr_page_create(new_block, new_page_zip, index, level, &mtr);
765 +
766 +                                       btr_page_set_next(new_page, new_page_zip, FIL_NULL, &mtr);
767 +                                       btr_page_set_prev(new_page, new_page_zip, FIL_NULL, &mtr);
768 +
769 +                                       page_zip_copy_recs(new_page_zip, new_page,
770 +                                                          page_zip, page, index, &mtr);
771 +                                       btr_search_move_or_delete_hash_entries(new_block, block, index);
772 +
773 +                                       rec = page_rec_get_next(page_get_infimum_rec(new_page));
774 +                                       new_page_no = buf_block_get_page_no(new_block);
775 +
776 +                                       node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
777 +                                                                            level);
778 +                                       dtuple_set_info_bits(node_ptr,
779 +                                                            dtuple_get_info_bits(node_ptr)
780 +                                                            | REC_INFO_MIN_REC_FLAG);
781 +                                       btr_page_empty(block, page_zip, index, level + 1, &mtr);
782 +
783 +                                       btr_page_set_next(page, page_zip, FIL_NULL, &mtr);
784 +                                       btr_page_set_prev(page, page_zip, FIL_NULL, &mtr);
785 +
786 +                                       page_cur_set_before_first(block, &page_cursor);
787 +
788 +                                       node_ptr_rec = page_cur_tuple_insert(&page_cursor, node_ptr,
789 +                                                                            index, 0, &mtr);
790 +                                       ut_a(node_ptr_rec);
791 +
792 +                                       if (!btr_page_reorganize(block, index, &mtr)) {
793 +                                               fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
794 +                                               goto convert_err_exit;
795 +                                       }
796 +
797 +                                       /* move to the raised page */
798 +                                       page_no = new_page_no;
799 +                                       block = new_block;
800 +                                       page = new_page;
801 +                                       page_zip = new_page_zip;
802 +
803 +                                       fprintf(stderr, "(raise_to:%lu)", page_no);
804 +                               }
805 +
806 +                               split_rec = page_get_middle_rec(page);
807 +
808 +                               new_block = btr_page_alloc(index, page_no + 1, FSP_UP,
809 +                                                          btr_page_get_level(page, &mtr), &mtr);
810 +                               new_page = buf_block_get_frame(new_block);
811 +                               new_page_zip = buf_block_get_page_zip(new_block);
812 +                               btr_page_create(new_block, new_page_zip, index,
813 +                                               btr_page_get_level(page, &mtr), &mtr);
814 +
815 +                               offsets = rec_get_offsets(split_rec, index, offsets, n_uniq, &heap);
816 +
817 +                               btr_attach_half_pages(index, block,
818 +                                                     split_rec, new_block, FSP_UP, &mtr);
819 +
820 +                               page_zip_copy_recs(new_page_zip, new_page,
821 +                                                  page_zip, page, index, &mtr);
822 +                               page_delete_rec_list_start(split_rec - page + new_page,
823 +                                                          new_block, index, &mtr);
824 +                               btr_search_move_or_delete_hash_entries(new_block, block, index);
825 +                               page_delete_rec_list_end(split_rec, block, index,
826 +                                                        ULINT_UNDEFINED, ULINT_UNDEFINED, &mtr);
827 +
828 +                               fprintf(stderr, "(new:%lu)", buf_block_get_page_no(new_block));
829 +
830 +                               /* Are they needed? */
831 +                               if (!btr_page_reorganize(block, index, &mtr)) {
832 +                                       fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
833 +                                       goto convert_err_exit;
834 +                               }
835 +                               if (!btr_page_reorganize(new_block, index, &mtr)) {
836 +                                       fprintf(stderr, "InnoDB: failed to store the page %lu.\n", buf_block_get_page_no(new_block));
837 +                                       goto convert_err_exit;
838 +                               }
839 +                       }
840 +
841 +                       page_no = btr_page_get_next(page, &mtr);
842 +
843 +                       mtr_commit(&mtr);
844 +
845 +                       if (heap) {
846 +                               mem_heap_empty(heap);
847 +                       }
848 +               }
849 +
850 +               fprintf(stderr, "...done.\nInnoDB: waiting the flush batch of the additional conversion.\n");
851 +
852 +               /* should wait for the not-logged changes are all flushed */
853 +               buf_flush_list(ULINT_MAX, mtr.end_lsn + 1);
854 +               buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
855 +
856 +               fprintf(stderr, "InnoDB: done.\n");
857 +convert_exit:
858 +               if (UNIV_LIKELY_NULL(heap)) {
859 +                       mem_heap_free(heap);
860 +               }
861 +       }
862 +
863         return(success);
864  }
865  #endif /* !UNIV_HOTBACKUP */
866 --- a/storage/innobase/handler/ha_innodb.cc
867 +++ b/storage/innobase/handler/ha_innodb.cc
868 @@ -7423,6 +7423,14 @@
869                 err = row_discard_tablespace_for_mysql(dict_table->name, trx);
870         } else {
871                 err = row_import_tablespace_for_mysql(dict_table->name, trx);
872 +
873 +               /* in expanded import mode re-initialize auto_increment again */
874 +               if ((err == DB_SUCCESS) && srv_expand_import &&
875 +                   (table->found_next_number_field != NULL)) {
876 +                       dict_table_autoinc_lock(dict_table);
877 +                       innobase_initialize_autoinc();
878 +                       dict_table_autoinc_unlock(dict_table);
879 +               }
880         }
881  
882         err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
883 @@ -11772,6 +11780,11 @@
884    "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
885    NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
886  
887 +static MYSQL_SYSVAR_ULONG(import_table_from_xtrabackup, srv_expand_import,
888 +  PLUGIN_VAR_RQCMDARG,
889 +  "Enable/Disable converting automatically *.ibd files when import tablespace.",
890 +  NULL, NULL, 0, 0, 1, 0);
891 +
892  static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
893    PLUGIN_VAR_RQCMDARG,
894    "Limit the allocated memory for dictionary cache. (0: unlimited)",
895 @@ -11846,6 +11859,7 @@
896    MYSQL_SYSVAR(flush_neighbor_pages),
897    MYSQL_SYSVAR(read_ahead),
898    MYSQL_SYSVAR(adaptive_flushing_method),
899 +  MYSQL_SYSVAR(import_table_from_xtrabackup),
900    MYSQL_SYSVAR(dict_size_limit),
901    MYSQL_SYSVAR(use_sys_malloc),
902    MYSQL_SYSVAR(use_native_aio),
903 --- a/storage/innobase/include/btr0btr.h
904 +++ b/storage/innobase/include/btr0btr.h
905 @@ -238,6 +238,17 @@
906  @return the uncompressed page frame */
907  # define btr_page_get(space,zip_size,page_no,mode,idx,mtr)             \
908         buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,idx,mtr))
909 +/**************************************************************//**
910 +Sets the index id field of a page. */
911 +UNIV_INLINE
912 +void
913 +btr_page_set_index_id(
914 +/*==================*/
915 +       page_t*         page,   /*!< in: page to be created */
916 +       page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
917 +                               part will be updated, or NULL */
918 +       index_id_t      id,     /*!< in: index id */
919 +       mtr_t*          mtr);   /*!< in: mtr */
920  #endif /* !UNIV_HOTBACKUP */
921  /**************************************************************//**
922  Gets the index id field of a page.
923 @@ -275,6 +286,17 @@
924         const page_t*   page,   /*!< in: index page */
925         mtr_t*          mtr);   /*!< in: mini-transaction handle */
926  /********************************************************//**
927 +Sets the next index page field. */
928 +UNIV_INLINE
929 +void
930 +btr_page_set_next(
931 +/*==============*/
932 +       page_t*         page,   /*!< in: index page */
933 +       page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
934 +                               part will be updated, or NULL */
935 +       ulint           next,   /*!< in: next page number */
936 +       mtr_t*          mtr);   /*!< in: mini-transaction handle */
937 +/********************************************************//**
938  Gets the previous index page number.
939  @return        prev page number */
940  UNIV_INLINE
941 @@ -283,6 +305,17 @@
942  /*==============*/
943         const page_t*   page,   /*!< in: index page */
944         mtr_t*          mtr);   /*!< in: mini-transaction handle */
945 +/********************************************************//**
946 +Sets the previous index page field. */
947 +UNIV_INLINE
948 +void
949 +btr_page_set_prev(
950 +/*==============*/
951 +       page_t*         page,   /*!< in: index page */
952 +       page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
953 +                               part will be updated, or NULL */
954 +       ulint           prev,   /*!< in: previous page number */
955 +       mtr_t*          mtr);   /*!< in: mini-transaction handle */
956  /*************************************************************//**
957  Gets pointer to the previous user record in the tree. It is assumed
958  that the caller has appropriate latches on the page and its neighbor.
959 @@ -328,6 +361,18 @@
960  /*===========================*/
961         const rec_t*    rec,    /*!< in: node pointer record */
962         const ulint*    offsets);/*!< in: array returned by rec_get_offsets() */
963 +/**************************************************************//**
964 +Creates a new index page (not the root, and also not
965 +used in page reorganization).  @see btr_page_empty(). */
966 +UNIV_INTERN
967 +void
968 +btr_page_create(
969 +/*============*/
970 +       buf_block_t*    block,  /*!< in/out: page to be created */
971 +       page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
972 +       dict_index_t*   index,  /*!< in: index */
973 +       ulint           level,  /*!< in: the B-tree level of the page */
974 +       mtr_t*          mtr);   /*!< in: mtr */
975  /************************************************************//**
976  Creates the root node for a new index tree.
977  @return        page number of the created root, FIL_NULL if did not succeed */
978 @@ -397,6 +442,17 @@
979         dict_index_t*   index,  /*!< in: record descriptor */
980         mtr_t*          mtr);   /*!< in: mtr */
981  /*************************************************************//**
982 +Empties an index page.  @see btr_page_create(). */
983 +UNIV_INTERN
984 +void
985 +btr_page_empty(
986 +/*===========*/
987 +       buf_block_t*    block,  /*!< in: page to be emptied */
988 +       page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
989 +       dict_index_t*   index,  /*!< in: index of the page */
990 +       ulint           level,  /*!< in: the B-tree level of the page */
991 +       mtr_t*          mtr);   /*!< in: mtr */
992 +/*************************************************************//**
993  Decides if the page should be split at the convergence point of
994  inserts converging to left.
995  @return        TRUE if split recommended */
996 @@ -455,6 +511,20 @@
997  # define btr_insert_on_non_leaf_level(i,l,t,m)                         \
998         btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
999  #endif /* !UNIV_HOTBACKUP */
1000 +/**************************************************************//**
1001 +Attaches the halves of an index page on the appropriate level in an
1002 +index tree. */
1003 +UNIV_INTERN
1004 +void
1005 +btr_attach_half_pages(
1006 +/*==================*/
1007 +       dict_index_t*   index,          /*!< in: the index tree */
1008 +       buf_block_t*    block,          /*!< in/out: page to be split */
1009 +       rec_t*          split_rec,      /*!< in: first record on upper
1010 +                                       half page */
1011 +       buf_block_t*    new_block,      /*!< in/out: the new half page */
1012 +       ulint           direction,      /*!< in: FSP_UP or FSP_DOWN */
1013 +       mtr_t*          mtr);           /*!< in: mtr */
1014  /****************************************************************//**
1015  Sets a record as the predefined minimum record. */
1016  UNIV_INTERN
1017 --- a/storage/innobase/include/srv0srv.h
1018 +++ b/storage/innobase/include/srv0srv.h
1019 @@ -234,6 +234,8 @@
1020  extern ulint   srv_read_ahead;
1021  extern ulint   srv_adaptive_flushing_method;
1022  
1023 +extern ulint   srv_expand_import;
1024 +
1025  extern ulint   srv_dict_size_limit;
1026  /*-------------------------------------------*/
1027  
1028 --- a/storage/innobase/row/row0mysql.c
1029 +++ b/storage/innobase/row/row0mysql.c
1030 @@ -2547,6 +2547,11 @@
1031  
1032         current_lsn = log_get_lsn();
1033  
1034 +       /* Enlarge the fatal lock wait timeout during import. */
1035 +       mutex_enter(&kernel_mutex);
1036 +       srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
1037 +       mutex_exit(&kernel_mutex);
1038 +
1039         /* It is possible, though very improbable, that the lsn's in the
1040         tablespace to be imported have risen above the current system lsn, if
1041         a lengthy purge, ibuf merge, or rollback was performed on a backup
1042 @@ -2658,6 +2663,11 @@
1043  
1044         trx->op_info = "";
1045  
1046 +       /* Restore the fatal semaphore wait timeout */
1047 +       mutex_enter(&kernel_mutex);
1048 +       srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
1049 +       mutex_exit(&kernel_mutex);
1050 +
1051         return((int) err);
1052  }
1053  
1054 --- a/storage/innobase/srv/srv0srv.c
1055 +++ b/storage/innobase/srv/srv0srv.c
1056 @@ -418,6 +418,8 @@
1057  UNIV_INTERN ulint      srv_read_ahead = 3; /* 1: random  2: linear  3: Both */
1058  UNIV_INTERN ulint      srv_adaptive_flushing_method = 0; /* 0: native  1: estimate  2: keep_average */
1059  
1060 +UNIV_INTERN ulint      srv_expand_import = 0; /* 0:disable 1:enable */
1061 +
1062  UNIV_INTERN ulint      srv_dict_size_limit = 0;
1063  /*-------------------------------------------*/
1064  UNIV_INTERN ulong      srv_n_spin_wait_rounds  = 30;
This page took 0.186827 seconds and 3 git commands to generate.