]> git.pld-linux.org Git - packages/mysql.git/blob - innodb_buffer_pool_shm.patch
- update percona patches (mysql_dump_ignore_ct.patch needs updating)
[packages/mysql.git] / innodb_buffer_pool_shm.patch
1 # name       : innodb_buffer_pool_shm.patch
2 # introduced : 12
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
9 --- a/storage/innobase/buf/buf0buddy.c  2010-12-04 19:46:39.372513543 +0900
10 +++ b/storage/innobase/buf/buf0buddy.c  2010-12-07 17:56:28.302087851 +0900
11 @@ -183,7 +183,7 @@
12         void*           buf,            /*!< in: buffer frame to deallocate */
13         ibool           have_page_hash_mutex)
14  {
15 -       const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
16 +       const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf_pool, buf);
17         buf_page_t*     bpage;
18         buf_block_t*    block;
19  
20 @@ -227,7 +227,7 @@
21         buf_block_t*    block)  /*!< in: buffer frame to allocate */
22  {
23         buf_pool_t*     buf_pool = buf_pool_from_block(block);
24 -       const ulint     fold = BUF_POOL_ZIP_FOLD(block);
25 +       const ulint     fold = BUF_POOL_ZIP_FOLD(buf_pool, block);
26         //ut_ad(buf_pool_mutex_own(buf_pool));
27         ut_ad(!mutex_own(&buf_pool->zip_mutex));
28         ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
29 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
30 --- a/storage/innobase/buf/buf0buf.c    2010-12-06 20:16:21.726195340 +0900
31 +++ b/storage/innobase/buf/buf0buf.c    2010-12-07 20:40:30.824749814 +0900
32 @@ -53,6 +53,10 @@
33  #include "page0zip.h"
34  #include "trx0trx.h"
35  #include "srv0start.h"
36 +#include "que0que.h"
37 +#include "read0read.h"
38 +#include "row0row.h"
39 +#include "ha_prototypes.h"
40  
41  /* prototypes for new functions added to ha_innodb.cc */
42  trx_t* innobase_get_trx();
43 @@ -342,6 +346,31 @@
44  //                                     was allocated for the frames */
45  //     buf_block_t*    blocks;         /*!< array of buffer control blocks */
46  //};
47 +
48 +/* Buffer pool shared memory segment information */
49 +typedef        struct buf_shm_info_struct      buf_shm_info_t;
50 +
51 +struct buf_shm_info_struct {
52 +       char    head_str[8];
53 +       ulint   binary_id;
54 +       ibool   is_new;         /* during initializing */
55 +       ibool   clean;          /* clean shutdowned and free */
56 +       ibool   reusable;       /* reusable */
57 +       ulint   buf_pool_size;  /* backup value */
58 +       ulint   page_size;      /* backup value */
59 +       ulint   frame_offset;   /* offset of the first frame based on chunk->mem */
60 +       ulint   zip_hash_offset;
61 +       ulint   zip_hash_n;
62 +
63 +       ulint   checksum;
64 +
65 +       buf_pool_t      buf_pool_backup;
66 +       buf_chunk_t     chunk_backup;
67 +
68 +       ib_uint64_t     dummy;
69 +};
70 +
71 +#define BUF_SHM_INFO_HEAD "XTRA_SHM"
72  #endif /* !UNIV_HOTBACKUP */
73  
74  /********************************************************************//**
75 @@ -988,6 +1017,58 @@
76  #endif /* UNIV_SYNC_DEBUG */
77  }
78  
79 +static
80 +void
81 +buf_block_reuse(
82 +/*============*/
83 +       buf_block_t*    block,
84 +       ptrdiff_t       frame_offset)
85 +{
86 +       /* block_init */
87 +       block->frame += frame_offset;
88 +
89 +       UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
90 +
91 +       block->index = NULL;
92 +       block->btr_search_latch = NULL;
93 +
94 +#ifdef UNIV_DEBUG
95 +       /* recreate later */
96 +       block->page.in_page_hash = FALSE;
97 +       block->page.in_zip_hash = FALSE;
98 +#endif /* UNIV_DEBUG */
99 +
100 +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
101 +       block->n_pointers = 0;
102 +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
103 +
104 +       if (block->page.zip.data)
105 +               block->page.zip.data += frame_offset;
106 +
107 +       block->is_hashed = FALSE;
108 +
109 +#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
110 +       /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
111 +       of buffer block mutex/rwlock with performance schema. If
112 +       PFS_GROUP_BUFFER_SYNC is defined, skip the registration
113 +       since buffer block mutex/rwlock will be registered later in
114 +       pfs_register_buffer_block() */
115 +
116 +       mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
117 +       rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
118 +#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
119 +       mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
120 +       rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
121 +#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
122 +
123 +       ut_ad(rw_lock_validate(&(block->lock)));
124 +
125 +#ifdef UNIV_SYNC_DEBUG
126 +       rw_lock_create(buf_block_debug_latch_key,
127 +                      &block->debug_latch, SYNC_NO_ORDER_CHECK);
128 +#endif /* UNIV_SYNC_DEBUG */
129 +}
130 +
131  /********************************************************************//**
132  Allocates a chunk of buffer frames.
133  @return        chunk, or NULL on failure */
134 @@ -1001,26 +1082,190 @@
135  {
136         buf_block_t*    block;
137         byte*           frame;
138 +       ulint           zip_hash_n = 0;
139 +       ulint           zip_hash_mem_size = 0;
140 +       hash_table_t*   zip_hash_tmp = NULL;
141         ulint           i;
142 +       ulint           size_target;
143 +       buf_shm_info_t* shm_info = NULL;
144  
145         /* Round down to a multiple of page size,
146         although it already should be. */
147         mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
148 +       size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
149 +
150 +       srv_buffer_pool_shm_is_reused = FALSE;
151 +
152 +       if (srv_buffer_pool_shm_key) {
153 +               /* zip_hash size */
154 +               zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
155 +               zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
156 +                                                 + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
157 +       }
158 +
159         /* Reserve space for the block descriptors. */
160         mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
161                                   + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
162 +       if (srv_buffer_pool_shm_key) {
163 +                mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
164 +                                          + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
165 +                mem_size += zip_hash_mem_size;
166 +       }
167  
168         chunk->mem_size = mem_size;
169 +
170 +       if (srv_buffer_pool_shm_key) {
171 +               ulint   binary_id;
172 +               ibool   is_new;
173 +
174 +               ut_a(buf_pool->n_chunks == 1);
175 +
176 +               fprintf(stderr,
177 +               "InnoDB: Notice: The innodb_buffer_pool_shm_key option has been specified.\n"
178 +               "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
179 +               "InnoDB:   * the mysqld executable between restarts of the server.\n"
180 +               "InnoDB:   * the value of innodb_buffer_pool_size.\n"
181 +               "InnoDB:   * the value of innodb_page_size.\n"
182 +               "InnoDB:   * datafiles created by InnoDB during this session.\n"
183 +               "InnoDB: Otherwise, data corruption in datafiles may result.\n");
184 +
185 +               /* FIXME: This is vague id still */
186 +               binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
187 +                         + (ulint) ((byte*)os_file_get_last_error - (byte*)buf_calc_page_new_checksum)
188 +                         + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
189 +                         + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
190 +                         + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
191 +                         + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
192 +                         + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
193 +                         + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
194 +                         + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
195 +                         + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
196 +                         + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
197 +
198 +               chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
199 +
200 +               if (UNIV_UNLIKELY(chunk->mem == NULL)) {
201 +                       return(NULL);
202 +               }
203 +init_again:
204 +#ifdef UNIV_SET_MEM_TO_ZERO
205 +               if (is_new) {
206 +                       memset(chunk->mem, '\0', chunk->mem_size);
207 +               }
208 +#endif
209 +               /* for ut_fold_binary_32(), these values should be 32-bit aligned */
210 +               ut_a(sizeof(buf_shm_info_t) % 4 == 0);
211 +               ut_a((ulint)chunk->mem % 4 == 0);
212 +               ut_a(chunk->mem_size % 4 == 0);
213 +
214 +               shm_info = chunk->mem;
215 +
216 +               zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
217 +
218 +               if (is_new) {
219 +                       strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
220 +                       shm_info->binary_id = binary_id;
221 +                       shm_info->is_new = TRUE;        /* changed to FALSE when the initialization is finished */
222 +                       shm_info->clean = FALSE;        /* changed to TRUE when free the segment. */
223 +                       shm_info->reusable = FALSE;     /* changed to TRUE when validation is finished. */
224 +                       shm_info->buf_pool_size = srv_buf_pool_size;
225 +                       shm_info->page_size = srv_page_size;
226 +                       shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
227 +                       shm_info->zip_hash_n = zip_hash_n;
228 +               } else {
229 +                       ulint   checksum;
230 +
231 +                       if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
232 +                               fprintf(stderr,
233 +                               "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
234 +                               return(NULL);
235 +                       }
236 +                       if (shm_info->binary_id != binary_id) {
237 +                               fprintf(stderr,
238 +                               "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
239 +                               return(NULL);
240 +                       }
241 +                       if (shm_info->is_new) {
242 +                               fprintf(stderr,
243 +                               "InnoDB: Error: The shared memory was not initialized yet.\n");
244 +                               return(NULL);
245 +                       }
246 +                       if (shm_info->buf_pool_size != srv_buf_pool_size) {
247 +                               fprintf(stderr,
248 +                               "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
249 +                               shm_info->buf_pool_size, srv_buf_pool_size);
250 +                               return(NULL);
251 +                       }
252 +                       if (shm_info->page_size != srv_page_size) {
253 +                               fprintf(stderr,
254 +                               "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
255 +                               shm_info->page_size, srv_page_size);
256 +                               return(NULL);
257 +                       }
258 +                       if (!shm_info->reusable) {
259 +                               fprintf(stderr,
260 +                               "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
261 +                               "InnoDB: The shared memory segment is initialized.\n");
262 +                               is_new = TRUE;
263 +                               goto init_again;
264 +                       }
265 +                       if (!shm_info->clean) {
266 +                               fprintf(stderr,
267 +                               "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
268 +                               "InnoDB: The shared memory segment is initialized.\n");
269 +                               is_new = TRUE;
270 +                               goto init_again;
271 +                       }
272 +
273 +                       ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
274 +                       ut_a(shm_info->zip_hash_n == zip_hash_n);
275 +
276 +                       /* check checksum */
277 +                       if (srv_buffer_pool_shm_checksum) {
278 +                               checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
279 +                                                            chunk->mem_size - sizeof(buf_shm_info_t));
280 +                       } else {
281 +                               checksum = BUF_NO_CHECKSUM_MAGIC;
282 +                       }
283 +
284 +                       if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
285 +                           && shm_info->checksum != checksum) {
286 +                               fprintf(stderr,
287 +                               "InnoDB: Error: checksum of the shared memory is not match. "
288 +                               "(stored=%lu calculated=%lu)\n",
289 +                               shm_info->checksum, checksum);
290 +                               return(NULL);
291 +                       }
292 +
293 +                       /* flag to use the segment. */
294 +                       shm_info->clean = FALSE;        /* changed to TRUE when free the segment. */
295 +               }
296 +
297 +               /* init zip_hash contents */
298 +               if (is_new) {
299 +                       hash_create_init(zip_hash_tmp, zip_hash_n);
300 +               } else {
301 +                       /* adjust offset is done later */
302 +                       hash_create_reuse(zip_hash_tmp);
303 +
304 +                       srv_buffer_pool_shm_is_reused = TRUE;
305 +               }
306 +       } else {
307         chunk->mem = os_mem_alloc_large(&chunk->mem_size);
308  
309         if (UNIV_UNLIKELY(chunk->mem == NULL)) {
310  
311                 return(NULL);
312         }
313 +       }
314  
315         /* Allocate the block descriptors from
316         the start of the memory block. */
317 +       if (srv_buffer_pool_shm_key) {
318 +               chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
319 +       } else {
320         chunk->blocks = chunk->mem;
321 +       }
322  
323         /* Align a pointer to the first frame.  Note that when
324         os_large_page_size is smaller than UNIV_PAGE_SIZE,
325 @@ -1028,8 +1273,13 @@
326         it is bigger, we may allocate more blocks than requested. */
327  
328         frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
329 +       if (srv_buffer_pool_shm_key) {
330 +               /* reserve zip_hash space and always -1 for reproductibity */
331 +               chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
332 +       } else {
333         chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
334                 - (frame != chunk->mem);
335 +       }
336  
337         /* Subtract the space needed for block descriptors. */
338         {
339 @@ -1043,6 +1293,102 @@
340                 chunk->size = size;
341         }
342  
343 +       if (chunk->size > size_target) {
344 +               chunk->size = size_target;
345 +       }
346 +
347 +       if (shm_info && !(shm_info->is_new)) {
348 +               /* convert the shared memory segment for reuse */
349 +               ptrdiff_t       phys_offset;
350 +               ptrdiff_t       logi_offset;
351 +               ptrdiff_t       blocks_offset;
352 +               void*           previous_frame_address;
353 +
354 +               if (chunk->size < shm_info->chunk_backup.size) {
355 +                       fprintf(stderr,
356 +                       "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
357 +                       "InnoDB: Retrying may avoid this situation.\n");
358 +                       shm_info->clean = TRUE; /* release the flag for retrying */
359 +                       return(NULL);
360 +               }
361 +
362 +               chunk->size = shm_info->chunk_backup.size;
363 +               phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
364 +               logi_offset = frame - chunk->blocks[0].frame;
365 +               previous_frame_address = chunk->blocks[0].frame;
366 +               blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
367 +
368 +               if (phys_offset || logi_offset || blocks_offset) {
369 +                       fprintf(stderr,
370 +                       "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
371 +                       "InnoDB: Previous frames in address      : %p\n"
372 +                       "InnoDB: Previous frames were located    : %p\n"
373 +                       "InnoDB: Current frames should be located: %p\n"
374 +                       "InnoDB: Pysical offset                  : %ld (%#lx)\n"
375 +                       "InnoDB: Logical offset (frames)         : %ld (%#lx)\n"
376 +                       "InnoDB: Logical offset (blocks)         : %ld (%#lx)\n",
377 +                               (byte*)chunk->mem + shm_info->frame_offset,
378 +                               chunk->blocks[0].frame, frame,
379 +                               phys_offset, phys_offset, logi_offset, logi_offset,
380 +                               blocks_offset, blocks_offset);
381 +               } else {
382 +                       fprintf(stderr,
383 +                       "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
384 +               }
385 +
386 +               if (phys_offset) {
387 +                       fprintf(stderr,
388 +                       "InnoDB: Aligning physical offset...");
389 +
390 +                       memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
391 +                               chunk->size * UNIV_PAGE_SIZE);
392 +
393 +                       fprintf(stderr,
394 +                       " Done.\n");
395 +               }
396 +
397 +               /* buf_block_t */
398 +               block = chunk->blocks;
399 +               for (i = chunk->size; i--; ) {
400 +                       buf_block_reuse(block, logi_offset);
401 +                       block++;
402 +               }
403 +
404 +               if (logi_offset || blocks_offset) {
405 +                       fprintf(stderr,
406 +                       "InnoDB: Aligning logical offset...");
407 +
408 +
409 +                       /* buf_pool_t buf_pool_backup */
410 +                       UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
411 +                                       previous_frame_address, logi_offset, blocks_offset);
412 +                       UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
413 +                                       previous_frame_address, logi_offset, blocks_offset);
414 +                       UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
415 +                                       previous_frame_address, logi_offset, blocks_offset);
416 +                       if (shm_info->buf_pool_backup.LRU_old)
417 +                               shm_info->buf_pool_backup.LRU_old =
418 +                                       (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
419 +                                               + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
420 +                                                 ? logi_offset : blocks_offset));
421 +
422 +                       UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
423 +                                       previous_frame_address, logi_offset, blocks_offset);
424 +
425 +                       UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
426 +                                       previous_frame_address, logi_offset, blocks_offset);
427 +                       for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
428 +                               UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
429 +                                       previous_frame_address, logi_offset, blocks_offset);
430 +                       }
431 +
432 +                       HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
433 +                                       previous_frame_address, logi_offset, blocks_offset);
434 +
435 +                       fprintf(stderr,
436 +                       " Done.\n");
437 +               }
438 +       } else {
439         /* Init block structs and assign frames for them. Then we
440         assign the frames to the first blocks (we already mapped the
441         memory above). */
442 @@ -1068,6 +1414,11 @@
443                 block++;
444                 frame += UNIV_PAGE_SIZE;
445         }
446 +       }
447 +
448 +       if (shm_info) {
449 +               shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
450 +       }
451  
452  #ifdef PFS_GROUP_BUFFER_SYNC
453         pfs_register_buffer_block(chunk);
454 @@ -1249,6 +1600,8 @@
455                 UNIV_MEM_UNDESC(block);
456         }
457  
458 +       ut_a(!srv_buffer_pool_shm_key);
459 +
460         os_mem_free_large(chunk->mem, chunk->mem_size);
461  }
462  
463 @@ -1289,7 +1642,7 @@
464         ulint           instance_no)    /*!< in: id of the instance */
465  {
466         ulint           i;
467 -       buf_chunk_t*    chunk;
468 +       buf_chunk_t*    chunk = NULL;
469  
470         /* 1. Initialize general fields
471         ------------------------------- */
472 @@ -1335,7 +1688,10 @@
473                 buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
474  
475                 buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
476 +               /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
477 +               if (!srv_buffer_pool_shm_key) {
478                 buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
479 +               }
480                 
481                 buf_pool->last_printout_time = ut_time();
482         }
483 @@ -1354,6 +1710,86 @@
484  
485         /* All fields are initialized by mem_zalloc(). */
486  
487 +       if (chunk && srv_buffer_pool_shm_key) {
488 +               buf_shm_info_t* shm_info;
489 +
490 +               ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
491 +               shm_info = chunk->mem;
492 +
493 +               buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
494 +
495 +               if(shm_info->is_new) {
496 +                       shm_info->is_new = FALSE; /* initialization was finished */
497 +               } else {
498 +                       buf_block_t*    block = chunk->blocks;
499 +                       buf_page_t*     b;
500 +
501 +                       /* shm_info->buf_pool_backup should be converted */
502 +                       /* at buf_chunk_init(). So copy simply. */
503 +                       buf_pool->flush_list            = shm_info->buf_pool_backup.flush_list;
504 +                       buf_pool->freed_page_clock      = shm_info->buf_pool_backup.freed_page_clock;
505 +                       buf_pool->free                  = shm_info->buf_pool_backup.free;
506 +                       buf_pool->LRU                   = shm_info->buf_pool_backup.LRU;
507 +                       buf_pool->LRU_old               = shm_info->buf_pool_backup.LRU_old;
508 +                       buf_pool->LRU_old_len           = shm_info->buf_pool_backup.LRU_old_len;
509 +                       buf_pool->unzip_LRU             = shm_info->buf_pool_backup.unzip_LRU;
510 +                       buf_pool->zip_clean             = shm_info->buf_pool_backup.zip_clean;
511 +                       for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
512 +                               buf_pool->zip_free[i]   = shm_info->buf_pool_backup.zip_free[i];
513 +                       }
514 +
515 +                       for (i = 0; i < chunk->size; i++, block++) {
516 +                               if (buf_block_get_state(block)
517 +                                   == BUF_BLOCK_FILE_PAGE) {
518 +                                       ut_d(block->page.in_page_hash = TRUE);
519 +                                       HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
520 +                                                   buf_page_address_fold(
521 +                                                           block->page.space,
522 +                                                           block->page.offset),
523 +                                                   &block->page);
524 +                               }
525 +                       }
526 +
527 +                       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
528 +                            b = UT_LIST_GET_NEXT(zip_list, b)) {
529 +                               ut_ad(!b->in_flush_list);
530 +                               ut_ad(b->in_LRU_list);
531 +
532 +                               ut_d(b->in_page_hash = TRUE);
533 +                               HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
534 +                                           buf_page_address_fold(b->space, b->offset), b);
535 +                       }
536 +
537 +                       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
538 +                            b = UT_LIST_GET_NEXT(flush_list, b)) {
539 +                               ut_ad(b->in_flush_list);
540 +                               ut_ad(b->in_LRU_list);
541 +
542 +                               switch (buf_page_get_state(b)) {
543 +                               case BUF_BLOCK_ZIP_DIRTY:
544 +                                       ut_d(b->in_page_hash = TRUE);
545 +                                       HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
546 +                                                   buf_page_address_fold(b->space,
547 +                                                                         b->offset), b);
548 +                                       break;
549 +                               case BUF_BLOCK_FILE_PAGE:
550 +                                       /* uncompressed page */
551 +                                       break;
552 +                               case BUF_BLOCK_ZIP_FREE:
553 +                               case BUF_BLOCK_ZIP_PAGE:
554 +                               case BUF_BLOCK_NOT_USED:
555 +                               case BUF_BLOCK_READY_FOR_USE:
556 +                               case BUF_BLOCK_MEMORY:
557 +                               case BUF_BLOCK_REMOVE_HASH:
558 +                                       ut_error;
559 +                                       break;
560 +                               }
561 +                       }
562 +
563 +
564 +               }
565 +       }
566 +
567         mutex_exit(&buf_pool->LRU_list_mutex);
568         rw_lock_x_unlock(&buf_pool->page_hash_latch);
569         buf_pool_mutex_exit(buf_pool);
570 @@ -1373,6 +1809,42 @@
571         buf_chunk_t*    chunk;
572         buf_chunk_t*    chunks;
573  
574 +       if (srv_buffer_pool_shm_key) {
575 +               buf_shm_info_t* shm_info;
576 +
577 +               ut_a(buf_pool->n_chunks == 1);
578 +
579 +               chunk = buf_pool->chunks;
580 +               shm_info = chunk->mem;
581 +               ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
582 +
583 +               /* if opened, close shm. */
584 +               if (!shm_info->clean) {
585 +                       /* validation the shared memory segment doesn't have unrecoverable contents. */
586 +                       /* Currently, validation became not needed */
587 +                       shm_info->reusable = TRUE;
588 +
589 +                       memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
590 +                       memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
591 +
592 +                       if (srv_fast_shutdown < 2) {
593 +                               if (srv_buffer_pool_shm_checksum) {
594 +                                       shm_info->checksum =
595 +                                               ut_fold_binary_32(
596 +                                                       (byte*)chunk->mem + sizeof(buf_shm_info_t),
597 +                                                       chunk->mem_size - sizeof(buf_shm_info_t));
598 +                               } else {
599 +                                       shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
600 +                               }
601 +                               shm_info->clean = TRUE;
602 +                       }
603 +
604 +                       fprintf(stderr,
605 +                               "InnoDB: The shared memory was closed.\n");
606 +               }
607 +
608 +               os_shm_free(chunk->mem, chunk->mem_size);
609 +       } else {
610         chunks = buf_pool->chunks;
611         chunk = chunks + buf_pool->n_chunks;
612  
613 @@ -1381,10 +1853,13 @@
614                 would fail at shutdown. */
615                 os_mem_free_large(chunk->mem, chunk->mem_size);
616         }
617 +       }
618  
619         mem_free(buf_pool->chunks);
620         hash_table_free(buf_pool->page_hash);
621 +       if (!srv_buffer_pool_shm_key) {
622         hash_table_free(buf_pool->zip_hash);
623 +       }
624  }
625  
626  /********************************************************************//**
627 @@ -1668,6 +2143,11 @@
628         //buf_pool_mutex_enter(buf_pool);
629         mutex_enter(&buf_pool->LRU_list_mutex);
630  
631 +       if (srv_buffer_pool_shm_key) {
632 +               /* Cannot support shrink */
633 +               goto func_done;
634 +       }
635 +
636  shrink_again:
637         if (buf_pool->n_chunks <= 1) {
638  
639 @@ -1848,7 +2328,7 @@
640         zip_hash = hash_create(2 * buf_pool->curr_size);
641  
642         HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
643 -                    BUF_POOL_ZIP_FOLD_BPAGE);
644 +                    buf_pool, BUF_POOL_ZIP_FOLD_BPAGE);
645  
646         hash_table_free(buf_pool->zip_hash);
647         buf_pool->zip_hash = zip_hash;
648 @@ -2130,6 +2610,11 @@
649         ulint   change_size;
650         ulint   min_change_size = 1048576 * srv_buf_pool_instances;
651  
652 +       if (srv_buffer_pool_shm_key) {
653 +               /* Cannot support resize */
654 +               return;
655 +       }
656 +
657         buf_pool_mutex_enter_all();
658    
659         if (srv_buf_pool_old_size == srv_buf_pool_size) {
660 diff -ruN a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
661 --- a/storage/innobase/ha/hash0hash.c   2010-11-03 07:01:13.000000000 +0900
662 +++ b/storage/innobase/ha/hash0hash.c   2010-12-07 16:10:14.937749140 +0900
663 @@ -133,6 +133,70 @@
664  }
665  
666  /*************************************************************//**
667 +*/
668 +UNIV_INTERN
669 +ulint
670 +hash_create_needed(
671 +/*===============*/
672 +       ulint   n)
673 +{
674 +       ulint   prime;
675 +       ulint   offset;
676 +
677 +       prime = ut_find_prime(n);
678 +
679 +       offset = (sizeof(hash_table_t) + 7) / 8;
680 +       offset *= 8;
681 +
682 +       return(offset + sizeof(hash_cell_t) * prime);
683 +}
684 +
685 +UNIV_INTERN
686 +void
687 +hash_create_init(
688 +/*=============*/
689 +       hash_table_t*   table,
690 +       ulint           n)
691 +{
692 +       ulint   prime;
693 +       ulint   offset;
694 +
695 +       prime = ut_find_prime(n);
696 +
697 +       offset = (sizeof(hash_table_t) + 7) / 8;
698 +       offset *= 8;
699 +
700 +       table->array = (hash_cell_t*)(((byte*)table) + offset);
701 +       table->n_cells = prime;
702 +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
703 +       table->adaptive = FALSE;
704 +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
705 +       table->n_mutexes = 0;
706 +       table->mutexes = NULL;
707 +       table->heaps = NULL;
708 +       table->heap = NULL;
709 +       ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
710 +
711 +       /* Initialize the cell array */
712 +       hash_table_clear(table);
713 +}
714 +
715 +UNIV_INTERN
716 +void
717 +hash_create_reuse(
718 +/*==============*/
719 +       hash_table_t*   table)
720 +{
721 +       ulint   offset;
722 +
723 +       offset = (sizeof(hash_table_t) + 7) / 8;
724 +       offset *= 8;
725 +
726 +       table->array = (hash_cell_t*)(((byte*)table) + offset);
727 +       ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
728 +}
729 +
730 +/*************************************************************//**
731  Frees a hash table. */
732  UNIV_INTERN
733  void
734 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
735 --- a/storage/innobase/handler/ha_innodb.cc     2010-12-06 20:16:21.733263627 +0900
736 +++ b/storage/innobase/handler/ha_innodb.cc     2010-12-07 17:56:28.316139830 +0900
737 @@ -194,6 +194,7 @@
738  static my_bool innobase_create_status_file             = FALSE;
739  static my_bool innobase_stats_on_metadata              = TRUE;
740  static my_bool innobase_use_sys_stats_table            = FALSE;
741 +static my_bool innobase_buffer_pool_shm_checksum       = TRUE;
742  
743  
744  static char*   internal_innobase_data_file_path        = NULL;
745 @@ -2624,6 +2625,14 @@
746         srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
747         srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
748  
749 +       if (srv_buffer_pool_shm_key && srv_buf_pool_instances > 1) {
750 +               fprintf(stderr,
751 +                       "InnoDB: Warning: innodb_buffer_pool_shm_key cannot be used with several innodb_buffer_pool_instances.\n"
752 +                       "InnoDB:          innodb_buffer_pool_instances was set to 1.\n");
753 +               srv_buf_pool_instances = 1;
754 +               innobase_buffer_pool_instances = 1;
755 +       }
756 +
757         srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
758  
759         srv_n_file_io_threads = (ulint) innobase_file_io_threads;
760 @@ -2640,6 +2649,7 @@
761         srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
762         srv_use_checksums = (ibool) innobase_use_checksums;
763         srv_fast_checksum = (ibool) innobase_fast_checksum;
764 +       srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
765  
766  #ifdef HAVE_LARGE_PAGES
767          if ((os_use_large_pages = (ibool) my_use_large_pages))
768 @@ -11648,6 +11658,16 @@
769    "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
770    NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
771  
772 +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
773 +  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
774 +  "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
775 +  NULL, NULL, 0, 0, INT_MAX32, 0);
776 +
777 +static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
778 +  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
779 +  "Enable buffer_pool_shm checksum validation (enabled by default).",
780 +  NULL, NULL, TRUE);
781 +
782  static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
783    PLUGIN_VAR_RQCMDARG,
784    "Helps in performance tuning in heavily concurrent environments.",
785 @@ -11939,6 +11959,8 @@
786    MYSQL_SYSVAR(autoextend_increment),
787    MYSQL_SYSVAR(buffer_pool_size),
788    MYSQL_SYSVAR(buffer_pool_instances),
789 +  MYSQL_SYSVAR(buffer_pool_shm_key),
790 +  MYSQL_SYSVAR(buffer_pool_shm_checksum),
791    MYSQL_SYSVAR(checksums),
792    MYSQL_SYSVAR(fast_checksum),
793    MYSQL_SYSVAR(commit_concurrency),
794 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
795 --- a/storage/innobase/include/buf0buf.h        2010-12-06 20:16:21.778264552 +0900
796 +++ b/storage/innobase/include/buf0buf.h        2010-12-07 17:56:28.322749380 +0900
797 @@ -36,6 +36,7 @@
798  #ifndef UNIV_HOTBACKUP
799  #include "ut0rbt.h"
800  #include "os0proc.h"
801 +#include "srv0srv.h"
802  
803  /** @name Modes for buf_page_get_gen */
804  /* @{ */
805 @@ -1592,9 +1593,12 @@
806  /**********************************************************************//**
807  Compute the hash fold value for blocks in buf_pool->zip_hash. */
808  /* @{ */
809 -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
810 -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
811 -#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
812 +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
813 +#define BUF_POOL_ZIP_FOLD_PTR(bpool, ptr) (!srv_buffer_pool_shm_key\
814 +                                       ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
815 +                                       :((ulint) ((byte*)ptr - (byte*)(buf_page_from_array(bpool, 0)->frame)) / UNIV_PAGE_SIZE))
816 +#define BUF_POOL_ZIP_FOLD(bpool, b) BUF_POOL_ZIP_FOLD_PTR(bpool, (b)->frame)
817 +#define BUF_POOL_ZIP_FOLD_BPAGE(bpool, b) BUF_POOL_ZIP_FOLD(bpool, (buf_block_t*) (b))
818  /* @} */
819  
820  /** A chunk of buffers.  The buffer pool is allocated in chunks. */
821 diff -ruN a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
822 --- a/storage/innobase/include/hash0hash.h      2010-11-03 07:01:13.000000000 +0900
823 +++ b/storage/innobase/include/hash0hash.h      2010-12-07 17:56:28.324726446 +0900
824 @@ -49,6 +49,28 @@
825  hash_create(
826  /*========*/
827         ulint   n);     /*!< in: number of array cells */
828 +
829 +/*************************************************************//**
830 +*/
831 +UNIV_INTERN
832 +ulint
833 +hash_create_needed(
834 +/*===============*/
835 +       ulint   n);
836 +
837 +UNIV_INTERN
838 +void
839 +hash_create_init(
840 +/*=============*/
841 +       hash_table_t*   table,
842 +       ulint           n);
843 +
844 +UNIV_INTERN
845 +void
846 +hash_create_reuse(
847 +/*==============*/
848 +       hash_table_t*   table);
849 +
850  #ifndef UNIV_HOTBACKUP
851  /*************************************************************//**
852  Creates a mutex array to protect a hash table. */
853 @@ -306,7 +328,7 @@
854  /****************************************************************//**
855  Move all hash table entries from OLD_TABLE to NEW_TABLE. */
856  
857 -#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
858 +#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, BPOOL, FOLD_FUNC) \
859  do {\
860         ulint           i2222;\
861         ulint           cell_count2222;\
862 @@ -318,7 +340,7 @@
863  \
864                 while (node2222) {\
865                         NODE_TYPE*      next2222 = node2222->PTR_NAME;\
866 -                       ulint           fold2222 = FOLD_FUNC(node2222);\
867 +                       ulint           fold2222 = FOLD_FUNC(BPOOL, node2222);\
868  \
869                         HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
870                                 fold2222, node2222);\
871 @@ -327,6 +349,33 @@
872                 }\
873         }\
874  } while (0)
875 +
876 +/********************************************************************//**
877 +Align nodes with moving location.*/
878 +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
879 +do {\
880 +       ulint           i2222;\
881 +       ulint           cell_count2222;\
882 +\
883 +       cell_count2222 = hash_get_n_cells(TABLE);\
884 +\
885 +       for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
886 +               NODE_TYPE*      node2222;\
887 +\
888 +               if ((TABLE)->array[i2222].node) \
889 +                       (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
890 +                       + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
891 +               node2222 = HASH_GET_FIRST((TABLE), i2222);\
892 +\
893 +               while (node2222) {\
894 +                       if (node2222->PTR_NAME) \
895 +                               node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
896 +                               + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
897 +\
898 +                       node2222 = node2222->PTR_NAME;\
899 +               }\
900 +       }\
901 +} while (0)
902  
903  /************************************************************//**
904  Gets the mutex index for a fold value in a hash table.
905 diff -ruN a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
906 --- a/storage/innobase/include/os0proc.h        2010-11-03 07:01:13.000000000 +0900
907 +++ b/storage/innobase/include/os0proc.h        2010-12-07 16:10:14.955718750 +0900
908 @@ -32,6 +32,11 @@
909  #ifdef UNIV_LINUX
910  #include <sys/ipc.h>
911  #include <sys/shm.h>
912 +#else
913 +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
914 +#include <sys/ipc.h>
915 +#include <sys/shm.h>
916 +# endif
917  #endif
918  
919  typedef void*                  os_process_t;
920 @@ -70,6 +75,29 @@
921         ulint   size);                  /*!< in: size returned by
922                                         os_mem_alloc_large() */
923  
924 +
925 +/****************************************************************//**
926 +Allocates or attaches and reuses shared memory segment.
927 +The content is not cleared automatically.
928 +@return        allocated memory */
929 +UNIV_INTERN
930 +void*
931 +os_shm_alloc(
932 +/*=========*/
933 +       ulint*  n,                      /*!< in/out: number of bytes */
934 +       uint    key,
935 +       ibool*  is_new);
936 +
937 +/****************************************************************//**
938 +Detach shared memory segment. */
939 +UNIV_INTERN
940 +void
941 +os_shm_free(
942 +/*========*/
943 +       void    *ptr,                   /*!< in: pointer returned by
944 +                                       os_shm_alloc() */
945 +       ulint   size);                  /*!< in: size returned by
946 +                                       os_shm_alloc() */
947  #ifndef UNIV_NONINL
948  #include "os0proc.ic"
949  #endif
950 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
951 --- a/storage/innobase/include/srv0srv.h        2010-12-04 20:20:28.016566697 +0900
952 +++ b/storage/innobase/include/srv0srv.h        2010-12-07 16:10:14.956717659 +0900
953 @@ -171,6 +171,10 @@
954  extern ulint   srv_mem_pool_size;
955  extern ulint   srv_lock_table_size;
956  
957 +extern uint    srv_buffer_pool_shm_key;
958 +extern ibool   srv_buffer_pool_shm_is_reused;
959 +extern ibool   srv_buffer_pool_shm_checksum;
960 +
961  extern ibool   srv_thread_concurrency_timer_based;
962  
963  extern ulint   srv_n_file_io_threads;
964 diff -ruN a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
965 --- a/storage/innobase/include/ut0lst.h 2010-11-03 07:01:13.000000000 +0900
966 +++ b/storage/innobase/include/ut0lst.h 2010-12-07 16:10:14.957785525 +0900
967 @@ -257,5 +257,48 @@
968         ut_a(ut_list_node_313 == NULL);                                 \
969  } while (0)
970  
971 +/********************************************************************//**
972 +Align nodes with moving location.
973 +@param NAME            the name of the list
974 +@param TYPE            node type
975 +@param BASE            base node (not a pointer to it)
976 +@param OFFSET          offset moved */
977 +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET)      \
978 +do {                                                                   \
979 +       ulint   ut_list_i_313;                                          \
980 +       TYPE*   ut_list_node_313;                                       \
981 +                                                                       \
982 +       if ((BASE).start)                                               \
983 +               (BASE).start = (void*)((byte*)((BASE).start)                    \
984 +                       + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
985 +       if ((BASE).end)                                                 \
986 +               (BASE).end   = (void*)((byte*)((BASE).end)                      \
987 +                       + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
988 +                                                                       \
989 +       ut_list_node_313 = (BASE).start;                                \
990 +                                                                       \
991 +       for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {         \
992 +               ut_a(ut_list_node_313);                                 \
993 +               if ((ut_list_node_313->NAME).prev)                      \
994 +                       (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
995 +                               + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
996 +               if ((ut_list_node_313->NAME).next)                      \
997 +                       (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
998 +                               + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
999 +               ut_list_node_313 = (ut_list_node_313->NAME).next;       \
1000 +       }                                                               \
1001 +                                                                       \
1002 +       ut_a(ut_list_node_313 == NULL);                                 \
1003 +                                                                       \
1004 +       ut_list_node_313 = (BASE).end;                                  \
1005 +                                                                       \
1006 +       for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {         \
1007 +               ut_a(ut_list_node_313);                                 \
1008 +               ut_list_node_313 = (ut_list_node_313->NAME).prev;       \
1009 +       }                                                               \
1010 +                                                                       \
1011 +       ut_a(ut_list_node_313 == NULL);                                 \
1012 +} while (0)
1013 +
1014  #endif
1015  
1016 diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
1017 --- a/storage/innobase/log/log0recv.c   2010-12-04 19:46:40.212513377 +0900
1018 +++ b/storage/innobase/log/log0recv.c   2010-12-07 16:10:14.959785817 +0900
1019 @@ -2912,6 +2912,7 @@
1020  /*==========================*/
1021  {
1022         ut_a(!recv_needed_recovery);
1023 +       ut_a(!srv_buffer_pool_shm_is_reused);
1024  
1025         recv_needed_recovery = TRUE;
1026  
1027 diff -ruN a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
1028 --- a/storage/innobase/os/os0proc.c     2010-11-03 07:01:13.000000000 +0900
1029 +++ b/storage/innobase/os/os0proc.c     2010-12-07 16:10:14.960800123 +0900
1030 @@ -229,3 +229,173 @@
1031         }
1032  #endif
1033  }
1034 +
1035 +/****************************************************************//**
1036 +Allocates or attaches and reuses shared memory segment.
1037 +The content is not cleared automatically.
1038 +@return        allocated memory */
1039 +UNIV_INTERN
1040 +void*
1041 +os_shm_alloc(
1042 +/*=========*/
1043 +       ulint*  n,                      /*!< in/out: number of bytes */
1044 +       uint    key,
1045 +       ibool*  is_new)
1046 +{
1047 +       void*   ptr;
1048 +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1049 +       ulint   size;
1050 +       int     shmid;
1051 +
1052 +       *is_new = FALSE;
1053 +       fprintf(stderr,
1054 +               "InnoDB: The shared memory segment containing the buffer pool is: key  %#x (%d).\n",
1055 +               key, key);
1056 +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
1057 +       if (!os_use_large_pages || !os_large_page_size) {
1058 +               goto skip;
1059 +       }
1060 +
1061 +       /* Align block size to os_large_page_size */
1062 +       ut_ad(ut_is_2pow(os_large_page_size));
1063 +       size = ut_2pow_round(*n + (os_large_page_size - 1),
1064 +                            os_large_page_size);
1065 +
1066 +       shmid = shmget((key_t)key, (size_t)size,
1067 +                       IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
1068 +       if (shmid < 0) {
1069 +               if (errno == EEXIST) {
1070 +                       fprintf(stderr,
1071 +                               "InnoDB: HugeTLB: The shared memory segment exists.\n");
1072 +                       shmid = shmget((key_t)key, (size_t)size,
1073 +                                       SHM_HUGETLB | SHM_R | SHM_W);
1074 +                       if (shmid < 0) {
1075 +                               fprintf(stderr,
1076 +                                       "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1077 +                                       size, errno);
1078 +                               goto skip;
1079 +                       } else {
1080 +                               fprintf(stderr,
1081 +                                       "InnoDB: HugeTLB: The existent shared memory segment is used.\n");
1082 +                       }
1083 +               } else {
1084 +                       fprintf(stderr,
1085 +                               "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1086 +                               size, errno);
1087 +                       goto skip;
1088 +               }
1089 +       } else {
1090 +               *is_new = TRUE;
1091 +               fprintf(stderr,
1092 +                       "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
1093 +       }
1094 +
1095 +       ptr = shmat(shmid, NULL, 0);
1096 +       if (ptr == (void *)-1) {
1097 +               fprintf(stderr,
1098 +                       "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
1099 +                       errno);
1100 +               ptr = NULL;
1101 +       }
1102 +
1103 +       if (ptr) {
1104 +               *n = size;
1105 +               os_fast_mutex_lock(&ut_list_mutex);
1106 +               ut_total_allocated_memory += size;
1107 +               os_fast_mutex_unlock(&ut_list_mutex);
1108 +               UNIV_MEM_ALLOC(ptr, size);
1109 +               return(ptr);
1110 +       }
1111 +skip:
1112 +       *is_new = FALSE;
1113 +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
1114 +# ifdef HAVE_GETPAGESIZE
1115 +       size = getpagesize();
1116 +# else
1117 +       size = UNIV_PAGE_SIZE;
1118 +# endif
1119 +       /* Align block size to system page size */
1120 +       ut_ad(ut_is_2pow(size));
1121 +       size = *n = ut_2pow_round(*n + (size - 1), size);
1122 +
1123 +       shmid = shmget((key_t)key, (size_t)size,
1124 +                       IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
1125 +       if (shmid < 0) {
1126 +               if (errno == EEXIST) {
1127 +                       fprintf(stderr,
1128 +                               "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
1129 +                       shmid = shmget((key_t)key, (size_t)size,
1130 +                                       SHM_R | SHM_W);
1131 +                       if (shmid < 0) {
1132 +                               fprintf(stderr,
1133 +                                       "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1134 +                                       size, errno);
1135 +                               ptr = NULL;
1136 +                               goto end;
1137 +                       } else {
1138 +                               fprintf(stderr,
1139 +                                       "InnoDB: The existent shared memory segment is used.\n");
1140 +                       }
1141 +               } else {
1142 +                       fprintf(stderr,
1143 +                               "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1144 +                               size, errno);
1145 +                       ptr = NULL;
1146 +                       goto end;
1147 +               }
1148 +       } else {
1149 +               *is_new = TRUE;
1150 +               fprintf(stderr,
1151 +                       "InnoDB: A new shared memory segment has been created.\n");
1152 +       }
1153 +
1154 +       ptr = shmat(shmid, NULL, 0);
1155 +       if (ptr == (void *)-1) {
1156 +               fprintf(stderr,
1157 +                       "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
1158 +                       errno);
1159 +               ptr = NULL;
1160 +       }
1161 +
1162 +       if (ptr) {
1163 +               *n = size;
1164 +               os_fast_mutex_lock(&ut_list_mutex);
1165 +               ut_total_allocated_memory += size;
1166 +               os_fast_mutex_unlock(&ut_list_mutex);
1167 +               UNIV_MEM_ALLOC(ptr, size);
1168 +       }
1169 +end:
1170 +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1171 +       fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1172 +       ptr = NULL;
1173 +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1174 +       return(ptr);
1175 +}
1176 +
1177 +/****************************************************************//**
1178 +Detach shared memory segment. */
1179 +UNIV_INTERN
1180 +void
1181 +os_shm_free(
1182 +/*========*/
1183 +       void    *ptr,                   /*!< in: pointer returned by
1184 +                                       os_shm_alloc() */
1185 +       ulint   size)                   /*!< in: size returned by
1186 +                                       os_shm_alloc() */
1187 +{
1188 +       os_fast_mutex_lock(&ut_list_mutex);
1189 +       ut_a(ut_total_allocated_memory >= size);
1190 +       os_fast_mutex_unlock(&ut_list_mutex);
1191 +
1192 +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1193 +       if (!shmdt(ptr)) {
1194 +               os_fast_mutex_lock(&ut_list_mutex);
1195 +               ut_a(ut_total_allocated_memory >= size);
1196 +               ut_total_allocated_memory -= size;
1197 +               os_fast_mutex_unlock(&ut_list_mutex);
1198 +               UNIV_MEM_FREE(ptr, size);
1199 +       }
1200 +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1201 +       fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1202 +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1203 +}
1204 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
1205 --- a/storage/innobase/srv/srv0srv.c    2010-12-04 20:20:44.687550693 +0900
1206 +++ b/storage/innobase/srv/srv0srv.c    2010-12-07 16:10:14.962785720 +0900
1207 @@ -235,6 +235,11 @@
1208  UNIV_INTERN ulint      srv_mem_pool_size       = ULINT_MAX;
1209  UNIV_INTERN ulint      srv_lock_table_size     = ULINT_MAX;
1210  
1211 +/* key value for shm */
1212 +UNIV_INTERN uint       srv_buffer_pool_shm_key = 0;
1213 +UNIV_INTERN ibool      srv_buffer_pool_shm_is_reused = FALSE;
1214 +UNIV_INTERN ibool      srv_buffer_pool_shm_checksum = TRUE;
1215 +
1216  /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
1217  instead. */
1218  UNIV_INTERN ulint      srv_n_file_io_threads   = ULINT_MAX;
1219 diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
1220 --- a/storage/innobase/srv/srv0start.c  2010-12-04 20:19:29.806482628 +0900
1221 +++ b/storage/innobase/srv/srv0start.c  2010-12-07 16:10:14.964785346 +0900
1222 @@ -1835,6 +1835,8 @@
1223                 Note that this is not as heavy weight as it seems. At
1224                 this point there will be only ONE page in the buf_LRU
1225                 and there must be no page in the buf_flush list. */
1226 +               /* buffer_pool_shm should not be reused when recovery was needed. */
1227 +               if (!srv_buffer_pool_shm_is_reused)
1228                 buf_pool_invalidate();
1229  
1230                 /* We always try to do a recovery, even if the database had
This page took 0.116544 seconds and 4 git commands to generate.