]> git.pld-linux.org Git - packages/mysql.git/blob - innodb_buffer_pool_shm.patch
- rel 0.5 (consider this to be test before rel 1); update percona patches; drop obsol...
[packages/mysql.git] / innodb_buffer_pool_shm.patch
1 # name       : innodb_buffer_pool_shm.patch
2 # introduced : 12
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
9 --- a/storage/innobase/buf/buf0buddy.c  2010-12-04 19:46:39.372513543 +0900
10 +++ b/storage/innobase/buf/buf0buddy.c  2010-12-07 17:56:28.302087851 +0900
11 @@ -183,7 +183,7 @@
12         void*           buf,            /*!< in: buffer frame to deallocate */
13         ibool           have_page_hash_mutex)
14  {
15 -       const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
16 +       const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf_pool, buf);
17         buf_page_t*     bpage;
18         buf_block_t*    block;
19  
20 @@ -227,7 +227,7 @@
21         buf_block_t*    block)  /*!< in: buffer frame to allocate */
22  {
23         buf_pool_t*     buf_pool = buf_pool_from_block(block);
24 -       const ulint     fold = BUF_POOL_ZIP_FOLD(block);
25 +       const ulint     fold = BUF_POOL_ZIP_FOLD(buf_pool, block);
26         //ut_ad(buf_pool_mutex_own(buf_pool));
27         ut_ad(!mutex_own(&buf_pool->zip_mutex));
28         ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
29 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
30 --- a/storage/innobase/buf/buf0buf.c    2010-12-06 20:16:21.726195340 +0900
31 +++ b/storage/innobase/buf/buf0buf.c    2010-12-07 20:40:30.824749814 +0900
32 @@ -53,6 +53,10 @@
33  #include "page0zip.h"
34  #include "trx0trx.h"
35  #include "srv0start.h"
36 +#include "que0que.h"
37 +#include "read0read.h"
38 +#include "row0row.h"
39 +#include "ha_prototypes.h"
40  
41  /* prototypes for new functions added to ha_innodb.cc */
42  trx_t* innobase_get_trx();
43 @@ -342,6 +346,31 @@
44                                         was allocated for the frames */
45         buf_block_t*    blocks;         /*!< array of buffer control blocks */
46  };
47 +
48 +/* Buffer pool shared memory segment information */
49 +typedef        struct buf_shm_info_struct      buf_shm_info_t;
50 +
51 +struct buf_shm_info_struct {
52 +       char    head_str[8];
53 +       ulint   binary_id;
54 +       ibool   is_new;         /* during initializing */
55 +       ibool   clean;          /* clean shutdowned and free */
56 +       ibool   reusable;       /* reusable */
57 +       ulint   buf_pool_size;  /* backup value */
58 +       ulint   page_size;      /* backup value */
59 +       ulint   frame_offset;   /* offset of the first frame based on chunk->mem */
60 +       ulint   zip_hash_offset;
61 +       ulint   zip_hash_n;
62 +
63 +       ulint   checksum;
64 +
65 +       buf_pool_t      buf_pool_backup;
66 +       buf_chunk_t     chunk_backup;
67 +
68 +       ib_uint64_t     dummy;
69 +};
70 +
71 +#define BUF_SHM_INFO_HEAD "XTRA_SHM"
72  #endif /* !UNIV_HOTBACKUP */
73  
74  /********************************************************************//**
75 @@ -988,6 +1017,58 @@
76  #endif /* UNIV_SYNC_DEBUG */
77  }
78  
79 +static
80 +void
81 +buf_block_reuse(
82 +/*============*/
83 +       buf_block_t*    block,
84 +       ptrdiff_t       frame_offset)
85 +{
86 +       /* block_init */
87 +       block->frame += frame_offset;
88 +
89 +       UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
90 +
91 +       block->index = NULL;
92 +       block->btr_search_latch = NULL;
93 +
94 +#ifdef UNIV_DEBUG
95 +       /* recreate later */
96 +       block->page.in_page_hash = FALSE;
97 +       block->page.in_zip_hash = FALSE;
98 +#endif /* UNIV_DEBUG */
99 +
100 +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
101 +       block->n_pointers = 0;
102 +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
103 +
104 +       if (block->page.zip.data)
105 +               block->page.zip.data += frame_offset;
106 +
107 +       block->is_hashed = FALSE;
108 +
109 +#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
110 +       /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
111 +       of buffer block mutex/rwlock with performance schema. If
112 +       PFS_GROUP_BUFFER_SYNC is defined, skip the registration
113 +       since buffer block mutex/rwlock will be registered later in
114 +       pfs_register_buffer_block() */
115 +
116 +       mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
117 +       rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
118 +#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
119 +       mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
120 +       rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
121 +#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
122 +
123 +       ut_ad(rw_lock_validate(&(block->lock)));
124 +
125 +#ifdef UNIV_SYNC_DEBUG
126 +       rw_lock_create(buf_block_debug_latch_key,
127 +                      &block->debug_latch, SYNC_NO_ORDER_CHECK);
128 +#endif /* UNIV_SYNC_DEBUG */
129 +}
130 +
131  /********************************************************************//**
132  Allocates a chunk of buffer frames.
133  @return        chunk, or NULL on failure */
134 @@ -1001,26 +1082,188 @@
135  {
136         buf_block_t*    block;
137         byte*           frame;
138 +       ulint           zip_hash_n = 0;
139 +       ulint           zip_hash_mem_size = 0;
140 +       hash_table_t*   zip_hash_tmp = NULL;
141         ulint           i;
142 +       buf_shm_info_t* shm_info = NULL;
143  
144         /* Round down to a multiple of page size,
145         although it already should be. */
146         mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
147 +
148 +       srv_buffer_pool_shm_is_reused = FALSE;
149 +
150 +       if (srv_buffer_pool_shm_key) {
151 +               /* zip_hash size */
152 +               zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
153 +               zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
154 +                                                 + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
155 +       }
156 +
157         /* Reserve space for the block descriptors. */
158         mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
159                                   + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
160 +       if (srv_buffer_pool_shm_key) {
161 +                mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
162 +                                          + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
163 +                mem_size += zip_hash_mem_size;
164 +       }
165  
166         chunk->mem_size = mem_size;
167 +
168 +       if (srv_buffer_pool_shm_key) {
169 +               ulint   binary_id;
170 +               ibool   is_new;
171 +
172 +               ut_a(buf_pool->n_chunks == 1);
173 +
174 +               fprintf(stderr,
175 +               "InnoDB: Notice: The innodb_buffer_pool_shm_key option has been specified.\n"
176 +               "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
177 +               "InnoDB:   * the mysqld executable between restarts of the server.\n"
178 +               "InnoDB:   * the value of innodb_buffer_pool_size.\n"
179 +               "InnoDB:   * the value of innodb_page_size.\n"
180 +               "InnoDB:   * datafiles created by InnoDB during this session.\n"
181 +               "InnoDB: Otherwise, data corruption in datafiles may result.\n");
182 +
183 +               /* FIXME: This is vague id still */
184 +               binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
185 +                         + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
186 +                         + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
187 +                         + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
188 +                         + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
189 +                         + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
190 +                         + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
191 +                         + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
192 +                         + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
193 +                         + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
194 +                         + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
195 +
196 +               chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
197 +
198 +               if (UNIV_UNLIKELY(chunk->mem == NULL)) {
199 +                       return(NULL);
200 +               }
201 +init_again:
202 +#ifdef UNIV_SET_MEM_TO_ZERO
203 +               if (is_new) {
204 +                       memset(chunk->mem, '\0', chunk->mem_size);
205 +               }
206 +#endif
207 +               /* for ut_fold_binary_32(), these values should be 32-bit aligned */
208 +               ut_a(sizeof(buf_shm_info_t) % 4 == 0);
209 +               ut_a((ulint)chunk->mem % 4 == 0);
210 +               ut_a(chunk->mem_size % 4 == 0);
211 +
212 +               shm_info = chunk->mem;
213 +
214 +               zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
215 +
216 +               if (is_new) {
217 +                       strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
218 +                       shm_info->binary_id = binary_id;
219 +                       shm_info->is_new = TRUE;        /* changed to FALSE when the initialization is finished */
220 +                       shm_info->clean = FALSE;        /* changed to TRUE when free the segment. */
221 +                       shm_info->reusable = FALSE;     /* changed to TRUE when validation is finished. */
222 +                       shm_info->buf_pool_size = srv_buf_pool_size;
223 +                       shm_info->page_size = srv_page_size;
224 +                       shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
225 +                       shm_info->zip_hash_n = zip_hash_n;
226 +               } else {
227 +                       ulint   checksum;
228 +
229 +                       if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
230 +                               fprintf(stderr,
231 +                               "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
232 +                               return(NULL);
233 +                       }
234 +                       if (shm_info->binary_id != binary_id) {
235 +                               fprintf(stderr,
236 +                               "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
237 +                               return(NULL);
238 +                       }
239 +                       if (shm_info->is_new) {
240 +                               fprintf(stderr,
241 +                               "InnoDB: Error: The shared memory was not initialized yet.\n");
242 +                               return(NULL);
243 +                       }
244 +                       if (shm_info->buf_pool_size != srv_buf_pool_size) {
245 +                               fprintf(stderr,
246 +                               "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
247 +                               shm_info->buf_pool_size, srv_buf_pool_size);
248 +                               return(NULL);
249 +                       }
250 +                       if (shm_info->page_size != srv_page_size) {
251 +                               fprintf(stderr,
252 +                               "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
253 +                               shm_info->page_size, srv_page_size);
254 +                               return(NULL);
255 +                       }
256 +                       if (!shm_info->reusable) {
257 +                               fprintf(stderr,
258 +                               "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
259 +                               "InnoDB: The shared memory segment is initialized.\n");
260 +                               is_new = TRUE;
261 +                               goto init_again;
262 +                       }
263 +                       if (!shm_info->clean) {
264 +                               fprintf(stderr,
265 +                               "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
266 +                               "InnoDB: The shared memory segment is initialized.\n");
267 +                               is_new = TRUE;
268 +                               goto init_again;
269 +                       }
270 +
271 +                       ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
272 +                       ut_a(shm_info->zip_hash_n == zip_hash_n);
273 +
274 +                       /* check checksum */
275 +                       if (srv_buffer_pool_shm_checksum) {
276 +                               checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
277 +                                                            chunk->mem_size - sizeof(buf_shm_info_t));
278 +                       } else {
279 +                               checksum = BUF_NO_CHECKSUM_MAGIC;
280 +                       }
281 +
282 +                       if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
283 +                           && shm_info->checksum != checksum) {
284 +                               fprintf(stderr,
285 +                               "InnoDB: Error: checksum of the shared memory is not match. "
286 +                               "(stored=%lu calculated=%lu)\n",
287 +                               shm_info->checksum, checksum);
288 +                               return(NULL);
289 +                       }
290 +
291 +                       /* flag to use the segment. */
292 +                       shm_info->clean = FALSE;        /* changed to TRUE when free the segment. */
293 +               }
294 +
295 +               /* init zip_hash contents */
296 +               if (is_new) {
297 +                       hash_create_init(zip_hash_tmp, zip_hash_n);
298 +               } else {
299 +                       /* adjust offset is done later */
300 +                       hash_create_reuse(zip_hash_tmp);
301 +
302 +                       srv_buffer_pool_shm_is_reused = TRUE;
303 +               }
304 +       } else {
305         chunk->mem = os_mem_alloc_large(&chunk->mem_size);
306  
307         if (UNIV_UNLIKELY(chunk->mem == NULL)) {
308  
309                 return(NULL);
310         }
311 +       }
312  
313         /* Allocate the block descriptors from
314         the start of the memory block. */
315 +       if (srv_buffer_pool_shm_key) {
316 +               chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
317 +       } else {
318         chunk->blocks = chunk->mem;
319 +       }
320  
321         /* Align a pointer to the first frame.  Note that when
322         os_large_page_size is smaller than UNIV_PAGE_SIZE,
323 @@ -1028,8 +1271,13 @@
324         it is bigger, we may allocate more blocks than requested. */
325  
326         frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
327 +       if (srv_buffer_pool_shm_key) {
328 +               /* reserve zip_hash space and always -1 for reproductibity */
329 +               chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
330 +       } else {
331         chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
332                 - (frame != chunk->mem);
333 +       }
334  
335         /* Subtract the space needed for block descriptors. */
336         {
337 @@ -1043,6 +1291,98 @@
338                 chunk->size = size;
339         }
340  
341 +       if (shm_info && !(shm_info->is_new)) {
342 +               /* convert the shared memory segment for reuse */
343 +               ptrdiff_t       phys_offset;
344 +               ptrdiff_t       logi_offset;
345 +               ptrdiff_t       blocks_offset;
346 +               void*           previous_frame_address;
347 +
348 +               if (chunk->size < shm_info->chunk_backup.size) {
349 +                       fprintf(stderr,
350 +                       "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
351 +                       "InnoDB: Retrying may avoid this situation.\n");
352 +                       shm_info->clean = TRUE; /* release the flag for retrying */
353 +                       return(NULL);
354 +               }
355 +
356 +               chunk->size = shm_info->chunk_backup.size;
357 +               phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
358 +               logi_offset = frame - chunk->blocks[0].frame;
359 +               previous_frame_address = chunk->blocks[0].frame;
360 +               blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
361 +
362 +               if (phys_offset || logi_offset || blocks_offset) {
363 +                       fprintf(stderr,
364 +                       "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
365 +                       "InnoDB: Previous frames in address      : %p\n"
366 +                       "InnoDB: Previous frames were located    : %p\n"
367 +                       "InnoDB: Current frames should be located: %p\n"
368 +                       "InnoDB: Pysical offset                  : %ld (%#lx)\n"
369 +                       "InnoDB: Logical offset (frames)         : %ld (%#lx)\n"
370 +                       "InnoDB: Logical offset (blocks)         : %ld (%#lx)\n",
371 +                               (byte*)chunk->mem + shm_info->frame_offset,
372 +                               chunk->blocks[0].frame, frame,
373 +                               phys_offset, phys_offset, logi_offset, logi_offset,
374 +                               blocks_offset, blocks_offset);
375 +               } else {
376 +                       fprintf(stderr,
377 +                       "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
378 +               }
379 +
380 +               if (phys_offset) {
381 +                       fprintf(stderr,
382 +                       "InnoDB: Aligning physical offset...");
383 +
384 +                       memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
385 +                               chunk->size * UNIV_PAGE_SIZE);
386 +
387 +                       fprintf(stderr,
388 +                       " Done.\n");
389 +               }
390 +
391 +               /* buf_block_t */
392 +               block = chunk->blocks;
393 +               for (i = chunk->size; i--; ) {
394 +                       buf_block_reuse(block, logi_offset);
395 +                       block++;
396 +               }
397 +
398 +               if (logi_offset || blocks_offset) {
399 +                       fprintf(stderr,
400 +                       "InnoDB: Aligning logical offset...");
401 +
402 +
403 +                       /* buf_pool_t buf_pool_backup */
404 +                       UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
405 +                                       previous_frame_address, logi_offset, blocks_offset);
406 +                       UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
407 +                                       previous_frame_address, logi_offset, blocks_offset);
408 +                       UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
409 +                                       previous_frame_address, logi_offset, blocks_offset);
410 +                       if (shm_info->buf_pool_backup.LRU_old)
411 +                               shm_info->buf_pool_backup.LRU_old =
412 +                                       (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
413 +                                               + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
414 +                                                 ? logi_offset : blocks_offset));
415 +
416 +                       UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
417 +                                       previous_frame_address, logi_offset, blocks_offset);
418 +
419 +                       UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
420 +                                       previous_frame_address, logi_offset, blocks_offset);
421 +                       for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
422 +                               UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
423 +                                       previous_frame_address, logi_offset, blocks_offset);
424 +                       }
425 +
426 +                       HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
427 +                                       previous_frame_address, logi_offset, blocks_offset);
428 +
429 +                       fprintf(stderr,
430 +                       " Done.\n");
431 +               }
432 +       } else {
433         /* Init block structs and assign frames for them. Then we
434         assign the frames to the first blocks (we already mapped the
435         memory above). */
436 @@ -1068,6 +1408,11 @@
437                 block++;
438                 frame += UNIV_PAGE_SIZE;
439         }
440 +       }
441 +
442 +       if (shm_info) {
443 +               shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
444 +       }
445  
446  #ifdef PFS_GROUP_BUFFER_SYNC
447         pfs_register_buffer_block(chunk);
448 @@ -1249,6 +1594,8 @@
449                 UNIV_MEM_UNDESC(block);
450         }
451  
452 +       ut_a(!srv_buffer_pool_shm_key);
453 +
454         os_mem_free_large(chunk->mem, chunk->mem_size);
455  }
456  
457 @@ -1289,7 +1636,7 @@
458         ulint           instance_no)    /*!< in: id of the instance */
459  {
460         ulint           i;
461 -       buf_chunk_t*    chunk;
462 +       buf_chunk_t*    chunk = NULL;
463  
464         /* 1. Initialize general fields
465         ------------------------------- */
466 @@ -1335,7 +1682,10 @@
467                 buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
468  
469                 buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
470 +               /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
471 +               if (!srv_buffer_pool_shm_key) {
472                 buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
473 +               }
474                 
475                 buf_pool->last_printout_time = ut_time();
476         }
477 @@ -1354,6 +1704,86 @@
478  
479         /* All fields are initialized by mem_zalloc(). */
480  
481 +       if (chunk && srv_buffer_pool_shm_key) {
482 +               buf_shm_info_t* shm_info;
483 +
484 +               ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
485 +               shm_info = chunk->mem;
486 +
487 +               buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
488 +
489 +               if(shm_info->is_new) {
490 +                       shm_info->is_new = FALSE; /* initialization was finished */
491 +               } else {
492 +                       buf_block_t*    block = chunk->blocks;
493 +                       buf_page_t*     b;
494 +
495 +                       /* shm_info->buf_pool_backup should be converted */
496 +                       /* at buf_chunk_init(). So copy simply. */
497 +                       buf_pool->flush_list            = shm_info->buf_pool_backup.flush_list;
498 +                       buf_pool->freed_page_clock      = shm_info->buf_pool_backup.freed_page_clock;
499 +                       buf_pool->free                  = shm_info->buf_pool_backup.free;
500 +                       buf_pool->LRU                   = shm_info->buf_pool_backup.LRU;
501 +                       buf_pool->LRU_old               = shm_info->buf_pool_backup.LRU_old;
502 +                       buf_pool->LRU_old_len           = shm_info->buf_pool_backup.LRU_old_len;
503 +                       buf_pool->unzip_LRU             = shm_info->buf_pool_backup.unzip_LRU;
504 +                       buf_pool->zip_clean             = shm_info->buf_pool_backup.zip_clean;
505 +                       for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
506 +                               buf_pool->zip_free[i]   = shm_info->buf_pool_backup.zip_free[i];
507 +                       }
508 +
509 +                       for (i = 0; i < chunk->size; i++, block++) {
510 +                               if (buf_block_get_state(block)
511 +                                   == BUF_BLOCK_FILE_PAGE) {
512 +                                       ut_d(block->page.in_page_hash = TRUE);
513 +                                       HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
514 +                                                   buf_page_address_fold(
515 +                                                           block->page.space,
516 +                                                           block->page.offset),
517 +                                                   &block->page);
518 +                               }
519 +                       }
520 +
521 +                       for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
522 +                            b = UT_LIST_GET_NEXT(zip_list, b)) {
523 +                               ut_ad(!b->in_flush_list);
524 +                               ut_ad(b->in_LRU_list);
525 +
526 +                               ut_d(b->in_page_hash = TRUE);
527 +                               HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
528 +                                           buf_page_address_fold(b->space, b->offset), b);
529 +                       }
530 +
531 +                       for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
532 +                            b = UT_LIST_GET_NEXT(flush_list, b)) {
533 +                               ut_ad(b->in_flush_list);
534 +                               ut_ad(b->in_LRU_list);
535 +
536 +                               switch (buf_page_get_state(b)) {
537 +                               case BUF_BLOCK_ZIP_DIRTY:
538 +                                       ut_d(b->in_page_hash = TRUE);
539 +                                       HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
540 +                                                   buf_page_address_fold(b->space,
541 +                                                                         b->offset), b);
542 +                                       break;
543 +                               case BUF_BLOCK_FILE_PAGE:
544 +                                       /* uncompressed page */
545 +                                       break;
546 +                               case BUF_BLOCK_ZIP_FREE:
547 +                               case BUF_BLOCK_ZIP_PAGE:
548 +                               case BUF_BLOCK_NOT_USED:
549 +                               case BUF_BLOCK_READY_FOR_USE:
550 +                               case BUF_BLOCK_MEMORY:
551 +                               case BUF_BLOCK_REMOVE_HASH:
552 +                                       ut_error;
553 +                                       break;
554 +                               }
555 +                       }
556 +
557 +
558 +               }
559 +       }
560 +
561         mutex_exit(&buf_pool->LRU_list_mutex);
562         rw_lock_x_unlock(&buf_pool->page_hash_latch);
563         buf_pool_mutex_exit(buf_pool);
564 @@ -1373,6 +1803,42 @@
565         buf_chunk_t*    chunk;
566         buf_chunk_t*    chunks;
567  
568 +       if (srv_buffer_pool_shm_key) {
569 +               buf_shm_info_t* shm_info;
570 +
571 +               ut_a(buf_pool->n_chunks == 1);
572 +
573 +               chunk = buf_pool->chunks;
574 +               shm_info = chunk->mem;
575 +               ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
576 +
577 +               /* if opened, close shm. */
578 +               if (!shm_info->clean) {
579 +                       /* validation the shared memory segment doesn't have unrecoverable contents. */
580 +                       /* Currently, validation became not needed */
581 +                       shm_info->reusable = TRUE;
582 +
583 +                       memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
584 +                       memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
585 +
586 +                       if (srv_fast_shutdown < 2) {
587 +                               if (srv_buffer_pool_shm_checksum) {
588 +                                       shm_info->checksum =
589 +                                               ut_fold_binary_32(
590 +                                                       (byte*)chunk->mem + sizeof(buf_shm_info_t),
591 +                                                       chunk->mem_size - sizeof(buf_shm_info_t));
592 +                               } else {
593 +                                       shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
594 +                               }
595 +                               shm_info->clean = TRUE;
596 +                       }
597 +
598 +                       fprintf(stderr,
599 +                               "InnoDB: The shared memory was closed.\n");
600 +               }
601 +
602 +               os_shm_free(chunk->mem, chunk->mem_size);
603 +       } else {
604         chunks = buf_pool->chunks;
605         chunk = chunks + buf_pool->n_chunks;
606  
607 @@ -1381,10 +1847,13 @@
608                 would fail at shutdown. */
609                 os_mem_free_large(chunk->mem, chunk->mem_size);
610         }
611 +       }
612  
613         mem_free(buf_pool->chunks);
614         hash_table_free(buf_pool->page_hash);
615 +       if (!srv_buffer_pool_shm_key) {
616         hash_table_free(buf_pool->zip_hash);
617 +       }
618  }
619  
620  /********************************************************************//**
621 @@ -1668,6 +2137,11 @@
622         //buf_pool_mutex_enter(buf_pool);
623         mutex_enter(&buf_pool->LRU_list_mutex);
624  
625 +       if (srv_buffer_pool_shm_key) {
626 +               /* Cannot support shrink */
627 +               goto func_done;
628 +       }
629 +
630  shrink_again:
631         if (buf_pool->n_chunks <= 1) {
632  
633 @@ -1848,7 +2322,7 @@
634         zip_hash = hash_create(2 * buf_pool->curr_size);
635  
636         HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
637 -                    BUF_POOL_ZIP_FOLD_BPAGE);
638 +                    buf_pool, BUF_POOL_ZIP_FOLD_BPAGE);
639  
640         hash_table_free(buf_pool->zip_hash);
641         buf_pool->zip_hash = zip_hash;
642 @@ -2130,6 +2604,11 @@
643         ulint   change_size;
644         ulint   min_change_size = 1048576 * srv_buf_pool_instances;
645  
646 +       if (srv_buffer_pool_shm_key) {
647 +               /* Cannot support resize */
648 +               return;
649 +       }
650 +
651         buf_pool_mutex_enter_all();
652    
653         if (srv_buf_pool_old_size == srv_buf_pool_size) {
654 diff -ruN a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
655 --- a/storage/innobase/ha/hash0hash.c   2010-11-03 07:01:13.000000000 +0900
656 +++ b/storage/innobase/ha/hash0hash.c   2010-12-07 16:10:14.937749140 +0900
657 @@ -133,6 +133,70 @@
658  }
659  
660  /*************************************************************//**
661 +*/
662 +UNIV_INTERN
663 +ulint
664 +hash_create_needed(
665 +/*===============*/
666 +       ulint   n)
667 +{
668 +       ulint   prime;
669 +       ulint   offset;
670 +
671 +       prime = ut_find_prime(n);
672 +
673 +       offset = (sizeof(hash_table_t) + 7) / 8;
674 +       offset *= 8;
675 +
676 +       return(offset + sizeof(hash_cell_t) * prime);
677 +}
678 +
679 +UNIV_INTERN
680 +void
681 +hash_create_init(
682 +/*=============*/
683 +       hash_table_t*   table,
684 +       ulint           n)
685 +{
686 +       ulint   prime;
687 +       ulint   offset;
688 +
689 +       prime = ut_find_prime(n);
690 +
691 +       offset = (sizeof(hash_table_t) + 7) / 8;
692 +       offset *= 8;
693 +
694 +       table->array = (hash_cell_t*)(((byte*)table) + offset);
695 +       table->n_cells = prime;
696 +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
697 +       table->adaptive = FALSE;
698 +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
699 +       table->n_mutexes = 0;
700 +       table->mutexes = NULL;
701 +       table->heaps = NULL;
702 +       table->heap = NULL;
703 +       ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
704 +
705 +       /* Initialize the cell array */
706 +       hash_table_clear(table);
707 +}
708 +
709 +UNIV_INTERN
710 +void
711 +hash_create_reuse(
712 +/*==============*/
713 +       hash_table_t*   table)
714 +{
715 +       ulint   offset;
716 +
717 +       offset = (sizeof(hash_table_t) + 7) / 8;
718 +       offset *= 8;
719 +
720 +       table->array = (hash_cell_t*)(((byte*)table) + offset);
721 +       ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
722 +}
723 +
724 +/*************************************************************//**
725  Frees a hash table. */
726  UNIV_INTERN
727  void
728 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
729 --- a/storage/innobase/handler/ha_innodb.cc     2010-12-06 20:16:21.733263627 +0900
730 +++ b/storage/innobase/handler/ha_innodb.cc     2010-12-07 17:56:28.316139830 +0900
731 @@ -194,6 +194,7 @@
732  static my_bool innobase_create_status_file             = FALSE;
733  static my_bool innobase_stats_on_metadata              = TRUE;
734  static my_bool innobase_use_sys_stats_table            = FALSE;
735 +static my_bool innobase_buffer_pool_shm_checksum       = TRUE;
736  
737  
738  static char*   internal_innobase_data_file_path        = NULL;
739 @@ -2620,6 +2621,14 @@
740         srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
741         srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
742  
743 +       if (srv_buffer_pool_shm_key && srv_buf_pool_instances > 1) {
744 +               fprintf(stderr,
745 +                       "InnoDB: Warning: innodb_buffer_pool_shm_key cannot be used with several innodb_buffer_pool_instances.\n"
746 +                       "InnoDB:          innodb_buffer_pool_instances was set to 1.\n");
747 +               srv_buf_pool_instances = 1;
748 +               innobase_buffer_pool_instances = 1;
749 +       }
750 +
751         srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
752  
753         srv_n_file_io_threads = (ulint) innobase_file_io_threads;
754 @@ -2636,6 +2645,7 @@
755         srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
756         srv_use_checksums = (ibool) innobase_use_checksums;
757         srv_fast_checksum = (ibool) innobase_fast_checksum;
758 +       srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
759  
760  #ifdef HAVE_LARGE_PAGES
761          if ((os_use_large_pages = (ibool) my_use_large_pages))
762 @@ -11642,6 +11652,16 @@
763    "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
764    NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
765  
766 +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
767 +  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
768 +  "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
769 +  NULL, NULL, 0, 0, INT_MAX32, 0);
770 +
771 +static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
772 +  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
773 +  "Enable buffer_pool_shm checksum validation (enabled by default).",
774 +  NULL, NULL, TRUE);
775 +
776  static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
777    PLUGIN_VAR_RQCMDARG,
778    "Helps in performance tuning in heavily concurrent environments.",
779 @@ -11921,6 +11941,8 @@
780    MYSQL_SYSVAR(autoextend_increment),
781    MYSQL_SYSVAR(buffer_pool_size),
782    MYSQL_SYSVAR(buffer_pool_instances),
783 +  MYSQL_SYSVAR(buffer_pool_shm_key),
784 +  MYSQL_SYSVAR(buffer_pool_shm_checksum),
785    MYSQL_SYSVAR(checksums),
786    MYSQL_SYSVAR(fast_checksum),
787    MYSQL_SYSVAR(commit_concurrency),
788 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
789 --- a/storage/innobase/include/buf0buf.h        2010-12-06 20:16:21.778264552 +0900
790 +++ b/storage/innobase/include/buf0buf.h        2010-12-07 17:56:28.322749380 +0900
791 @@ -36,6 +36,7 @@
792  #ifndef UNIV_HOTBACKUP
793  #include "ut0rbt.h"
794  #include "os0proc.h"
795 +#include "srv0srv.h"
796  
797  /** @name Modes for buf_page_get_gen */
798  /* @{ */
799 @@ -1520,9 +1521,12 @@
800  /**********************************************************************//**
801  Compute the hash fold value for blocks in buf_pool->zip_hash. */
802  /* @{ */
803 -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
804 -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
805 -#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
806 +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
807 +#define BUF_POOL_ZIP_FOLD_PTR(bpool, ptr) (!srv_buffer_pool_shm_key\
808 +                                       ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
809 +                                       :((ulint) ((byte*)ptr - (byte*)(buf_page_from_array(bpool, 0)->frame)) / UNIV_PAGE_SIZE))
810 +#define BUF_POOL_ZIP_FOLD(bpool, b) BUF_POOL_ZIP_FOLD_PTR(bpool, (b)->frame)
811 +#define BUF_POOL_ZIP_FOLD_BPAGE(bpool, b) BUF_POOL_ZIP_FOLD(bpool, (buf_block_t*) (b))
812  /* @} */
813  
814  /** @brief The buffer pool statistics structure. */
815 diff -ruN a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
816 --- a/storage/innobase/include/hash0hash.h      2010-11-03 07:01:13.000000000 +0900
817 +++ b/storage/innobase/include/hash0hash.h      2010-12-07 17:56:28.324726446 +0900
818 @@ -49,6 +49,28 @@
819  hash_create(
820  /*========*/
821         ulint   n);     /*!< in: number of array cells */
822 +
823 +/*************************************************************//**
824 +*/
825 +UNIV_INTERN
826 +ulint
827 +hash_create_needed(
828 +/*===============*/
829 +       ulint   n);
830 +
831 +UNIV_INTERN
832 +void
833 +hash_create_init(
834 +/*=============*/
835 +       hash_table_t*   table,
836 +       ulint           n);
837 +
838 +UNIV_INTERN
839 +void
840 +hash_create_reuse(
841 +/*==============*/
842 +       hash_table_t*   table);
843 +
844  #ifndef UNIV_HOTBACKUP
845  /*************************************************************//**
846  Creates a mutex array to protect a hash table. */
847 @@ -306,7 +328,7 @@
848  /****************************************************************//**
849  Move all hash table entries from OLD_TABLE to NEW_TABLE. */
850  
851 -#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
852 +#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, BPOOL, FOLD_FUNC) \
853  do {\
854         ulint           i2222;\
855         ulint           cell_count2222;\
856 @@ -318,7 +340,7 @@
857  \
858                 while (node2222) {\
859                         NODE_TYPE*      next2222 = node2222->PTR_NAME;\
860 -                       ulint           fold2222 = FOLD_FUNC(node2222);\
861 +                       ulint           fold2222 = FOLD_FUNC(BPOOL, node2222);\
862  \
863                         HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
864                                 fold2222, node2222);\
865 @@ -327,6 +349,33 @@
866                 }\
867         }\
868  } while (0)
869 +
870 +/********************************************************************//**
871 +Align nodes with moving location.*/
872 +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
873 +do {\
874 +       ulint           i2222;\
875 +       ulint           cell_count2222;\
876 +\
877 +       cell_count2222 = hash_get_n_cells(TABLE);\
878 +\
879 +       for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
880 +               NODE_TYPE*      node2222;\
881 +\
882 +               if ((TABLE)->array[i2222].node) \
883 +                       (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
884 +                       + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
885 +               node2222 = HASH_GET_FIRST((TABLE), i2222);\
886 +\
887 +               while (node2222) {\
888 +                       if (node2222->PTR_NAME) \
889 +                               node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
890 +                               + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
891 +\
892 +                       node2222 = node2222->PTR_NAME;\
893 +               }\
894 +       }\
895 +} while (0)
896  
897  /************************************************************//**
898  Gets the mutex index for a fold value in a hash table.
899 diff -ruN a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
900 --- a/storage/innobase/include/os0proc.h        2010-11-03 07:01:13.000000000 +0900
901 +++ b/storage/innobase/include/os0proc.h        2010-12-07 16:10:14.955718750 +0900
902 @@ -32,6 +32,11 @@
903  #ifdef UNIV_LINUX
904  #include <sys/ipc.h>
905  #include <sys/shm.h>
906 +#else
907 +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
908 +#include <sys/ipc.h>
909 +#include <sys/shm.h>
910 +# endif
911  #endif
912  
913  typedef void*                  os_process_t;
914 @@ -70,6 +75,29 @@
915         ulint   size);                  /*!< in: size returned by
916                                         os_mem_alloc_large() */
917  
918 +
919 +/****************************************************************//**
920 +Allocates or attaches and reuses shared memory segment.
921 +The content is not cleared automatically.
922 +@return        allocated memory */
923 +UNIV_INTERN
924 +void*
925 +os_shm_alloc(
926 +/*=========*/
927 +       ulint*  n,                      /*!< in/out: number of bytes */
928 +       uint    key,
929 +       ibool*  is_new);
930 +
931 +/****************************************************************//**
932 +Detach shared memory segment. */
933 +UNIV_INTERN
934 +void
935 +os_shm_free(
936 +/*========*/
937 +       void    *ptr,                   /*!< in: pointer returned by
938 +                                       os_shm_alloc() */
939 +       ulint   size);                  /*!< in: size returned by
940 +                                       os_shm_alloc() */
941  #ifndef UNIV_NONINL
942  #include "os0proc.ic"
943  #endif
944 diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
945 --- a/storage/innobase/include/srv0srv.h        2010-12-04 20:20:28.016566697 +0900
946 +++ b/storage/innobase/include/srv0srv.h        2010-12-07 16:10:14.956717659 +0900
947 @@ -171,6 +171,10 @@
948  extern ulint   srv_mem_pool_size;
949  extern ulint   srv_lock_table_size;
950  
951 +extern uint    srv_buffer_pool_shm_key;
952 +extern ibool   srv_buffer_pool_shm_is_reused;
953 +extern ibool   srv_buffer_pool_shm_checksum;
954 +
955  extern ibool   srv_thread_concurrency_timer_based;
956  
957  extern ulint   srv_n_file_io_threads;
958 diff -ruN a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
959 --- a/storage/innobase/include/ut0lst.h 2010-11-03 07:01:13.000000000 +0900
960 +++ b/storage/innobase/include/ut0lst.h 2010-12-07 16:10:14.957785525 +0900
961 @@ -257,5 +257,48 @@
962         ut_a(ut_list_node_313 == NULL);                                 \
963  } while (0)
964  
965 +/********************************************************************//**
966 +Align nodes with moving location.
967 +@param NAME            the name of the list
968 +@param TYPE            node type
969 +@param BASE            base node (not a pointer to it)
970 +@param OFFSET          offset moved */
971 +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET)      \
972 +do {                                                                   \
973 +       ulint   ut_list_i_313;                                          \
974 +       TYPE*   ut_list_node_313;                                       \
975 +                                                                       \
976 +       if ((BASE).start)                                               \
977 +               (BASE).start = (void*)((byte*)((BASE).start)                    \
978 +                       + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
979 +       if ((BASE).end)                                                 \
980 +               (BASE).end   = (void*)((byte*)((BASE).end)                      \
981 +                       + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
982 +                                                                       \
983 +       ut_list_node_313 = (BASE).start;                                \
984 +                                                                       \
985 +       for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {         \
986 +               ut_a(ut_list_node_313);                                 \
987 +               if ((ut_list_node_313->NAME).prev)                      \
988 +                       (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
989 +                               + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
990 +               if ((ut_list_node_313->NAME).next)                      \
991 +                       (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
992 +                               + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
993 +               ut_list_node_313 = (ut_list_node_313->NAME).next;       \
994 +       }                                                               \
995 +                                                                       \
996 +       ut_a(ut_list_node_313 == NULL);                                 \
997 +                                                                       \
998 +       ut_list_node_313 = (BASE).end;                                  \
999 +                                                                       \
1000 +       for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {         \
1001 +               ut_a(ut_list_node_313);                                 \
1002 +               ut_list_node_313 = (ut_list_node_313->NAME).prev;       \
1003 +       }                                                               \
1004 +                                                                       \
1005 +       ut_a(ut_list_node_313 == NULL);                                 \
1006 +} while (0)
1007 +
1008  #endif
1009  
1010 diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
1011 --- a/storage/innobase/log/log0recv.c   2010-12-04 19:46:40.212513377 +0900
1012 +++ b/storage/innobase/log/log0recv.c   2010-12-07 16:10:14.959785817 +0900
1013 @@ -2912,6 +2912,7 @@
1014  /*==========================*/
1015  {
1016         ut_a(!recv_needed_recovery);
1017 +       ut_a(!srv_buffer_pool_shm_is_reused);
1018  
1019         recv_needed_recovery = TRUE;
1020  
1021 diff -ruN a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
1022 --- a/storage/innobase/os/os0proc.c     2010-11-03 07:01:13.000000000 +0900
1023 +++ b/storage/innobase/os/os0proc.c     2010-12-07 16:10:14.960800123 +0900
1024 @@ -229,3 +229,173 @@
1025         }
1026  #endif
1027  }
1028 +
1029 +/****************************************************************//**
1030 +Allocates or attaches and reuses shared memory segment.
1031 +The content is not cleared automatically.
1032 +@return        allocated memory */
1033 +UNIV_INTERN
1034 +void*
1035 +os_shm_alloc(
1036 +/*=========*/
1037 +       ulint*  n,                      /*!< in/out: number of bytes */
1038 +       uint    key,
1039 +       ibool*  is_new)
1040 +{
1041 +       void*   ptr;
1042 +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1043 +       ulint   size;
1044 +       int     shmid;
1045 +
1046 +       *is_new = FALSE;
1047 +       fprintf(stderr,
1048 +               "InnoDB: The shared memory segment containing the buffer pool is: key  %#x (%d).\n",
1049 +               key, key);
1050 +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
1051 +       if (!os_use_large_pages || !os_large_page_size) {
1052 +               goto skip;
1053 +       }
1054 +
1055 +       /* Align block size to os_large_page_size */
1056 +       ut_ad(ut_is_2pow(os_large_page_size));
1057 +       size = ut_2pow_round(*n + (os_large_page_size - 1),
1058 +                            os_large_page_size);
1059 +
1060 +       shmid = shmget((key_t)key, (size_t)size,
1061 +                       IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
1062 +       if (shmid < 0) {
1063 +               if (errno == EEXIST) {
1064 +                       fprintf(stderr,
1065 +                               "InnoDB: HugeTLB: The shared memory segment exists.\n");
1066 +                       shmid = shmget((key_t)key, (size_t)size,
1067 +                                       SHM_HUGETLB | SHM_R | SHM_W);
1068 +                       if (shmid < 0) {
1069 +                               fprintf(stderr,
1070 +                                       "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1071 +                                       size, errno);
1072 +                               goto skip;
1073 +                       } else {
1074 +                               fprintf(stderr,
1075 +                                       "InnoDB: HugeTLB: The existent shared memory segment is used.\n");
1076 +                       }
1077 +               } else {
1078 +                       fprintf(stderr,
1079 +                               "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1080 +                               size, errno);
1081 +                       goto skip;
1082 +               }
1083 +       } else {
1084 +               *is_new = TRUE;
1085 +               fprintf(stderr,
1086 +                       "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
1087 +       }
1088 +
1089 +       ptr = shmat(shmid, NULL, 0);
1090 +       if (ptr == (void *)-1) {
1091 +               fprintf(stderr,
1092 +                       "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
1093 +                       errno);
1094 +               ptr = NULL;
1095 +       }
1096 +
1097 +       if (ptr) {
1098 +               *n = size;
1099 +               os_fast_mutex_lock(&ut_list_mutex);
1100 +               ut_total_allocated_memory += size;
1101 +               os_fast_mutex_unlock(&ut_list_mutex);
1102 +               UNIV_MEM_ALLOC(ptr, size);
1103 +               return(ptr);
1104 +       }
1105 +skip:
1106 +       *is_new = FALSE;
1107 +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
1108 +# ifdef HAVE_GETPAGESIZE
1109 +       size = getpagesize();
1110 +# else
1111 +       size = UNIV_PAGE_SIZE;
1112 +# endif
1113 +       /* Align block size to system page size */
1114 +       ut_ad(ut_is_2pow(size));
1115 +       size = *n = ut_2pow_round(*n + (size - 1), size);
1116 +
1117 +       shmid = shmget((key_t)key, (size_t)size,
1118 +                       IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
1119 +       if (shmid < 0) {
1120 +               if (errno == EEXIST) {
1121 +                       fprintf(stderr,
1122 +                               "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
1123 +                       shmid = shmget((key_t)key, (size_t)size,
1124 +                                       SHM_R | SHM_W);
1125 +                       if (shmid < 0) {
1126 +                               fprintf(stderr,
1127 +                                       "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1128 +                                       size, errno);
1129 +                               ptr = NULL;
1130 +                               goto end;
1131 +                       } else {
1132 +                               fprintf(stderr,
1133 +                                       "InnoDB: The existent shared memory segment is used.\n");
1134 +                       }
1135 +               } else {
1136 +                       fprintf(stderr,
1137 +                               "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1138 +                               size, errno);
1139 +                       ptr = NULL;
1140 +                       goto end;
1141 +               }
1142 +       } else {
1143 +               *is_new = TRUE;
1144 +               fprintf(stderr,
1145 +                       "InnoDB: A new shared memory segment has been created.\n");
1146 +       }
1147 +
1148 +       ptr = shmat(shmid, NULL, 0);
1149 +       if (ptr == (void *)-1) {
1150 +               fprintf(stderr,
1151 +                       "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
1152 +                       errno);
1153 +               ptr = NULL;
1154 +       }
1155 +
1156 +       if (ptr) {
1157 +               *n = size;
1158 +               os_fast_mutex_lock(&ut_list_mutex);
1159 +               ut_total_allocated_memory += size;
1160 +               os_fast_mutex_unlock(&ut_list_mutex);
1161 +               UNIV_MEM_ALLOC(ptr, size);
1162 +       }
1163 +end:
1164 +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1165 +       fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1166 +       ptr = NULL;
1167 +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1168 +       return(ptr);
1169 +}
1170 +
1171 +/****************************************************************//**
1172 +Detach shared memory segment. */
1173 +UNIV_INTERN
1174 +void
1175 +os_shm_free(
1176 +/*========*/
1177 +       void    *ptr,                   /*!< in: pointer returned by
1178 +                                       os_shm_alloc() */
1179 +       ulint   size)                   /*!< in: size returned by
1180 +                                       os_shm_alloc() */
1181 +{
1182 +       os_fast_mutex_lock(&ut_list_mutex);
1183 +       ut_a(ut_total_allocated_memory >= size);
1184 +       os_fast_mutex_unlock(&ut_list_mutex);
1185 +
1186 +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1187 +       if (!shmdt(ptr)) {
1188 +               os_fast_mutex_lock(&ut_list_mutex);
1189 +               ut_a(ut_total_allocated_memory >= size);
1190 +               ut_total_allocated_memory -= size;
1191 +               os_fast_mutex_unlock(&ut_list_mutex);
1192 +               UNIV_MEM_FREE(ptr, size);
1193 +       }
1194 +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1195 +       fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1196 +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1197 +}
1198 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
1199 --- a/storage/innobase/srv/srv0srv.c    2010-12-04 20:20:44.687550693 +0900
1200 +++ b/storage/innobase/srv/srv0srv.c    2010-12-07 16:10:14.962785720 +0900
1201 @@ -233,6 +233,11 @@
1202  UNIV_INTERN ulint      srv_mem_pool_size       = ULINT_MAX;
1203  UNIV_INTERN ulint      srv_lock_table_size     = ULINT_MAX;
1204  
1205 +/* key value for shm */
1206 +UNIV_INTERN uint       srv_buffer_pool_shm_key = 0;
1207 +UNIV_INTERN ibool      srv_buffer_pool_shm_is_reused = FALSE;
1208 +UNIV_INTERN ibool      srv_buffer_pool_shm_checksum = TRUE;
1209 +
1210  /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
1211  instead. */
1212  UNIV_INTERN ulint      srv_n_file_io_threads   = ULINT_MAX;
1213 diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
1214 --- a/storage/innobase/srv/srv0start.c  2010-12-04 20:19:29.806482628 +0900
1215 +++ b/storage/innobase/srv/srv0start.c  2010-12-07 16:10:14.964785346 +0900
1216 @@ -1759,6 +1759,8 @@
1217                 Note that this is not as heavy weight as it seems. At
1218                 this point there will be only ONE page in the buf_LRU
1219                 and there must be no page in the buf_flush list. */
1220 +               /* buffer_pool_shm should not be reused when recovery was needed. */
1221 +               if (!srv_buffer_pool_shm_is_reused)
1222                 buf_pool_invalidate();
1223  
1224                 /* We always try to do a recovery, even if the database had
This page took 0.12776 seconds and 4 git commands to generate.