]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_buffer_pool_shm.patch
- up to 5.5.10
[packages/mysql.git] / innodb_buffer_pool_shm.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_buffer_pool_shm.patch
2# introduced : 12
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
9--- a/storage/innobase/buf/buf0buddy.c 2010-12-04 19:46:39.372513543 +0900
10+++ b/storage/innobase/buf/buf0buddy.c 2010-12-07 17:56:28.302087851 +0900
11@@ -183,7 +183,7 @@
12 void* buf, /*!< in: buffer frame to deallocate */
13 ibool have_page_hash_mutex)
14 {
15- const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
16+ const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf_pool, buf);
17 buf_page_t* bpage;
18 buf_block_t* block;
19
20@@ -227,7 +227,7 @@
21 buf_block_t* block) /*!< in: buffer frame to allocate */
22 {
23 buf_pool_t* buf_pool = buf_pool_from_block(block);
24- const ulint fold = BUF_POOL_ZIP_FOLD(block);
25+ const ulint fold = BUF_POOL_ZIP_FOLD(buf_pool, block);
26 //ut_ad(buf_pool_mutex_own(buf_pool));
27 ut_ad(!mutex_own(&buf_pool->zip_mutex));
28 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
29diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
30--- a/storage/innobase/buf/buf0buf.c 2010-12-06 20:16:21.726195340 +0900
31+++ b/storage/innobase/buf/buf0buf.c 2010-12-07 20:40:30.824749814 +0900
32@@ -53,6 +53,10 @@
33 #include "page0zip.h"
34 #include "trx0trx.h"
35 #include "srv0start.h"
36+#include "que0que.h"
37+#include "read0read.h"
38+#include "row0row.h"
39+#include "ha_prototypes.h"
40
41 /* prototypes for new functions added to ha_innodb.cc */
42 trx_t* innobase_get_trx();
43@@ -342,6 +346,31 @@
d8778560
AM
44 // was allocated for the frames */
45 // buf_block_t* blocks; /*!< array of buffer control blocks */
46 //};
b4e1fa2c
AM
47+
48+/* Buffer pool shared memory segment information */
49+typedef struct buf_shm_info_struct buf_shm_info_t;
50+
51+struct buf_shm_info_struct {
52+ char head_str[8];
53+ ulint binary_id;
54+ ibool is_new; /* during initializing */
55+ ibool clean; /* clean shutdowned and free */
56+ ibool reusable; /* reusable */
57+ ulint buf_pool_size; /* backup value */
58+ ulint page_size; /* backup value */
59+ ulint frame_offset; /* offset of the first frame based on chunk->mem */
60+ ulint zip_hash_offset;
61+ ulint zip_hash_n;
62+
63+ ulint checksum;
64+
65+ buf_pool_t buf_pool_backup;
66+ buf_chunk_t chunk_backup;
67+
68+ ib_uint64_t dummy;
69+};
70+
71+#define BUF_SHM_INFO_HEAD "XTRA_SHM"
72 #endif /* !UNIV_HOTBACKUP */
73
74 /********************************************************************//**
75@@ -988,6 +1017,58 @@
76 #endif /* UNIV_SYNC_DEBUG */
77 }
78
79+static
80+void
81+buf_block_reuse(
82+/*============*/
83+ buf_block_t* block,
84+ ptrdiff_t frame_offset)
85+{
86+ /* block_init */
87+ block->frame += frame_offset;
88+
89+ UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
90+
91+ block->index = NULL;
92+ block->btr_search_latch = NULL;
93+
94+#ifdef UNIV_DEBUG
95+ /* recreate later */
96+ block->page.in_page_hash = FALSE;
97+ block->page.in_zip_hash = FALSE;
98+#endif /* UNIV_DEBUG */
99+
100+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
101+ block->n_pointers = 0;
102+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
103+
104+ if (block->page.zip.data)
105+ block->page.zip.data += frame_offset;
106+
107+ block->is_hashed = FALSE;
108+
109+#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
110+ /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
111+ of buffer block mutex/rwlock with performance schema. If
112+ PFS_GROUP_BUFFER_SYNC is defined, skip the registration
113+ since buffer block mutex/rwlock will be registered later in
114+ pfs_register_buffer_block() */
115+
116+ mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
117+ rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
118+#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
119+ mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
120+ rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
121+#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
122+
123+ ut_ad(rw_lock_validate(&(block->lock)));
124+
125+#ifdef UNIV_SYNC_DEBUG
126+ rw_lock_create(buf_block_debug_latch_key,
127+ &block->debug_latch, SYNC_NO_ORDER_CHECK);
128+#endif /* UNIV_SYNC_DEBUG */
129+}
130+
131 /********************************************************************//**
132 Allocates a chunk of buffer frames.
133 @return chunk, or NULL on failure */
a9ee80b9 134@@ -1001,26 +1082,190 @@
b4e1fa2c
AM
135 {
136 buf_block_t* block;
137 byte* frame;
138+ ulint zip_hash_n = 0;
139+ ulint zip_hash_mem_size = 0;
140+ hash_table_t* zip_hash_tmp = NULL;
141 ulint i;
a9ee80b9 142+ ulint size_target;
b4e1fa2c
AM
143+ buf_shm_info_t* shm_info = NULL;
144
145 /* Round down to a multiple of page size,
146 although it already should be. */
147 mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
a9ee80b9 148+ size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
b4e1fa2c
AM
149+
150+ srv_buffer_pool_shm_is_reused = FALSE;
151+
152+ if (srv_buffer_pool_shm_key) {
153+ /* zip_hash size */
154+ zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
155+ zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
156+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
157+ }
158+
159 /* Reserve space for the block descriptors. */
160 mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
161 + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
162+ if (srv_buffer_pool_shm_key) {
163+ mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
164+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
165+ mem_size += zip_hash_mem_size;
166+ }
167
168 chunk->mem_size = mem_size;
169+
170+ if (srv_buffer_pool_shm_key) {
171+ ulint binary_id;
172+ ibool is_new;
173+
174+ ut_a(buf_pool->n_chunks == 1);
175+
176+ fprintf(stderr,
177+ "InnoDB: Notice: The innodb_buffer_pool_shm_key option has been specified.\n"
178+ "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
179+ "InnoDB: * the mysqld executable between restarts of the server.\n"
180+ "InnoDB: * the value of innodb_buffer_pool_size.\n"
181+ "InnoDB: * the value of innodb_page_size.\n"
182+ "InnoDB: * datafiles created by InnoDB during this session.\n"
183+ "InnoDB: Otherwise, data corruption in datafiles may result.\n");
184+
185+ /* FIXME: This is vague id still */
186+ binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
d8778560 187+ + (ulint) ((byte*)os_file_get_last_error - (byte*)buf_calc_page_new_checksum)
b4e1fa2c
AM
188+ + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
189+ + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
190+ + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
191+ + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
192+ + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
193+ + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
194+ + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
195+ + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
196+ + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
197+
198+ chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
199+
200+ if (UNIV_UNLIKELY(chunk->mem == NULL)) {
201+ return(NULL);
202+ }
203+init_again:
204+#ifdef UNIV_SET_MEM_TO_ZERO
205+ if (is_new) {
206+ memset(chunk->mem, '\0', chunk->mem_size);
207+ }
208+#endif
209+ /* for ut_fold_binary_32(), these values should be 32-bit aligned */
210+ ut_a(sizeof(buf_shm_info_t) % 4 == 0);
211+ ut_a((ulint)chunk->mem % 4 == 0);
212+ ut_a(chunk->mem_size % 4 == 0);
213+
214+ shm_info = chunk->mem;
215+
216+ zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
217+
218+ if (is_new) {
219+ strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
220+ shm_info->binary_id = binary_id;
221+ shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */
222+ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
223+ shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */
224+ shm_info->buf_pool_size = srv_buf_pool_size;
225+ shm_info->page_size = srv_page_size;
226+ shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
227+ shm_info->zip_hash_n = zip_hash_n;
228+ } else {
229+ ulint checksum;
230+
231+ if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
232+ fprintf(stderr,
233+ "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
234+ return(NULL);
235+ }
236+ if (shm_info->binary_id != binary_id) {
237+ fprintf(stderr,
238+ "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
239+ return(NULL);
240+ }
241+ if (shm_info->is_new) {
242+ fprintf(stderr,
243+ "InnoDB: Error: The shared memory was not initialized yet.\n");
244+ return(NULL);
245+ }
246+ if (shm_info->buf_pool_size != srv_buf_pool_size) {
247+ fprintf(stderr,
248+ "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
249+ shm_info->buf_pool_size, srv_buf_pool_size);
250+ return(NULL);
251+ }
252+ if (shm_info->page_size != srv_page_size) {
253+ fprintf(stderr,
254+ "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
255+ shm_info->page_size, srv_page_size);
256+ return(NULL);
257+ }
258+ if (!shm_info->reusable) {
259+ fprintf(stderr,
260+ "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
261+ "InnoDB: The shared memory segment is initialized.\n");
262+ is_new = TRUE;
263+ goto init_again;
264+ }
265+ if (!shm_info->clean) {
266+ fprintf(stderr,
267+ "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
268+ "InnoDB: The shared memory segment is initialized.\n");
269+ is_new = TRUE;
270+ goto init_again;
271+ }
272+
273+ ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
274+ ut_a(shm_info->zip_hash_n == zip_hash_n);
275+
276+ /* check checksum */
277+ if (srv_buffer_pool_shm_checksum) {
278+ checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
279+ chunk->mem_size - sizeof(buf_shm_info_t));
280+ } else {
281+ checksum = BUF_NO_CHECKSUM_MAGIC;
282+ }
283+
284+ if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
285+ && shm_info->checksum != checksum) {
286+ fprintf(stderr,
287+ "InnoDB: Error: checksum of the shared memory is not match. "
288+ "(stored=%lu calculated=%lu)\n",
289+ shm_info->checksum, checksum);
290+ return(NULL);
291+ }
292+
293+ /* flag to use the segment. */
294+ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
295+ }
296+
297+ /* init zip_hash contents */
298+ if (is_new) {
299+ hash_create_init(zip_hash_tmp, zip_hash_n);
300+ } else {
301+ /* adjust offset is done later */
302+ hash_create_reuse(zip_hash_tmp);
303+
304+ srv_buffer_pool_shm_is_reused = TRUE;
305+ }
306+ } else {
307 chunk->mem = os_mem_alloc_large(&chunk->mem_size);
308
309 if (UNIV_UNLIKELY(chunk->mem == NULL)) {
310
311 return(NULL);
312 }
313+ }
314
315 /* Allocate the block descriptors from
316 the start of the memory block. */
317+ if (srv_buffer_pool_shm_key) {
318+ chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
319+ } else {
320 chunk->blocks = chunk->mem;
321+ }
322
323 /* Align a pointer to the first frame. Note that when
324 os_large_page_size is smaller than UNIV_PAGE_SIZE,
a9ee80b9 325@@ -1028,8 +1273,13 @@
b4e1fa2c
AM
326 it is bigger, we may allocate more blocks than requested. */
327
328 frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
329+ if (srv_buffer_pool_shm_key) {
330+ /* reserve zip_hash space and always -1 for reproductibity */
331+ chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
332+ } else {
333 chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
334 - (frame != chunk->mem);
335+ }
336
337 /* Subtract the space needed for block descriptors. */
338 {
a9ee80b9 339@@ -1043,6 +1293,102 @@
b4e1fa2c
AM
340 chunk->size = size;
341 }
342
a9ee80b9
ER
343+ if (chunk->size > size_target) {
344+ chunk->size = size_target;
345+ }
346+
b4e1fa2c
AM
347+ if (shm_info && !(shm_info->is_new)) {
348+ /* convert the shared memory segment for reuse */
349+ ptrdiff_t phys_offset;
350+ ptrdiff_t logi_offset;
351+ ptrdiff_t blocks_offset;
352+ void* previous_frame_address;
353+
354+ if (chunk->size < shm_info->chunk_backup.size) {
355+ fprintf(stderr,
356+ "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
357+ "InnoDB: Retrying may avoid this situation.\n");
358+ shm_info->clean = TRUE; /* release the flag for retrying */
359+ return(NULL);
360+ }
361+
362+ chunk->size = shm_info->chunk_backup.size;
363+ phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
364+ logi_offset = frame - chunk->blocks[0].frame;
365+ previous_frame_address = chunk->blocks[0].frame;
366+ blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
367+
368+ if (phys_offset || logi_offset || blocks_offset) {
369+ fprintf(stderr,
370+ "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
371+ "InnoDB: Previous frames in address : %p\n"
372+ "InnoDB: Previous frames were located : %p\n"
373+ "InnoDB: Current frames should be located: %p\n"
374+ "InnoDB: Pysical offset : %ld (%#lx)\n"
375+ "InnoDB: Logical offset (frames) : %ld (%#lx)\n"
376+ "InnoDB: Logical offset (blocks) : %ld (%#lx)\n",
377+ (byte*)chunk->mem + shm_info->frame_offset,
378+ chunk->blocks[0].frame, frame,
379+ phys_offset, phys_offset, logi_offset, logi_offset,
380+ blocks_offset, blocks_offset);
381+ } else {
382+ fprintf(stderr,
383+ "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
384+ }
385+
386+ if (phys_offset) {
387+ fprintf(stderr,
388+ "InnoDB: Aligning physical offset...");
389+
390+ memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
391+ chunk->size * UNIV_PAGE_SIZE);
392+
393+ fprintf(stderr,
394+ " Done.\n");
395+ }
396+
397+ /* buf_block_t */
398+ block = chunk->blocks;
399+ for (i = chunk->size; i--; ) {
400+ buf_block_reuse(block, logi_offset);
401+ block++;
402+ }
403+
404+ if (logi_offset || blocks_offset) {
405+ fprintf(stderr,
406+ "InnoDB: Aligning logical offset...");
407+
408+
409+ /* buf_pool_t buf_pool_backup */
410+ UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
411+ previous_frame_address, logi_offset, blocks_offset);
412+ UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
413+ previous_frame_address, logi_offset, blocks_offset);
414+ UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
415+ previous_frame_address, logi_offset, blocks_offset);
416+ if (shm_info->buf_pool_backup.LRU_old)
417+ shm_info->buf_pool_backup.LRU_old =
418+ (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
419+ + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
420+ ? logi_offset : blocks_offset));
421+
422+ UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
423+ previous_frame_address, logi_offset, blocks_offset);
424+
425+ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
426+ previous_frame_address, logi_offset, blocks_offset);
427+ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
428+ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
429+ previous_frame_address, logi_offset, blocks_offset);
430+ }
431+
432+ HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
433+ previous_frame_address, logi_offset, blocks_offset);
434+
435+ fprintf(stderr,
436+ " Done.\n");
437+ }
438+ } else {
439 /* Init block structs and assign frames for them. Then we
440 assign the frames to the first blocks (we already mapped the
441 memory above). */
a9ee80b9 442@@ -1068,6 +1414,11 @@
b4e1fa2c
AM
443 block++;
444 frame += UNIV_PAGE_SIZE;
445 }
446+ }
447+
448+ if (shm_info) {
449+ shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
450+ }
451
452 #ifdef PFS_GROUP_BUFFER_SYNC
453 pfs_register_buffer_block(chunk);
a9ee80b9 454@@ -1249,6 +1600,8 @@
b4e1fa2c
AM
455 UNIV_MEM_UNDESC(block);
456 }
457
458+ ut_a(!srv_buffer_pool_shm_key);
459+
460 os_mem_free_large(chunk->mem, chunk->mem_size);
461 }
462
a9ee80b9 463@@ -1289,7 +1642,7 @@
b4e1fa2c
AM
464 ulint instance_no) /*!< in: id of the instance */
465 {
466 ulint i;
467- buf_chunk_t* chunk;
468+ buf_chunk_t* chunk = NULL;
469
470 /* 1. Initialize general fields
471 ------------------------------- */
a9ee80b9 472@@ -1335,7 +1688,10 @@
b4e1fa2c
AM
473 buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
474
475 buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
476+ /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
477+ if (!srv_buffer_pool_shm_key) {
478 buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
479+ }
480
481 buf_pool->last_printout_time = ut_time();
482 }
a9ee80b9 483@@ -1354,6 +1710,86 @@
b4e1fa2c
AM
484
485 /* All fields are initialized by mem_zalloc(). */
486
487+ if (chunk && srv_buffer_pool_shm_key) {
488+ buf_shm_info_t* shm_info;
489+
490+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
491+ shm_info = chunk->mem;
492+
493+ buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
494+
495+ if(shm_info->is_new) {
496+ shm_info->is_new = FALSE; /* initialization was finished */
497+ } else {
498+ buf_block_t* block = chunk->blocks;
499+ buf_page_t* b;
500+
501+ /* shm_info->buf_pool_backup should be converted */
502+ /* at buf_chunk_init(). So copy simply. */
503+ buf_pool->flush_list = shm_info->buf_pool_backup.flush_list;
504+ buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock;
505+ buf_pool->free = shm_info->buf_pool_backup.free;
506+ buf_pool->LRU = shm_info->buf_pool_backup.LRU;
507+ buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old;
508+ buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len;
509+ buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU;
510+ buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean;
511+ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
512+ buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i];
513+ }
514+
515+ for (i = 0; i < chunk->size; i++, block++) {
516+ if (buf_block_get_state(block)
517+ == BUF_BLOCK_FILE_PAGE) {
518+ ut_d(block->page.in_page_hash = TRUE);
519+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
520+ buf_page_address_fold(
521+ block->page.space,
522+ block->page.offset),
523+ &block->page);
524+ }
525+ }
526+
527+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
528+ b = UT_LIST_GET_NEXT(zip_list, b)) {
529+ ut_ad(!b->in_flush_list);
530+ ut_ad(b->in_LRU_list);
531+
532+ ut_d(b->in_page_hash = TRUE);
533+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
534+ buf_page_address_fold(b->space, b->offset), b);
535+ }
536+
537+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
538+ b = UT_LIST_GET_NEXT(flush_list, b)) {
539+ ut_ad(b->in_flush_list);
540+ ut_ad(b->in_LRU_list);
541+
542+ switch (buf_page_get_state(b)) {
543+ case BUF_BLOCK_ZIP_DIRTY:
544+ ut_d(b->in_page_hash = TRUE);
545+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
546+ buf_page_address_fold(b->space,
547+ b->offset), b);
548+ break;
549+ case BUF_BLOCK_FILE_PAGE:
550+ /* uncompressed page */
551+ break;
552+ case BUF_BLOCK_ZIP_FREE:
553+ case BUF_BLOCK_ZIP_PAGE:
554+ case BUF_BLOCK_NOT_USED:
555+ case BUF_BLOCK_READY_FOR_USE:
556+ case BUF_BLOCK_MEMORY:
557+ case BUF_BLOCK_REMOVE_HASH:
558+ ut_error;
559+ break;
560+ }
561+ }
562+
563+
564+ }
565+ }
566+
567 mutex_exit(&buf_pool->LRU_list_mutex);
568 rw_lock_x_unlock(&buf_pool->page_hash_latch);
569 buf_pool_mutex_exit(buf_pool);
a9ee80b9 570@@ -1373,6 +1809,42 @@
b4e1fa2c
AM
571 buf_chunk_t* chunk;
572 buf_chunk_t* chunks;
573
574+ if (srv_buffer_pool_shm_key) {
575+ buf_shm_info_t* shm_info;
576+
577+ ut_a(buf_pool->n_chunks == 1);
578+
579+ chunk = buf_pool->chunks;
580+ shm_info = chunk->mem;
581+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
582+
583+ /* if opened, close shm. */
584+ if (!shm_info->clean) {
585+ /* validation the shared memory segment doesn't have unrecoverable contents. */
586+ /* Currently, validation became not needed */
587+ shm_info->reusable = TRUE;
588+
589+ memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
590+ memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
591+
592+ if (srv_fast_shutdown < 2) {
593+ if (srv_buffer_pool_shm_checksum) {
594+ shm_info->checksum =
595+ ut_fold_binary_32(
596+ (byte*)chunk->mem + sizeof(buf_shm_info_t),
597+ chunk->mem_size - sizeof(buf_shm_info_t));
598+ } else {
599+ shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
600+ }
601+ shm_info->clean = TRUE;
602+ }
603+
604+ fprintf(stderr,
605+ "InnoDB: The shared memory was closed.\n");
606+ }
607+
608+ os_shm_free(chunk->mem, chunk->mem_size);
609+ } else {
610 chunks = buf_pool->chunks;
611 chunk = chunks + buf_pool->n_chunks;
612
a9ee80b9 613@@ -1381,10 +1853,13 @@
b4e1fa2c
AM
614 would fail at shutdown. */
615 os_mem_free_large(chunk->mem, chunk->mem_size);
616 }
617+ }
618
619 mem_free(buf_pool->chunks);
620 hash_table_free(buf_pool->page_hash);
621+ if (!srv_buffer_pool_shm_key) {
622 hash_table_free(buf_pool->zip_hash);
623+ }
624 }
625
626 /********************************************************************//**
a9ee80b9 627@@ -1668,6 +2143,11 @@
b4e1fa2c
AM
628 //buf_pool_mutex_enter(buf_pool);
629 mutex_enter(&buf_pool->LRU_list_mutex);
630
631+ if (srv_buffer_pool_shm_key) {
632+ /* Cannot support shrink */
633+ goto func_done;
634+ }
635+
636 shrink_again:
637 if (buf_pool->n_chunks <= 1) {
638
a9ee80b9 639@@ -1848,7 +2328,7 @@
b4e1fa2c
AM
640 zip_hash = hash_create(2 * buf_pool->curr_size);
641
642 HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
643- BUF_POOL_ZIP_FOLD_BPAGE);
644+ buf_pool, BUF_POOL_ZIP_FOLD_BPAGE);
645
646 hash_table_free(buf_pool->zip_hash);
647 buf_pool->zip_hash = zip_hash;
a9ee80b9 648@@ -2130,6 +2610,11 @@
b4e1fa2c
AM
649 ulint change_size;
650 ulint min_change_size = 1048576 * srv_buf_pool_instances;
651
652+ if (srv_buffer_pool_shm_key) {
653+ /* Cannot support resize */
654+ return;
655+ }
656+
657 buf_pool_mutex_enter_all();
658
659 if (srv_buf_pool_old_size == srv_buf_pool_size) {
660diff -ruN a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
661--- a/storage/innobase/ha/hash0hash.c 2010-11-03 07:01:13.000000000 +0900
662+++ b/storage/innobase/ha/hash0hash.c 2010-12-07 16:10:14.937749140 +0900
663@@ -133,6 +133,70 @@
664 }
665
666 /*************************************************************//**
667+*/
668+UNIV_INTERN
669+ulint
670+hash_create_needed(
671+/*===============*/
672+ ulint n)
673+{
674+ ulint prime;
675+ ulint offset;
676+
677+ prime = ut_find_prime(n);
678+
679+ offset = (sizeof(hash_table_t) + 7) / 8;
680+ offset *= 8;
681+
682+ return(offset + sizeof(hash_cell_t) * prime);
683+}
684+
685+UNIV_INTERN
686+void
687+hash_create_init(
688+/*=============*/
689+ hash_table_t* table,
690+ ulint n)
691+{
692+ ulint prime;
693+ ulint offset;
694+
695+ prime = ut_find_prime(n);
696+
697+ offset = (sizeof(hash_table_t) + 7) / 8;
698+ offset *= 8;
699+
700+ table->array = (hash_cell_t*)(((byte*)table) + offset);
701+ table->n_cells = prime;
702+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
703+ table->adaptive = FALSE;
704+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
705+ table->n_mutexes = 0;
706+ table->mutexes = NULL;
707+ table->heaps = NULL;
708+ table->heap = NULL;
709+ ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
710+
711+ /* Initialize the cell array */
712+ hash_table_clear(table);
713+}
714+
715+UNIV_INTERN
716+void
717+hash_create_reuse(
718+/*==============*/
719+ hash_table_t* table)
720+{
721+ ulint offset;
722+
723+ offset = (sizeof(hash_table_t) + 7) / 8;
724+ offset *= 8;
725+
726+ table->array = (hash_cell_t*)(((byte*)table) + offset);
727+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
728+}
729+
730+/*************************************************************//**
731 Frees a hash table. */
732 UNIV_INTERN
733 void
734diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
735--- a/storage/innobase/handler/ha_innodb.cc 2010-12-06 20:16:21.733263627 +0900
736+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-07 17:56:28.316139830 +0900
737@@ -194,6 +194,7 @@
738 static my_bool innobase_create_status_file = FALSE;
739 static my_bool innobase_stats_on_metadata = TRUE;
740 static my_bool innobase_use_sys_stats_table = FALSE;
741+static my_bool innobase_buffer_pool_shm_checksum = TRUE;
742
743
744 static char* internal_innobase_data_file_path = NULL;
df1b5770 745@@ -2643,6 +2644,14 @@
b4e1fa2c
AM
746 srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
747 srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
748
749+ if (srv_buffer_pool_shm_key && srv_buf_pool_instances > 1) {
750+ fprintf(stderr,
751+ "InnoDB: Warning: innodb_buffer_pool_shm_key cannot be used with several innodb_buffer_pool_instances.\n"
752+ "InnoDB: innodb_buffer_pool_instances was set to 1.\n");
753+ srv_buf_pool_instances = 1;
754+ innobase_buffer_pool_instances = 1;
755+ }
756+
757 srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
758
759 srv_n_file_io_threads = (ulint) innobase_file_io_threads;
df1b5770 760@@ -2659,6 +2668,7 @@
b4e1fa2c
AM
761 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
762 srv_use_checksums = (ibool) innobase_use_checksums;
763 srv_fast_checksum = (ibool) innobase_fast_checksum;
764+ srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
765
766 #ifdef HAVE_LARGE_PAGES
767 if ((os_use_large_pages = (ibool) my_use_large_pages))
df1b5770 768@@ -11702,6 +11712,16 @@
b4e1fa2c
AM
769 "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
770 NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
771
772+static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
773+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
774+ "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
775+ NULL, NULL, 0, 0, INT_MAX32, 0);
776+
777+static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
778+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
779+ "Enable buffer_pool_shm checksum validation (enabled by default).",
780+ NULL, NULL, TRUE);
781+
782 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
783 PLUGIN_VAR_RQCMDARG,
784 "Helps in performance tuning in heavily concurrent environments.",
df1b5770 785@@ -12000,6 +12020,8 @@
b4e1fa2c
AM
786 MYSQL_SYSVAR(autoextend_increment),
787 MYSQL_SYSVAR(buffer_pool_size),
788 MYSQL_SYSVAR(buffer_pool_instances),
789+ MYSQL_SYSVAR(buffer_pool_shm_key),
790+ MYSQL_SYSVAR(buffer_pool_shm_checksum),
791 MYSQL_SYSVAR(checksums),
792 MYSQL_SYSVAR(fast_checksum),
793 MYSQL_SYSVAR(commit_concurrency),
794diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
795--- a/storage/innobase/include/buf0buf.h 2010-12-06 20:16:21.778264552 +0900
796+++ b/storage/innobase/include/buf0buf.h 2010-12-07 17:56:28.322749380 +0900
797@@ -36,6 +36,7 @@
798 #ifndef UNIV_HOTBACKUP
799 #include "ut0rbt.h"
800 #include "os0proc.h"
801+#include "srv0srv.h"
802
803 /** @name Modes for buf_page_get_gen */
804 /* @{ */
a9ee80b9 805@@ -1592,9 +1593,12 @@
b4e1fa2c
AM
806 /**********************************************************************//**
807 Compute the hash fold value for blocks in buf_pool->zip_hash. */
808 /* @{ */
809-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
810-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
811-#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
812+/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
813+#define BUF_POOL_ZIP_FOLD_PTR(bpool, ptr) (!srv_buffer_pool_shm_key\
814+ ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
815+ :((ulint) ((byte*)ptr - (byte*)(buf_page_from_array(bpool, 0)->frame)) / UNIV_PAGE_SIZE))
816+#define BUF_POOL_ZIP_FOLD(bpool, b) BUF_POOL_ZIP_FOLD_PTR(bpool, (b)->frame)
817+#define BUF_POOL_ZIP_FOLD_BPAGE(bpool, b) BUF_POOL_ZIP_FOLD(bpool, (buf_block_t*) (b))
818 /* @} */
819
d8778560 820 /** A chunk of buffers. The buffer pool is allocated in chunks. */
b4e1fa2c
AM
821diff -ruN a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
822--- a/storage/innobase/include/hash0hash.h 2010-11-03 07:01:13.000000000 +0900
823+++ b/storage/innobase/include/hash0hash.h 2010-12-07 17:56:28.324726446 +0900
824@@ -49,6 +49,28 @@
825 hash_create(
826 /*========*/
827 ulint n); /*!< in: number of array cells */
828+
829+/*************************************************************//**
830+*/
831+UNIV_INTERN
832+ulint
833+hash_create_needed(
834+/*===============*/
835+ ulint n);
836+
837+UNIV_INTERN
838+void
839+hash_create_init(
840+/*=============*/
841+ hash_table_t* table,
842+ ulint n);
843+
844+UNIV_INTERN
845+void
846+hash_create_reuse(
847+/*==============*/
848+ hash_table_t* table);
849+
850 #ifndef UNIV_HOTBACKUP
851 /*************************************************************//**
852 Creates a mutex array to protect a hash table. */
853@@ -306,7 +328,7 @@
854 /****************************************************************//**
855 Move all hash table entries from OLD_TABLE to NEW_TABLE. */
856
857-#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
858+#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, BPOOL, FOLD_FUNC) \
859 do {\
860 ulint i2222;\
861 ulint cell_count2222;\
862@@ -318,7 +340,7 @@
863 \
864 while (node2222) {\
865 NODE_TYPE* next2222 = node2222->PTR_NAME;\
866- ulint fold2222 = FOLD_FUNC(node2222);\
867+ ulint fold2222 = FOLD_FUNC(BPOOL, node2222);\
868 \
869 HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
870 fold2222, node2222);\
871@@ -327,6 +349,33 @@
872 }\
873 }\
874 } while (0)
875+
876+/********************************************************************//**
877+Align nodes with moving location.*/
878+#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
879+do {\
880+ ulint i2222;\
881+ ulint cell_count2222;\
882+\
883+ cell_count2222 = hash_get_n_cells(TABLE);\
884+\
885+ for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
886+ NODE_TYPE* node2222;\
887+\
888+ if ((TABLE)->array[i2222].node) \
889+ (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
890+ + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
891+ node2222 = HASH_GET_FIRST((TABLE), i2222);\
892+\
893+ while (node2222) {\
894+ if (node2222->PTR_NAME) \
895+ node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
896+ + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
897+\
898+ node2222 = node2222->PTR_NAME;\
899+ }\
900+ }\
901+} while (0)
902
903 /************************************************************//**
904 Gets the mutex index for a fold value in a hash table.
905diff -ruN a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
906--- a/storage/innobase/include/os0proc.h 2010-11-03 07:01:13.000000000 +0900
907+++ b/storage/innobase/include/os0proc.h 2010-12-07 16:10:14.955718750 +0900
908@@ -32,6 +32,11 @@
909 #ifdef UNIV_LINUX
910 #include <sys/ipc.h>
911 #include <sys/shm.h>
912+#else
913+# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
914+#include <sys/ipc.h>
915+#include <sys/shm.h>
916+# endif
917 #endif
918
919 typedef void* os_process_t;
920@@ -70,6 +75,29 @@
921 ulint size); /*!< in: size returned by
922 os_mem_alloc_large() */
923
924+
925+/****************************************************************//**
926+Allocates or attaches and reuses shared memory segment.
927+The content is not cleared automatically.
928+@return allocated memory */
929+UNIV_INTERN
930+void*
931+os_shm_alloc(
932+/*=========*/
933+ ulint* n, /*!< in/out: number of bytes */
934+ uint key,
935+ ibool* is_new);
936+
937+/****************************************************************//**
938+Detach shared memory segment. */
939+UNIV_INTERN
940+void
941+os_shm_free(
942+/*========*/
943+ void *ptr, /*!< in: pointer returned by
944+ os_shm_alloc() */
945+ ulint size); /*!< in: size returned by
946+ os_shm_alloc() */
947 #ifndef UNIV_NONINL
948 #include "os0proc.ic"
949 #endif
950diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
951--- a/storage/innobase/include/srv0srv.h 2010-12-04 20:20:28.016566697 +0900
952+++ b/storage/innobase/include/srv0srv.h 2010-12-07 16:10:14.956717659 +0900
953@@ -171,6 +171,10 @@
954 extern ulint srv_mem_pool_size;
955 extern ulint srv_lock_table_size;
956
957+extern uint srv_buffer_pool_shm_key;
958+extern ibool srv_buffer_pool_shm_is_reused;
959+extern ibool srv_buffer_pool_shm_checksum;
960+
961 extern ibool srv_thread_concurrency_timer_based;
962
963 extern ulint srv_n_file_io_threads;
964diff -ruN a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
965--- a/storage/innobase/include/ut0lst.h 2010-11-03 07:01:13.000000000 +0900
966+++ b/storage/innobase/include/ut0lst.h 2010-12-07 16:10:14.957785525 +0900
967@@ -257,5 +257,48 @@
968 ut_a(ut_list_node_313 == NULL); \
969 } while (0)
970
971+/********************************************************************//**
972+Align nodes with moving location.
973+@param NAME the name of the list
974+@param TYPE node type
975+@param BASE base node (not a pointer to it)
976+@param OFFSET offset moved */
977+#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \
978+do { \
979+ ulint ut_list_i_313; \
980+ TYPE* ut_list_node_313; \
981+ \
982+ if ((BASE).start) \
983+ (BASE).start = (void*)((byte*)((BASE).start) \
984+ + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
985+ if ((BASE).end) \
986+ (BASE).end = (void*)((byte*)((BASE).end) \
987+ + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
988+ \
989+ ut_list_node_313 = (BASE).start; \
990+ \
991+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
992+ ut_a(ut_list_node_313); \
993+ if ((ut_list_node_313->NAME).prev) \
994+ (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
995+ + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
996+ if ((ut_list_node_313->NAME).next) \
997+ (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
998+ + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
999+ ut_list_node_313 = (ut_list_node_313->NAME).next; \
1000+ } \
1001+ \
1002+ ut_a(ut_list_node_313 == NULL); \
1003+ \
1004+ ut_list_node_313 = (BASE).end; \
1005+ \
1006+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
1007+ ut_a(ut_list_node_313); \
1008+ ut_list_node_313 = (ut_list_node_313->NAME).prev; \
1009+ } \
1010+ \
1011+ ut_a(ut_list_node_313 == NULL); \
1012+} while (0)
1013+
1014 #endif
1015
1016diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
1017--- a/storage/innobase/log/log0recv.c 2010-12-04 19:46:40.212513377 +0900
1018+++ b/storage/innobase/log/log0recv.c 2010-12-07 16:10:14.959785817 +0900
1019@@ -2912,6 +2912,7 @@
1020 /*==========================*/
1021 {
1022 ut_a(!recv_needed_recovery);
1023+ ut_a(!srv_buffer_pool_shm_is_reused);
1024
1025 recv_needed_recovery = TRUE;
1026
1027diff -ruN a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
1028--- a/storage/innobase/os/os0proc.c 2010-11-03 07:01:13.000000000 +0900
1029+++ b/storage/innobase/os/os0proc.c 2010-12-07 16:10:14.960800123 +0900
1030@@ -229,3 +229,173 @@
1031 }
1032 #endif
1033 }
1034+
1035+/****************************************************************//**
1036+Allocates or attaches and reuses shared memory segment.
1037+The content is not cleared automatically.
1038+@return allocated memory */
1039+UNIV_INTERN
1040+void*
1041+os_shm_alloc(
1042+/*=========*/
1043+ ulint* n, /*!< in/out: number of bytes */
1044+ uint key,
1045+ ibool* is_new)
1046+{
1047+ void* ptr;
1048+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1049+ ulint size;
1050+ int shmid;
1051+
1052+ *is_new = FALSE;
1053+ fprintf(stderr,
1054+ "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n",
1055+ key, key);
1056+# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
1057+ if (!os_use_large_pages || !os_large_page_size) {
1058+ goto skip;
1059+ }
1060+
1061+ /* Align block size to os_large_page_size */
1062+ ut_ad(ut_is_2pow(os_large_page_size));
1063+ size = ut_2pow_round(*n + (os_large_page_size - 1),
1064+ os_large_page_size);
1065+
1066+ shmid = shmget((key_t)key, (size_t)size,
1067+ IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
1068+ if (shmid < 0) {
1069+ if (errno == EEXIST) {
1070+ fprintf(stderr,
1071+ "InnoDB: HugeTLB: The shared memory segment exists.\n");
1072+ shmid = shmget((key_t)key, (size_t)size,
1073+ SHM_HUGETLB | SHM_R | SHM_W);
1074+ if (shmid < 0) {
1075+ fprintf(stderr,
1076+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1077+ size, errno);
1078+ goto skip;
1079+ } else {
1080+ fprintf(stderr,
1081+ "InnoDB: HugeTLB: The existent shared memory segment is used.\n");
1082+ }
1083+ } else {
1084+ fprintf(stderr,
1085+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1086+ size, errno);
1087+ goto skip;
1088+ }
1089+ } else {
1090+ *is_new = TRUE;
1091+ fprintf(stderr,
1092+ "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
1093+ }
1094+
1095+ ptr = shmat(shmid, NULL, 0);
1096+ if (ptr == (void *)-1) {
1097+ fprintf(stderr,
1098+ "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
1099+ errno);
1100+ ptr = NULL;
1101+ }
1102+
1103+ if (ptr) {
1104+ *n = size;
1105+ os_fast_mutex_lock(&ut_list_mutex);
1106+ ut_total_allocated_memory += size;
1107+ os_fast_mutex_unlock(&ut_list_mutex);
1108+ UNIV_MEM_ALLOC(ptr, size);
1109+ return(ptr);
1110+ }
1111+skip:
1112+ *is_new = FALSE;
1113+# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
1114+# ifdef HAVE_GETPAGESIZE
1115+ size = getpagesize();
1116+# else
1117+ size = UNIV_PAGE_SIZE;
1118+# endif
1119+ /* Align block size to system page size */
1120+ ut_ad(ut_is_2pow(size));
1121+ size = *n = ut_2pow_round(*n + (size - 1), size);
1122+
1123+ shmid = shmget((key_t)key, (size_t)size,
1124+ IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
1125+ if (shmid < 0) {
1126+ if (errno == EEXIST) {
1127+ fprintf(stderr,
1128+ "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
1129+ shmid = shmget((key_t)key, (size_t)size,
1130+ SHM_R | SHM_W);
1131+ if (shmid < 0) {
1132+ fprintf(stderr,
1133+ "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1134+ size, errno);
1135+ ptr = NULL;
1136+ goto end;
1137+ } else {
1138+ fprintf(stderr,
1139+ "InnoDB: The existent shared memory segment is used.\n");
1140+ }
1141+ } else {
1142+ fprintf(stderr,
1143+ "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1144+ size, errno);
1145+ ptr = NULL;
1146+ goto end;
1147+ }
1148+ } else {
1149+ *is_new = TRUE;
1150+ fprintf(stderr,
1151+ "InnoDB: A new shared memory segment has been created.\n");
1152+ }
1153+
1154+ ptr = shmat(shmid, NULL, 0);
1155+ if (ptr == (void *)-1) {
1156+ fprintf(stderr,
1157+ "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
1158+ errno);
1159+ ptr = NULL;
1160+ }
1161+
1162+ if (ptr) {
1163+ *n = size;
1164+ os_fast_mutex_lock(&ut_list_mutex);
1165+ ut_total_allocated_memory += size;
1166+ os_fast_mutex_unlock(&ut_list_mutex);
1167+ UNIV_MEM_ALLOC(ptr, size);
1168+ }
1169+end:
1170+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1171+ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1172+ ptr = NULL;
1173+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1174+ return(ptr);
1175+}
1176+
1177+/****************************************************************//**
1178+Detach shared memory segment. */
1179+UNIV_INTERN
1180+void
1181+os_shm_free(
1182+/*========*/
1183+ void *ptr, /*!< in: pointer returned by
1184+ os_shm_alloc() */
1185+ ulint size) /*!< in: size returned by
1186+ os_shm_alloc() */
1187+{
1188+ os_fast_mutex_lock(&ut_list_mutex);
1189+ ut_a(ut_total_allocated_memory >= size);
1190+ os_fast_mutex_unlock(&ut_list_mutex);
1191+
1192+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1193+ if (!shmdt(ptr)) {
1194+ os_fast_mutex_lock(&ut_list_mutex);
1195+ ut_a(ut_total_allocated_memory >= size);
1196+ ut_total_allocated_memory -= size;
1197+ os_fast_mutex_unlock(&ut_list_mutex);
1198+ UNIV_MEM_FREE(ptr, size);
1199+ }
1200+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1201+ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1202+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1203+}
1204diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
1205--- a/storage/innobase/srv/srv0srv.c 2010-12-04 20:20:44.687550693 +0900
1206+++ b/storage/innobase/srv/srv0srv.c 2010-12-07 16:10:14.962785720 +0900
d8778560 1207@@ -235,6 +235,11 @@
b4e1fa2c
AM
1208 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
1209 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
1210
1211+/* key value for shm */
1212+UNIV_INTERN uint srv_buffer_pool_shm_key = 0;
1213+UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE;
1214+UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE;
1215+
1216 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
1217 instead. */
1218 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
1219diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
1220--- a/storage/innobase/srv/srv0start.c 2010-12-04 20:19:29.806482628 +0900
1221+++ b/storage/innobase/srv/srv0start.c 2010-12-07 16:10:14.964785346 +0900
df1b5770 1222@@ -1838,6 +1838,8 @@
b4e1fa2c
AM
1223 Note that this is not as heavy weight as it seems. At
1224 this point there will be only ONE page in the buf_LRU
1225 and there must be no page in the buf_flush list. */
1226+ /* buffer_pool_shm should not be reused when recovery was needed. */
1227+ if (!srv_buffer_pool_shm_is_reused)
1228 buf_pool_invalidate();
1229
1230 /* We always try to do a recovery, even if the database had
This page took 0.161308 seconds and 4 git commands to generate.