]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_buffer_pool_shm.patch
- rel 0.5 (consider this to be test before rel 1); update percona patches; drop obsol...
[packages/mysql.git] / innodb_buffer_pool_shm.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_buffer_pool_shm.patch
2# introduced : 12
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
9--- a/storage/innobase/buf/buf0buddy.c 2010-12-04 19:46:39.372513543 +0900
10+++ b/storage/innobase/buf/buf0buddy.c 2010-12-07 17:56:28.302087851 +0900
11@@ -183,7 +183,7 @@
12 void* buf, /*!< in: buffer frame to deallocate */
13 ibool have_page_hash_mutex)
14 {
15- const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
16+ const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf_pool, buf);
17 buf_page_t* bpage;
18 buf_block_t* block;
19
20@@ -227,7 +227,7 @@
21 buf_block_t* block) /*!< in: buffer frame to allocate */
22 {
23 buf_pool_t* buf_pool = buf_pool_from_block(block);
24- const ulint fold = BUF_POOL_ZIP_FOLD(block);
25+ const ulint fold = BUF_POOL_ZIP_FOLD(buf_pool, block);
26 //ut_ad(buf_pool_mutex_own(buf_pool));
27 ut_ad(!mutex_own(&buf_pool->zip_mutex));
28 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
29diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
30--- a/storage/innobase/buf/buf0buf.c 2010-12-06 20:16:21.726195340 +0900
31+++ b/storage/innobase/buf/buf0buf.c 2010-12-07 20:40:30.824749814 +0900
32@@ -53,6 +53,10 @@
33 #include "page0zip.h"
34 #include "trx0trx.h"
35 #include "srv0start.h"
36+#include "que0que.h"
37+#include "read0read.h"
38+#include "row0row.h"
39+#include "ha_prototypes.h"
40
41 /* prototypes for new functions added to ha_innodb.cc */
42 trx_t* innobase_get_trx();
43@@ -342,6 +346,31 @@
44 was allocated for the frames */
45 buf_block_t* blocks; /*!< array of buffer control blocks */
46 };
47+
48+/* Buffer pool shared memory segment information */
49+typedef struct buf_shm_info_struct buf_shm_info_t;
50+
51+struct buf_shm_info_struct {
52+ char head_str[8];
53+ ulint binary_id;
54+ ibool is_new; /* during initializing */
55+ ibool clean; /* clean shutdowned and free */
56+ ibool reusable; /* reusable */
57+ ulint buf_pool_size; /* backup value */
58+ ulint page_size; /* backup value */
59+ ulint frame_offset; /* offset of the first frame based on chunk->mem */
60+ ulint zip_hash_offset;
61+ ulint zip_hash_n;
62+
63+ ulint checksum;
64+
65+ buf_pool_t buf_pool_backup;
66+ buf_chunk_t chunk_backup;
67+
68+ ib_uint64_t dummy;
69+};
70+
71+#define BUF_SHM_INFO_HEAD "XTRA_SHM"
72 #endif /* !UNIV_HOTBACKUP */
73
74 /********************************************************************//**
75@@ -988,6 +1017,58 @@
76 #endif /* UNIV_SYNC_DEBUG */
77 }
78
79+static
80+void
81+buf_block_reuse(
82+/*============*/
83+ buf_block_t* block,
84+ ptrdiff_t frame_offset)
85+{
86+ /* block_init */
87+ block->frame += frame_offset;
88+
89+ UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
90+
91+ block->index = NULL;
92+ block->btr_search_latch = NULL;
93+
94+#ifdef UNIV_DEBUG
95+ /* recreate later */
96+ block->page.in_page_hash = FALSE;
97+ block->page.in_zip_hash = FALSE;
98+#endif /* UNIV_DEBUG */
99+
100+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
101+ block->n_pointers = 0;
102+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
103+
104+ if (block->page.zip.data)
105+ block->page.zip.data += frame_offset;
106+
107+ block->is_hashed = FALSE;
108+
109+#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
110+ /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
111+ of buffer block mutex/rwlock with performance schema. If
112+ PFS_GROUP_BUFFER_SYNC is defined, skip the registration
113+ since buffer block mutex/rwlock will be registered later in
114+ pfs_register_buffer_block() */
115+
116+ mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
117+ rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
118+#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
119+ mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
120+ rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
121+#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
122+
123+ ut_ad(rw_lock_validate(&(block->lock)));
124+
125+#ifdef UNIV_SYNC_DEBUG
126+ rw_lock_create(buf_block_debug_latch_key,
127+ &block->debug_latch, SYNC_NO_ORDER_CHECK);
128+#endif /* UNIV_SYNC_DEBUG */
129+}
130+
131 /********************************************************************//**
132 Allocates a chunk of buffer frames.
133 @return chunk, or NULL on failure */
134@@ -1001,26 +1082,188 @@
135 {
136 buf_block_t* block;
137 byte* frame;
138+ ulint zip_hash_n = 0;
139+ ulint zip_hash_mem_size = 0;
140+ hash_table_t* zip_hash_tmp = NULL;
141 ulint i;
142+ buf_shm_info_t* shm_info = NULL;
143
144 /* Round down to a multiple of page size,
145 although it already should be. */
146 mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
147+
148+ srv_buffer_pool_shm_is_reused = FALSE;
149+
150+ if (srv_buffer_pool_shm_key) {
151+ /* zip_hash size */
152+ zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
153+ zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
154+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
155+ }
156+
157 /* Reserve space for the block descriptors. */
158 mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
159 + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
160+ if (srv_buffer_pool_shm_key) {
161+ mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
162+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
163+ mem_size += zip_hash_mem_size;
164+ }
165
166 chunk->mem_size = mem_size;
167+
168+ if (srv_buffer_pool_shm_key) {
169+ ulint binary_id;
170+ ibool is_new;
171+
172+ ut_a(buf_pool->n_chunks == 1);
173+
174+ fprintf(stderr,
175+ "InnoDB: Notice: The innodb_buffer_pool_shm_key option has been specified.\n"
176+ "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
177+ "InnoDB: * the mysqld executable between restarts of the server.\n"
178+ "InnoDB: * the value of innodb_buffer_pool_size.\n"
179+ "InnoDB: * the value of innodb_page_size.\n"
180+ "InnoDB: * datafiles created by InnoDB during this session.\n"
181+ "InnoDB: Otherwise, data corruption in datafiles may result.\n");
182+
183+ /* FIXME: This is vague id still */
184+ binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
185+ + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
186+ + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
187+ + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
188+ + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
189+ + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
190+ + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
191+ + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
192+ + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
193+ + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
194+ + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
195+
196+ chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
197+
198+ if (UNIV_UNLIKELY(chunk->mem == NULL)) {
199+ return(NULL);
200+ }
201+init_again:
202+#ifdef UNIV_SET_MEM_TO_ZERO
203+ if (is_new) {
204+ memset(chunk->mem, '\0', chunk->mem_size);
205+ }
206+#endif
207+ /* for ut_fold_binary_32(), these values should be 32-bit aligned */
208+ ut_a(sizeof(buf_shm_info_t) % 4 == 0);
209+ ut_a((ulint)chunk->mem % 4 == 0);
210+ ut_a(chunk->mem_size % 4 == 0);
211+
212+ shm_info = chunk->mem;
213+
214+ zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
215+
216+ if (is_new) {
217+ strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
218+ shm_info->binary_id = binary_id;
219+ shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */
220+ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
221+ shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */
222+ shm_info->buf_pool_size = srv_buf_pool_size;
223+ shm_info->page_size = srv_page_size;
224+ shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
225+ shm_info->zip_hash_n = zip_hash_n;
226+ } else {
227+ ulint checksum;
228+
229+ if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
230+ fprintf(stderr,
231+ "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
232+ return(NULL);
233+ }
234+ if (shm_info->binary_id != binary_id) {
235+ fprintf(stderr,
236+ "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
237+ return(NULL);
238+ }
239+ if (shm_info->is_new) {
240+ fprintf(stderr,
241+ "InnoDB: Error: The shared memory was not initialized yet.\n");
242+ return(NULL);
243+ }
244+ if (shm_info->buf_pool_size != srv_buf_pool_size) {
245+ fprintf(stderr,
246+ "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
247+ shm_info->buf_pool_size, srv_buf_pool_size);
248+ return(NULL);
249+ }
250+ if (shm_info->page_size != srv_page_size) {
251+ fprintf(stderr,
252+ "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
253+ shm_info->page_size, srv_page_size);
254+ return(NULL);
255+ }
256+ if (!shm_info->reusable) {
257+ fprintf(stderr,
258+ "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
259+ "InnoDB: The shared memory segment is initialized.\n");
260+ is_new = TRUE;
261+ goto init_again;
262+ }
263+ if (!shm_info->clean) {
264+ fprintf(stderr,
265+ "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
266+ "InnoDB: The shared memory segment is initialized.\n");
267+ is_new = TRUE;
268+ goto init_again;
269+ }
270+
271+ ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
272+ ut_a(shm_info->zip_hash_n == zip_hash_n);
273+
274+ /* check checksum */
275+ if (srv_buffer_pool_shm_checksum) {
276+ checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
277+ chunk->mem_size - sizeof(buf_shm_info_t));
278+ } else {
279+ checksum = BUF_NO_CHECKSUM_MAGIC;
280+ }
281+
282+ if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
283+ && shm_info->checksum != checksum) {
284+ fprintf(stderr,
285+ "InnoDB: Error: checksum of the shared memory is not match. "
286+ "(stored=%lu calculated=%lu)\n",
287+ shm_info->checksum, checksum);
288+ return(NULL);
289+ }
290+
291+ /* flag to use the segment. */
292+ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
293+ }
294+
295+ /* init zip_hash contents */
296+ if (is_new) {
297+ hash_create_init(zip_hash_tmp, zip_hash_n);
298+ } else {
299+ /* adjust offset is done later */
300+ hash_create_reuse(zip_hash_tmp);
301+
302+ srv_buffer_pool_shm_is_reused = TRUE;
303+ }
304+ } else {
305 chunk->mem = os_mem_alloc_large(&chunk->mem_size);
306
307 if (UNIV_UNLIKELY(chunk->mem == NULL)) {
308
309 return(NULL);
310 }
311+ }
312
313 /* Allocate the block descriptors from
314 the start of the memory block. */
315+ if (srv_buffer_pool_shm_key) {
316+ chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
317+ } else {
318 chunk->blocks = chunk->mem;
319+ }
320
321 /* Align a pointer to the first frame. Note that when
322 os_large_page_size is smaller than UNIV_PAGE_SIZE,
323@@ -1028,8 +1271,13 @@
324 it is bigger, we may allocate more blocks than requested. */
325
326 frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
327+ if (srv_buffer_pool_shm_key) {
328+ /* reserve zip_hash space and always -1 for reproductibity */
329+ chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
330+ } else {
331 chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
332 - (frame != chunk->mem);
333+ }
334
335 /* Subtract the space needed for block descriptors. */
336 {
337@@ -1043,6 +1291,98 @@
338 chunk->size = size;
339 }
340
341+ if (shm_info && !(shm_info->is_new)) {
342+ /* convert the shared memory segment for reuse */
343+ ptrdiff_t phys_offset;
344+ ptrdiff_t logi_offset;
345+ ptrdiff_t blocks_offset;
346+ void* previous_frame_address;
347+
348+ if (chunk->size < shm_info->chunk_backup.size) {
349+ fprintf(stderr,
350+ "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
351+ "InnoDB: Retrying may avoid this situation.\n");
352+ shm_info->clean = TRUE; /* release the flag for retrying */
353+ return(NULL);
354+ }
355+
356+ chunk->size = shm_info->chunk_backup.size;
357+ phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
358+ logi_offset = frame - chunk->blocks[0].frame;
359+ previous_frame_address = chunk->blocks[0].frame;
360+ blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
361+
362+ if (phys_offset || logi_offset || blocks_offset) {
363+ fprintf(stderr,
364+ "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
365+ "InnoDB: Previous frames in address : %p\n"
366+ "InnoDB: Previous frames were located : %p\n"
367+ "InnoDB: Current frames should be located: %p\n"
368+ "InnoDB: Pysical offset : %ld (%#lx)\n"
369+ "InnoDB: Logical offset (frames) : %ld (%#lx)\n"
370+ "InnoDB: Logical offset (blocks) : %ld (%#lx)\n",
371+ (byte*)chunk->mem + shm_info->frame_offset,
372+ chunk->blocks[0].frame, frame,
373+ phys_offset, phys_offset, logi_offset, logi_offset,
374+ blocks_offset, blocks_offset);
375+ } else {
376+ fprintf(stderr,
377+ "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
378+ }
379+
380+ if (phys_offset) {
381+ fprintf(stderr,
382+ "InnoDB: Aligning physical offset...");
383+
384+ memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
385+ chunk->size * UNIV_PAGE_SIZE);
386+
387+ fprintf(stderr,
388+ " Done.\n");
389+ }
390+
391+ /* buf_block_t */
392+ block = chunk->blocks;
393+ for (i = chunk->size; i--; ) {
394+ buf_block_reuse(block, logi_offset);
395+ block++;
396+ }
397+
398+ if (logi_offset || blocks_offset) {
399+ fprintf(stderr,
400+ "InnoDB: Aligning logical offset...");
401+
402+
403+ /* buf_pool_t buf_pool_backup */
404+ UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
405+ previous_frame_address, logi_offset, blocks_offset);
406+ UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
407+ previous_frame_address, logi_offset, blocks_offset);
408+ UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
409+ previous_frame_address, logi_offset, blocks_offset);
410+ if (shm_info->buf_pool_backup.LRU_old)
411+ shm_info->buf_pool_backup.LRU_old =
412+ (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
413+ + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
414+ ? logi_offset : blocks_offset));
415+
416+ UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
417+ previous_frame_address, logi_offset, blocks_offset);
418+
419+ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
420+ previous_frame_address, logi_offset, blocks_offset);
421+ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
422+ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
423+ previous_frame_address, logi_offset, blocks_offset);
424+ }
425+
426+ HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
427+ previous_frame_address, logi_offset, blocks_offset);
428+
429+ fprintf(stderr,
430+ " Done.\n");
431+ }
432+ } else {
433 /* Init block structs and assign frames for them. Then we
434 assign the frames to the first blocks (we already mapped the
435 memory above). */
436@@ -1068,6 +1408,11 @@
437 block++;
438 frame += UNIV_PAGE_SIZE;
439 }
440+ }
441+
442+ if (shm_info) {
443+ shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
444+ }
445
446 #ifdef PFS_GROUP_BUFFER_SYNC
447 pfs_register_buffer_block(chunk);
448@@ -1249,6 +1594,8 @@
449 UNIV_MEM_UNDESC(block);
450 }
451
452+ ut_a(!srv_buffer_pool_shm_key);
453+
454 os_mem_free_large(chunk->mem, chunk->mem_size);
455 }
456
457@@ -1289,7 +1636,7 @@
458 ulint instance_no) /*!< in: id of the instance */
459 {
460 ulint i;
461- buf_chunk_t* chunk;
462+ buf_chunk_t* chunk = NULL;
463
464 /* 1. Initialize general fields
465 ------------------------------- */
466@@ -1335,7 +1682,10 @@
467 buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
468
469 buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
470+ /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
471+ if (!srv_buffer_pool_shm_key) {
472 buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
473+ }
474
475 buf_pool->last_printout_time = ut_time();
476 }
477@@ -1354,6 +1704,86 @@
478
479 /* All fields are initialized by mem_zalloc(). */
480
481+ if (chunk && srv_buffer_pool_shm_key) {
482+ buf_shm_info_t* shm_info;
483+
484+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
485+ shm_info = chunk->mem;
486+
487+ buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
488+
489+ if(shm_info->is_new) {
490+ shm_info->is_new = FALSE; /* initialization was finished */
491+ } else {
492+ buf_block_t* block = chunk->blocks;
493+ buf_page_t* b;
494+
495+ /* shm_info->buf_pool_backup should be converted */
496+ /* at buf_chunk_init(). So copy simply. */
497+ buf_pool->flush_list = shm_info->buf_pool_backup.flush_list;
498+ buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock;
499+ buf_pool->free = shm_info->buf_pool_backup.free;
500+ buf_pool->LRU = shm_info->buf_pool_backup.LRU;
501+ buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old;
502+ buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len;
503+ buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU;
504+ buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean;
505+ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
506+ buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i];
507+ }
508+
509+ for (i = 0; i < chunk->size; i++, block++) {
510+ if (buf_block_get_state(block)
511+ == BUF_BLOCK_FILE_PAGE) {
512+ ut_d(block->page.in_page_hash = TRUE);
513+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
514+ buf_page_address_fold(
515+ block->page.space,
516+ block->page.offset),
517+ &block->page);
518+ }
519+ }
520+
521+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
522+ b = UT_LIST_GET_NEXT(zip_list, b)) {
523+ ut_ad(!b->in_flush_list);
524+ ut_ad(b->in_LRU_list);
525+
526+ ut_d(b->in_page_hash = TRUE);
527+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
528+ buf_page_address_fold(b->space, b->offset), b);
529+ }
530+
531+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
532+ b = UT_LIST_GET_NEXT(flush_list, b)) {
533+ ut_ad(b->in_flush_list);
534+ ut_ad(b->in_LRU_list);
535+
536+ switch (buf_page_get_state(b)) {
537+ case BUF_BLOCK_ZIP_DIRTY:
538+ ut_d(b->in_page_hash = TRUE);
539+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
540+ buf_page_address_fold(b->space,
541+ b->offset), b);
542+ break;
543+ case BUF_BLOCK_FILE_PAGE:
544+ /* uncompressed page */
545+ break;
546+ case BUF_BLOCK_ZIP_FREE:
547+ case BUF_BLOCK_ZIP_PAGE:
548+ case BUF_BLOCK_NOT_USED:
549+ case BUF_BLOCK_READY_FOR_USE:
550+ case BUF_BLOCK_MEMORY:
551+ case BUF_BLOCK_REMOVE_HASH:
552+ ut_error;
553+ break;
554+ }
555+ }
556+
557+
558+ }
559+ }
560+
561 mutex_exit(&buf_pool->LRU_list_mutex);
562 rw_lock_x_unlock(&buf_pool->page_hash_latch);
563 buf_pool_mutex_exit(buf_pool);
564@@ -1373,6 +1803,42 @@
565 buf_chunk_t* chunk;
566 buf_chunk_t* chunks;
567
568+ if (srv_buffer_pool_shm_key) {
569+ buf_shm_info_t* shm_info;
570+
571+ ut_a(buf_pool->n_chunks == 1);
572+
573+ chunk = buf_pool->chunks;
574+ shm_info = chunk->mem;
575+ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
576+
577+ /* if opened, close shm. */
578+ if (!shm_info->clean) {
579+ /* validation the shared memory segment doesn't have unrecoverable contents. */
580+ /* Currently, validation became not needed */
581+ shm_info->reusable = TRUE;
582+
583+ memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
584+ memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
585+
586+ if (srv_fast_shutdown < 2) {
587+ if (srv_buffer_pool_shm_checksum) {
588+ shm_info->checksum =
589+ ut_fold_binary_32(
590+ (byte*)chunk->mem + sizeof(buf_shm_info_t),
591+ chunk->mem_size - sizeof(buf_shm_info_t));
592+ } else {
593+ shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
594+ }
595+ shm_info->clean = TRUE;
596+ }
597+
598+ fprintf(stderr,
599+ "InnoDB: The shared memory was closed.\n");
600+ }
601+
602+ os_shm_free(chunk->mem, chunk->mem_size);
603+ } else {
604 chunks = buf_pool->chunks;
605 chunk = chunks + buf_pool->n_chunks;
606
607@@ -1381,10 +1847,13 @@
608 would fail at shutdown. */
609 os_mem_free_large(chunk->mem, chunk->mem_size);
610 }
611+ }
612
613 mem_free(buf_pool->chunks);
614 hash_table_free(buf_pool->page_hash);
615+ if (!srv_buffer_pool_shm_key) {
616 hash_table_free(buf_pool->zip_hash);
617+ }
618 }
619
620 /********************************************************************//**
621@@ -1668,6 +2137,11 @@
622 //buf_pool_mutex_enter(buf_pool);
623 mutex_enter(&buf_pool->LRU_list_mutex);
624
625+ if (srv_buffer_pool_shm_key) {
626+ /* Cannot support shrink */
627+ goto func_done;
628+ }
629+
630 shrink_again:
631 if (buf_pool->n_chunks <= 1) {
632
633@@ -1848,7 +2322,7 @@
634 zip_hash = hash_create(2 * buf_pool->curr_size);
635
636 HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
637- BUF_POOL_ZIP_FOLD_BPAGE);
638+ buf_pool, BUF_POOL_ZIP_FOLD_BPAGE);
639
640 hash_table_free(buf_pool->zip_hash);
641 buf_pool->zip_hash = zip_hash;
642@@ -2130,6 +2604,11 @@
643 ulint change_size;
644 ulint min_change_size = 1048576 * srv_buf_pool_instances;
645
646+ if (srv_buffer_pool_shm_key) {
647+ /* Cannot support resize */
648+ return;
649+ }
650+
651 buf_pool_mutex_enter_all();
652
653 if (srv_buf_pool_old_size == srv_buf_pool_size) {
654diff -ruN a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
655--- a/storage/innobase/ha/hash0hash.c 2010-11-03 07:01:13.000000000 +0900
656+++ b/storage/innobase/ha/hash0hash.c 2010-12-07 16:10:14.937749140 +0900
657@@ -133,6 +133,70 @@
658 }
659
660 /*************************************************************//**
661+*/
662+UNIV_INTERN
663+ulint
664+hash_create_needed(
665+/*===============*/
666+ ulint n)
667+{
668+ ulint prime;
669+ ulint offset;
670+
671+ prime = ut_find_prime(n);
672+
673+ offset = (sizeof(hash_table_t) + 7) / 8;
674+ offset *= 8;
675+
676+ return(offset + sizeof(hash_cell_t) * prime);
677+}
678+
679+UNIV_INTERN
680+void
681+hash_create_init(
682+/*=============*/
683+ hash_table_t* table,
684+ ulint n)
685+{
686+ ulint prime;
687+ ulint offset;
688+
689+ prime = ut_find_prime(n);
690+
691+ offset = (sizeof(hash_table_t) + 7) / 8;
692+ offset *= 8;
693+
694+ table->array = (hash_cell_t*)(((byte*)table) + offset);
695+ table->n_cells = prime;
696+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
697+ table->adaptive = FALSE;
698+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
699+ table->n_mutexes = 0;
700+ table->mutexes = NULL;
701+ table->heaps = NULL;
702+ table->heap = NULL;
703+ ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
704+
705+ /* Initialize the cell array */
706+ hash_table_clear(table);
707+}
708+
709+UNIV_INTERN
710+void
711+hash_create_reuse(
712+/*==============*/
713+ hash_table_t* table)
714+{
715+ ulint offset;
716+
717+ offset = (sizeof(hash_table_t) + 7) / 8;
718+ offset *= 8;
719+
720+ table->array = (hash_cell_t*)(((byte*)table) + offset);
721+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
722+}
723+
724+/*************************************************************//**
725 Frees a hash table. */
726 UNIV_INTERN
727 void
728diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
729--- a/storage/innobase/handler/ha_innodb.cc 2010-12-06 20:16:21.733263627 +0900
730+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-07 17:56:28.316139830 +0900
731@@ -194,6 +194,7 @@
732 static my_bool innobase_create_status_file = FALSE;
733 static my_bool innobase_stats_on_metadata = TRUE;
734 static my_bool innobase_use_sys_stats_table = FALSE;
735+static my_bool innobase_buffer_pool_shm_checksum = TRUE;
736
737
738 static char* internal_innobase_data_file_path = NULL;
739@@ -2620,6 +2621,14 @@
740 srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
741 srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
742
743+ if (srv_buffer_pool_shm_key && srv_buf_pool_instances > 1) {
744+ fprintf(stderr,
745+ "InnoDB: Warning: innodb_buffer_pool_shm_key cannot be used with several innodb_buffer_pool_instances.\n"
746+ "InnoDB: innodb_buffer_pool_instances was set to 1.\n");
747+ srv_buf_pool_instances = 1;
748+ innobase_buffer_pool_instances = 1;
749+ }
750+
751 srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
752
753 srv_n_file_io_threads = (ulint) innobase_file_io_threads;
754@@ -2636,6 +2645,7 @@
755 srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
756 srv_use_checksums = (ibool) innobase_use_checksums;
757 srv_fast_checksum = (ibool) innobase_fast_checksum;
758+ srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
759
760 #ifdef HAVE_LARGE_PAGES
761 if ((os_use_large_pages = (ibool) my_use_large_pages))
762@@ -11642,6 +11652,16 @@
763 "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
764 NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
765
766+static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
767+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
768+ "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
769+ NULL, NULL, 0, 0, INT_MAX32, 0);
770+
771+static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
772+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
773+ "Enable buffer_pool_shm checksum validation (enabled by default).",
774+ NULL, NULL, TRUE);
775+
776 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
777 PLUGIN_VAR_RQCMDARG,
778 "Helps in performance tuning in heavily concurrent environments.",
779@@ -11921,6 +11941,8 @@
780 MYSQL_SYSVAR(autoextend_increment),
781 MYSQL_SYSVAR(buffer_pool_size),
782 MYSQL_SYSVAR(buffer_pool_instances),
783+ MYSQL_SYSVAR(buffer_pool_shm_key),
784+ MYSQL_SYSVAR(buffer_pool_shm_checksum),
785 MYSQL_SYSVAR(checksums),
786 MYSQL_SYSVAR(fast_checksum),
787 MYSQL_SYSVAR(commit_concurrency),
788diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
789--- a/storage/innobase/include/buf0buf.h 2010-12-06 20:16:21.778264552 +0900
790+++ b/storage/innobase/include/buf0buf.h 2010-12-07 17:56:28.322749380 +0900
791@@ -36,6 +36,7 @@
792 #ifndef UNIV_HOTBACKUP
793 #include "ut0rbt.h"
794 #include "os0proc.h"
795+#include "srv0srv.h"
796
797 /** @name Modes for buf_page_get_gen */
798 /* @{ */
799@@ -1520,9 +1521,12 @@
800 /**********************************************************************//**
801 Compute the hash fold value for blocks in buf_pool->zip_hash. */
802 /* @{ */
803-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
804-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
805-#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
806+/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
807+#define BUF_POOL_ZIP_FOLD_PTR(bpool, ptr) (!srv_buffer_pool_shm_key\
808+ ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
809+ :((ulint) ((byte*)ptr - (byte*)(buf_page_from_array(bpool, 0)->frame)) / UNIV_PAGE_SIZE))
810+#define BUF_POOL_ZIP_FOLD(bpool, b) BUF_POOL_ZIP_FOLD_PTR(bpool, (b)->frame)
811+#define BUF_POOL_ZIP_FOLD_BPAGE(bpool, b) BUF_POOL_ZIP_FOLD(bpool, (buf_block_t*) (b))
812 /* @} */
813
814 /** @brief The buffer pool statistics structure. */
815diff -ruN a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
816--- a/storage/innobase/include/hash0hash.h 2010-11-03 07:01:13.000000000 +0900
817+++ b/storage/innobase/include/hash0hash.h 2010-12-07 17:56:28.324726446 +0900
818@@ -49,6 +49,28 @@
819 hash_create(
820 /*========*/
821 ulint n); /*!< in: number of array cells */
822+
823+/*************************************************************//**
824+*/
825+UNIV_INTERN
826+ulint
827+hash_create_needed(
828+/*===============*/
829+ ulint n);
830+
831+UNIV_INTERN
832+void
833+hash_create_init(
834+/*=============*/
835+ hash_table_t* table,
836+ ulint n);
837+
838+UNIV_INTERN
839+void
840+hash_create_reuse(
841+/*==============*/
842+ hash_table_t* table);
843+
844 #ifndef UNIV_HOTBACKUP
845 /*************************************************************//**
846 Creates a mutex array to protect a hash table. */
847@@ -306,7 +328,7 @@
848 /****************************************************************//**
849 Move all hash table entries from OLD_TABLE to NEW_TABLE. */
850
851-#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
852+#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, BPOOL, FOLD_FUNC) \
853 do {\
854 ulint i2222;\
855 ulint cell_count2222;\
856@@ -318,7 +340,7 @@
857 \
858 while (node2222) {\
859 NODE_TYPE* next2222 = node2222->PTR_NAME;\
860- ulint fold2222 = FOLD_FUNC(node2222);\
861+ ulint fold2222 = FOLD_FUNC(BPOOL, node2222);\
862 \
863 HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
864 fold2222, node2222);\
865@@ -327,6 +349,33 @@
866 }\
867 }\
868 } while (0)
869+
870+/********************************************************************//**
871+Align nodes with moving location.*/
872+#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
873+do {\
874+ ulint i2222;\
875+ ulint cell_count2222;\
876+\
877+ cell_count2222 = hash_get_n_cells(TABLE);\
878+\
879+ for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
880+ NODE_TYPE* node2222;\
881+\
882+ if ((TABLE)->array[i2222].node) \
883+ (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
884+ + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
885+ node2222 = HASH_GET_FIRST((TABLE), i2222);\
886+\
887+ while (node2222) {\
888+ if (node2222->PTR_NAME) \
889+ node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
890+ + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
891+\
892+ node2222 = node2222->PTR_NAME;\
893+ }\
894+ }\
895+} while (0)
896
897 /************************************************************//**
898 Gets the mutex index for a fold value in a hash table.
899diff -ruN a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
900--- a/storage/innobase/include/os0proc.h 2010-11-03 07:01:13.000000000 +0900
901+++ b/storage/innobase/include/os0proc.h 2010-12-07 16:10:14.955718750 +0900
902@@ -32,6 +32,11 @@
903 #ifdef UNIV_LINUX
904 #include <sys/ipc.h>
905 #include <sys/shm.h>
906+#else
907+# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
908+#include <sys/ipc.h>
909+#include <sys/shm.h>
910+# endif
911 #endif
912
913 typedef void* os_process_t;
914@@ -70,6 +75,29 @@
915 ulint size); /*!< in: size returned by
916 os_mem_alloc_large() */
917
918+
919+/****************************************************************//**
920+Allocates or attaches and reuses shared memory segment.
921+The content is not cleared automatically.
922+@return allocated memory */
923+UNIV_INTERN
924+void*
925+os_shm_alloc(
926+/*=========*/
927+ ulint* n, /*!< in/out: number of bytes */
928+ uint key,
929+ ibool* is_new);
930+
931+/****************************************************************//**
932+Detach shared memory segment. */
933+UNIV_INTERN
934+void
935+os_shm_free(
936+/*========*/
937+ void *ptr, /*!< in: pointer returned by
938+ os_shm_alloc() */
939+ ulint size); /*!< in: size returned by
940+ os_shm_alloc() */
941 #ifndef UNIV_NONINL
942 #include "os0proc.ic"
943 #endif
944diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
945--- a/storage/innobase/include/srv0srv.h 2010-12-04 20:20:28.016566697 +0900
946+++ b/storage/innobase/include/srv0srv.h 2010-12-07 16:10:14.956717659 +0900
947@@ -171,6 +171,10 @@
948 extern ulint srv_mem_pool_size;
949 extern ulint srv_lock_table_size;
950
951+extern uint srv_buffer_pool_shm_key;
952+extern ibool srv_buffer_pool_shm_is_reused;
953+extern ibool srv_buffer_pool_shm_checksum;
954+
955 extern ibool srv_thread_concurrency_timer_based;
956
957 extern ulint srv_n_file_io_threads;
958diff -ruN a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
959--- a/storage/innobase/include/ut0lst.h 2010-11-03 07:01:13.000000000 +0900
960+++ b/storage/innobase/include/ut0lst.h 2010-12-07 16:10:14.957785525 +0900
961@@ -257,5 +257,48 @@
962 ut_a(ut_list_node_313 == NULL); \
963 } while (0)
964
965+/********************************************************************//**
966+Align nodes with moving location.
967+@param NAME the name of the list
968+@param TYPE node type
969+@param BASE base node (not a pointer to it)
970+@param OFFSET offset moved */
971+#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \
972+do { \
973+ ulint ut_list_i_313; \
974+ TYPE* ut_list_node_313; \
975+ \
976+ if ((BASE).start) \
977+ (BASE).start = (void*)((byte*)((BASE).start) \
978+ + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
979+ if ((BASE).end) \
980+ (BASE).end = (void*)((byte*)((BASE).end) \
981+ + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
982+ \
983+ ut_list_node_313 = (BASE).start; \
984+ \
985+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
986+ ut_a(ut_list_node_313); \
987+ if ((ut_list_node_313->NAME).prev) \
988+ (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
989+ + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
990+ if ((ut_list_node_313->NAME).next) \
991+ (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
992+ + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
993+ ut_list_node_313 = (ut_list_node_313->NAME).next; \
994+ } \
995+ \
996+ ut_a(ut_list_node_313 == NULL); \
997+ \
998+ ut_list_node_313 = (BASE).end; \
999+ \
1000+ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
1001+ ut_a(ut_list_node_313); \
1002+ ut_list_node_313 = (ut_list_node_313->NAME).prev; \
1003+ } \
1004+ \
1005+ ut_a(ut_list_node_313 == NULL); \
1006+} while (0)
1007+
1008 #endif
1009
1010diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
1011--- a/storage/innobase/log/log0recv.c 2010-12-04 19:46:40.212513377 +0900
1012+++ b/storage/innobase/log/log0recv.c 2010-12-07 16:10:14.959785817 +0900
1013@@ -2912,6 +2912,7 @@
1014 /*==========================*/
1015 {
1016 ut_a(!recv_needed_recovery);
1017+ ut_a(!srv_buffer_pool_shm_is_reused);
1018
1019 recv_needed_recovery = TRUE;
1020
1021diff -ruN a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
1022--- a/storage/innobase/os/os0proc.c 2010-11-03 07:01:13.000000000 +0900
1023+++ b/storage/innobase/os/os0proc.c 2010-12-07 16:10:14.960800123 +0900
1024@@ -229,3 +229,173 @@
1025 }
1026 #endif
1027 }
1028+
1029+/****************************************************************//**
1030+Allocates or attaches and reuses shared memory segment.
1031+The content is not cleared automatically.
1032+@return allocated memory */
1033+UNIV_INTERN
1034+void*
1035+os_shm_alloc(
1036+/*=========*/
1037+ ulint* n, /*!< in/out: number of bytes */
1038+ uint key,
1039+ ibool* is_new)
1040+{
1041+ void* ptr;
1042+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1043+ ulint size;
1044+ int shmid;
1045+
1046+ *is_new = FALSE;
1047+ fprintf(stderr,
1048+ "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n",
1049+ key, key);
1050+# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
1051+ if (!os_use_large_pages || !os_large_page_size) {
1052+ goto skip;
1053+ }
1054+
1055+ /* Align block size to os_large_page_size */
1056+ ut_ad(ut_is_2pow(os_large_page_size));
1057+ size = ut_2pow_round(*n + (os_large_page_size - 1),
1058+ os_large_page_size);
1059+
1060+ shmid = shmget((key_t)key, (size_t)size,
1061+ IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
1062+ if (shmid < 0) {
1063+ if (errno == EEXIST) {
1064+ fprintf(stderr,
1065+ "InnoDB: HugeTLB: The shared memory segment exists.\n");
1066+ shmid = shmget((key_t)key, (size_t)size,
1067+ SHM_HUGETLB | SHM_R | SHM_W);
1068+ if (shmid < 0) {
1069+ fprintf(stderr,
1070+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1071+ size, errno);
1072+ goto skip;
1073+ } else {
1074+ fprintf(stderr,
1075+ "InnoDB: HugeTLB: The existent shared memory segment is used.\n");
1076+ }
1077+ } else {
1078+ fprintf(stderr,
1079+ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1080+ size, errno);
1081+ goto skip;
1082+ }
1083+ } else {
1084+ *is_new = TRUE;
1085+ fprintf(stderr,
1086+ "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
1087+ }
1088+
1089+ ptr = shmat(shmid, NULL, 0);
1090+ if (ptr == (void *)-1) {
1091+ fprintf(stderr,
1092+ "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
1093+ errno);
1094+ ptr = NULL;
1095+ }
1096+
1097+ if (ptr) {
1098+ *n = size;
1099+ os_fast_mutex_lock(&ut_list_mutex);
1100+ ut_total_allocated_memory += size;
1101+ os_fast_mutex_unlock(&ut_list_mutex);
1102+ UNIV_MEM_ALLOC(ptr, size);
1103+ return(ptr);
1104+ }
1105+skip:
1106+ *is_new = FALSE;
1107+# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
1108+# ifdef HAVE_GETPAGESIZE
1109+ size = getpagesize();
1110+# else
1111+ size = UNIV_PAGE_SIZE;
1112+# endif
1113+ /* Align block size to system page size */
1114+ ut_ad(ut_is_2pow(size));
1115+ size = *n = ut_2pow_round(*n + (size - 1), size);
1116+
1117+ shmid = shmget((key_t)key, (size_t)size,
1118+ IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
1119+ if (shmid < 0) {
1120+ if (errno == EEXIST) {
1121+ fprintf(stderr,
1122+ "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
1123+ shmid = shmget((key_t)key, (size_t)size,
1124+ SHM_R | SHM_W);
1125+ if (shmid < 0) {
1126+ fprintf(stderr,
1127+ "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
1128+ size, errno);
1129+ ptr = NULL;
1130+ goto end;
1131+ } else {
1132+ fprintf(stderr,
1133+ "InnoDB: The existent shared memory segment is used.\n");
1134+ }
1135+ } else {
1136+ fprintf(stderr,
1137+ "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
1138+ size, errno);
1139+ ptr = NULL;
1140+ goto end;
1141+ }
1142+ } else {
1143+ *is_new = TRUE;
1144+ fprintf(stderr,
1145+ "InnoDB: A new shared memory segment has been created.\n");
1146+ }
1147+
1148+ ptr = shmat(shmid, NULL, 0);
1149+ if (ptr == (void *)-1) {
1150+ fprintf(stderr,
1151+ "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
1152+ errno);
1153+ ptr = NULL;
1154+ }
1155+
1156+ if (ptr) {
1157+ *n = size;
1158+ os_fast_mutex_lock(&ut_list_mutex);
1159+ ut_total_allocated_memory += size;
1160+ os_fast_mutex_unlock(&ut_list_mutex);
1161+ UNIV_MEM_ALLOC(ptr, size);
1162+ }
1163+end:
1164+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1165+ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1166+ ptr = NULL;
1167+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1168+ return(ptr);
1169+}
1170+
1171+/****************************************************************//**
1172+Detach shared memory segment. */
1173+UNIV_INTERN
1174+void
1175+os_shm_free(
1176+/*========*/
1177+ void *ptr, /*!< in: pointer returned by
1178+ os_shm_alloc() */
1179+ ulint size) /*!< in: size returned by
1180+ os_shm_alloc() */
1181+{
1182+ os_fast_mutex_lock(&ut_list_mutex);
1183+ ut_a(ut_total_allocated_memory >= size);
1184+ os_fast_mutex_unlock(&ut_list_mutex);
1185+
1186+#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
1187+ if (!shmdt(ptr)) {
1188+ os_fast_mutex_lock(&ut_list_mutex);
1189+ ut_a(ut_total_allocated_memory >= size);
1190+ ut_total_allocated_memory -= size;
1191+ os_fast_mutex_unlock(&ut_list_mutex);
1192+ UNIV_MEM_FREE(ptr, size);
1193+ }
1194+#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1195+ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
1196+#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
1197+}
1198diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
1199--- a/storage/innobase/srv/srv0srv.c 2010-12-04 20:20:44.687550693 +0900
1200+++ b/storage/innobase/srv/srv0srv.c 2010-12-07 16:10:14.962785720 +0900
1201@@ -233,6 +233,11 @@
1202 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
1203 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
1204
1205+/* key value for shm */
1206+UNIV_INTERN uint srv_buffer_pool_shm_key = 0;
1207+UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE;
1208+UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE;
1209+
1210 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
1211 instead. */
1212 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
1213diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
1214--- a/storage/innobase/srv/srv0start.c 2010-12-04 20:19:29.806482628 +0900
1215+++ b/storage/innobase/srv/srv0start.c 2010-12-07 16:10:14.964785346 +0900
1216@@ -1759,6 +1759,8 @@
1217 Note that this is not as heavy weight as it seems. At
1218 this point there will be only ONE page in the buf_LRU
1219 and there must be no page in the buf_flush list. */
1220+ /* buffer_pool_shm should not be reused when recovery was needed. */
1221+ if (!srv_buffer_pool_shm_is_reused)
1222 buf_pool_invalidate();
1223
1224 /* We always try to do a recovery, even if the database had
This page took 0.165528 seconds and 4 git commands to generate.