]>
Commit | Line | Data |
---|---|---|
b4e1fa2c AM |
1 | # name : innodb_buffer_pool_shm.patch |
2 | # introduced : 12 | |
3 | # maintainer : Yasufumi | |
4 | # | |
5 | #!!! notice !!! | |
6 | # Any small change to this file in the main branch | |
7 | # should be done or reviewed by the maintainer! | |
8 | diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c | |
9 | --- a/storage/innobase/buf/buf0buddy.c 2010-12-04 19:46:39.372513543 +0900 | |
10 | +++ b/storage/innobase/buf/buf0buddy.c 2010-12-07 17:56:28.302087851 +0900 | |
11 | @@ -183,7 +183,7 @@ | |
12 | void* buf, /*!< in: buffer frame to deallocate */ | |
13 | ibool have_page_hash_mutex) | |
14 | { | |
15 | - const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf); | |
16 | + const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf_pool, buf); | |
17 | buf_page_t* bpage; | |
18 | buf_block_t* block; | |
19 | ||
20 | @@ -227,7 +227,7 @@ | |
21 | buf_block_t* block) /*!< in: buffer frame to allocate */ | |
22 | { | |
23 | buf_pool_t* buf_pool = buf_pool_from_block(block); | |
24 | - const ulint fold = BUF_POOL_ZIP_FOLD(block); | |
25 | + const ulint fold = BUF_POOL_ZIP_FOLD(buf_pool, block); | |
26 | //ut_ad(buf_pool_mutex_own(buf_pool)); | |
27 | ut_ad(!mutex_own(&buf_pool->zip_mutex)); | |
28 | ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE); | |
29 | diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c | |
30 | --- a/storage/innobase/buf/buf0buf.c 2010-12-06 20:16:21.726195340 +0900 | |
31 | +++ b/storage/innobase/buf/buf0buf.c 2010-12-07 20:40:30.824749814 +0900 | |
32 | @@ -53,6 +53,10 @@ | |
33 | #include "page0zip.h" | |
34 | #include "trx0trx.h" | |
35 | #include "srv0start.h" | |
36 | +#include "que0que.h" | |
37 | +#include "read0read.h" | |
38 | +#include "row0row.h" | |
39 | +#include "ha_prototypes.h" | |
40 | ||
41 | /* prototypes for new functions added to ha_innodb.cc */ | |
42 | trx_t* innobase_get_trx(); | |
43 | @@ -342,6 +346,31 @@ | |
44 | was allocated for the frames */ | |
45 | buf_block_t* blocks; /*!< array of buffer control blocks */ | |
46 | }; | |
47 | + | |
48 | +/* Buffer pool shared memory segment information */ | |
49 | +typedef struct buf_shm_info_struct buf_shm_info_t; | |
50 | + | |
51 | +struct buf_shm_info_struct { | |
52 | + char head_str[8]; | |
53 | + ulint binary_id; | |
54 | + ibool is_new; /* during initializing */ | |
55 | + ibool clean; /* clean shutdowned and free */ | |
56 | + ibool reusable; /* reusable */ | |
57 | + ulint buf_pool_size; /* backup value */ | |
58 | + ulint page_size; /* backup value */ | |
59 | + ulint frame_offset; /* offset of the first frame based on chunk->mem */ | |
60 | + ulint zip_hash_offset; | |
61 | + ulint zip_hash_n; | |
62 | + | |
63 | + ulint checksum; | |
64 | + | |
65 | + buf_pool_t buf_pool_backup; | |
66 | + buf_chunk_t chunk_backup; | |
67 | + | |
68 | + ib_uint64_t dummy; | |
69 | +}; | |
70 | + | |
71 | +#define BUF_SHM_INFO_HEAD "XTRA_SHM" | |
72 | #endif /* !UNIV_HOTBACKUP */ | |
73 | ||
74 | /********************************************************************//** | |
75 | @@ -988,6 +1017,58 @@ | |
76 | #endif /* UNIV_SYNC_DEBUG */ | |
77 | } | |
78 | ||
79 | +static | |
80 | +void | |
81 | +buf_block_reuse( | |
82 | +/*============*/ | |
83 | + buf_block_t* block, | |
84 | + ptrdiff_t frame_offset) | |
85 | +{ | |
86 | + /* block_init */ | |
87 | + block->frame += frame_offset; | |
88 | + | |
89 | + UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block); | |
90 | + | |
91 | + block->index = NULL; | |
92 | + block->btr_search_latch = NULL; | |
93 | + | |
94 | +#ifdef UNIV_DEBUG | |
95 | + /* recreate later */ | |
96 | + block->page.in_page_hash = FALSE; | |
97 | + block->page.in_zip_hash = FALSE; | |
98 | +#endif /* UNIV_DEBUG */ | |
99 | + | |
100 | +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG | |
101 | + block->n_pointers = 0; | |
102 | +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ | |
103 | + | |
104 | + if (block->page.zip.data) | |
105 | + block->page.zip.data += frame_offset; | |
106 | + | |
107 | + block->is_hashed = FALSE; | |
108 | + | |
109 | +#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC | |
110 | + /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration | |
111 | + of buffer block mutex/rwlock with performance schema. If | |
112 | + PFS_GROUP_BUFFER_SYNC is defined, skip the registration | |
113 | + since buffer block mutex/rwlock will be registered later in | |
114 | + pfs_register_buffer_block() */ | |
115 | + | |
116 | + mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK); | |
117 | + rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING); | |
118 | +#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */ | |
119 | + mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK); | |
120 | + rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING); | |
121 | +#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */ | |
122 | + | |
123 | + ut_ad(rw_lock_validate(&(block->lock))); | |
124 | + | |
125 | +#ifdef UNIV_SYNC_DEBUG | |
126 | + rw_lock_create(buf_block_debug_latch_key, | |
127 | + &block->debug_latch, SYNC_NO_ORDER_CHECK); | |
128 | +#endif /* UNIV_SYNC_DEBUG */ | |
129 | +} | |
130 | + | |
131 | /********************************************************************//** | |
132 | Allocates a chunk of buffer frames. | |
133 | @return chunk, or NULL on failure */ | |
134 | @@ -1001,26 +1082,188 @@ | |
135 | { | |
136 | buf_block_t* block; | |
137 | byte* frame; | |
138 | + ulint zip_hash_n = 0; | |
139 | + ulint zip_hash_mem_size = 0; | |
140 | + hash_table_t* zip_hash_tmp = NULL; | |
141 | ulint i; | |
142 | + buf_shm_info_t* shm_info = NULL; | |
143 | ||
144 | /* Round down to a multiple of page size, | |
145 | although it already should be. */ | |
146 | mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); | |
147 | + | |
148 | + srv_buffer_pool_shm_is_reused = FALSE; | |
149 | + | |
150 | + if (srv_buffer_pool_shm_key) { | |
151 | + /* zip_hash size */ | |
152 | + zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2; | |
153 | + zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n) | |
154 | + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); | |
155 | + } | |
156 | + | |
157 | /* Reserve space for the block descriptors. */ | |
158 | mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block) | |
159 | + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); | |
160 | + if (srv_buffer_pool_shm_key) { | |
161 | + mem_size += ut_2pow_round(sizeof(buf_shm_info_t) | |
162 | + + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); | |
163 | + mem_size += zip_hash_mem_size; | |
164 | + } | |
165 | ||
166 | chunk->mem_size = mem_size; | |
167 | + | |
168 | + if (srv_buffer_pool_shm_key) { | |
169 | + ulint binary_id; | |
170 | + ibool is_new; | |
171 | + | |
172 | + ut_a(buf_pool->n_chunks == 1); | |
173 | + | |
174 | + fprintf(stderr, | |
175 | + "InnoDB: Notice: The innodb_buffer_pool_shm_key option has been specified.\n" | |
176 | + "InnoDB: Do not change the following between restarts of the server while this option is being used:\n" | |
177 | + "InnoDB: * the mysqld executable between restarts of the server.\n" | |
178 | + "InnoDB: * the value of innodb_buffer_pool_size.\n" | |
179 | + "InnoDB: * the value of innodb_page_size.\n" | |
180 | + "InnoDB: * datafiles created by InnoDB during this session.\n" | |
181 | + "InnoDB: Otherwise, data corruption in datafiles may result.\n"); | |
182 | + | |
183 | + /* FIXME: This is vague id still */ | |
184 | + binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get) | |
185 | + + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum) | |
186 | + + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal) | |
187 | + + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str) | |
188 | + + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version) | |
189 | + + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low) | |
190 | + + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func) | |
191 | + + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread) | |
192 | + + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside) | |
193 | + + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity) | |
194 | + + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup); | |
195 | + | |
196 | + chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new); | |
197 | + | |
198 | + if (UNIV_UNLIKELY(chunk->mem == NULL)) { | |
199 | + return(NULL); | |
200 | + } | |
201 | +init_again: | |
202 | +#ifdef UNIV_SET_MEM_TO_ZERO | |
203 | + if (is_new) { | |
204 | + memset(chunk->mem, '\0', chunk->mem_size); | |
205 | + } | |
206 | +#endif | |
207 | + /* for ut_fold_binary_32(), these values should be 32-bit aligned */ | |
208 | + ut_a(sizeof(buf_shm_info_t) % 4 == 0); | |
209 | + ut_a((ulint)chunk->mem % 4 == 0); | |
210 | + ut_a(chunk->mem_size % 4 == 0); | |
211 | + | |
212 | + shm_info = chunk->mem; | |
213 | + | |
214 | + zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size); | |
215 | + | |
216 | + if (is_new) { | |
217 | + strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8); | |
218 | + shm_info->binary_id = binary_id; | |
219 | + shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */ | |
220 | + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ | |
221 | + shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */ | |
222 | + shm_info->buf_pool_size = srv_buf_pool_size; | |
223 | + shm_info->page_size = srv_page_size; | |
224 | + shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size; | |
225 | + shm_info->zip_hash_n = zip_hash_n; | |
226 | + } else { | |
227 | + ulint checksum; | |
228 | + | |
229 | + if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) { | |
230 | + fprintf(stderr, | |
231 | + "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n"); | |
232 | + return(NULL); | |
233 | + } | |
234 | + if (shm_info->binary_id != binary_id) { | |
235 | + fprintf(stderr, | |
236 | + "InnoDB: Error: The shared memory segment seems not to be for this binary.\n"); | |
237 | + return(NULL); | |
238 | + } | |
239 | + if (shm_info->is_new) { | |
240 | + fprintf(stderr, | |
241 | + "InnoDB: Error: The shared memory was not initialized yet.\n"); | |
242 | + return(NULL); | |
243 | + } | |
244 | + if (shm_info->buf_pool_size != srv_buf_pool_size) { | |
245 | + fprintf(stderr, | |
246 | + "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n", | |
247 | + shm_info->buf_pool_size, srv_buf_pool_size); | |
248 | + return(NULL); | |
249 | + } | |
250 | + if (shm_info->page_size != srv_page_size) { | |
251 | + fprintf(stderr, | |
252 | + "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n", | |
253 | + shm_info->page_size, srv_page_size); | |
254 | + return(NULL); | |
255 | + } | |
256 | + if (!shm_info->reusable) { | |
257 | + fprintf(stderr, | |
258 | + "InnoDB: Warning: The shared memory has unrecoverable contents.\n" | |
259 | + "InnoDB: The shared memory segment is initialized.\n"); | |
260 | + is_new = TRUE; | |
261 | + goto init_again; | |
262 | + } | |
263 | + if (!shm_info->clean) { | |
264 | + fprintf(stderr, | |
265 | + "InnoDB: Warning: The shared memory was not shut down cleanly.\n" | |
266 | + "InnoDB: The shared memory segment is initialized.\n"); | |
267 | + is_new = TRUE; | |
268 | + goto init_again; | |
269 | + } | |
270 | + | |
271 | + ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size); | |
272 | + ut_a(shm_info->zip_hash_n == zip_hash_n); | |
273 | + | |
274 | + /* check checksum */ | |
275 | + if (srv_buffer_pool_shm_checksum) { | |
276 | + checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t), | |
277 | + chunk->mem_size - sizeof(buf_shm_info_t)); | |
278 | + } else { | |
279 | + checksum = BUF_NO_CHECKSUM_MAGIC; | |
280 | + } | |
281 | + | |
282 | + if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC | |
283 | + && shm_info->checksum != checksum) { | |
284 | + fprintf(stderr, | |
285 | + "InnoDB: Error: checksum of the shared memory is not match. " | |
286 | + "(stored=%lu calculated=%lu)\n", | |
287 | + shm_info->checksum, checksum); | |
288 | + return(NULL); | |
289 | + } | |
290 | + | |
291 | + /* flag to use the segment. */ | |
292 | + shm_info->clean = FALSE; /* changed to TRUE when free the segment. */ | |
293 | + } | |
294 | + | |
295 | + /* init zip_hash contents */ | |
296 | + if (is_new) { | |
297 | + hash_create_init(zip_hash_tmp, zip_hash_n); | |
298 | + } else { | |
299 | + /* adjust offset is done later */ | |
300 | + hash_create_reuse(zip_hash_tmp); | |
301 | + | |
302 | + srv_buffer_pool_shm_is_reused = TRUE; | |
303 | + } | |
304 | + } else { | |
305 | chunk->mem = os_mem_alloc_large(&chunk->mem_size); | |
306 | ||
307 | if (UNIV_UNLIKELY(chunk->mem == NULL)) { | |
308 | ||
309 | return(NULL); | |
310 | } | |
311 | + } | |
312 | ||
313 | /* Allocate the block descriptors from | |
314 | the start of the memory block. */ | |
315 | + if (srv_buffer_pool_shm_key) { | |
316 | + chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t)); | |
317 | + } else { | |
318 | chunk->blocks = chunk->mem; | |
319 | + } | |
320 | ||
321 | /* Align a pointer to the first frame. Note that when | |
322 | os_large_page_size is smaller than UNIV_PAGE_SIZE, | |
323 | @@ -1028,8 +1271,13 @@ | |
324 | it is bigger, we may allocate more blocks than requested. */ | |
325 | ||
326 | frame = ut_align(chunk->mem, UNIV_PAGE_SIZE); | |
327 | + if (srv_buffer_pool_shm_key) { | |
328 | + /* reserve zip_hash space and always -1 for reproductibity */ | |
329 | + chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1; | |
330 | + } else { | |
331 | chunk->size = chunk->mem_size / UNIV_PAGE_SIZE | |
332 | - (frame != chunk->mem); | |
333 | + } | |
334 | ||
335 | /* Subtract the space needed for block descriptors. */ | |
336 | { | |
337 | @@ -1043,6 +1291,98 @@ | |
338 | chunk->size = size; | |
339 | } | |
340 | ||
341 | + if (shm_info && !(shm_info->is_new)) { | |
342 | + /* convert the shared memory segment for reuse */ | |
343 | + ptrdiff_t phys_offset; | |
344 | + ptrdiff_t logi_offset; | |
345 | + ptrdiff_t blocks_offset; | |
346 | + void* previous_frame_address; | |
347 | + | |
348 | + if (chunk->size < shm_info->chunk_backup.size) { | |
349 | + fprintf(stderr, | |
350 | + "InnoDB: Error: The buffer pool became smaller because of allocated address.\n" | |
351 | + "InnoDB: Retrying may avoid this situation.\n"); | |
352 | + shm_info->clean = TRUE; /* release the flag for retrying */ | |
353 | + return(NULL); | |
354 | + } | |
355 | + | |
356 | + chunk->size = shm_info->chunk_backup.size; | |
357 | + phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset); | |
358 | + logi_offset = frame - chunk->blocks[0].frame; | |
359 | + previous_frame_address = chunk->blocks[0].frame; | |
360 | + blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks; | |
361 | + | |
362 | + if (phys_offset || logi_offset || blocks_offset) { | |
363 | + fprintf(stderr, | |
364 | + "InnoDB: Buffer pool in the shared memory segment should be converted.\n" | |
365 | + "InnoDB: Previous frames in address : %p\n" | |
366 | + "InnoDB: Previous frames were located : %p\n" | |
367 | + "InnoDB: Current frames should be located: %p\n" | |
368 | + "InnoDB: Pysical offset : %ld (%#lx)\n" | |
369 | + "InnoDB: Logical offset (frames) : %ld (%#lx)\n" | |
370 | + "InnoDB: Logical offset (blocks) : %ld (%#lx)\n", | |
371 | + (byte*)chunk->mem + shm_info->frame_offset, | |
372 | + chunk->blocks[0].frame, frame, | |
373 | + phys_offset, phys_offset, logi_offset, logi_offset, | |
374 | + blocks_offset, blocks_offset); | |
375 | + } else { | |
376 | + fprintf(stderr, | |
377 | + "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n"); | |
378 | + } | |
379 | + | |
380 | + if (phys_offset) { | |
381 | + fprintf(stderr, | |
382 | + "InnoDB: Aligning physical offset..."); | |
383 | + | |
384 | + memmove(frame, (byte*)chunk->mem + shm_info->frame_offset, | |
385 | + chunk->size * UNIV_PAGE_SIZE); | |
386 | + | |
387 | + fprintf(stderr, | |
388 | + " Done.\n"); | |
389 | + } | |
390 | + | |
391 | + /* buf_block_t */ | |
392 | + block = chunk->blocks; | |
393 | + for (i = chunk->size; i--; ) { | |
394 | + buf_block_reuse(block, logi_offset); | |
395 | + block++; | |
396 | + } | |
397 | + | |
398 | + if (logi_offset || blocks_offset) { | |
399 | + fprintf(stderr, | |
400 | + "InnoDB: Aligning logical offset..."); | |
401 | + | |
402 | + | |
403 | + /* buf_pool_t buf_pool_backup */ | |
404 | + UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list, | |
405 | + previous_frame_address, logi_offset, blocks_offset); | |
406 | + UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free, | |
407 | + previous_frame_address, logi_offset, blocks_offset); | |
408 | + UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU, | |
409 | + previous_frame_address, logi_offset, blocks_offset); | |
410 | + if (shm_info->buf_pool_backup.LRU_old) | |
411 | + shm_info->buf_pool_backup.LRU_old = | |
412 | + (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old) | |
413 | + + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address) | |
414 | + ? logi_offset : blocks_offset)); | |
415 | + | |
416 | + UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU, | |
417 | + previous_frame_address, logi_offset, blocks_offset); | |
418 | + | |
419 | + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean, | |
420 | + previous_frame_address, logi_offset, blocks_offset); | |
421 | + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { | |
422 | + UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i], | |
423 | + previous_frame_address, logi_offset, blocks_offset); | |
424 | + } | |
425 | + | |
426 | + HASH_OFFSET(zip_hash_tmp, buf_page_t, hash, | |
427 | + previous_frame_address, logi_offset, blocks_offset); | |
428 | + | |
429 | + fprintf(stderr, | |
430 | + " Done.\n"); | |
431 | + } | |
432 | + } else { | |
433 | /* Init block structs and assign frames for them. Then we | |
434 | assign the frames to the first blocks (we already mapped the | |
435 | memory above). */ | |
436 | @@ -1068,6 +1408,11 @@ | |
437 | block++; | |
438 | frame += UNIV_PAGE_SIZE; | |
439 | } | |
440 | + } | |
441 | + | |
442 | + if (shm_info) { | |
443 | + shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem; | |
444 | + } | |
445 | ||
446 | #ifdef PFS_GROUP_BUFFER_SYNC | |
447 | pfs_register_buffer_block(chunk); | |
448 | @@ -1249,6 +1594,8 @@ | |
449 | UNIV_MEM_UNDESC(block); | |
450 | } | |
451 | ||
452 | + ut_a(!srv_buffer_pool_shm_key); | |
453 | + | |
454 | os_mem_free_large(chunk->mem, chunk->mem_size); | |
455 | } | |
456 | ||
457 | @@ -1289,7 +1636,7 @@ | |
458 | ulint instance_no) /*!< in: id of the instance */ | |
459 | { | |
460 | ulint i; | |
461 | - buf_chunk_t* chunk; | |
462 | + buf_chunk_t* chunk = NULL; | |
463 | ||
464 | /* 1. Initialize general fields | |
465 | ------------------------------- */ | |
466 | @@ -1335,7 +1682,10 @@ | |
467 | buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE; | |
468 | ||
469 | buf_pool->page_hash = hash_create(2 * buf_pool->curr_size); | |
470 | + /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */ | |
471 | + if (!srv_buffer_pool_shm_key) { | |
472 | buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); | |
473 | + } | |
474 | ||
475 | buf_pool->last_printout_time = ut_time(); | |
476 | } | |
477 | @@ -1354,6 +1704,86 @@ | |
478 | ||
479 | /* All fields are initialized by mem_zalloc(). */ | |
480 | ||
481 | + if (chunk && srv_buffer_pool_shm_key) { | |
482 | + buf_shm_info_t* shm_info; | |
483 | + | |
484 | + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); | |
485 | + shm_info = chunk->mem; | |
486 | + | |
487 | + buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset); | |
488 | + | |
489 | + if(shm_info->is_new) { | |
490 | + shm_info->is_new = FALSE; /* initialization was finished */ | |
491 | + } else { | |
492 | + buf_block_t* block = chunk->blocks; | |
493 | + buf_page_t* b; | |
494 | + | |
495 | + /* shm_info->buf_pool_backup should be converted */ | |
496 | + /* at buf_chunk_init(). So copy simply. */ | |
497 | + buf_pool->flush_list = shm_info->buf_pool_backup.flush_list; | |
498 | + buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock; | |
499 | + buf_pool->free = shm_info->buf_pool_backup.free; | |
500 | + buf_pool->LRU = shm_info->buf_pool_backup.LRU; | |
501 | + buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old; | |
502 | + buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len; | |
503 | + buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU; | |
504 | + buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean; | |
505 | + for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) { | |
506 | + buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i]; | |
507 | + } | |
508 | + | |
509 | + for (i = 0; i < chunk->size; i++, block++) { | |
510 | + if (buf_block_get_state(block) | |
511 | + == BUF_BLOCK_FILE_PAGE) { | |
512 | + ut_d(block->page.in_page_hash = TRUE); | |
513 | + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, | |
514 | + buf_page_address_fold( | |
515 | + block->page.space, | |
516 | + block->page.offset), | |
517 | + &block->page); | |
518 | + } | |
519 | + } | |
520 | + | |
521 | + for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; | |
522 | + b = UT_LIST_GET_NEXT(zip_list, b)) { | |
523 | + ut_ad(!b->in_flush_list); | |
524 | + ut_ad(b->in_LRU_list); | |
525 | + | |
526 | + ut_d(b->in_page_hash = TRUE); | |
527 | + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, | |
528 | + buf_page_address_fold(b->space, b->offset), b); | |
529 | + } | |
530 | + | |
531 | + for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; | |
532 | + b = UT_LIST_GET_NEXT(flush_list, b)) { | |
533 | + ut_ad(b->in_flush_list); | |
534 | + ut_ad(b->in_LRU_list); | |
535 | + | |
536 | + switch (buf_page_get_state(b)) { | |
537 | + case BUF_BLOCK_ZIP_DIRTY: | |
538 | + ut_d(b->in_page_hash = TRUE); | |
539 | + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, | |
540 | + buf_page_address_fold(b->space, | |
541 | + b->offset), b); | |
542 | + break; | |
543 | + case BUF_BLOCK_FILE_PAGE: | |
544 | + /* uncompressed page */ | |
545 | + break; | |
546 | + case BUF_BLOCK_ZIP_FREE: | |
547 | + case BUF_BLOCK_ZIP_PAGE: | |
548 | + case BUF_BLOCK_NOT_USED: | |
549 | + case BUF_BLOCK_READY_FOR_USE: | |
550 | + case BUF_BLOCK_MEMORY: | |
551 | + case BUF_BLOCK_REMOVE_HASH: | |
552 | + ut_error; | |
553 | + break; | |
554 | + } | |
555 | + } | |
556 | + | |
557 | + | |
558 | + } | |
559 | + } | |
560 | + | |
561 | mutex_exit(&buf_pool->LRU_list_mutex); | |
562 | rw_lock_x_unlock(&buf_pool->page_hash_latch); | |
563 | buf_pool_mutex_exit(buf_pool); | |
564 | @@ -1373,6 +1803,42 @@ | |
565 | buf_chunk_t* chunk; | |
566 | buf_chunk_t* chunks; | |
567 | ||
568 | + if (srv_buffer_pool_shm_key) { | |
569 | + buf_shm_info_t* shm_info; | |
570 | + | |
571 | + ut_a(buf_pool->n_chunks == 1); | |
572 | + | |
573 | + chunk = buf_pool->chunks; | |
574 | + shm_info = chunk->mem; | |
575 | + ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t)); | |
576 | + | |
577 | + /* if opened, close shm. */ | |
578 | + if (!shm_info->clean) { | |
579 | + /* validation the shared memory segment doesn't have unrecoverable contents. */ | |
580 | + /* Currently, validation became not needed */ | |
581 | + shm_info->reusable = TRUE; | |
582 | + | |
583 | + memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t)); | |
584 | + memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t)); | |
585 | + | |
586 | + if (srv_fast_shutdown < 2) { | |
587 | + if (srv_buffer_pool_shm_checksum) { | |
588 | + shm_info->checksum = | |
589 | + ut_fold_binary_32( | |
590 | + (byte*)chunk->mem + sizeof(buf_shm_info_t), | |
591 | + chunk->mem_size - sizeof(buf_shm_info_t)); | |
592 | + } else { | |
593 | + shm_info->checksum = BUF_NO_CHECKSUM_MAGIC; | |
594 | + } | |
595 | + shm_info->clean = TRUE; | |
596 | + } | |
597 | + | |
598 | + fprintf(stderr, | |
599 | + "InnoDB: The shared memory was closed.\n"); | |
600 | + } | |
601 | + | |
602 | + os_shm_free(chunk->mem, chunk->mem_size); | |
603 | + } else { | |
604 | chunks = buf_pool->chunks; | |
605 | chunk = chunks + buf_pool->n_chunks; | |
606 | ||
607 | @@ -1381,10 +1847,13 @@ | |
608 | would fail at shutdown. */ | |
609 | os_mem_free_large(chunk->mem, chunk->mem_size); | |
610 | } | |
611 | + } | |
612 | ||
613 | mem_free(buf_pool->chunks); | |
614 | hash_table_free(buf_pool->page_hash); | |
615 | + if (!srv_buffer_pool_shm_key) { | |
616 | hash_table_free(buf_pool->zip_hash); | |
617 | + } | |
618 | } | |
619 | ||
620 | /********************************************************************//** | |
621 | @@ -1668,6 +2137,11 @@ | |
622 | //buf_pool_mutex_enter(buf_pool); | |
623 | mutex_enter(&buf_pool->LRU_list_mutex); | |
624 | ||
625 | + if (srv_buffer_pool_shm_key) { | |
626 | + /* Cannot support shrink */ | |
627 | + goto func_done; | |
628 | + } | |
629 | + | |
630 | shrink_again: | |
631 | if (buf_pool->n_chunks <= 1) { | |
632 | ||
633 | @@ -1848,7 +2322,7 @@ | |
634 | zip_hash = hash_create(2 * buf_pool->curr_size); | |
635 | ||
636 | HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash, | |
637 | - BUF_POOL_ZIP_FOLD_BPAGE); | |
638 | + buf_pool, BUF_POOL_ZIP_FOLD_BPAGE); | |
639 | ||
640 | hash_table_free(buf_pool->zip_hash); | |
641 | buf_pool->zip_hash = zip_hash; | |
642 | @@ -2130,6 +2604,11 @@ | |
643 | ulint change_size; | |
644 | ulint min_change_size = 1048576 * srv_buf_pool_instances; | |
645 | ||
646 | + if (srv_buffer_pool_shm_key) { | |
647 | + /* Cannot support resize */ | |
648 | + return; | |
649 | + } | |
650 | + | |
651 | buf_pool_mutex_enter_all(); | |
652 | ||
653 | if (srv_buf_pool_old_size == srv_buf_pool_size) { | |
654 | diff -ruN a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c | |
655 | --- a/storage/innobase/ha/hash0hash.c 2010-11-03 07:01:13.000000000 +0900 | |
656 | +++ b/storage/innobase/ha/hash0hash.c 2010-12-07 16:10:14.937749140 +0900 | |
657 | @@ -133,6 +133,70 @@ | |
658 | } | |
659 | ||
660 | /*************************************************************//** | |
661 | +*/ | |
662 | +UNIV_INTERN | |
663 | +ulint | |
664 | +hash_create_needed( | |
665 | +/*===============*/ | |
666 | + ulint n) | |
667 | +{ | |
668 | + ulint prime; | |
669 | + ulint offset; | |
670 | + | |
671 | + prime = ut_find_prime(n); | |
672 | + | |
673 | + offset = (sizeof(hash_table_t) + 7) / 8; | |
674 | + offset *= 8; | |
675 | + | |
676 | + return(offset + sizeof(hash_cell_t) * prime); | |
677 | +} | |
678 | + | |
679 | +UNIV_INTERN | |
680 | +void | |
681 | +hash_create_init( | |
682 | +/*=============*/ | |
683 | + hash_table_t* table, | |
684 | + ulint n) | |
685 | +{ | |
686 | + ulint prime; | |
687 | + ulint offset; | |
688 | + | |
689 | + prime = ut_find_prime(n); | |
690 | + | |
691 | + offset = (sizeof(hash_table_t) + 7) / 8; | |
692 | + offset *= 8; | |
693 | + | |
694 | + table->array = (hash_cell_t*)(((byte*)table) + offset); | |
695 | + table->n_cells = prime; | |
696 | +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG | |
697 | + table->adaptive = FALSE; | |
698 | +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ | |
699 | + table->n_mutexes = 0; | |
700 | + table->mutexes = NULL; | |
701 | + table->heaps = NULL; | |
702 | + table->heap = NULL; | |
703 | + ut_d(table->magic_n = HASH_TABLE_MAGIC_N); | |
704 | + | |
705 | + /* Initialize the cell array */ | |
706 | + hash_table_clear(table); | |
707 | +} | |
708 | + | |
709 | +UNIV_INTERN | |
710 | +void | |
711 | +hash_create_reuse( | |
712 | +/*==============*/ | |
713 | + hash_table_t* table) | |
714 | +{ | |
715 | + ulint offset; | |
716 | + | |
717 | + offset = (sizeof(hash_table_t) + 7) / 8; | |
718 | + offset *= 8; | |
719 | + | |
720 | + table->array = (hash_cell_t*)(((byte*)table) + offset); | |
721 | + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); | |
722 | +} | |
723 | + | |
724 | +/*************************************************************//** | |
725 | Frees a hash table. */ | |
726 | UNIV_INTERN | |
727 | void | |
728 | diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc | |
729 | --- a/storage/innobase/handler/ha_innodb.cc 2010-12-06 20:16:21.733263627 +0900 | |
730 | +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-07 17:56:28.316139830 +0900 | |
731 | @@ -194,6 +194,7 @@ | |
732 | static my_bool innobase_create_status_file = FALSE; | |
733 | static my_bool innobase_stats_on_metadata = TRUE; | |
734 | static my_bool innobase_use_sys_stats_table = FALSE; | |
735 | +static my_bool innobase_buffer_pool_shm_checksum = TRUE; | |
736 | ||
737 | ||
738 | static char* internal_innobase_data_file_path = NULL; | |
739 | @@ -2620,6 +2621,14 @@ | |
740 | srv_buf_pool_size = (ulint) innobase_buffer_pool_size; | |
741 | srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances; | |
742 | ||
743 | + if (srv_buffer_pool_shm_key && srv_buf_pool_instances > 1) { | |
744 | + fprintf(stderr, | |
745 | + "InnoDB: Warning: innodb_buffer_pool_shm_key cannot be used with several innodb_buffer_pool_instances.\n" | |
746 | + "InnoDB: innodb_buffer_pool_instances was set to 1.\n"); | |
747 | + srv_buf_pool_instances = 1; | |
748 | + innobase_buffer_pool_instances = 1; | |
749 | + } | |
750 | + | |
751 | srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; | |
752 | ||
753 | srv_n_file_io_threads = (ulint) innobase_file_io_threads; | |
754 | @@ -2636,6 +2645,7 @@ | |
755 | srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; | |
756 | srv_use_checksums = (ibool) innobase_use_checksums; | |
757 | srv_fast_checksum = (ibool) innobase_fast_checksum; | |
758 | + srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum; | |
759 | ||
760 | #ifdef HAVE_LARGE_PAGES | |
761 | if ((os_use_large_pages = (ibool) my_use_large_pages)) | |
762 | @@ -11642,6 +11652,16 @@ | |
763 | "Number of buffer pool instances, set to higher value on high-end machines to increase scalability", | |
764 | NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L); | |
765 | ||
766 | +static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key, | |
767 | + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, | |
768 | + "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.", | |
769 | + NULL, NULL, 0, 0, INT_MAX32, 0); | |
770 | + | |
771 | +static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum, | |
772 | + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, | |
773 | + "Enable buffer_pool_shm checksum validation (enabled by default).", | |
774 | + NULL, NULL, TRUE); | |
775 | + | |
776 | static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, | |
777 | PLUGIN_VAR_RQCMDARG, | |
778 | "Helps in performance tuning in heavily concurrent environments.", | |
779 | @@ -11921,6 +11941,8 @@ | |
780 | MYSQL_SYSVAR(autoextend_increment), | |
781 | MYSQL_SYSVAR(buffer_pool_size), | |
782 | MYSQL_SYSVAR(buffer_pool_instances), | |
783 | + MYSQL_SYSVAR(buffer_pool_shm_key), | |
784 | + MYSQL_SYSVAR(buffer_pool_shm_checksum), | |
785 | MYSQL_SYSVAR(checksums), | |
786 | MYSQL_SYSVAR(fast_checksum), | |
787 | MYSQL_SYSVAR(commit_concurrency), | |
788 | diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h | |
789 | --- a/storage/innobase/include/buf0buf.h 2010-12-06 20:16:21.778264552 +0900 | |
790 | +++ b/storage/innobase/include/buf0buf.h 2010-12-07 17:56:28.322749380 +0900 | |
791 | @@ -36,6 +36,7 @@ | |
792 | #ifndef UNIV_HOTBACKUP | |
793 | #include "ut0rbt.h" | |
794 | #include "os0proc.h" | |
795 | +#include "srv0srv.h" | |
796 | ||
797 | /** @name Modes for buf_page_get_gen */ | |
798 | /* @{ */ | |
799 | @@ -1520,9 +1521,12 @@ | |
800 | /**********************************************************************//** | |
801 | Compute the hash fold value for blocks in buf_pool->zip_hash. */ | |
802 | /* @{ */ | |
803 | -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) | |
804 | -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) | |
805 | -#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) | |
806 | +/* the fold should be relative when srv_buffer_pool_shm_key is enabled */ | |
807 | +#define BUF_POOL_ZIP_FOLD_PTR(bpool, ptr) (!srv_buffer_pool_shm_key\ | |
808 | + ?((ulint) (ptr) / UNIV_PAGE_SIZE)\ | |
809 | + :((ulint) ((byte*)ptr - (byte*)(buf_page_from_array(bpool, 0)->frame)) / UNIV_PAGE_SIZE)) | |
810 | +#define BUF_POOL_ZIP_FOLD(bpool, b) BUF_POOL_ZIP_FOLD_PTR(bpool, (b)->frame) | |
811 | +#define BUF_POOL_ZIP_FOLD_BPAGE(bpool, b) BUF_POOL_ZIP_FOLD(bpool, (buf_block_t*) (b)) | |
812 | /* @} */ | |
813 | ||
814 | /** @brief The buffer pool statistics structure. */ | |
815 | diff -ruN a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h | |
816 | --- a/storage/innobase/include/hash0hash.h 2010-11-03 07:01:13.000000000 +0900 | |
817 | +++ b/storage/innobase/include/hash0hash.h 2010-12-07 17:56:28.324726446 +0900 | |
818 | @@ -49,6 +49,28 @@ | |
819 | hash_create( | |
820 | /*========*/ | |
821 | ulint n); /*!< in: number of array cells */ | |
822 | + | |
823 | +/*************************************************************//** | |
824 | +*/ | |
825 | +UNIV_INTERN | |
826 | +ulint | |
827 | +hash_create_needed( | |
828 | +/*===============*/ | |
829 | + ulint n); | |
830 | + | |
831 | +UNIV_INTERN | |
832 | +void | |
833 | +hash_create_init( | |
834 | +/*=============*/ | |
835 | + hash_table_t* table, | |
836 | + ulint n); | |
837 | + | |
838 | +UNIV_INTERN | |
839 | +void | |
840 | +hash_create_reuse( | |
841 | +/*==============*/ | |
842 | + hash_table_t* table); | |
843 | + | |
844 | #ifndef UNIV_HOTBACKUP | |
845 | /*************************************************************//** | |
846 | Creates a mutex array to protect a hash table. */ | |
847 | @@ -306,7 +328,7 @@ | |
848 | /****************************************************************//** | |
849 | Move all hash table entries from OLD_TABLE to NEW_TABLE. */ | |
850 | ||
851 | -#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \ | |
852 | +#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, BPOOL, FOLD_FUNC) \ | |
853 | do {\ | |
854 | ulint i2222;\ | |
855 | ulint cell_count2222;\ | |
856 | @@ -318,7 +340,7 @@ | |
857 | \ | |
858 | while (node2222) {\ | |
859 | NODE_TYPE* next2222 = node2222->PTR_NAME;\ | |
860 | - ulint fold2222 = FOLD_FUNC(node2222);\ | |
861 | + ulint fold2222 = FOLD_FUNC(BPOOL, node2222);\ | |
862 | \ | |
863 | HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\ | |
864 | fold2222, node2222);\ | |
865 | @@ -327,6 +349,33 @@ | |
866 | }\ | |
867 | }\ | |
868 | } while (0) | |
869 | + | |
870 | +/********************************************************************//** | |
871 | +Align nodes with moving location.*/ | |
872 | +#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \ | |
873 | +do {\ | |
874 | + ulint i2222;\ | |
875 | + ulint cell_count2222;\ | |
876 | +\ | |
877 | + cell_count2222 = hash_get_n_cells(TABLE);\ | |
878 | +\ | |
879 | + for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ | |
880 | + NODE_TYPE* node2222;\ | |
881 | +\ | |
882 | + if ((TABLE)->array[i2222].node) \ | |
883 | + (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \ | |
884 | + + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\ | |
885 | + node2222 = HASH_GET_FIRST((TABLE), i2222);\ | |
886 | +\ | |
887 | + while (node2222) {\ | |
888 | + if (node2222->PTR_NAME) \ | |
889 | + node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \ | |
890 | + + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\ | |
891 | +\ | |
892 | + node2222 = node2222->PTR_NAME;\ | |
893 | + }\ | |
894 | + }\ | |
895 | +} while (0) | |
896 | ||
897 | /************************************************************//** | |
898 | Gets the mutex index for a fold value in a hash table. | |
899 | diff -ruN a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h | |
900 | --- a/storage/innobase/include/os0proc.h 2010-11-03 07:01:13.000000000 +0900 | |
901 | +++ b/storage/innobase/include/os0proc.h 2010-12-07 16:10:14.955718750 +0900 | |
902 | @@ -32,6 +32,11 @@ | |
903 | #ifdef UNIV_LINUX | |
904 | #include <sys/ipc.h> | |
905 | #include <sys/shm.h> | |
906 | +#else | |
907 | +# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H | |
908 | +#include <sys/ipc.h> | |
909 | +#include <sys/shm.h> | |
910 | +# endif | |
911 | #endif | |
912 | ||
913 | typedef void* os_process_t; | |
914 | @@ -70,6 +75,29 @@ | |
915 | ulint size); /*!< in: size returned by | |
916 | os_mem_alloc_large() */ | |
917 | ||
918 | + | |
919 | +/****************************************************************//** | |
920 | +Allocates or attaches and reuses shared memory segment. | |
921 | +The content is not cleared automatically. | |
922 | +@return allocated memory */ | |
923 | +UNIV_INTERN | |
924 | +void* | |
925 | +os_shm_alloc( | |
926 | +/*=========*/ | |
927 | + ulint* n, /*!< in/out: number of bytes */ | |
928 | + uint key, | |
929 | + ibool* is_new); | |
930 | + | |
931 | +/****************************************************************//** | |
932 | +Detach shared memory segment. */ | |
933 | +UNIV_INTERN | |
934 | +void | |
935 | +os_shm_free( | |
936 | +/*========*/ | |
937 | + void *ptr, /*!< in: pointer returned by | |
938 | + os_shm_alloc() */ | |
939 | + ulint size); /*!< in: size returned by | |
940 | + os_shm_alloc() */ | |
941 | #ifndef UNIV_NONINL | |
942 | #include "os0proc.ic" | |
943 | #endif | |
944 | diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h | |
945 | --- a/storage/innobase/include/srv0srv.h 2010-12-04 20:20:28.016566697 +0900 | |
946 | +++ b/storage/innobase/include/srv0srv.h 2010-12-07 16:10:14.956717659 +0900 | |
947 | @@ -171,6 +171,10 @@ | |
948 | extern ulint srv_mem_pool_size; | |
949 | extern ulint srv_lock_table_size; | |
950 | ||
951 | +extern uint srv_buffer_pool_shm_key; | |
952 | +extern ibool srv_buffer_pool_shm_is_reused; | |
953 | +extern ibool srv_buffer_pool_shm_checksum; | |
954 | + | |
955 | extern ibool srv_thread_concurrency_timer_based; | |
956 | ||
957 | extern ulint srv_n_file_io_threads; | |
958 | diff -ruN a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h | |
959 | --- a/storage/innobase/include/ut0lst.h 2010-11-03 07:01:13.000000000 +0900 | |
960 | +++ b/storage/innobase/include/ut0lst.h 2010-12-07 16:10:14.957785525 +0900 | |
961 | @@ -257,5 +257,48 @@ | |
962 | ut_a(ut_list_node_313 == NULL); \ | |
963 | } while (0) | |
964 | ||
965 | +/********************************************************************//** | |
966 | +Align nodes with moving location. | |
967 | +@param NAME the name of the list | |
968 | +@param TYPE node type | |
969 | +@param BASE base node (not a pointer to it) | |
970 | +@param OFFSET offset moved */ | |
971 | +#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \ | |
972 | +do { \ | |
973 | + ulint ut_list_i_313; \ | |
974 | + TYPE* ut_list_node_313; \ | |
975 | + \ | |
976 | + if ((BASE).start) \ | |
977 | + (BASE).start = (void*)((byte*)((BASE).start) \ | |
978 | + + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\ | |
979 | + if ((BASE).end) \ | |
980 | + (BASE).end = (void*)((byte*)((BASE).end) \ | |
981 | + + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\ | |
982 | + \ | |
983 | + ut_list_node_313 = (BASE).start; \ | |
984 | + \ | |
985 | + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ | |
986 | + ut_a(ut_list_node_313); \ | |
987 | + if ((ut_list_node_313->NAME).prev) \ | |
988 | + (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\ | |
989 | + + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\ | |
990 | + if ((ut_list_node_313->NAME).next) \ | |
991 | + (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\ | |
992 | + + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\ | |
993 | + ut_list_node_313 = (ut_list_node_313->NAME).next; \ | |
994 | + } \ | |
995 | + \ | |
996 | + ut_a(ut_list_node_313 == NULL); \ | |
997 | + \ | |
998 | + ut_list_node_313 = (BASE).end; \ | |
999 | + \ | |
1000 | + for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ | |
1001 | + ut_a(ut_list_node_313); \ | |
1002 | + ut_list_node_313 = (ut_list_node_313->NAME).prev; \ | |
1003 | + } \ | |
1004 | + \ | |
1005 | + ut_a(ut_list_node_313 == NULL); \ | |
1006 | +} while (0) | |
1007 | + | |
1008 | #endif | |
1009 | ||
1010 | diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c | |
1011 | --- a/storage/innobase/log/log0recv.c 2010-12-04 19:46:40.212513377 +0900 | |
1012 | +++ b/storage/innobase/log/log0recv.c 2010-12-07 16:10:14.959785817 +0900 | |
1013 | @@ -2912,6 +2912,7 @@ | |
1014 | /*==========================*/ | |
1015 | { | |
1016 | ut_a(!recv_needed_recovery); | |
1017 | + ut_a(!srv_buffer_pool_shm_is_reused); | |
1018 | ||
1019 | recv_needed_recovery = TRUE; | |
1020 | ||
1021 | diff -ruN a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c | |
1022 | --- a/storage/innobase/os/os0proc.c 2010-11-03 07:01:13.000000000 +0900 | |
1023 | +++ b/storage/innobase/os/os0proc.c 2010-12-07 16:10:14.960800123 +0900 | |
1024 | @@ -229,3 +229,173 @@ | |
1025 | } | |
1026 | #endif | |
1027 | } | |
1028 | + | |
1029 | +/****************************************************************//** | |
1030 | +Allocates or attaches and reuses shared memory segment. | |
1031 | +The content is not cleared automatically. | |
1032 | +@return allocated memory */ | |
1033 | +UNIV_INTERN | |
1034 | +void* | |
1035 | +os_shm_alloc( | |
1036 | +/*=========*/ | |
1037 | + ulint* n, /*!< in/out: number of bytes */ | |
1038 | + uint key, | |
1039 | + ibool* is_new) | |
1040 | +{ | |
1041 | + void* ptr; | |
1042 | +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H | |
1043 | + ulint size; | |
1044 | + int shmid; | |
1045 | + | |
1046 | + *is_new = FALSE; | |
1047 | + fprintf(stderr, | |
1048 | + "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n", | |
1049 | + key, key); | |
1050 | +# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX | |
1051 | + if (!os_use_large_pages || !os_large_page_size) { | |
1052 | + goto skip; | |
1053 | + } | |
1054 | + | |
1055 | + /* Align block size to os_large_page_size */ | |
1056 | + ut_ad(ut_is_2pow(os_large_page_size)); | |
1057 | + size = ut_2pow_round(*n + (os_large_page_size - 1), | |
1058 | + os_large_page_size); | |
1059 | + | |
1060 | + shmid = shmget((key_t)key, (size_t)size, | |
1061 | + IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W); | |
1062 | + if (shmid < 0) { | |
1063 | + if (errno == EEXIST) { | |
1064 | + fprintf(stderr, | |
1065 | + "InnoDB: HugeTLB: The shared memory segment exists.\n"); | |
1066 | + shmid = shmget((key_t)key, (size_t)size, | |
1067 | + SHM_HUGETLB | SHM_R | SHM_W); | |
1068 | + if (shmid < 0) { | |
1069 | + fprintf(stderr, | |
1070 | + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", | |
1071 | + size, errno); | |
1072 | + goto skip; | |
1073 | + } else { | |
1074 | + fprintf(stderr, | |
1075 | + "InnoDB: HugeTLB: The existent shared memory segment is used.\n"); | |
1076 | + } | |
1077 | + } else { | |
1078 | + fprintf(stderr, | |
1079 | + "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", | |
1080 | + size, errno); | |
1081 | + goto skip; | |
1082 | + } | |
1083 | + } else { | |
1084 | + *is_new = TRUE; | |
1085 | + fprintf(stderr, | |
1086 | + "InnoDB: HugeTLB: A new shared memory segment has been created .\n"); | |
1087 | + } | |
1088 | + | |
1089 | + ptr = shmat(shmid, NULL, 0); | |
1090 | + if (ptr == (void *)-1) { | |
1091 | + fprintf(stderr, | |
1092 | + "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n", | |
1093 | + errno); | |
1094 | + ptr = NULL; | |
1095 | + } | |
1096 | + | |
1097 | + if (ptr) { | |
1098 | + *n = size; | |
1099 | + os_fast_mutex_lock(&ut_list_mutex); | |
1100 | + ut_total_allocated_memory += size; | |
1101 | + os_fast_mutex_unlock(&ut_list_mutex); | |
1102 | + UNIV_MEM_ALLOC(ptr, size); | |
1103 | + return(ptr); | |
1104 | + } | |
1105 | +skip: | |
1106 | + *is_new = FALSE; | |
1107 | +# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */ | |
1108 | +# ifdef HAVE_GETPAGESIZE | |
1109 | + size = getpagesize(); | |
1110 | +# else | |
1111 | + size = UNIV_PAGE_SIZE; | |
1112 | +# endif | |
1113 | + /* Align block size to system page size */ | |
1114 | + ut_ad(ut_is_2pow(size)); | |
1115 | + size = *n = ut_2pow_round(*n + (size - 1), size); | |
1116 | + | |
1117 | + shmid = shmget((key_t)key, (size_t)size, | |
1118 | + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W); | |
1119 | + if (shmid < 0) { | |
1120 | + if (errno == EEXIST) { | |
1121 | + fprintf(stderr, | |
1122 | + "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n"); | |
1123 | + shmid = shmget((key_t)key, (size_t)size, | |
1124 | + SHM_R | SHM_W); | |
1125 | + if (shmid < 0) { | |
1126 | + fprintf(stderr, | |
1127 | + "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n", | |
1128 | + size, errno); | |
1129 | + ptr = NULL; | |
1130 | + goto end; | |
1131 | + } else { | |
1132 | + fprintf(stderr, | |
1133 | + "InnoDB: The existent shared memory segment is used.\n"); | |
1134 | + } | |
1135 | + } else { | |
1136 | + fprintf(stderr, | |
1137 | + "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n", | |
1138 | + size, errno); | |
1139 | + ptr = NULL; | |
1140 | + goto end; | |
1141 | + } | |
1142 | + } else { | |
1143 | + *is_new = TRUE; | |
1144 | + fprintf(stderr, | |
1145 | + "InnoDB: A new shared memory segment has been created.\n"); | |
1146 | + } | |
1147 | + | |
1148 | + ptr = shmat(shmid, NULL, 0); | |
1149 | + if (ptr == (void *)-1) { | |
1150 | + fprintf(stderr, | |
1151 | + "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n", | |
1152 | + errno); | |
1153 | + ptr = NULL; | |
1154 | + } | |
1155 | + | |
1156 | + if (ptr) { | |
1157 | + *n = size; | |
1158 | + os_fast_mutex_lock(&ut_list_mutex); | |
1159 | + ut_total_allocated_memory += size; | |
1160 | + os_fast_mutex_unlock(&ut_list_mutex); | |
1161 | + UNIV_MEM_ALLOC(ptr, size); | |
1162 | + } | |
1163 | +end: | |
1164 | +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ | |
1165 | + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); | |
1166 | + ptr = NULL; | |
1167 | +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ | |
1168 | + return(ptr); | |
1169 | +} | |
1170 | + | |
1171 | +/****************************************************************//** | |
1172 | +Detach shared memory segment. */ | |
1173 | +UNIV_INTERN | |
1174 | +void | |
1175 | +os_shm_free( | |
1176 | +/*========*/ | |
1177 | + void *ptr, /*!< in: pointer returned by | |
1178 | + os_shm_alloc() */ | |
1179 | + ulint size) /*!< in: size returned by | |
1180 | + os_shm_alloc() */ | |
1181 | +{ | |
1182 | + os_fast_mutex_lock(&ut_list_mutex); | |
1183 | + ut_a(ut_total_allocated_memory >= size); | |
1184 | + os_fast_mutex_unlock(&ut_list_mutex); | |
1185 | + | |
1186 | +#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H | |
1187 | + if (!shmdt(ptr)) { | |
1188 | + os_fast_mutex_lock(&ut_list_mutex); | |
1189 | + ut_a(ut_total_allocated_memory >= size); | |
1190 | + ut_total_allocated_memory -= size; | |
1191 | + os_fast_mutex_unlock(&ut_list_mutex); | |
1192 | + UNIV_MEM_FREE(ptr, size); | |
1193 | + } | |
1194 | +#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ | |
1195 | + fprintf(stderr, "InnoDB: shared memory segment is not supported.\n"); | |
1196 | +#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */ | |
1197 | +} | |
1198 | diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c | |
1199 | --- a/storage/innobase/srv/srv0srv.c 2010-12-04 20:20:44.687550693 +0900 | |
1200 | +++ b/storage/innobase/srv/srv0srv.c 2010-12-07 16:10:14.962785720 +0900 | |
1201 | @@ -233,6 +233,11 @@ | |
1202 | UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; | |
1203 | UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; | |
1204 | ||
1205 | +/* key value for shm */ | |
1206 | +UNIV_INTERN uint srv_buffer_pool_shm_key = 0; | |
1207 | +UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE; | |
1208 | +UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE; | |
1209 | + | |
1210 | /* This parameter is deprecated. Use srv_n_io_[read|write]_threads | |
1211 | instead. */ | |
1212 | UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; | |
1213 | diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c | |
1214 | --- a/storage/innobase/srv/srv0start.c 2010-12-04 20:19:29.806482628 +0900 | |
1215 | +++ b/storage/innobase/srv/srv0start.c 2010-12-07 16:10:14.964785346 +0900 | |
1216 | @@ -1759,6 +1759,8 @@ | |
1217 | Note that this is not as heavy weight as it seems. At | |
1218 | this point there will be only ONE page in the buf_LRU | |
1219 | and there must be no page in the buf_flush list. */ | |
1220 | + /* buffer_pool_shm should not be reused when recovery was needed. */ | |
1221 | + if (!srv_buffer_pool_shm_is_reused) | |
1222 | buf_pool_invalidate(); | |
1223 | ||
1224 | /* We always try to do a recovery, even if the database had |