]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_split_buf_pool_mutex.patch
- more mysqld.conf changes: skip-locking, default-character-set
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_split_buf_pool_mutex.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9--- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10+++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
d8778560 11@@ -3935,7 +3935,8 @@
b4e1fa2c
AM
12
13 mtr_commit(mtr);
14
15- buf_pool_mutex_enter(buf_pool);
16+ //buf_pool_mutex_enter(buf_pool);
17+ mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
19
20 /* Only free the block if it is still allocated to
d8778560 21@@ -3946,17 +3947,22 @@
b4e1fa2c
AM
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
24
25- if (buf_LRU_free_block(&block->page, all, NULL)
26+ if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
27 != BUF_LRU_FREED
28- && all && block->page.zip.data) {
29+ && all && block->page.zip.data
30+ /* Now, buf_LRU_free_block() may release mutex temporarily */
31+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32+ && buf_block_get_space(block) == space
33+ && buf_block_get_page_no(block) == page_no) {
34 /* Attempt to deallocate the uncompressed page
35 if the whole block cannot be deallocted. */
36
37- buf_LRU_free_block(&block->page, FALSE, NULL);
38+ buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
39 }
40 }
41
42- buf_pool_mutex_exit(buf_pool);
43+ //buf_pool_mutex_exit(buf_pool);
44+ mutex_exit(&buf_pool->LRU_list_mutex);
45 mutex_exit(&block->mutex);
46 }
47
48diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49--- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
50+++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
d8778560 51@@ -1943,7 +1943,7 @@
b4e1fa2c
AM
52 rec_offs_init(offsets_);
53
54 rw_lock_x_lock(&btr_search_latch);
55- buf_pool_mutex_enter_all();
56+ buf_pool_page_hash_x_lock_all();
57
58 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
59
d8778560 60@@ -1951,11 +1951,11 @@
b4e1fa2c
AM
61 /* We release btr_search_latch every once in a while to
62 give other queries a chance to run. */
63 if ((i != 0) && ((i % chunk_size) == 0)) {
64- buf_pool_mutex_exit_all();
65+ buf_pool_page_hash_x_unlock_all();
66 rw_lock_x_unlock(&btr_search_latch);
67 os_thread_yield();
68 rw_lock_x_lock(&btr_search_latch);
69- buf_pool_mutex_enter_all();
70+ buf_pool_page_hash_x_lock_all();
71 }
72
73 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
d8778560 74@@ -2066,11 +2066,11 @@
b4e1fa2c
AM
75 /* We release btr_search_latch every once in a while to
76 give other queries a chance to run. */
77 if (i != 0) {
78- buf_pool_mutex_exit_all();
79+ buf_pool_page_hash_x_unlock_all();
80 rw_lock_x_unlock(&btr_search_latch);
81 os_thread_yield();
82 rw_lock_x_lock(&btr_search_latch);
83- buf_pool_mutex_enter_all();
84+ buf_pool_page_hash_x_lock_all();
85 }
86
87 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
d8778560 88@@ -2078,7 +2078,7 @@
b4e1fa2c
AM
89 }
90 }
91
92- buf_pool_mutex_exit_all();
93+ buf_pool_page_hash_x_unlock_all();
94 rw_lock_x_unlock(&btr_search_latch);
95 if (UNIV_LIKELY_NULL(heap)) {
96 mem_heap_free(heap);
97diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
98--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
99+++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
100@@ -73,10 +73,11 @@
101 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
102 #endif /* UNIV_DEBUG_VALGRIND */
103
104- ut_ad(buf_pool_mutex_own(buf_pool));
105+ //ut_ad(buf_pool_mutex_own(buf_pool));
106+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
107 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
108 ut_ad(buf_pool->zip_free[i].start != bpage);
109- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
110+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
111
112 #ifdef UNIV_DEBUG_VALGRIND
113 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
114@@ -96,8 +97,8 @@
115 buf_pool->zip_free[] */
116 {
117 #ifdef UNIV_DEBUG_VALGRIND
118- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
119- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
120+ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
121+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
122
123 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
124 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
125@@ -106,9 +107,10 @@
126 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
127 #endif /* UNIV_DEBUG_VALGRIND */
128
129- ut_ad(buf_pool_mutex_own(buf_pool));
130+ //ut_ad(buf_pool_mutex_own(buf_pool));
131+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
132 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
133- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
134+ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
135
136 #ifdef UNIV_DEBUG_VALGRIND
137 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
138@@ -128,12 +130,13 @@
139 {
140 buf_page_t* bpage;
141
142- ut_ad(buf_pool_mutex_own(buf_pool));
143+ //ut_ad(buf_pool_mutex_own(buf_pool));
144+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
145 ut_a(i < BUF_BUDDY_SIZES);
146
147 #ifndef UNIV_DEBUG_VALGRIND
148 /* Valgrind would complain about accessing free memory. */
149- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
150+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
151 ut_ad(buf_page_get_state(ut_list_node_313)
152 == BUF_BLOCK_ZIP_FREE)));
153 #endif /* !UNIV_DEBUG_VALGRIND */
154@@ -177,16 +180,19 @@
155 buf_buddy_block_free(
156 /*=================*/
157 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
158- void* buf) /*!< in: buffer frame to deallocate */
159+ void* buf, /*!< in: buffer frame to deallocate */
160+ ibool have_page_hash_mutex)
161 {
162 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
163 buf_page_t* bpage;
164 buf_block_t* block;
165
166- ut_ad(buf_pool_mutex_own(buf_pool));
167+ //ut_ad(buf_pool_mutex_own(buf_pool));
168 ut_ad(!mutex_own(&buf_pool->zip_mutex));
169 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
170
171+ mutex_enter(&buf_pool->zip_hash_mutex);
172+
173 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
174 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
175 && bpage->in_zip_hash && !bpage->in_page_hash),
176@@ -198,12 +204,14 @@
177 ut_d(bpage->in_zip_hash = FALSE);
178 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
179
180+ mutex_exit(&buf_pool->zip_hash_mutex);
181+
182 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
183 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
184
185 block = (buf_block_t*) bpage;
186 mutex_enter(&block->mutex);
187- buf_LRU_block_free_non_file_page(block);
188+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
189 mutex_exit(&block->mutex);
190
191 ut_ad(buf_pool->buddy_n_frames > 0);
192@@ -220,7 +228,7 @@
193 {
194 buf_pool_t* buf_pool = buf_pool_from_block(block);
195 const ulint fold = BUF_POOL_ZIP_FOLD(block);
196- ut_ad(buf_pool_mutex_own(buf_pool));
197+ //ut_ad(buf_pool_mutex_own(buf_pool));
198 ut_ad(!mutex_own(&buf_pool->zip_mutex));
199 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
200
201@@ -232,7 +240,10 @@
202 ut_ad(!block->page.in_page_hash);
203 ut_ad(!block->page.in_zip_hash);
204 ut_d(block->page.in_zip_hash = TRUE);
205+
206+ mutex_enter(&buf_pool->zip_hash_mutex);
207 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
208+ mutex_exit(&buf_pool->zip_hash_mutex);
209
210 ut_d(buf_pool->buddy_n_frames++);
211 }
212@@ -268,7 +279,7 @@
213 bpage->state = BUF_BLOCK_ZIP_FREE;
214 #ifndef UNIV_DEBUG_VALGRIND
215 /* Valgrind would complain about accessing free memory. */
216- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
217+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
218 ut_ad(buf_page_get_state(
219 ut_list_node_313)
220 == BUF_BLOCK_ZIP_FREE)));
221@@ -291,25 +302,29 @@
222 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
223 ulint i, /*!< in: index of buf_pool->zip_free[],
224 or BUF_BUDDY_SIZES */
225- ibool* lru) /*!< in: pointer to a variable that
226+ ibool* lru, /*!< in: pointer to a variable that
227 will be assigned TRUE if storage was
228 allocated from the LRU list and
229 buf_pool->mutex was temporarily
230 released, or NULL if the LRU list
231 should not be used */
232+ ibool have_page_hash_mutex)
233 {
234 buf_block_t* block;
235
236- ut_ad(buf_pool_mutex_own(buf_pool));
237+ //ut_ad(buf_pool_mutex_own(buf_pool));
238+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
239 ut_ad(!mutex_own(&buf_pool->zip_mutex));
240
241 if (i < BUF_BUDDY_SIZES) {
242 /* Try to allocate from the buddy system. */
243+ mutex_enter(&buf_pool->zip_free_mutex);
244 block = buf_buddy_alloc_zip(buf_pool, i);
245
246 if (block) {
247 goto func_exit;
248 }
249+ mutex_exit(&buf_pool->zip_free_mutex);
250 }
251
252 /* Try allocating from the buf_pool->free list. */
253@@ -326,19 +341,30 @@
254 }
255
256 /* Try replacing an uncompressed page in the buffer pool. */
257- buf_pool_mutex_exit(buf_pool);
258+ //buf_pool_mutex_exit(buf_pool);
259+ mutex_exit(&buf_pool->LRU_list_mutex);
260+ if (have_page_hash_mutex) {
261+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
262+ }
263 block = buf_LRU_get_free_block(buf_pool, 0);
264 *lru = TRUE;
265- buf_pool_mutex_enter(buf_pool);
266+ //buf_pool_mutex_enter(buf_pool);
267+ mutex_enter(&buf_pool->LRU_list_mutex);
268+ if (have_page_hash_mutex) {
269+ rw_lock_x_lock(&buf_pool->page_hash_latch);
270+ }
271
272 alloc_big:
273 buf_buddy_block_register(block);
274
275+ mutex_enter(&buf_pool->zip_free_mutex);
276 block = buf_buddy_alloc_from(
277 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
278
279 func_exit:
280 buf_pool->buddy_stat[i].used++;
281+ mutex_exit(&buf_pool->zip_free_mutex);
282+
283 return(block);
284 }
285
286@@ -355,7 +381,10 @@
287 buf_page_t* b;
288 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
289
290- ut_ad(buf_pool_mutex_own(buf_pool));
291+ //ut_ad(buf_pool_mutex_own(buf_pool));
292+#ifdef UNIV_SYNC_DEBUG
293+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
294+#endif
295
296 switch (buf_page_get_state(bpage)) {
297 case BUF_BLOCK_ZIP_FREE:
298@@ -364,7 +393,7 @@
299 case BUF_BLOCK_FILE_PAGE:
300 case BUF_BLOCK_MEMORY:
301 case BUF_BLOCK_REMOVE_HASH:
302- ut_error;
303+ /* ut_error; */ /* optimistic */
304 case BUF_BLOCK_ZIP_DIRTY:
305 /* Cannot relocate dirty pages. */
306 return(FALSE);
307@@ -374,9 +403,18 @@
308 }
309
310 mutex_enter(&buf_pool->zip_mutex);
311+ mutex_enter(&buf_pool->zip_free_mutex);
312
313 if (!buf_page_can_relocate(bpage)) {
314 mutex_exit(&buf_pool->zip_mutex);
315+ mutex_exit(&buf_pool->zip_free_mutex);
316+ return(FALSE);
317+ }
318+
319+ if (bpage != buf_page_hash_get(buf_pool,
320+ bpage->space, bpage->offset)) {
321+ mutex_exit(&buf_pool->zip_mutex);
322+ mutex_exit(&buf_pool->zip_free_mutex);
323 return(FALSE);
324 }
325
326@@ -384,18 +422,19 @@
327 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
328
329 /* relocate buf_pool->zip_clean */
330- b = UT_LIST_GET_PREV(list, dpage);
331- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
332+ b = UT_LIST_GET_PREV(zip_list, dpage);
333+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
334
335 if (b) {
336- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
337+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
338 } else {
339- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
340+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
341 }
342
343 UNIV_MEM_INVALID(bpage, sizeof *bpage);
344
345 mutex_exit(&buf_pool->zip_mutex);
346+ mutex_exit(&buf_pool->zip_free_mutex);
347 return(TRUE);
348 }
349
350@@ -409,14 +448,16 @@
351 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
352 void* src, /*!< in: block to relocate */
353 void* dst, /*!< in: free block to relocate to */
354- ulint i) /*!< in: index of
355+ ulint i, /*!< in: index of
356 buf_pool->zip_free[] */
357+ ibool have_page_hash_mutex)
358 {
359 buf_page_t* bpage;
360 const ulint size = BUF_BUDDY_LOW << i;
361 ullint usec = ut_time_us(NULL);
362
363- ut_ad(buf_pool_mutex_own(buf_pool));
364+ //ut_ad(buf_pool_mutex_own(buf_pool));
365+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
366 ut_ad(!mutex_own(&buf_pool->zip_mutex));
367 ut_ad(!ut_align_offset(src, size));
368 ut_ad(!ut_align_offset(dst, size));
d8778560
AM
369@@ -437,6 +478,13 @@
370 if (size >= PAGE_ZIP_MIN_SIZE) {
b4e1fa2c
AM
371 /* This is a compressed page. */
372 mutex_t* mutex;
d8778560
AM
373+ ulint space, page_no;
374+
b4e1fa2c
AM
375+ if (!have_page_hash_mutex) {
376+ mutex_exit(&buf_pool->zip_free_mutex);
377+ mutex_enter(&buf_pool->LRU_list_mutex);
378+ rw_lock_x_lock(&buf_pool->page_hash_latch);
379+ }
d8778560 380
b4e1fa2c
AM
381 /* The src block may be split into smaller blocks,
382 some of which may be free. Thus, the
d8778560
AM
383@@ -446,9 +494,9 @@
384 pool), so there is nothing wrong about this. The
385 mach_read_from_4() calls here will only trigger bogus
386 Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
387- ulint space = mach_read_from_4(
388+ space = mach_read_from_4(
389 (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
390- ulint page_no = mach_read_from_4(
391+ page_no = mach_read_from_4(
392 (const byte*) src + FIL_PAGE_OFFSET);
393 /* Suppress Valgrind warnings about conditional jump
394 on uninitialized value. */
395@@ -462,6 +510,11 @@
b4e1fa2c
AM
396 added to buf_pool->page_hash yet. Obviously,
397 it cannot be relocated. */
398
399+ if (!have_page_hash_mutex) {
400+ mutex_enter(&buf_pool->zip_free_mutex);
401+ mutex_exit(&buf_pool->LRU_list_mutex);
402+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
403+ }
404 return(FALSE);
405 }
406
d8778560 407@@ -473,18 +526,27 @@
b4e1fa2c
AM
408 For the sake of simplicity, give up. */
409 ut_ad(page_zip_get_size(&bpage->zip) < size);
410
411+ if (!have_page_hash_mutex) {
412+ mutex_enter(&buf_pool->zip_free_mutex);
413+ mutex_exit(&buf_pool->LRU_list_mutex);
414+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
415+ }
416 return(FALSE);
417 }
418
419+ /* To keep latch order */
420+ if (have_page_hash_mutex)
421+ mutex_exit(&buf_pool->zip_free_mutex);
422+
423 /* The block must have been allocated, but it may
424 contain uninitialized data. */
425 UNIV_MEM_ASSERT_W(src, size);
426
427- mutex = buf_page_get_mutex(bpage);
428+ mutex = buf_page_get_mutex_enter(bpage);
429
430- mutex_enter(mutex);
431+ mutex_enter(&buf_pool->zip_free_mutex);
432
433- if (buf_page_can_relocate(bpage)) {
434+ if (mutex && buf_page_can_relocate(bpage)) {
435 /* Relocate the compressed page. */
436 ut_a(bpage->zip.data == src);
437 memcpy(dst, src, size);
d8778560 438@@ -499,10 +561,22 @@
b4e1fa2c
AM
439 buddy_stat->relocated_usec
440 += ut_time_us(NULL) - usec;
441 }
442+
443+ if (!have_page_hash_mutex) {
444+ mutex_exit(&buf_pool->LRU_list_mutex);
445+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
446+ }
447 return(TRUE);
448 }
449
450- mutex_exit(mutex);
451+ if (!have_page_hash_mutex) {
452+ mutex_exit(&buf_pool->LRU_list_mutex);
453+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
454+ }
455+
456+ if (mutex) {
457+ mutex_exit(mutex);
458+ }
459 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
460 /* This must be a buf_page_t object. */
461 #if UNIV_WORD_SIZE == 4
d8778560 462@@ -511,10 +585,31 @@
b4e1fa2c
AM
463 about uninitialized pad bytes. */
464 UNIV_MEM_ASSERT_RW(src, size);
465 #endif
466+
467+ mutex_exit(&buf_pool->zip_free_mutex);
468+
469+ if (!have_page_hash_mutex) {
470+ mutex_enter(&buf_pool->LRU_list_mutex);
471+ rw_lock_x_lock(&buf_pool->page_hash_latch);
472+ }
473+
474 if (buf_buddy_relocate_block(src, dst)) {
475+ mutex_enter(&buf_pool->zip_free_mutex);
476+
477+ if (!have_page_hash_mutex) {
478+ mutex_exit(&buf_pool->LRU_list_mutex);
479+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
480+ }
481
482 goto success;
483 }
484+
485+ mutex_enter(&buf_pool->zip_free_mutex);
486+
487+ if (!have_page_hash_mutex) {
488+ mutex_exit(&buf_pool->LRU_list_mutex);
489+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
490+ }
491 }
492
493 return(FALSE);
d8778560 494@@ -529,13 +624,15 @@
b4e1fa2c
AM
495 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
496 void* buf, /*!< in: block to be freed, must not be
497 pointed to by the buffer pool */
498- ulint i) /*!< in: index of buf_pool->zip_free[],
499+ ulint i, /*!< in: index of buf_pool->zip_free[],
500 or BUF_BUDDY_SIZES */
501+ ibool have_page_hash_mutex)
502 {
503 buf_page_t* bpage;
504 buf_page_t* buddy;
505
506- ut_ad(buf_pool_mutex_own(buf_pool));
507+ //ut_ad(buf_pool_mutex_own(buf_pool));
508+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
509 ut_ad(!mutex_own(&buf_pool->zip_mutex));
510 ut_ad(i <= BUF_BUDDY_SIZES);
511 ut_ad(buf_pool->buddy_stat[i].used > 0);
d8778560 512@@ -546,7 +643,9 @@
b4e1fa2c
AM
513 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
514
515 if (i == BUF_BUDDY_SIZES) {
516- buf_buddy_block_free(buf_pool, buf);
517+ mutex_exit(&buf_pool->zip_free_mutex);
518+ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
519+ mutex_enter(&buf_pool->zip_free_mutex);
520 return;
521 }
522
d8778560 523@@ -591,7 +690,7 @@
b4e1fa2c
AM
524 ut_a(bpage != buf);
525
526 {
527- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
528+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
529 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
530 bpage = next;
531 }
d8778560 532@@ -600,13 +699,13 @@
b4e1fa2c
AM
533 #ifndef UNIV_DEBUG_VALGRIND
534 buddy_nonfree:
535 /* Valgrind would complain about accessing free memory. */
536- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
537+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
538 ut_ad(buf_page_get_state(ut_list_node_313)
539 == BUF_BLOCK_ZIP_FREE)));
540 #endif /* UNIV_DEBUG_VALGRIND */
541
542 /* The buddy is not free. Is there a free block of this size? */
543- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
544+ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
545
546 if (bpage) {
547 /* Remove the block from the free list, because a successful
d8778560 548@@ -616,7 +715,7 @@
b4e1fa2c
AM
549 buf_buddy_remove_from_free(buf_pool, bpage, i);
550
551 /* Try to relocate the buddy of buf to the free block. */
552- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
553+ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
554
555 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
556 goto buddy_free2;
d8778560 557@@ -636,14 +735,14 @@
b4e1fa2c
AM
558
559 (Parts of the buddy can be free in
560 buf_pool->zip_free[j] with j < i.) */
561- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
562+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
563 ut_ad(buf_page_get_state(
564 ut_list_node_313)
565 == BUF_BLOCK_ZIP_FREE
566 && ut_list_node_313 != buddy)));
567 #endif /* !UNIV_DEBUG_VALGRIND */
568
569- if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
570+ if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
571
572 buf = bpage;
573 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
574diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
575--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
576+++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
577@@ -263,6 +263,7 @@
578 #ifdef UNIV_PFS_RWLOCK
579 /* Keys to register buffer block related rwlocks and mutexes with
580 performance schema */
581+UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
582 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
583 # ifdef UNIV_SYNC_DEBUG
584 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
585@@ -273,6 +274,10 @@
586 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
587 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
588 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
589+UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
590+UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
591+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
592+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
593 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
594 #endif /* UNIV_PFS_MUTEX */
595
596@@ -881,9 +886,9 @@
597 block->page.in_zip_hash = FALSE;
598 block->page.in_flush_list = FALSE;
599 block->page.in_free_list = FALSE;
600- block->in_unzip_LRU_list = FALSE;
601 #endif /* UNIV_DEBUG */
602 block->page.in_LRU_list = FALSE;
603+ block->in_unzip_LRU_list = FALSE;
604 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
605 block->n_pointers = 0;
606 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
607@@ -981,9 +986,11 @@
608 memset(block->frame, '\0', UNIV_PAGE_SIZE);
609 #endif
610 /* Add the block to the free list */
611- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
612+ mutex_enter(&buf_pool->free_list_mutex);
613+ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
614
615 ut_d(block->page.in_free_list = TRUE);
616+ mutex_exit(&buf_pool->free_list_mutex);
617 ut_ad(buf_pool_from_block(block) == buf_pool);
618
619 block++;
620@@ -1038,7 +1045,8 @@
621 buf_chunk_t* chunk = buf_pool->chunks;
622
623 ut_ad(buf_pool);
624- ut_ad(buf_pool_mutex_own(buf_pool));
625+ //ut_ad(buf_pool_mutex_own(buf_pool));
626+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
627 for (n = buf_pool->n_chunks; n--; chunk++) {
628
629 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
630@@ -1138,7 +1146,7 @@
631 buf_block_t* block;
632 const buf_block_t* block_end;
633
634- ut_ad(buf_pool_mutex_own(buf_pool));
635+ //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
636
637 block_end = chunk->blocks + chunk->size;
638
639@@ -1150,8 +1158,10 @@
640 ut_ad(!block->in_unzip_LRU_list);
641 ut_ad(!block->page.in_flush_list);
642 /* Remove the block from the free list. */
643+ mutex_enter(&buf_pool->free_list_mutex);
644 ut_ad(block->page.in_free_list);
645- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
646+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
647+ mutex_exit(&buf_pool->free_list_mutex);
648
649 /* Free the latches. */
650 mutex_free(&block->mutex);
651@@ -1208,9 +1218,21 @@
652 ------------------------------- */
653 mutex_create(buf_pool_mutex_key,
654 &buf_pool->mutex, SYNC_BUF_POOL);
655+ mutex_create(buf_pool_LRU_list_mutex_key,
656+ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
657+ rw_lock_create(buf_pool_page_hash_key,
658+ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
659+ mutex_create(buf_pool_free_list_mutex_key,
660+ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
661+ mutex_create(buf_pool_zip_free_mutex_key,
662+ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
663+ mutex_create(buf_pool_zip_hash_mutex_key,
664+ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
665 mutex_create(buf_pool_zip_mutex_key,
666 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
667
668+ mutex_enter(&buf_pool->LRU_list_mutex);
669+ rw_lock_x_lock(&buf_pool->page_hash_latch);
670 buf_pool_mutex_enter(buf_pool);
671
672 if (buf_pool_size > 0) {
673@@ -1223,6 +1245,8 @@
674 mem_free(chunk);
675 mem_free(buf_pool);
676
677+ mutex_exit(&buf_pool->LRU_list_mutex);
678+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
679 buf_pool_mutex_exit(buf_pool);
680
681 return(DB_ERROR);
682@@ -1253,6 +1277,8 @@
683
684 /* All fields are initialized by mem_zalloc(). */
685
686+ mutex_exit(&buf_pool->LRU_list_mutex);
687+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
688 buf_pool_mutex_exit(buf_pool);
689
690 return(DB_SUCCESS);
691@@ -1467,7 +1493,11 @@
692 ulint fold;
693 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
694
695- ut_ad(buf_pool_mutex_own(buf_pool));
696+ //ut_ad(buf_pool_mutex_own(buf_pool));
697+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
698+#ifdef UNIV_SYNC_DEBUG
699+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
700+#endif
701 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
702 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
703 ut_a(bpage->buf_fix_count == 0);
704@@ -1554,7 +1584,8 @@
705
706 try_again:
707 btr_search_disable(); /* Empty the adaptive hash index again */
708- buf_pool_mutex_enter(buf_pool);
709+ //buf_pool_mutex_enter(buf_pool);
710+ mutex_enter(&buf_pool->LRU_list_mutex);
711
712 shrink_again:
713 if (buf_pool->n_chunks <= 1) {
714@@ -1625,7 +1656,7 @@
715
716 buf_LRU_make_block_old(&block->page);
717 dirty++;
718- } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
719+ } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
720 != BUF_LRU_FREED) {
721 nonfree++;
722 }
723@@ -1633,7 +1664,8 @@
724 mutex_exit(&block->mutex);
725 }
726
727- buf_pool_mutex_exit(buf_pool);
728+ //buf_pool_mutex_exit(buf_pool);
729+ mutex_exit(&buf_pool->LRU_list_mutex);
730
731 /* Request for a flush of the chunk if it helps.
732 Do not flush if there are non-free blocks, since
733@@ -1683,7 +1715,8 @@
734 func_done:
735 buf_pool->old_pool_size = buf_pool->curr_pool_size;
736 func_exit:
737- buf_pool_mutex_exit(buf_pool);
738+ //buf_pool_mutex_exit(buf_pool);
739+ mutex_exit(&buf_pool->LRU_list_mutex);
740 btr_search_enable();
741 }
742
743@@ -1724,7 +1757,9 @@
744 hash_table_t* zip_hash;
745 hash_table_t* page_hash;
746
747- buf_pool_mutex_enter(buf_pool);
748+ //buf_pool_mutex_enter(buf_pool);
749+ mutex_enter(&buf_pool->LRU_list_mutex);
750+ rw_lock_x_lock(&buf_pool->page_hash_latch);
751
752 /* Free, create, and populate the hash table. */
753 hash_table_free(buf_pool->page_hash);
754@@ -1765,8 +1800,9 @@
755 All such blocks are either in buf_pool->zip_clean or
756 in buf_pool->flush_list. */
757
758+ mutex_enter(&buf_pool->zip_mutex);
759 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
760- b = UT_LIST_GET_NEXT(list, b)) {
761+ b = UT_LIST_GET_NEXT(zip_list, b)) {
762 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
763 ut_ad(!b->in_flush_list);
764 ut_ad(b->in_LRU_list);
765@@ -1776,10 +1812,11 @@
766 HASH_INSERT(buf_page_t, hash, page_hash,
767 buf_page_address_fold(b->space, b->offset), b);
768 }
769+ mutex_exit(&buf_pool->zip_mutex);
770
771 buf_flush_list_mutex_enter(buf_pool);
772 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
773- b = UT_LIST_GET_NEXT(list, b)) {
774+ b = UT_LIST_GET_NEXT(flush_list, b)) {
775 ut_ad(b->in_flush_list);
776 ut_ad(b->in_LRU_list);
777 ut_ad(b->in_page_hash);
778@@ -1806,7 +1843,9 @@
779 }
780
781 buf_flush_list_mutex_exit(buf_pool);
782- buf_pool_mutex_exit(buf_pool);
783+ //buf_pool_mutex_exit(buf_pool);
784+ mutex_exit(&buf_pool->LRU_list_mutex);
785+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
786 }
787
788 /********************************************************************
789@@ -1853,21 +1892,32 @@
790 buf_page_t* bpage;
791 ulint i;
792 buf_pool_t* buf_pool = buf_pool_get(space, offset);
793+ mutex_t* block_mutex;
794
795- ut_ad(buf_pool_mutex_own(buf_pool));
796+ //ut_ad(buf_pool_mutex_own(buf_pool));
797
798+ rw_lock_x_lock(&buf_pool->page_hash_latch);
799 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
800+ if (bpage) {
801+ block_mutex = buf_page_get_mutex_enter(bpage);
802+ ut_a(block_mutex);
803+ }
804
805 if (UNIV_LIKELY_NULL(bpage)) {
806 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
807 /* The page was loaded meanwhile. */
808+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
809 return(bpage);
810 }
811 /* Add to an existing watch. */
812 bpage->buf_fix_count++;
813+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
814+ mutex_exit(block_mutex);
815 return(NULL);
816 }
817
818+ /* buf_pool->watch is protected by zip_mutex for now */
819+ mutex_enter(&buf_pool->zip_mutex);
820 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
821 bpage = &buf_pool->watch[i];
822
823@@ -1891,10 +1941,12 @@
824 bpage->space = space;
825 bpage->offset = offset;
826 bpage->buf_fix_count = 1;
827-
828+ bpage->buf_pool_index = buf_pool_index(buf_pool);
829 ut_d(bpage->in_page_hash = TRUE);
830 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
831 fold, bpage);
832+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
833+ mutex_exit(&buf_pool->zip_mutex);
834 return(NULL);
835 case BUF_BLOCK_ZIP_PAGE:
836 ut_ad(bpage->in_page_hash);
837@@ -1912,6 +1964,8 @@
838 ut_error;
839
840 /* Fix compiler warning */
841+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
842+ mutex_exit(&buf_pool->zip_mutex);
843 return(NULL);
844 }
845
846@@ -1941,6 +1995,8 @@
847 buf_chunk_t* chunks;
848 buf_chunk_t* chunk;
849
850+ mutex_enter(&buf_pool->LRU_list_mutex);
851+ rw_lock_x_lock(&buf_pool->page_hash_latch);
852 buf_pool_mutex_enter(buf_pool);
853 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
854
855@@ -1959,6 +2015,8 @@
856 buf_pool->n_chunks++;
857 }
858
859+ mutex_exit(&buf_pool->LRU_list_mutex);
860+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
861 buf_pool_mutex_exit(buf_pool);
862 }
863
864@@ -2046,7 +2104,11 @@
865 space, offset) */
866 buf_page_t* watch) /*!< in/out: sentinel for watch */
867 {
868- ut_ad(buf_pool_mutex_own(buf_pool));
869+ //ut_ad(buf_pool_mutex_own(buf_pool));
870+#ifdef UNIV_SYNC_DEBUG
871+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
872+#endif
873+ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
874
875 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
876 ut_d(watch->in_page_hash = FALSE);
877@@ -2068,28 +2130,31 @@
878 buf_pool_t* buf_pool = buf_pool_get(space, offset);
879 ulint fold = buf_page_address_fold(space, offset);
880
881- buf_pool_mutex_enter(buf_pool);
882+ //buf_pool_mutex_enter(buf_pool);
883+ rw_lock_x_lock(&buf_pool->page_hash_latch);
884 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
885 /* The page must exist because buf_pool_watch_set()
886 increments buf_fix_count. */
887 ut_a(bpage);
888
889 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
890- mutex_t* mutex = buf_page_get_mutex(bpage);
891+ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
892
893- mutex_enter(mutex);
894 ut_a(bpage->buf_fix_count > 0);
895 bpage->buf_fix_count--;
896 mutex_exit(mutex);
897 } else {
898+ mutex_enter(&buf_pool->zip_mutex);
899 ut_a(bpage->buf_fix_count > 0);
900
901 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
902 buf_pool_watch_remove(buf_pool, fold, bpage);
903 }
904+ mutex_exit(&buf_pool->zip_mutex);
905 }
906
907- buf_pool_mutex_exit(buf_pool);
908+ //buf_pool_mutex_exit(buf_pool);
909+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
910 }
911
912 /****************************************************************//**
913@@ -2109,14 +2174,16 @@
914 buf_pool_t* buf_pool = buf_pool_get(space, offset);
915 ulint fold = buf_page_address_fold(space, offset);
916
917- buf_pool_mutex_enter(buf_pool);
918+ //buf_pool_mutex_enter(buf_pool);
919+ rw_lock_s_lock(&buf_pool->page_hash_latch);
920
921 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
922 /* The page must exist because buf_pool_watch_set()
923 increments buf_fix_count. */
924 ut_a(bpage);
925 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
926- buf_pool_mutex_exit(buf_pool);
927+ //buf_pool_mutex_exit(buf_pool);
928+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
929
930 return(ret);
931 }
932@@ -2133,13 +2200,15 @@
933 {
934 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
935
936- buf_pool_mutex_enter(buf_pool);
937+ //buf_pool_mutex_enter(buf_pool);
938+ mutex_enter(&buf_pool->LRU_list_mutex);
939
940 ut_a(buf_page_in_file(bpage));
941
942 buf_LRU_make_block_young(bpage);
943
944- buf_pool_mutex_exit(buf_pool);
945+ //buf_pool_mutex_exit(buf_pool);
946+ mutex_exit(&buf_pool->LRU_list_mutex);
947 }
948
949 /********************************************************************//**
950@@ -2163,14 +2232,20 @@
951 ut_a(buf_page_in_file(bpage));
952
953 if (buf_page_peek_if_too_old(bpage)) {
954- buf_pool_mutex_enter(buf_pool);
955+ //buf_pool_mutex_enter(buf_pool);
956+ mutex_enter(&buf_pool->LRU_list_mutex);
957 buf_LRU_make_block_young(bpage);
958- buf_pool_mutex_exit(buf_pool);
959+ //buf_pool_mutex_exit(buf_pool);
960+ mutex_exit(&buf_pool->LRU_list_mutex);
961 } else if (!access_time) {
962 ulint time_ms = ut_time_ms();
963- buf_pool_mutex_enter(buf_pool);
964+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
965+ //buf_pool_mutex_enter(buf_pool);
966+ if (block_mutex) {
967 buf_page_set_accessed(bpage, time_ms);
968- buf_pool_mutex_exit(buf_pool);
969+ mutex_exit(block_mutex);
970+ }
971+ //buf_pool_mutex_exit(buf_pool);
972 }
973 }
974
975@@ -2187,7 +2262,8 @@
976 buf_block_t* block;
977 buf_pool_t* buf_pool = buf_pool_get(space, offset);
978
979- buf_pool_mutex_enter(buf_pool);
980+ //buf_pool_mutex_enter(buf_pool);
981+ rw_lock_s_lock(&buf_pool->page_hash_latch);
982
983 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
984
985@@ -2196,7 +2272,8 @@
986 block->check_index_page_at_flush = FALSE;
987 }
988
989- buf_pool_mutex_exit(buf_pool);
990+ //buf_pool_mutex_exit(buf_pool);
991+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
992 }
993
994 /********************************************************************//**
995@@ -2215,7 +2292,8 @@
996 ibool is_hashed;
997 buf_pool_t* buf_pool = buf_pool_get(space, offset);
998
999- buf_pool_mutex_enter(buf_pool);
1000+ //buf_pool_mutex_enter(buf_pool);
1001+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1002
1003 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1004
1005@@ -2226,7 +2304,8 @@
1006 is_hashed = block->is_hashed;
1007 }
1008
1009- buf_pool_mutex_exit(buf_pool);
1010+ //buf_pool_mutex_exit(buf_pool);
1011+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1012
1013 return(is_hashed);
1014 }
1015@@ -2248,7 +2327,8 @@
1016 buf_page_t* bpage;
1017 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1018
1019- buf_pool_mutex_enter(buf_pool);
1020+ //buf_pool_mutex_enter(buf_pool);
1021+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1022
1023 bpage = buf_page_hash_get(buf_pool, space, offset);
1024
1025@@ -2257,7 +2337,8 @@
1026 bpage->file_page_was_freed = TRUE;
1027 }
1028
1029- buf_pool_mutex_exit(buf_pool);
1030+ //buf_pool_mutex_exit(buf_pool);
1031+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1032
1033 return(bpage);
1034 }
1035@@ -2278,7 +2359,8 @@
1036 buf_page_t* bpage;
1037 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1038
1039- buf_pool_mutex_enter(buf_pool);
1040+ //buf_pool_mutex_enter(buf_pool);
1041+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1042
1043 bpage = buf_page_hash_get(buf_pool, space, offset);
1044
1045@@ -2287,7 +2369,8 @@
1046 bpage->file_page_was_freed = FALSE;
1047 }
1048
1049- buf_pool_mutex_exit(buf_pool);
1050+ //buf_pool_mutex_exit(buf_pool);
1051+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1052
1053 return(bpage);
1054 }
1055@@ -2322,8 +2405,9 @@
1056 buf_pool->stat.n_page_gets++;
1057
1058 for (;;) {
1059- buf_pool_mutex_enter(buf_pool);
1060+ //buf_pool_mutex_enter(buf_pool);
1061 lookup:
1062+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1063 bpage = buf_page_hash_get(buf_pool, space, offset);
1064 if (bpage) {
1065 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1066@@ -2332,7 +2416,8 @@
1067
1068 /* Page not in buf_pool: needs to be read from file */
1069
1070- buf_pool_mutex_exit(buf_pool);
1071+ //buf_pool_mutex_exit(buf_pool);
1072+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1073
1074 buf_read_page(space, zip_size, offset);
1075
1076@@ -2344,10 +2429,15 @@
1077 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1078 /* There is no compressed page. */
1079 err_exit:
1080- buf_pool_mutex_exit(buf_pool);
1081+ //buf_pool_mutex_exit(buf_pool);
1082+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1083 return(NULL);
1084 }
1085
1086+ block_mutex = buf_page_get_mutex_enter(bpage);
1087+
1088+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1089+
1090 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1091
1092 switch (buf_page_get_state(bpage)) {
1093@@ -2356,19 +2446,19 @@
1094 case BUF_BLOCK_MEMORY:
1095 case BUF_BLOCK_REMOVE_HASH:
1096 case BUF_BLOCK_ZIP_FREE:
1097+ if (block_mutex)
1098+ mutex_exit(block_mutex);
1099 break;
1100 case BUF_BLOCK_ZIP_PAGE:
1101 case BUF_BLOCK_ZIP_DIRTY:
1102- block_mutex = &buf_pool->zip_mutex;
1103- mutex_enter(block_mutex);
1104+ ut_a(block_mutex == &buf_pool->zip_mutex);
1105 bpage->buf_fix_count++;
1106 goto got_block;
1107 case BUF_BLOCK_FILE_PAGE:
1108- block_mutex = &((buf_block_t*) bpage)->mutex;
1109- mutex_enter(block_mutex);
1110+ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1111
1112 /* Discard the uncompressed page frame if possible. */
1113- if (buf_LRU_free_block(bpage, FALSE, NULL)
1114+ if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1115 == BUF_LRU_FREED) {
1116
1117 mutex_exit(block_mutex);
1118@@ -2387,7 +2477,7 @@
1119 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1120 access_time = buf_page_is_accessed(bpage);
1121
1122- buf_pool_mutex_exit(buf_pool);
1123+ //buf_pool_mutex_exit(buf_pool);
1124
1125 mutex_exit(block_mutex);
1126
1127@@ -2696,7 +2786,7 @@
1128 const buf_block_t* block) /*!< in: pointer to block,
1129 not dereferenced */
1130 {
1131- ut_ad(buf_pool_mutex_own(buf_pool));
1132+ //ut_ad(buf_pool_mutex_own(buf_pool));
1133
1134 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1135 /* The pointer should be aligned. */
1136@@ -2732,6 +2822,7 @@
1137 ulint fix_type;
1138 ibool must_read;
1139 ulint retries = 0;
1140+ mutex_t* block_mutex = NULL;
1141 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1142
1143 ut_ad(mtr);
d8778560 1144@@ -2754,9 +2845,11 @@
b4e1fa2c
AM
1145 fold = buf_page_address_fold(space, offset);
1146 loop:
1147 block = guess;
1148- buf_pool_mutex_enter(buf_pool);
1149+ //buf_pool_mutex_enter(buf_pool);
1150
1151 if (block) {
1152+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1153+
1154 /* If the guess is a compressed page descriptor that
1155 has been allocated by buf_buddy_alloc(), it may have
1156 been invalidated by buf_buddy_relocate(). In that
d8778560 1157@@ -2765,11 +2858,15 @@
b4e1fa2c
AM
1158 the guess may be pointing to a buffer pool chunk that
1159 has been released when resizing the buffer pool. */
1160
1161- if (!buf_block_is_uncompressed(buf_pool, block)
1162+ if (!block_mutex) {
1163+ block = guess = NULL;
1164+ } else if (!buf_block_is_uncompressed(buf_pool, block)
1165 || offset != block->page.offset
1166 || space != block->page.space
1167 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1168
1169+ mutex_exit(block_mutex);
1170+
1171 block = guess = NULL;
1172 } else {
1173 ut_ad(!block->page.in_zip_hash);
d8778560 1174@@ -2778,12 +2875,19 @@
b4e1fa2c
AM
1175 }
1176
1177 if (block == NULL) {
1178+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1179 block = (buf_block_t*) buf_page_hash_get_low(
1180 buf_pool, space, offset, fold);
1181+ if (block) {
1182+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1183+ ut_a(block_mutex);
1184+ }
1185+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1186 }
1187
1188 loop2:
1189 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1190+ mutex_exit(block_mutex);
1191 block = NULL;
1192 }
1193
d8778560 1194@@ -2795,12 +2899,14 @@
b4e1fa2c
AM
1195 space, offset, fold);
1196
1197 if (UNIV_LIKELY_NULL(block)) {
1198-
1199+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
1200+ ut_a(block_mutex);
1201+ ut_ad(mutex_own(block_mutex));
1202 goto got_block;
1203 }
1204 }
1205
1206- buf_pool_mutex_exit(buf_pool);
1207+ //buf_pool_mutex_exit(buf_pool);
1208
1209 if (mode == BUF_GET_IF_IN_POOL
1210 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
d8778560 1211@@ -2848,7 +2954,8 @@
b4e1fa2c
AM
1212 /* The page is being read to buffer pool,
1213 but we cannot wait around for the read to
1214 complete. */
1215- buf_pool_mutex_exit(buf_pool);
1216+ //buf_pool_mutex_exit(buf_pool);
1217+ mutex_exit(block_mutex);
1218
1219 return(NULL);
1220 }
d8778560 1221@@ -2858,38 +2965,49 @@
b4e1fa2c
AM
1222 ibool success;
1223
1224 case BUF_BLOCK_FILE_PAGE:
1225+ if (block_mutex == &buf_pool->zip_mutex) {
1226+ /* it is wrong mutex... */
1227+ mutex_exit(block_mutex);
1228+ goto loop;
1229+ }
1230 break;
1231
1232 case BUF_BLOCK_ZIP_PAGE:
1233 case BUF_BLOCK_ZIP_DIRTY:
1234+ ut_ad(block_mutex == &buf_pool->zip_mutex);
1235 bpage = &block->page;
1236 /* Protect bpage->buf_fix_count. */
1237- mutex_enter(&buf_pool->zip_mutex);
1238+ //mutex_enter(&buf_pool->zip_mutex);
1239
1240 if (bpage->buf_fix_count
1241 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1242 /* This condition often occurs when the buffer
1243 is not buffer-fixed, but I/O-fixed by
1244 buf_page_init_for_read(). */
1245- mutex_exit(&buf_pool->zip_mutex);
1246+ //mutex_exit(&buf_pool->zip_mutex);
1247 wait_until_unfixed:
1248 /* The block is buffer-fixed or I/O-fixed.
1249 Try again later. */
1250- buf_pool_mutex_exit(buf_pool);
1251+ //buf_pool_mutex_exit(buf_pool);
1252+ mutex_exit(block_mutex);
1253 os_thread_sleep(WAIT_FOR_READ);
1254
1255 goto loop;
1256 }
1257
1258 /* Allocate an uncompressed page. */
1259- buf_pool_mutex_exit(buf_pool);
1260- mutex_exit(&buf_pool->zip_mutex);
1261+ //buf_pool_mutex_exit(buf_pool);
1262+ //mutex_exit(&buf_pool->zip_mutex);
1263+ mutex_exit(block_mutex);
1264
1265 block = buf_LRU_get_free_block(buf_pool, 0);
1266 ut_a(block);
1267+ block_mutex = &block->mutex;
1268
1269- buf_pool_mutex_enter(buf_pool);
1270- mutex_enter(&block->mutex);
1271+ //buf_pool_mutex_enter(buf_pool);
1272+ mutex_enter(&buf_pool->LRU_list_mutex);
1273+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1274+ mutex_enter(block_mutex);
1275
1276 {
1277 buf_page_t* hash_bpage;
d8778560 1278@@ -2902,35 +3020,47 @@
b4e1fa2c
AM
1279 while buf_pool->mutex was released.
1280 Free the block that was allocated. */
1281
1282- buf_LRU_block_free_non_file_page(block);
1283- mutex_exit(&block->mutex);
1284+ buf_LRU_block_free_non_file_page(block, TRUE);
1285+ mutex_exit(block_mutex);
1286
1287 block = (buf_block_t*) hash_bpage;
1288+ if (block) {
1289+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1290+ ut_a(block_mutex);
1291+ }
1292+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1293+ mutex_exit(&buf_pool->LRU_list_mutex);
1294 goto loop2;
1295 }
1296 }
1297
1298+ mutex_enter(&buf_pool->zip_mutex);
1299+
1300 if (UNIV_UNLIKELY
1301 (bpage->buf_fix_count
1302 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1303
1304+ mutex_exit(&buf_pool->zip_mutex);
1305 /* The block was buffer-fixed or I/O-fixed
1306 while buf_pool->mutex was not held by this thread.
1307 Free the block that was allocated and try again.
1308 This should be extremely unlikely. */
1309
1310- buf_LRU_block_free_non_file_page(block);
1311- mutex_exit(&block->mutex);
1312+ buf_LRU_block_free_non_file_page(block, TRUE);
1313+ //mutex_exit(&block->mutex);
1314
1315+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1316+ mutex_exit(&buf_pool->LRU_list_mutex);
1317 goto wait_until_unfixed;
1318 }
1319
1320 /* Move the compressed page from bpage to block,
1321 and uncompress it. */
1322
1323- mutex_enter(&buf_pool->zip_mutex);
1324-
1325 buf_relocate(bpage, &block->page);
1326+
1327+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1328+
1329 buf_block_init_low(block);
1330 block->lock_hash_val = lock_rec_hash(space, offset);
1331
d8778560 1332@@ -2939,7 +3069,7 @@
b4e1fa2c
AM
1333
1334 if (buf_page_get_state(&block->page)
1335 == BUF_BLOCK_ZIP_PAGE) {
1336- UT_LIST_REMOVE(list, buf_pool->zip_clean,
1337+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1338 &block->page);
1339 ut_ad(!block->page.in_flush_list);
1340 } else {
d8778560 1341@@ -2956,19 +3086,24 @@
b4e1fa2c
AM
1342 /* Insert at the front of unzip_LRU list */
1343 buf_unzip_LRU_add_block(block, FALSE);
1344
1345+ mutex_exit(&buf_pool->LRU_list_mutex);
1346+
1347 block->page.buf_fix_count = 1;
1348 buf_block_set_io_fix(block, BUF_IO_READ);
1349 rw_lock_x_lock_func(&block->lock, 0, file, line);
1350
1351 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1352
1353- mutex_exit(&block->mutex);
1354+ mutex_exit(block_mutex);
1355 mutex_exit(&buf_pool->zip_mutex);
1356+
1357+ buf_pool_mutex_enter(buf_pool);
1358 buf_pool->n_pend_unzip++;
1359+ buf_pool_mutex_exit(buf_pool);
1360
1361- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1362+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1363
1364- buf_pool_mutex_exit(buf_pool);
1365+ //buf_pool_mutex_exit(buf_pool);
1366
1367 /* Decompress the page and apply buffered operations
1368 while not holding buf_pool->mutex or block->mutex. */
d8778560 1369@@ -2981,12 +3116,15 @@
b4e1fa2c
AM
1370 }
1371
1372 /* Unfix and unlatch the block. */
1373- buf_pool_mutex_enter(buf_pool);
1374- mutex_enter(&block->mutex);
1375+ //buf_pool_mutex_enter(buf_pool);
1376+ block_mutex = &block->mutex;
1377+ mutex_enter(block_mutex);
1378 block->page.buf_fix_count--;
1379 buf_block_set_io_fix(block, BUF_IO_NONE);
1380- mutex_exit(&block->mutex);
1381+
1382+ buf_pool_mutex_enter(buf_pool);
1383 buf_pool->n_pend_unzip--;
1384+ buf_pool_mutex_exit(buf_pool);
1385 rw_lock_x_unlock(&block->lock);
1386
1387 break;
d8778560 1388@@ -3002,7 +3140,7 @@
b4e1fa2c
AM
1389
1390 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1391
1392- mutex_enter(&block->mutex);
1393+ //mutex_enter(&block->mutex);
1394 #if UNIV_WORD_SIZE == 4
1395 /* On 32-bit systems, there is no padding in buf_page_t. On
1396 other systems, Valgrind could complain about uninitialized pad
d8778560 1397@@ -3015,7 +3153,7 @@
b4e1fa2c
AM
1398 /* Try to evict the block from the buffer pool, to use the
1399 insert buffer (change buffer) as much as possible. */
1400
1401- if (buf_LRU_free_block(&block->page, TRUE, NULL)
1402+ if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
1403 == BUF_LRU_FREED) {
1404 mutex_exit(&block->mutex);
1405 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
d8778560 1406@@ -3052,13 +3190,14 @@
b4e1fa2c
AM
1407
1408 buf_block_buf_fix_inc(block, file, line);
1409
1410- mutex_exit(&block->mutex);
1411+ //mutex_exit(&block->mutex);
1412
1413 /* Check if this is the first access to the page */
1414
1415 access_time = buf_page_is_accessed(&block->page);
1416
1417- buf_pool_mutex_exit(buf_pool);
1418+ //buf_pool_mutex_exit(buf_pool);
1419+ mutex_exit(block_mutex);
1420
1421 buf_page_set_accessed_make_young(&block->page, access_time);
1422
d8778560 1423@@ -3291,9 +3430,11 @@
b4e1fa2c
AM
1424 buf_pool = buf_pool_from_block(block);
1425
1426 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1427- buf_pool_mutex_enter(buf_pool);
1428+ //buf_pool_mutex_enter(buf_pool);
1429+ mutex_enter(&buf_pool->LRU_list_mutex);
1430 buf_LRU_make_block_young(&block->page);
1431- buf_pool_mutex_exit(buf_pool);
1432+ //buf_pool_mutex_exit(buf_pool);
1433+ mutex_exit(&buf_pool->LRU_list_mutex);
1434 } else if (!buf_page_is_accessed(&block->page)) {
1435 /* Above, we do a dirty read on purpose, to avoid
1436 mutex contention. The field buf_page_t::access_time
d8778560 1437@@ -3301,9 +3442,11 @@
b4e1fa2c
AM
1438 field must be protected by mutex, however. */
1439 ulint time_ms = ut_time_ms();
1440
1441- buf_pool_mutex_enter(buf_pool);
1442+ //buf_pool_mutex_enter(buf_pool);
1443+ mutex_enter(&block->mutex);
1444 buf_page_set_accessed(&block->page, time_ms);
1445- buf_pool_mutex_exit(buf_pool);
1446+ //buf_pool_mutex_exit(buf_pool);
1447+ mutex_exit(&block->mutex);
1448 }
1449
1450 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
d8778560 1451@@ -3370,18 +3513,21 @@
b4e1fa2c
AM
1452 ut_ad(mtr);
1453 ut_ad(mtr->state == MTR_ACTIVE);
1454
1455- buf_pool_mutex_enter(buf_pool);
1456+ //buf_pool_mutex_enter(buf_pool);
1457+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1458 block = buf_block_hash_get(buf_pool, space_id, page_no);
1459
1460 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1461- buf_pool_mutex_exit(buf_pool);
1462+ //buf_pool_mutex_exit(buf_pool);
1463+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1464 return(NULL);
1465 }
1466
1467 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1468
1469 mutex_enter(&block->mutex);
1470- buf_pool_mutex_exit(buf_pool);
1471+ //buf_pool_mutex_exit(buf_pool);
1472+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1473
1474 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1475 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
d8778560 1476@@ -3470,7 +3616,10 @@
b4e1fa2c
AM
1477 buf_page_t* hash_page;
1478 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1479
1480- ut_ad(buf_pool_mutex_own(buf_pool));
1481+ //ut_ad(buf_pool_mutex_own(buf_pool));
1482+#ifdef UNIV_SYNC_DEBUG
1483+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1484+#endif
1485 ut_ad(mutex_own(&(block->mutex)));
1486 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1487
d8778560 1488@@ -3499,11 +3648,14 @@
b4e1fa2c
AM
1489 if (UNIV_LIKELY(!hash_page)) {
1490 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1491 /* Preserve the reference count. */
1492- ulint buf_fix_count = hash_page->buf_fix_count;
1493+ ulint buf_fix_count;
1494
1495+ mutex_enter(&buf_pool->zip_mutex);
1496+ buf_fix_count = hash_page->buf_fix_count;
1497 ut_a(buf_fix_count > 0);
1498 block->page.buf_fix_count += buf_fix_count;
1499 buf_pool_watch_remove(buf_pool, fold, hash_page);
1500+ mutex_exit(&buf_pool->zip_mutex);
1501 } else {
1502 fprintf(stderr,
1503 "InnoDB: Error: page %lu %lu already found"
d8778560 1504@@ -3513,7 +3665,8 @@
b4e1fa2c
AM
1505 (const void*) hash_page, (const void*) block);
1506 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1507 mutex_exit(&block->mutex);
1508- buf_pool_mutex_exit(buf_pool);
1509+ //buf_pool_mutex_exit(buf_pool);
1510+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1511 buf_print();
1512 buf_LRU_print();
1513 buf_validate();
d8778560 1514@@ -3597,7 +3750,9 @@
b4e1fa2c
AM
1515
1516 fold = buf_page_address_fold(space, offset);
1517
1518- buf_pool_mutex_enter(buf_pool);
1519+ //buf_pool_mutex_enter(buf_pool);
1520+ mutex_enter(&buf_pool->LRU_list_mutex);
1521+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1522
1523 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1524 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
d8778560 1525@@ -3606,9 +3761,15 @@
b4e1fa2c
AM
1526 err_exit:
1527 if (block) {
1528 mutex_enter(&block->mutex);
1529- buf_LRU_block_free_non_file_page(block);
1530+ mutex_exit(&buf_pool->LRU_list_mutex);
1531+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1532+ buf_LRU_block_free_non_file_page(block, FALSE);
1533 mutex_exit(&block->mutex);
1534 }
1535+ else {
1536+ mutex_exit(&buf_pool->LRU_list_mutex);
1537+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1538+ }
1539
1540 bpage = NULL;
1541 goto func_exit;
d8778560 1542@@ -3631,6 +3792,8 @@
b4e1fa2c
AM
1543
1544 buf_page_init(space, offset, fold, block);
1545
1546+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1547+
1548 /* The block must be put to the LRU list, to the old blocks */
1549 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1550
d8778560 1551@@ -3658,7 +3821,7 @@
b4e1fa2c
AM
1552 been added to buf_pool->LRU and
1553 buf_pool->page_hash. */
1554 mutex_exit(&block->mutex);
1555- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1556+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1557 mutex_enter(&block->mutex);
1558 block->page.zip.data = data;
1559
d8778560 1560@@ -3671,6 +3834,7 @@
b4e1fa2c
AM
1561 buf_unzip_LRU_add_block(block, TRUE);
1562 }
1563
1564+ mutex_exit(&buf_pool->LRU_list_mutex);
1565 mutex_exit(&block->mutex);
1566 } else {
1567 /* Defer buf_buddy_alloc() until after the block has
d8778560 1568@@ -3682,8 +3846,8 @@
b4e1fa2c
AM
1569 control block (bpage), in order to avoid the
1570 invocation of buf_buddy_relocate_block() on
1571 uninitialized data. */
1572- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1573- bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1574+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1575+ bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1576
1577 /* Initialize the buf_pool pointer. */
1578 bpage->buf_pool_index = buf_pool_index(buf_pool);
d8778560 1579@@ -3702,8 +3866,11 @@
b4e1fa2c
AM
1580
1581 /* The block was added by some other thread. */
1582 watch_page = NULL;
1583- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1584- buf_buddy_free(buf_pool, data, zip_size);
1585+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1586+ buf_buddy_free(buf_pool, data, zip_size, TRUE);
1587+
1588+ mutex_exit(&buf_pool->LRU_list_mutex);
1589+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1590
1591 bpage = NULL;
1592 goto func_exit;
d8778560 1593@@ -3747,18 +3914,24 @@
b4e1fa2c
AM
1594 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1595 bpage);
1596
1597+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1598+
1599 /* The block must be put to the LRU list, to the old blocks */
1600 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1601 buf_LRU_insert_zip_clean(bpage);
1602
1603+ mutex_exit(&buf_pool->LRU_list_mutex);
1604+
1605 buf_page_set_io_fix(bpage, BUF_IO_READ);
1606
1607 mutex_exit(&buf_pool->zip_mutex);
1608 }
1609
1610+ buf_pool_mutex_enter(buf_pool);
1611 buf_pool->n_pend_reads++;
1612-func_exit:
1613 buf_pool_mutex_exit(buf_pool);
1614+func_exit:
1615+ //buf_pool_mutex_exit(buf_pool);
1616
1617 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1618
d8778560 1619@@ -3800,7 +3973,9 @@
b4e1fa2c
AM
1620
1621 fold = buf_page_address_fold(space, offset);
1622
1623- buf_pool_mutex_enter(buf_pool);
1624+ //buf_pool_mutex_enter(buf_pool);
1625+ mutex_enter(&buf_pool->LRU_list_mutex);
1626+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1627
1628 block = (buf_block_t*) buf_page_hash_get_low(
1629 buf_pool, space, offset, fold);
d8778560 1630@@ -3816,7 +3991,9 @@
b4e1fa2c
AM
1631 #endif /* UNIV_DEBUG_FILE_ACCESSES */
1632
1633 /* Page can be found in buf_pool */
1634- buf_pool_mutex_exit(buf_pool);
1635+ //buf_pool_mutex_exit(buf_pool);
1636+ mutex_exit(&buf_pool->LRU_list_mutex);
1637+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1638
1639 buf_block_free(free_block);
1640
d8778560 1641@@ -3838,6 +4015,7 @@
b4e1fa2c
AM
1642 mutex_enter(&block->mutex);
1643
1644 buf_page_init(space, offset, fold, block);
1645+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1646
1647 /* The block must be put to the LRU list */
1648 buf_LRU_add_block(&block->page, FALSE);
d8778560 1649@@ -3864,7 +4042,7 @@
b4e1fa2c
AM
1650 the reacquisition of buf_pool->mutex. We also must
1651 defer this operation until after the block descriptor
1652 has been added to buf_pool->LRU and buf_pool->page_hash. */
1653- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1654+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1655 mutex_enter(&block->mutex);
1656 block->page.zip.data = data;
1657
d8778560 1658@@ -3882,7 +4060,8 @@
b4e1fa2c
AM
1659
1660 buf_page_set_accessed(&block->page, time_ms);
1661
1662- buf_pool_mutex_exit(buf_pool);
1663+ //buf_pool_mutex_exit(buf_pool);
1664+ mutex_exit(&buf_pool->LRU_list_mutex);
1665
1666 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1667
d8778560 1668@@ -3933,6 +4112,8 @@
b4e1fa2c
AM
1669 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1670 const ibool uncompressed = (buf_page_get_state(bpage)
1671 == BUF_BLOCK_FILE_PAGE);
1672+ ibool have_LRU_mutex = FALSE;
1673+ mutex_t* block_mutex;
1674
1675 ut_a(buf_page_in_file(bpage));
1676
d8778560 1677@@ -4066,8 +4247,26 @@
b4e1fa2c
AM
1678 }
1679 }
1680
1681+ if (io_type == BUF_IO_WRITE
1682+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1683+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1684+ /* to keep consistency at buf_LRU_insert_zip_clean() */
1685+ have_LRU_mutex = TRUE; /* optimistic */
1686+ }
1687+retry_mutex:
1688+ if (have_LRU_mutex)
1689+ mutex_enter(&buf_pool->LRU_list_mutex);
1690+ block_mutex = buf_page_get_mutex_enter(bpage);
1691+ ut_a(block_mutex);
1692+ if (io_type == BUF_IO_WRITE
1693+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1694+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1695+ && !have_LRU_mutex) {
1696+ mutex_exit(block_mutex);
1697+ have_LRU_mutex = TRUE;
1698+ goto retry_mutex;
1699+ }
1700 buf_pool_mutex_enter(buf_pool);
1701- mutex_enter(buf_page_get_mutex(bpage));
1702
1703 #ifdef UNIV_IBUF_COUNT_DEBUG
1704 if (io_type == BUF_IO_WRITE || uncompressed) {
d8778560 1705@@ -4090,6 +4289,7 @@
b4e1fa2c
AM
1706 the x-latch to this OS thread: do not let this confuse you in
1707 debugging! */
1708
1709+ ut_a(!have_LRU_mutex);
1710 ut_ad(buf_pool->n_pend_reads > 0);
1711 buf_pool->n_pend_reads--;
1712 buf_pool->stat.n_pages_read++;
d8778560 1713@@ -4107,6 +4307,9 @@
b4e1fa2c
AM
1714
1715 buf_flush_write_complete(bpage);
1716
1717+ if (have_LRU_mutex)
1718+ mutex_exit(&buf_pool->LRU_list_mutex);
1719+
1720 if (uncompressed) {
1721 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1722 BUF_IO_WRITE);
d8778560 1723@@ -4129,8 +4332,8 @@
b4e1fa2c
AM
1724 }
1725 #endif /* UNIV_DEBUG */
1726
1727- mutex_exit(buf_page_get_mutex(bpage));
1728 buf_pool_mutex_exit(buf_pool);
1729+ mutex_exit(block_mutex);
1730 }
1731
1732 /*********************************************************************//**
d8778560 1733@@ -4147,7 +4350,9 @@
b4e1fa2c
AM
1734
1735 ut_ad(buf_pool);
1736
1737- buf_pool_mutex_enter(buf_pool);
1738+ //buf_pool_mutex_enter(buf_pool);
1739+ mutex_enter(&buf_pool->LRU_list_mutex);
1740+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1741
1742 chunk = buf_pool->chunks;
1743
d8778560 1744@@ -4164,7 +4369,9 @@
b4e1fa2c
AM
1745 }
1746 }
1747
1748- buf_pool_mutex_exit(buf_pool);
1749+ //buf_pool_mutex_exit(buf_pool);
1750+ mutex_exit(&buf_pool->LRU_list_mutex);
1751+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1752
1753 return(TRUE);
1754 }
d8778560 1755@@ -4212,7 +4419,8 @@
b4e1fa2c
AM
1756 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1757 }
1758
1759- buf_pool_mutex_enter(buf_pool);
1760+ //buf_pool_mutex_enter(buf_pool);
1761+ mutex_enter(&buf_pool->LRU_list_mutex);
1762
1763 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1764 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
d8778560 1765@@ -4225,7 +4433,8 @@
b4e1fa2c
AM
1766 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1767 buf_refresh_io_stats(buf_pool);
1768
1769- buf_pool_mutex_exit(buf_pool);
1770+ //buf_pool_mutex_exit(buf_pool);
1771+ mutex_exit(&buf_pool->LRU_list_mutex);
1772 }
1773
1774 /*********************************************************************//**
d8778560 1775@@ -4267,7 +4476,10 @@
b4e1fa2c
AM
1776
1777 ut_ad(buf_pool);
1778
1779- buf_pool_mutex_enter(buf_pool);
1780+ //buf_pool_mutex_enter(buf_pool);
1781+ mutex_enter(&buf_pool->LRU_list_mutex);
1782+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1783+ /* for keep the new latch order, it cannot validate correctly... */
1784
1785 chunk = buf_pool->chunks;
1786
d8778560 1787@@ -4362,7 +4574,7 @@
b4e1fa2c
AM
1788 /* Check clean compressed-only blocks. */
1789
1790 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1791- b = UT_LIST_GET_NEXT(list, b)) {
1792+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1793 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1794 switch (buf_page_get_io_fix(b)) {
1795 case BUF_IO_NONE:
d8778560 1796@@ -4393,7 +4605,7 @@
b4e1fa2c
AM
1797
1798 buf_flush_list_mutex_enter(buf_pool);
1799 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1800- b = UT_LIST_GET_NEXT(list, b)) {
1801+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1802 ut_ad(b->in_flush_list);
1803 ut_a(b->oldest_modification);
1804 n_flush++;
d8778560 1805@@ -4452,6 +4664,8 @@
b4e1fa2c
AM
1806 }
1807
1808 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1809+ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1810+/*
1811 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1812 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1813 (ulong) UT_LIST_GET_LEN(buf_pool->free),
d8778560 1814@@ -4462,8 +4676,11 @@
b4e1fa2c
AM
1815 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1816 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1817 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1818+*/
1819
1820- buf_pool_mutex_exit(buf_pool);
1821+ //buf_pool_mutex_exit(buf_pool);
1822+ mutex_exit(&buf_pool->LRU_list_mutex);
1823+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1824
1825 ut_a(buf_LRU_validate());
1826 ut_a(buf_flush_validate(buf_pool));
d8778560 1827@@ -4519,7 +4736,9 @@
b4e1fa2c
AM
1828 index_ids = mem_alloc(size * sizeof *index_ids);
1829 counts = mem_alloc(sizeof(ulint) * size);
1830
1831- buf_pool_mutex_enter(buf_pool);
1832+ //buf_pool_mutex_enter(buf_pool);
1833+ mutex_enter(&buf_pool->LRU_list_mutex);
1834+ mutex_enter(&buf_pool->free_list_mutex);
1835 buf_flush_list_mutex_enter(buf_pool);
1836
1837 fprintf(stderr,
d8778560 1838@@ -4588,7 +4807,9 @@
b4e1fa2c
AM
1839 }
1840 }
1841
1842- buf_pool_mutex_exit(buf_pool);
1843+ //buf_pool_mutex_exit(buf_pool);
1844+ mutex_exit(&buf_pool->LRU_list_mutex);
1845+ mutex_exit(&buf_pool->free_list_mutex);
1846
1847 for (i = 0; i < n_found; i++) {
1848 index = dict_index_get_if_in_cache(index_ids[i]);
d8778560 1849@@ -4645,7 +4866,7 @@
b4e1fa2c
AM
1850 buf_chunk_t* chunk;
1851 ulint fixed_pages_number = 0;
1852
1853- buf_pool_mutex_enter(buf_pool);
1854+ //buf_pool_mutex_enter(buf_pool);
1855
1856 chunk = buf_pool->chunks;
1857
d8778560 1858@@ -4679,7 +4900,7 @@
b4e1fa2c
AM
1859 /* Traverse the lists of clean and dirty compressed-only blocks. */
1860
1861 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1862- b = UT_LIST_GET_NEXT(list, b)) {
1863+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1864 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1865 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1866
d8778560 1867@@ -4691,7 +4912,7 @@
b4e1fa2c
AM
1868
1869 buf_flush_list_mutex_enter(buf_pool);
1870 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1871- b = UT_LIST_GET_NEXT(list, b)) {
1872+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1873 ut_ad(b->in_flush_list);
1874
1875 switch (buf_page_get_state(b)) {
d8778560 1876@@ -4717,7 +4938,7 @@
b4e1fa2c
AM
1877
1878 buf_flush_list_mutex_exit(buf_pool);
1879 mutex_exit(&buf_pool->zip_mutex);
1880- buf_pool_mutex_exit(buf_pool);
1881+ //buf_pool_mutex_exit(buf_pool);
1882
1883 return(fixed_pages_number);
1884 }
d8778560
AM
1885@@ -4873,6 +5094,8 @@
1886 /* Find appropriate pool_info to store stats for this buffer pool */
1887 pool_info = &all_pool_info[pool_id];
b4e1fa2c
AM
1888
1889+ mutex_enter(&buf_pool->LRU_list_mutex);
1890+ mutex_enter(&buf_pool->free_list_mutex);
1891 buf_pool_mutex_enter(buf_pool);
1892 buf_flush_list_mutex_enter(buf_pool);
1893
d8778560
AM
1894@@ -4983,6 +5206,8 @@
1895 pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
b4e1fa2c
AM
1896
1897 buf_refresh_io_stats(buf_pool);
1898+ mutex_exit(&buf_pool->LRU_list_mutex);
1899+ mutex_exit(&buf_pool->free_list_mutex);
1900 buf_pool_mutex_exit(buf_pool);
1901 }
1902
d8778560 1903@@ -5224,11 +5449,13 @@
b4e1fa2c
AM
1904 {
1905 ulint len;
1906
1907- buf_pool_mutex_enter(buf_pool);
1908+ //buf_pool_mutex_enter(buf_pool);
1909+ mutex_enter(&buf_pool->free_list_mutex);
1910
1911 len = UT_LIST_GET_LEN(buf_pool->free);
1912
1913- buf_pool_mutex_exit(buf_pool);
1914+ //buf_pool_mutex_exit(buf_pool);
1915+ mutex_exit(&buf_pool->free_list_mutex);
1916
1917 return(len);
1918 }
1919diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1920--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1921+++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
d8778560 1922@@ -307,7 +307,7 @@
b4e1fa2c
AM
1923
1924 ut_d(block->page.in_flush_list = TRUE);
1925 block->page.oldest_modification = lsn;
1926- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1927+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1928
1929 #ifdef UNIV_DEBUG_VALGRIND
1930 {
d8778560 1931@@ -401,14 +401,14 @@
b4e1fa2c
AM
1932 > block->page.oldest_modification) {
1933 ut_ad(b->in_flush_list);
1934 prev_b = b;
1935- b = UT_LIST_GET_NEXT(list, b);
1936+ b = UT_LIST_GET_NEXT(flush_list, b);
1937 }
1938 }
1939
1940 if (prev_b == NULL) {
1941- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1942+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1943 } else {
1944- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1945+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1946 prev_b, &block->page);
1947 }
1948
d8778560 1949@@ -434,7 +434,7 @@
b4e1fa2c
AM
1950 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1951 //ut_ad(buf_pool_mutex_own(buf_pool));
1952 #endif
1953- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1954+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1955 //ut_ad(bpage->in_LRU_list);
1956
1957 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
d8778560 1958@@ -470,14 +470,14 @@
b4e1fa2c
AM
1959 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1960 {
1961 #ifdef UNIV_DEBUG
1962- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1963- ut_ad(buf_pool_mutex_own(buf_pool));
1964+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1965+ //ut_ad(buf_pool_mutex_own(buf_pool));
1966 #endif
1967- ut_a(buf_page_in_file(bpage));
1968+ //ut_a(buf_page_in_file(bpage));
1969 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1970 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1971
1972- if (bpage->oldest_modification != 0
1973+ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1974 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1975 ut_ad(bpage->in_flush_list);
1976
d8778560 1977@@ -508,7 +508,7 @@
b4e1fa2c
AM
1978 {
1979 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1980
1981- ut_ad(buf_pool_mutex_own(buf_pool));
1982+ //ut_ad(buf_pool_mutex_own(buf_pool));
1983 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1984 ut_ad(bpage->in_flush_list);
1985
d8778560 1986@@ -526,11 +526,11 @@
b4e1fa2c
AM
1987 return;
1988 case BUF_BLOCK_ZIP_DIRTY:
1989 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1990- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1991+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1992 buf_LRU_insert_zip_clean(bpage);
1993 break;
1994 case BUF_BLOCK_FILE_PAGE:
1995- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1996+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1997 break;
1998 }
1999
d8778560 2000@@ -574,7 +574,7 @@
b4e1fa2c
AM
2001 buf_page_t* prev_b = NULL;
2002 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2003
2004- ut_ad(buf_pool_mutex_own(buf_pool));
2005+ //ut_ad(buf_pool_mutex_own(buf_pool));
2006 /* Must reside in the same buffer pool. */
2007 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2008
d8778560 2009@@ -603,18 +603,18 @@
b4e1fa2c
AM
2010 because we assert on in_flush_list in comparison function. */
2011 ut_d(bpage->in_flush_list = FALSE);
2012
2013- prev = UT_LIST_GET_PREV(list, bpage);
2014- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2015+ prev = UT_LIST_GET_PREV(flush_list, bpage);
2016+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2017
2018 if (prev) {
2019 ut_ad(prev->in_flush_list);
2020 UT_LIST_INSERT_AFTER(
2021- list,
2022+ flush_list,
2023 buf_pool->flush_list,
2024 prev, dpage);
2025 } else {
2026 UT_LIST_ADD_FIRST(
2027- list,
2028+ flush_list,
2029 buf_pool->flush_list,
2030 dpage);
2031 }
d8778560 2032@@ -1083,7 +1083,7 @@
b4e1fa2c
AM
2033
2034 #ifdef UNIV_DEBUG
2035 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2036- ut_ad(!buf_pool_mutex_own(buf_pool));
2037+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2038 #endif
2039
2040 #ifdef UNIV_LOG_DEBUG
d8778560 2041@@ -1097,7 +1097,8 @@
b4e1fa2c
AM
2042 io_fixed and oldest_modification != 0. Thus, it cannot be
2043 relocated in the buffer pool or removed from flush_list or
2044 LRU_list. */
2045- ut_ad(!buf_pool_mutex_own(buf_pool));
2046+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2047+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2048 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2049 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2050 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
d8778560 2051@@ -1260,12 +1261,18 @@
b4e1fa2c
AM
2052 ibool is_uncompressed;
2053
2054 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2055- ut_ad(buf_pool_mutex_own(buf_pool));
2056+ //ut_ad(buf_pool_mutex_own(buf_pool));
2057+#ifdef UNIV_SYNC_DEBUG
2058+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2059+#endif
2060 ut_ad(buf_page_in_file(bpage));
2061
2062 block_mutex = buf_page_get_mutex(bpage);
2063 ut_ad(mutex_own(block_mutex));
2064
2065+ buf_pool_mutex_enter(buf_pool);
2066+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2067+
2068 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2069
2070 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
d8778560 2071@@ -1427,14 +1434,16 @@
b4e1fa2c
AM
2072
2073 buf_pool = buf_pool_get(space, i);
2074
2075- buf_pool_mutex_enter(buf_pool);
2076+ //buf_pool_mutex_enter(buf_pool);
2077+ rw_lock_s_lock(&buf_pool->page_hash_latch);
2078
2079 /* We only want to flush pages from this buffer pool. */
2080 bpage = buf_page_hash_get(buf_pool, space, i);
2081
2082 if (!bpage) {
2083
2084- buf_pool_mutex_exit(buf_pool);
2085+ //buf_pool_mutex_exit(buf_pool);
2086+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2087 continue;
2088 }
2089
d8778560 2090@@ -1446,11 +1455,9 @@
b4e1fa2c
AM
2091 if (flush_type != BUF_FLUSH_LRU
2092 || i == offset
2093 || buf_page_is_old(bpage)) {
2094- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2095-
2096- mutex_enter(block_mutex);
2097+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2098
2099- if (buf_flush_ready_for_flush(bpage, flush_type)
2100+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2101 && (i == offset || !bpage->buf_fix_count)) {
2102 /* We only try to flush those
2103 neighbors != offset where the buf fix
d8778560 2104@@ -1466,11 +1473,12 @@
b4e1fa2c
AM
2105 ut_ad(!buf_pool_mutex_own(buf_pool));
2106 count++;
2107 continue;
2108- } else {
2109+ } else if (block_mutex) {
2110 mutex_exit(block_mutex);
2111 }
2112 }
2113- buf_pool_mutex_exit(buf_pool);
2114+ //buf_pool_mutex_exit(buf_pool);
2115+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2116 }
2117
2118 return(count);
d8778560 2119@@ -1503,21 +1511,25 @@
b4e1fa2c
AM
2120 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2121 #endif /* UNIV_DEBUG */
2122
2123- ut_ad(buf_pool_mutex_own(buf_pool));
2124+ //ut_ad(buf_pool_mutex_own(buf_pool));
2125+ ut_ad(flush_type != BUF_FLUSH_LRU
2126+ || mutex_own(&buf_pool->LRU_list_mutex));
2127
2128- block_mutex = buf_page_get_mutex(bpage);
2129- mutex_enter(block_mutex);
2130+ block_mutex = buf_page_get_mutex_enter(bpage);
2131
2132- ut_a(buf_page_in_file(bpage));
2133+ //ut_a(buf_page_in_file(bpage));
2134
2135- if (buf_flush_ready_for_flush(bpage, flush_type)) {
2136+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2137 ulint space;
2138 ulint offset;
2139 buf_pool_t* buf_pool;
2140
2141 buf_pool = buf_pool_from_bpage(bpage);
2142
2143- buf_pool_mutex_exit(buf_pool);
2144+ //buf_pool_mutex_exit(buf_pool);
2145+ if (flush_type == BUF_FLUSH_LRU) {
2146+ mutex_exit(&buf_pool->LRU_list_mutex);
2147+ }
2148
2149 /* These fields are protected by both the
2150 buffer pool mutex and block mutex. */
d8778560 2151@@ -1533,13 +1545,18 @@
b4e1fa2c
AM
2152 *count,
2153 n_to_flush);
2154
2155- buf_pool_mutex_enter(buf_pool);
2156+ //buf_pool_mutex_enter(buf_pool);
2157+ if (flush_type == BUF_FLUSH_LRU) {
2158+ mutex_enter(&buf_pool->LRU_list_mutex);
2159+ }
2160 flushed = TRUE;
2161- } else {
2162+ } else if (block_mutex) {
2163 mutex_exit(block_mutex);
2164 }
2165
2166- ut_ad(buf_pool_mutex_own(buf_pool));
2167+ //ut_ad(buf_pool_mutex_own(buf_pool));
2168+ ut_ad(flush_type != BUF_FLUSH_LRU
2169+ || mutex_own(&buf_pool->LRU_list_mutex));
2170
2171 return(flushed);
2172 }
d8778560 2173@@ -1560,7 +1577,8 @@
b4e1fa2c
AM
2174 buf_page_t* bpage;
2175 ulint count = 0;
2176
2177- ut_ad(buf_pool_mutex_own(buf_pool));
2178+ //ut_ad(buf_pool_mutex_own(buf_pool));
2179+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2180
2181 do {
2182 /* Start from the end of the list looking for a
d8778560 2183@@ -1582,7 +1600,8 @@
b4e1fa2c
AM
2184 should be flushed, we factor in this value. */
2185 buf_lru_flush_page_count += count;
2186
2187- ut_ad(buf_pool_mutex_own(buf_pool));
2188+ //ut_ad(buf_pool_mutex_own(buf_pool));
2189+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2190
2191 return(count);
2192 }
d8778560 2193@@ -1610,9 +1629,10 @@
b4e1fa2c
AM
2194 {
2195 ulint len;
2196 buf_page_t* bpage;
2197+ buf_page_t* prev_bpage = NULL;
2198 ulint count = 0;
2199
2200- ut_ad(buf_pool_mutex_own(buf_pool));
2201+ //ut_ad(buf_pool_mutex_own(buf_pool));
2202
2203 /* If we have flushed enough, leave the loop */
2204 do {
d8778560 2205@@ -1631,6 +1651,7 @@
b4e1fa2c
AM
2206
2207 if (bpage) {
2208 ut_a(bpage->oldest_modification > 0);
2209+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2210 }
2211
2212 if (!bpage || bpage->oldest_modification >= lsn_limit) {
d8778560 2213@@ -1672,9 +1693,17 @@
b4e1fa2c
AM
2214 break;
2215 }
2216
2217- bpage = UT_LIST_GET_PREV(list, bpage);
2218+ bpage = UT_LIST_GET_PREV(flush_list, bpage);
2219
2220- ut_ad(!bpage || bpage->in_flush_list);
2221+ //ut_ad(!bpage || bpage->in_flush_list);
2222+ if (bpage != prev_bpage) {
2223+ /* the search might warp.. retrying */
2224+ buf_flush_list_mutex_exit(buf_pool);
2225+ break;
2226+ }
2227+ if (bpage) {
2228+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2229+ }
2230
2231 buf_flush_list_mutex_exit(buf_pool);
2232
d8778560 2233@@ -1683,7 +1712,7 @@
b4e1fa2c
AM
2234
2235 } while (count < min_n && bpage != NULL && len > 0);
2236
2237- ut_ad(buf_pool_mutex_own(buf_pool));
2238+ //ut_ad(buf_pool_mutex_own(buf_pool));
2239
2240 return(count);
2241 }
d8778560 2242@@ -1722,13 +1751,15 @@
b4e1fa2c
AM
2243 || sync_thread_levels_empty_gen(TRUE));
2244 #endif /* UNIV_SYNC_DEBUG */
2245
2246- buf_pool_mutex_enter(buf_pool);
2247+ //buf_pool_mutex_enter(buf_pool);
2248
2249 /* Note: The buffer pool mutex is released and reacquired within
2250 the flush functions. */
2251 switch(flush_type) {
2252 case BUF_FLUSH_LRU:
2253+ mutex_enter(&buf_pool->LRU_list_mutex);
2254 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2255+ mutex_exit(&buf_pool->LRU_list_mutex);
2256 break;
2257 case BUF_FLUSH_LIST:
2258 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
d8778560 2259@@ -1737,7 +1768,7 @@
b4e1fa2c
AM
2260 ut_error;
2261 }
2262
2263- buf_pool_mutex_exit(buf_pool);
2264+ //buf_pool_mutex_exit(buf_pool);
2265
2266 buf_flush_buffered_writes();
2267
d8778560 2268@@ -1993,7 +2024,7 @@
b4e1fa2c
AM
2269 retry:
2270 //buf_pool_mutex_enter(buf_pool);
2271 if (have_LRU_mutex)
2272- buf_pool_mutex_enter(buf_pool);
2273+ mutex_enter(&buf_pool->LRU_list_mutex);
2274
2275 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2276
d8778560 2277@@ -2010,15 +2041,15 @@
b4e1fa2c
AM
2278 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2279 continue;
2280 }
2281- block_mutex = buf_page_get_mutex(bpage);
2282-
2283- mutex_enter(block_mutex);
2284+ block_mutex = buf_page_get_mutex_enter(bpage);
2285
2286- if (buf_flush_ready_for_replace(bpage)) {
2287+ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2288 n_replaceable++;
2289 }
2290
2291- mutex_exit(block_mutex);
2292+ if (block_mutex) {
2293+ mutex_exit(block_mutex);
2294+ }
2295
2296 distance++;
2297
d8778560 2298@@ -2027,7 +2058,7 @@
b4e1fa2c
AM
2299
2300 //buf_pool_mutex_exit(buf_pool);
2301 if (have_LRU_mutex)
2302- buf_pool_mutex_exit(buf_pool);
2303+ mutex_exit(&buf_pool->LRU_list_mutex);
2304
2305 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2306
d8778560 2307@@ -2226,7 +2257,7 @@
b4e1fa2c
AM
2308
2309 ut_ad(buf_flush_list_mutex_own(buf_pool));
2310
2311- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2312+ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2313 ut_ad(ut_list_node_313->in_flush_list));
2314
2315 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
d8778560 2316@@ -2266,7 +2297,7 @@
b4e1fa2c
AM
2317 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2318 }
2319
2320- bpage = UT_LIST_GET_NEXT(list, bpage);
2321+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2322
2323 ut_a(!bpage || om >= bpage->oldest_modification);
2324 }
2325diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2326--- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2327+++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2328@@ -143,8 +143,9 @@
2329 void
2330 buf_LRU_block_free_hashed_page(
2331 /*===========================*/
2332- buf_block_t* block); /*!< in: block, must contain a file page and
2333+ buf_block_t* block, /*!< in: block, must contain a file page and
2334 be in a state where it can be freed */
2335+ ibool have_page_hash_mutex);
2336
2337 /******************************************************************//**
2338 Determines if the unzip_LRU list should be used for evicting a victim
2339@@ -154,15 +155,20 @@
2340 ibool
2341 buf_LRU_evict_from_unzip_LRU(
2342 /*=========================*/
2343- buf_pool_t* buf_pool)
2344+ buf_pool_t* buf_pool,
2345+ ibool have_LRU_mutex)
2346 {
2347 ulint io_avg;
2348 ulint unzip_avg;
2349
2350- ut_ad(buf_pool_mutex_own(buf_pool));
2351+ //ut_ad(buf_pool_mutex_own(buf_pool));
2352
2353+ if (!have_LRU_mutex)
2354+ mutex_enter(&buf_pool->LRU_list_mutex);
2355 /* If the unzip_LRU list is empty, we can only use the LRU. */
2356 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2357+ if (!have_LRU_mutex)
2358+ mutex_exit(&buf_pool->LRU_list_mutex);
2359 return(FALSE);
2360 }
2361
2362@@ -171,14 +177,20 @@
2363 decompressed pages in the buffer pool. */
2364 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2365 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2366+ if (!have_LRU_mutex)
2367+ mutex_exit(&buf_pool->LRU_list_mutex);
2368 return(FALSE);
2369 }
2370
2371 /* If eviction hasn't started yet, we assume by default
2372 that a workload is disk bound. */
2373 if (buf_pool->freed_page_clock == 0) {
2374+ if (!have_LRU_mutex)
2375+ mutex_exit(&buf_pool->LRU_list_mutex);
2376 return(TRUE);
2377 }
2378+ if (!have_LRU_mutex)
2379+ mutex_exit(&buf_pool->LRU_list_mutex);
2380
2381 /* Calculate the average over past intervals, and add the values
2382 of the current interval. */
2383@@ -246,19 +258,23 @@
2384 page_arr = ut_malloc(
2385 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2386
2387- buf_pool_mutex_enter(buf_pool);
2388+ //buf_pool_mutex_enter(buf_pool);
2389+ mutex_enter(&buf_pool->LRU_list_mutex);
2390
2391 scan_again:
2392 num_entries = 0;
2393 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2394
2395 while (bpage != NULL) {
2396- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2397+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2398 buf_page_t* prev_bpage;
2399
2400- mutex_enter(block_mutex);
2401 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2402
2403+ if (!block_mutex) {
2404+ goto next_page;
2405+ }
2406+
2407 ut_a(buf_page_in_file(bpage));
2408
2409 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2410@@ -287,14 +303,16 @@
2411
2412 /* Array full. We release the buf_pool->mutex to
2413 obey the latching order. */
2414- buf_pool_mutex_exit(buf_pool);
2415+ //buf_pool_mutex_exit(buf_pool);
2416+ mutex_exit(&buf_pool->LRU_list_mutex);
2417
2418 buf_LRU_drop_page_hash_batch(
2419 id, zip_size, page_arr, num_entries);
2420
2421 num_entries = 0;
2422
2423- buf_pool_mutex_enter(buf_pool);
2424+ //buf_pool_mutex_enter(buf_pool);
2425+ mutex_enter(&buf_pool->LRU_list_mutex);
2426 } else {
2427 mutex_exit(block_mutex);
2428 }
2429@@ -319,7 +337,8 @@
2430 }
2431 }
2432
2433- buf_pool_mutex_exit(buf_pool);
2434+ //buf_pool_mutex_exit(buf_pool);
2435+ mutex_exit(&buf_pool->LRU_list_mutex);
2436
2437 /* Drop any remaining batch of search hashed pages. */
2438 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2439@@ -341,7 +360,9 @@
2440 ibool all_freed;
2441
2442 scan_again:
2443- buf_pool_mutex_enter(buf_pool);
2444+ //buf_pool_mutex_enter(buf_pool);
2445+ mutex_enter(&buf_pool->LRU_list_mutex);
2446+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2447
2448 all_freed = TRUE;
2449
2450@@ -369,8 +390,16 @@
2451
2452 all_freed = FALSE;
2453 } else {
2454- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2455- mutex_enter(block_mutex);
2456+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2457+
2458+ if (!block_mutex) {
2459+ /* It may be impossible case...
2460+ Something wrong, so will be scan_again */
2461+
2462+ all_freed = FALSE;
2463+
2464+ goto next_page_no_mutex;
2465+ }
2466
2467 if (bpage->buf_fix_count > 0) {
2468
2469@@ -429,7 +458,9 @@
2470 ulint page_no;
2471 ulint zip_size;
2472
2473- buf_pool_mutex_exit(buf_pool);
2474+ //buf_pool_mutex_exit(buf_pool);
2475+ mutex_exit(&buf_pool->LRU_list_mutex);
2476+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2477
2478 zip_size = buf_page_get_zip_size(bpage);
2479 page_no = buf_page_get_page_no(bpage);
2480@@ -454,7 +485,7 @@
2481 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2482 != BUF_BLOCK_ZIP_FREE) {
2483 buf_LRU_block_free_hashed_page((buf_block_t*)
2484- bpage);
2485+ bpage, TRUE);
2486 } else {
2487 /* The block_mutex should have been
2488 released by buf_LRU_block_remove_hashed_page()
2489@@ -486,7 +517,9 @@
2490 bpage = prev_bpage;
2491 }
2492
2493- buf_pool_mutex_exit(buf_pool);
2494+ //buf_pool_mutex_exit(buf_pool);
2495+ mutex_exit(&buf_pool->LRU_list_mutex);
2496+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2497
2498 if (!all_freed) {
2499 os_thread_sleep(20000);
2500@@ -532,7 +565,9 @@
2501 buf_page_t* b;
2502 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2503
2504- ut_ad(buf_pool_mutex_own(buf_pool));
2505+ //ut_ad(buf_pool_mutex_own(buf_pool));
2506+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2507+ ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2508 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2509
2510 /* Find the first successor of bpage in the LRU list
2511@@ -540,17 +575,17 @@
2512 b = bpage;
2513 do {
2514 b = UT_LIST_GET_NEXT(LRU, b);
2515- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2516+ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2517
2518 /* Insert bpage before b, i.e., after the predecessor of b. */
2519 if (b) {
2520- b = UT_LIST_GET_PREV(list, b);
2521+ b = UT_LIST_GET_PREV(zip_list, b);
2522 }
2523
2524 if (b) {
2525- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2526+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2527 } else {
2528- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2529+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2530 }
2531 }
2532
2533@@ -563,18 +598,19 @@
2534 buf_LRU_free_from_unzip_LRU_list(
2535 /*=============================*/
2536 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2537- ulint n_iterations) /*!< in: how many times this has
2538+ ulint n_iterations, /*!< in: how many times this has
2539 been called repeatedly without
2540 result: a high value means that
2541 we should search farther; we will
2542 search n_iterations / 5 of the
2543 unzip_LRU list, or nothing if
2544 n_iterations >= 5 */
2545+ ibool have_LRU_mutex)
2546 {
2547 buf_block_t* block;
2548 ulint distance;
2549
2550- ut_ad(buf_pool_mutex_own(buf_pool));
2551+ //ut_ad(buf_pool_mutex_own(buf_pool));
2552
2553 /* Theoratically it should be much easier to find a victim
2554 from unzip_LRU as we can choose even a dirty block (as we'll
2555@@ -584,7 +620,7 @@
2556 if we have done five iterations so far. */
2557
2558 if (UNIV_UNLIKELY(n_iterations >= 5)
2559- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2560+ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2561
2562 return(FALSE);
2563 }
2564@@ -592,18 +628,25 @@
2565 distance = 100 + (n_iterations
2566 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2567
2568+restart:
2569 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2570 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2571 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2572
2573 enum buf_lru_free_block_status freed;
2574
2575+ mutex_enter(&block->mutex);
2576+ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2577+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2578+ mutex_exit(&block->mutex);
2579+ goto restart;
2580+ }
2581+
2582 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2583 ut_ad(block->in_unzip_LRU_list);
2584 ut_ad(block->page.in_LRU_list);
2585
2586- mutex_enter(&block->mutex);
2587- freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2588+ freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2589 mutex_exit(&block->mutex);
2590
2591 switch (freed) {
2592@@ -637,21 +680,23 @@
2593 buf_LRU_free_from_common_LRU_list(
2594 /*==============================*/
2595 buf_pool_t* buf_pool,
2596- ulint n_iterations)
2597+ ulint n_iterations,
2598 /*!< in: how many times this has been called
2599 repeatedly without result: a high value means
2600 that we should search farther; if
2601 n_iterations < 10, then we search
2602 n_iterations / 10 * buf_pool->curr_size
2603 pages from the end of the LRU list */
2604+ ibool have_LRU_mutex)
2605 {
2606 buf_page_t* bpage;
2607 ulint distance;
2608
2609- ut_ad(buf_pool_mutex_own(buf_pool));
2610+ //ut_ad(buf_pool_mutex_own(buf_pool));
2611
2612 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2613
2614+restart:
2615 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2616 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2617 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2618@@ -659,14 +704,23 @@
2619 enum buf_lru_free_block_status freed;
2620 unsigned accessed;
2621 mutex_t* block_mutex
2622- = buf_page_get_mutex(bpage);
2623+ = buf_page_get_mutex_enter(bpage);
2624+
2625+ if (!block_mutex) {
2626+ goto restart;
2627+ }
2628+
2629+ if (!bpage->in_LRU_list
2630+ || !buf_page_in_file(bpage)) {
2631+ mutex_exit(block_mutex);
2632+ goto restart;
2633+ }
2634
2635 ut_ad(buf_page_in_file(bpage));
2636 ut_ad(bpage->in_LRU_list);
2637
2638- mutex_enter(block_mutex);
2639 accessed = buf_page_is_accessed(bpage);
2640- freed = buf_LRU_free_block(bpage, TRUE, NULL);
2641+ freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2642 mutex_exit(block_mutex);
2643
2644 switch (freed) {
2645@@ -718,16 +772,23 @@
2646 n_iterations / 5 of the unzip_LRU list. */
2647 {
2648 ibool freed = FALSE;
2649+ ibool have_LRU_mutex = FALSE;
2650
2651- buf_pool_mutex_enter(buf_pool);
2652+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2653+ have_LRU_mutex = TRUE;
2654+
2655+ //buf_pool_mutex_enter(buf_pool);
2656+ if (have_LRU_mutex)
2657+ mutex_enter(&buf_pool->LRU_list_mutex);
2658
2659- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2660+ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2661
2662 if (!freed) {
2663 freed = buf_LRU_free_from_common_LRU_list(
2664- buf_pool, n_iterations);
2665+ buf_pool, n_iterations, have_LRU_mutex);
2666 }
2667
2668+ buf_pool_mutex_enter(buf_pool);
2669 if (!freed) {
2670 buf_pool->LRU_flush_ended = 0;
2671 } else if (buf_pool->LRU_flush_ended > 0) {
2672@@ -735,6 +796,8 @@
2673 }
2674
2675 buf_pool_mutex_exit(buf_pool);
2676+ if (have_LRU_mutex)
2677+ mutex_exit(&buf_pool->LRU_list_mutex);
2678
2679 return(freed);
2680 }
2681@@ -795,7 +858,9 @@
2682
2683 buf_pool = buf_pool_from_array(i);
2684
2685- buf_pool_mutex_enter(buf_pool);
2686+ //buf_pool_mutex_enter(buf_pool);
2687+ mutex_enter(&buf_pool->LRU_list_mutex);
2688+ mutex_enter(&buf_pool->free_list_mutex);
2689
2690 if (!recv_recovery_on
2691 && UT_LIST_GET_LEN(buf_pool->free)
2692@@ -805,7 +870,9 @@
2693 ret = TRUE;
2694 }
2695
2696- buf_pool_mutex_exit(buf_pool);
2697+ //buf_pool_mutex_exit(buf_pool);
2698+ mutex_exit(&buf_pool->LRU_list_mutex);
2699+ mutex_exit(&buf_pool->free_list_mutex);
2700 }
2701
2702 return(ret);
2703@@ -823,9 +890,10 @@
2704 {
2705 buf_block_t* block;
2706
2707- ut_ad(buf_pool_mutex_own(buf_pool));
2708+ //ut_ad(buf_pool_mutex_own(buf_pool));
2709
2710- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2711+ mutex_enter(&buf_pool->free_list_mutex);
2712+ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2713
2714 if (block) {
2715
2716@@ -834,7 +902,9 @@
2717 ut_ad(!block->page.in_flush_list);
2718 ut_ad(!block->page.in_LRU_list);
2719 ut_a(!buf_page_in_file(&block->page));
2720- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2721+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2722+
2723+ mutex_exit(&buf_pool->free_list_mutex);
2724
2725 mutex_enter(&block->mutex);
2726
2727@@ -844,6 +914,8 @@
2728 ut_ad(buf_pool_from_block(block) == buf_pool);
2729
2730 mutex_exit(&block->mutex);
2731+ } else {
2732+ mutex_exit(&buf_pool->free_list_mutex);
2733 }
2734
2735 return(block);
2736@@ -868,7 +940,7 @@
2737 ibool mon_value_was = FALSE;
2738 ibool started_monitor = FALSE;
2739 loop:
2740- buf_pool_mutex_enter(buf_pool);
2741+ //buf_pool_mutex_enter(buf_pool);
2742
2743 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2744 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2745@@ -951,8 +1023,10 @@
2746 ibool lru;
2747 page_zip_set_size(&block->page.zip, zip_size);
2748
2749+ mutex_enter(&buf_pool->LRU_list_mutex);
2750 block->page.zip.data = buf_buddy_alloc(
2751- buf_pool, zip_size, &lru);
2752+ buf_pool, zip_size, &lru, FALSE);
2753+ mutex_exit(&buf_pool->LRU_list_mutex);
2754
2755 UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2756 } else {
2757@@ -960,7 +1034,7 @@
2758 block->page.zip.data = NULL;
2759 }
2760
2761- buf_pool_mutex_exit(buf_pool);
2762+ //buf_pool_mutex_exit(buf_pool);
2763
2764 if (started_monitor) {
2765 srv_print_innodb_monitor = mon_value_was;
2766@@ -972,7 +1046,7 @@
2767 /* If no block was in the free list, search from the end of the LRU
2768 list and try to free a block there */
2769
2770- buf_pool_mutex_exit(buf_pool);
2771+ //buf_pool_mutex_exit(buf_pool);
2772
2773 freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2774
2775@@ -1058,7 +1132,8 @@
2776 ulint new_len;
2777
2778 ut_a(buf_pool->LRU_old);
2779- ut_ad(buf_pool_mutex_own(buf_pool));
2780+ //ut_ad(buf_pool_mutex_own(buf_pool));
2781+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2782 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2783 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2784 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2785@@ -1124,7 +1199,8 @@
2786 {
2787 buf_page_t* bpage;
2788
2789- ut_ad(buf_pool_mutex_own(buf_pool));
2790+ //ut_ad(buf_pool_mutex_own(buf_pool));
2791+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2792 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2793
2794 /* We first initialize all blocks in the LRU list as old and then use
2795@@ -1159,13 +1235,14 @@
2796 ut_ad(buf_pool);
2797 ut_ad(bpage);
2798 ut_ad(buf_page_in_file(bpage));
2799- ut_ad(buf_pool_mutex_own(buf_pool));
2800+ //ut_ad(buf_pool_mutex_own(buf_pool));
2801+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2802
2803 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2804 buf_block_t* block = (buf_block_t*) bpage;
2805
2806 ut_ad(block->in_unzip_LRU_list);
2807- ut_d(block->in_unzip_LRU_list = FALSE);
2808+ block->in_unzip_LRU_list = FALSE;
2809
2810 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2811 }
2812@@ -1183,7 +1260,8 @@
2813
2814 ut_ad(buf_pool);
2815 ut_ad(bpage);
2816- ut_ad(buf_pool_mutex_own(buf_pool));
2817+ //ut_ad(buf_pool_mutex_own(buf_pool));
2818+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2819
2820 ut_a(buf_page_in_file(bpage));
2821
2822@@ -1260,12 +1338,13 @@
2823
2824 ut_ad(buf_pool);
2825 ut_ad(block);
2826- ut_ad(buf_pool_mutex_own(buf_pool));
2827+ //ut_ad(buf_pool_mutex_own(buf_pool));
2828+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2829
2830 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2831
2832 ut_ad(!block->in_unzip_LRU_list);
2833- ut_d(block->in_unzip_LRU_list = TRUE);
2834+ block->in_unzip_LRU_list = TRUE;
2835
2836 if (old) {
2837 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2838@@ -1286,7 +1365,8 @@
2839
2840 ut_ad(buf_pool);
2841 ut_ad(bpage);
2842- ut_ad(buf_pool_mutex_own(buf_pool));
2843+ //ut_ad(buf_pool_mutex_own(buf_pool));
2844+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2845
2846 ut_a(buf_page_in_file(bpage));
2847
2848@@ -1337,7 +1417,8 @@
2849
2850 ut_ad(buf_pool);
2851 ut_ad(bpage);
2852- ut_ad(buf_pool_mutex_own(buf_pool));
2853+ //ut_ad(buf_pool_mutex_own(buf_pool));
2854+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2855
2856 ut_a(buf_page_in_file(bpage));
2857 ut_ad(!bpage->in_LRU_list);
2858@@ -1416,7 +1497,8 @@
2859 {
2860 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2861
2862- ut_ad(buf_pool_mutex_own(buf_pool));
2863+ //ut_ad(buf_pool_mutex_own(buf_pool));
2864+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2865
2866 if (bpage->old) {
2867 buf_pool->stat.n_pages_made_young++;
2868@@ -1458,19 +1540,20 @@
2869 buf_page_t* bpage, /*!< in: block to be freed */
2870 ibool zip, /*!< in: TRUE if should remove also the
2871 compressed page of an uncompressed page */
2872- ibool* buf_pool_mutex_released)
2873+ ibool* buf_pool_mutex_released,
2874 /*!< in: pointer to a variable that will
2875 be assigned TRUE if buf_pool_mutex
2876 was temporarily released, or NULL */
2877+ ibool have_LRU_mutex)
2878 {
2879 buf_page_t* b = NULL;
2880 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2881 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2882
2883- ut_ad(buf_pool_mutex_own(buf_pool));
2884+ //ut_ad(buf_pool_mutex_own(buf_pool));
2885 ut_ad(mutex_own(block_mutex));
2886 ut_ad(buf_page_in_file(bpage));
2887- ut_ad(bpage->in_LRU_list);
2888+ //ut_ad(bpage->in_LRU_list);
2889 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2890 #if UNIV_WORD_SIZE == 4
2891 /* On 32-bit systems, there is no padding in buf_page_t. On
2892@@ -1479,7 +1562,7 @@
2893 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2894 #endif
2895
2896- if (!buf_page_can_relocate(bpage)) {
2897+ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2898
2899 /* Do not free buffer-fixed or I/O-fixed blocks. */
2900 return(BUF_LRU_NOT_FREED);
2901@@ -1511,15 +1594,15 @@
2902 If it cannot be allocated (without freeing a block
2903 from the LRU list), refuse to free bpage. */
2904 alloc:
2905- buf_pool_mutex_exit_forbid(buf_pool);
2906- b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2907- buf_pool_mutex_exit_allow(buf_pool);
2908+ //buf_pool_mutex_exit_forbid(buf_pool);
2909+ b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2910+ //buf_pool_mutex_exit_allow(buf_pool);
2911
2912 if (UNIV_UNLIKELY(!b)) {
2913 return(BUF_LRU_CANNOT_RELOCATE);
2914 }
2915
2916- memcpy(b, bpage, sizeof *b);
2917+ //memcpy(b, bpage, sizeof *b);
2918 }
2919
2920 #ifdef UNIV_DEBUG
2921@@ -1530,6 +1613,39 @@
2922 }
2923 #endif /* UNIV_DEBUG */
2924
2925+ /* not to break latch order, must re-enter block_mutex */
2926+ mutex_exit(block_mutex);
2927+
2928+ if (!have_LRU_mutex)
2929+ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2930+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2931+ mutex_enter(block_mutex);
2932+
2933+ /* recheck states of block */
2934+ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2935+ || !buf_page_can_relocate(bpage)) {
2936+not_freed:
2937+ if (b) {
2938+ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2939+ }
2940+ if (!have_LRU_mutex)
2941+ mutex_exit(&buf_pool->LRU_list_mutex);
2942+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2943+ return(BUF_LRU_NOT_FREED);
2944+ } else if (zip || !bpage->zip.data) {
2945+ if (bpage->oldest_modification)
2946+ goto not_freed;
2947+ } else if (bpage->oldest_modification) {
2948+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2949+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2950+ goto not_freed;
2951+ }
2952+ }
2953+
2954+ if (b) {
2955+ memcpy(b, bpage, sizeof *b);
2956+ }
2957+
2958 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2959 != BUF_BLOCK_ZIP_FREE) {
2960 ut_a(bpage->buf_fix_count == 0);
2961@@ -1546,6 +1662,10 @@
2962
2963 ut_a(!hash_b);
2964
2965+ while (prev_b && !prev_b->in_LRU_list) {
2966+ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2967+ }
2968+
2969 b->state = b->oldest_modification
2970 ? BUF_BLOCK_ZIP_DIRTY
2971 : BUF_BLOCK_ZIP_PAGE;
2972@@ -1642,7 +1762,9 @@
2973 *buf_pool_mutex_released = TRUE;
2974 }
2975
2976- buf_pool_mutex_exit(buf_pool);
2977+ //buf_pool_mutex_exit(buf_pool);
2978+ mutex_exit(&buf_pool->LRU_list_mutex);
2979+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2980 mutex_exit(block_mutex);
2981
2982 /* Remove possible adaptive hash index on the page.
2983@@ -1674,7 +1796,9 @@
2984 : BUF_NO_CHECKSUM_MAGIC);
2985 }
2986
2987- buf_pool_mutex_enter(buf_pool);
2988+ //buf_pool_mutex_enter(buf_pool);
2989+ if (have_LRU_mutex)
2990+ mutex_enter(&buf_pool->LRU_list_mutex);
2991 mutex_enter(block_mutex);
2992
2993 if (b) {
2994@@ -1684,13 +1808,17 @@
2995 mutex_exit(&buf_pool->zip_mutex);
2996 }
2997
2998- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2999+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3000 } else {
3001 /* The block_mutex should have been released by
3002 buf_LRU_block_remove_hashed_page() when it returns
3003 BUF_BLOCK_ZIP_FREE. */
3004 ut_ad(block_mutex == &buf_pool->zip_mutex);
3005 mutex_enter(block_mutex);
3006+
3007+ if (!have_LRU_mutex)
3008+ mutex_exit(&buf_pool->LRU_list_mutex);
3009+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3010 }
3011
3012 return(BUF_LRU_FREED);
3013@@ -1702,13 +1830,14 @@
3014 void
3015 buf_LRU_block_free_non_file_page(
3016 /*=============================*/
3017- buf_block_t* block) /*!< in: block, must not contain a file page */
3018+ buf_block_t* block, /*!< in: block, must not contain a file page */
3019+ ibool have_page_hash_mutex)
3020 {
3021 void* data;
3022 buf_pool_t* buf_pool = buf_pool_from_block(block);
3023
3024 ut_ad(block);
3025- ut_ad(buf_pool_mutex_own(buf_pool));
3026+ //ut_ad(buf_pool_mutex_own(buf_pool));
3027 ut_ad(mutex_own(&block->mutex));
3028
3029 switch (buf_block_get_state(block)) {
3030@@ -1742,18 +1871,21 @@
3031 if (data) {
3032 block->page.zip.data = NULL;
3033 mutex_exit(&block->mutex);
3034- buf_pool_mutex_exit_forbid(buf_pool);
3035+ //buf_pool_mutex_exit_forbid(buf_pool);
3036
3037 buf_buddy_free(
3038- buf_pool, data, page_zip_get_size(&block->page.zip));
3039+ buf_pool, data, page_zip_get_size(&block->page.zip),
3040+ have_page_hash_mutex);
3041
3042- buf_pool_mutex_exit_allow(buf_pool);
3043+ //buf_pool_mutex_exit_allow(buf_pool);
3044 mutex_enter(&block->mutex);
3045 page_zip_set_size(&block->page.zip, 0);
3046 }
3047
3048- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3049+ mutex_enter(&buf_pool->free_list_mutex);
3050+ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3051 ut_d(block->page.in_free_list = TRUE);
3052+ mutex_exit(&buf_pool->free_list_mutex);
3053
3054 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3055 }
3056@@ -1783,7 +1915,11 @@
3057 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3058
3059 ut_ad(bpage);
3060- ut_ad(buf_pool_mutex_own(buf_pool));
3061+ //ut_ad(buf_pool_mutex_own(buf_pool));
3062+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3063+#ifdef UNIV_SYNC_DEBUG
3064+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3065+#endif
3066 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3067
3068 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3069@@ -1891,7 +2027,9 @@
3070
3071 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3072 mutex_exit(buf_page_get_mutex(bpage));
3073- buf_pool_mutex_exit(buf_pool);
3074+ //buf_pool_mutex_exit(buf_pool);
3075+ mutex_exit(&buf_pool->LRU_list_mutex);
3076+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3077 buf_print();
3078 buf_LRU_print();
3079 buf_validate();
3080@@ -1912,17 +2050,17 @@
3081 ut_a(bpage->zip.data);
3082 ut_a(buf_page_get_zip_size(bpage));
3083
3084- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3085+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3086
3087 mutex_exit(&buf_pool->zip_mutex);
3088- buf_pool_mutex_exit_forbid(buf_pool);
3089+ //buf_pool_mutex_exit_forbid(buf_pool);
3090
3091 buf_buddy_free(
3092 buf_pool, bpage->zip.data,
3093- page_zip_get_size(&bpage->zip));
3094+ page_zip_get_size(&bpage->zip), TRUE);
3095
3096- buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3097- buf_pool_mutex_exit_allow(buf_pool);
3098+ buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3099+ //buf_pool_mutex_exit_allow(buf_pool);
3100
3101 UNIV_MEM_UNDESC(bpage);
3102 return(BUF_BLOCK_ZIP_FREE);
3103@@ -1945,13 +2083,13 @@
3104 ut_ad(!bpage->in_flush_list);
3105 ut_ad(!bpage->in_LRU_list);
3106 mutex_exit(&((buf_block_t*) bpage)->mutex);
3107- buf_pool_mutex_exit_forbid(buf_pool);
3108+ //buf_pool_mutex_exit_forbid(buf_pool);
3109
3110 buf_buddy_free(
3111 buf_pool, data,
3112- page_zip_get_size(&bpage->zip));
3113+ page_zip_get_size(&bpage->zip), TRUE);
3114
3115- buf_pool_mutex_exit_allow(buf_pool);
3116+ //buf_pool_mutex_exit_allow(buf_pool);
3117 mutex_enter(&((buf_block_t*) bpage)->mutex);
3118 page_zip_set_size(&bpage->zip, 0);
3119 }
3120@@ -1977,18 +2115,19 @@
3121 void
3122 buf_LRU_block_free_hashed_page(
3123 /*===========================*/
3124- buf_block_t* block) /*!< in: block, must contain a file page and
3125+ buf_block_t* block, /*!< in: block, must contain a file page and
3126 be in a state where it can be freed */
3127+ ibool have_page_hash_mutex)
3128 {
3129 #ifdef UNIV_DEBUG
3130- buf_pool_t* buf_pool = buf_pool_from_block(block);
3131- ut_ad(buf_pool_mutex_own(buf_pool));
3132+ //buf_pool_t* buf_pool = buf_pool_from_block(block);
3133+ //ut_ad(buf_pool_mutex_own(buf_pool));
3134 #endif
3135 ut_ad(mutex_own(&block->mutex));
3136
3137 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3138
3139- buf_LRU_block_free_non_file_page(block);
3140+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3141 }
3142
3143 /**********************************************************************//**
3144@@ -2015,7 +2154,8 @@
3145 }
3146
3147 if (adjust) {
3148- buf_pool_mutex_enter(buf_pool);
3149+ //buf_pool_mutex_enter(buf_pool);
3150+ mutex_enter(&buf_pool->LRU_list_mutex);
3151
3152 if (ratio != buf_pool->LRU_old_ratio) {
3153 buf_pool->LRU_old_ratio = ratio;
3154@@ -2027,7 +2167,8 @@
3155 }
3156 }
3157
3158- buf_pool_mutex_exit(buf_pool);
3159+ //buf_pool_mutex_exit(buf_pool);
3160+ mutex_exit(&buf_pool->LRU_list_mutex);
3161 } else {
3162 buf_pool->LRU_old_ratio = ratio;
3163 }
d8778560 3164@@ -2132,7 +2273,8 @@
b4e1fa2c
AM
3165 ulint new_len;
3166
3167 ut_ad(buf_pool);
3168- buf_pool_mutex_enter(buf_pool);
3169+ //buf_pool_mutex_enter(buf_pool);
3170+ mutex_enter(&buf_pool->LRU_list_mutex);
3171
3172 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3173
d8778560 3174@@ -2193,16 +2335,22 @@
b4e1fa2c
AM
3175
3176 ut_a(buf_pool->LRU_old_len == old_len);
3177
3178- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3179+ mutex_exit(&buf_pool->LRU_list_mutex);
3180+ mutex_enter(&buf_pool->free_list_mutex);
3181+
3182+ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3183 ut_ad(ut_list_node_313->in_free_list));
3184
3185 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3186 bpage != NULL;
3187- bpage = UT_LIST_GET_NEXT(list, bpage)) {
3188+ bpage = UT_LIST_GET_NEXT(free, bpage)) {
3189
3190 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3191 }
3192
3193+ mutex_exit(&buf_pool->free_list_mutex);
3194+ mutex_enter(&buf_pool->LRU_list_mutex);
3195+
3196 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3197 ut_ad(ut_list_node_313->in_unzip_LRU_list
3198 && ut_list_node_313->page.in_LRU_list));
d8778560 3199@@ -2216,7 +2364,8 @@
b4e1fa2c
AM
3200 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3201 }
3202
3203- buf_pool_mutex_exit(buf_pool);
3204+ //buf_pool_mutex_exit(buf_pool);
3205+ mutex_exit(&buf_pool->LRU_list_mutex);
3206 }
3207
3208 /**********************************************************************//**
d8778560 3209@@ -2252,7 +2401,8 @@
b4e1fa2c
AM
3210 const buf_page_t* bpage;
3211
3212 ut_ad(buf_pool);
3213- buf_pool_mutex_enter(buf_pool);
3214+ //buf_pool_mutex_enter(buf_pool);
3215+ mutex_enter(&buf_pool->LRU_list_mutex);
3216
3217 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3218
d8778560 3219@@ -2309,7 +2459,8 @@
b4e1fa2c
AM
3220 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3221 }
3222
3223- buf_pool_mutex_exit(buf_pool);
3224+ //buf_pool_mutex_exit(buf_pool);
3225+ mutex_exit(&buf_pool->LRU_list_mutex);
3226 }
3227
3228 /**********************************************************************//**
3229diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3230--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3231+++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3232@@ -311,6 +311,7 @@
3233
3234 return(0);
3235 }
3236+ buf_pool_mutex_exit(buf_pool);
3237
3238 /* Check that almost all pages in the area have been accessed; if
3239 offset == low, the accesses must be in a descending order, otherwise,
3240@@ -329,6 +330,7 @@
3241
3242 fail_count = 0;
3243
3244+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3245 for (i = low; i < high; i++) {
3246 bpage = buf_page_hash_get(buf_pool, space, i);
3247
3248@@ -356,7 +358,8 @@
3249
3250 if (fail_count > threshold) {
3251 /* Too many failures: return */
3252- buf_pool_mutex_exit(buf_pool);
3253+ //buf_pool_mutex_exit(buf_pool);
3254+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3255 return(0);
3256 }
3257
3258@@ -371,7 +374,8 @@
3259 bpage = buf_page_hash_get(buf_pool, space, offset);
3260
3261 if (bpage == NULL) {
3262- buf_pool_mutex_exit(buf_pool);
3263+ //buf_pool_mutex_exit(buf_pool);
3264+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3265
3266 return(0);
3267 }
3268@@ -397,7 +401,8 @@
3269 pred_offset = fil_page_get_prev(frame);
3270 succ_offset = fil_page_get_next(frame);
3271
3272- buf_pool_mutex_exit(buf_pool);
3273+ //buf_pool_mutex_exit(buf_pool);
3274+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3275
3276 if ((offset == low) && (succ_offset == offset + 1)) {
3277
3278diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3279--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3280+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
3281@@ -245,6 +245,10 @@
3282 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3283 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3284 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3285+ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3286+ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3287+ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3288+ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3289 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3290 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3291 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3292@@ -295,6 +299,7 @@
3293 {&archive_lock_key, "archive_lock", 0},
3294 # endif /* UNIV_LOG_ARCHIVE */
3295 {&btr_search_latch_key, "btr_search_latch", 0},
3296+ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3297 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3298 {&buf_block_lock_key, "buf_block_lock", 0},
3299 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3300diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3301--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3302+++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
d8778560 3303@@ -1565,7 +1565,8 @@
b4e1fa2c
AM
3304
3305 buf_pool = buf_pool_from_array(i);
3306
3307- buf_pool_mutex_enter(buf_pool);
3308+ //buf_pool_mutex_enter(buf_pool);
3309+ mutex_enter(&buf_pool->zip_free_mutex);
3310
3311 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3312 buf_buddy_stat_t* buddy_stat;
d8778560 3313@@ -1595,7 +1596,8 @@
b4e1fa2c
AM
3314 }
3315 }
3316
3317- buf_pool_mutex_exit(buf_pool);
3318+ //buf_pool_mutex_exit(buf_pool);
3319+ mutex_exit(&buf_pool->zip_free_mutex);
3320
3321 if (status) {
3322 break;
3323diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3324--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3325+++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
d8778560 3326@@ -3783,9 +3783,11 @@
b4e1fa2c
AM
3327 ulint fold = buf_page_address_fold(space, page_no);
3328 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3329
3330- buf_pool_mutex_enter(buf_pool);
3331+ //buf_pool_mutex_enter(buf_pool);
3332+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3333 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3334- buf_pool_mutex_exit(buf_pool);
3335+ //buf_pool_mutex_exit(buf_pool);
3336+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3337
3338 if (UNIV_LIKELY_NULL(bpage)) {
3339 /* A buffer pool watch has been set or the
3340diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3341--- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3342+++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3343@@ -51,10 +51,11 @@
3344 buf_pool_t* buf_pool,
3345 /*!< buffer pool in which the block resides */
3346 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3347- ibool* lru) /*!< in: pointer to a variable that will be assigned
3348+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3349 TRUE if storage was allocated from the LRU list
3350 and buf_pool->mutex was temporarily released,
3351 or NULL if the LRU list should not be used */
3352+ ibool have_page_hash_mutex)
3353 __attribute__((malloc));
3354
3355 /**********************************************************************//**
3356@@ -67,7 +68,8 @@
3357 /*!< buffer pool in which the block resides */
3358 void* buf, /*!< in: block to be freed, must not be
3359 pointed to by the buffer pool */
3360- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3361+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3362+ ibool have_page_hash_mutex)
3363 __attribute__((nonnull));
3364
3365 #ifndef UNIV_NONINL
3366diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3367--- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3368+++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3369@@ -46,10 +46,11 @@
3370 /*!< in: buffer pool in which the page resides */
3371 ulint i, /*!< in: index of buf_pool->zip_free[],
3372 or BUF_BUDDY_SIZES */
3373- ibool* lru) /*!< in: pointer to a variable that will be assigned
3374+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3375 TRUE if storage was allocated from the LRU list
3376 and buf_pool->mutex was temporarily released,
3377 or NULL if the LRU list should not be used */
3378+ ibool have_page_hash_mutex)
3379 __attribute__((malloc));
3380
3381 /**********************************************************************//**
3382@@ -61,8 +62,9 @@
3383 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3384 void* buf, /*!< in: block to be freed, must not be
3385 pointed to by the buffer pool */
3386- ulint i) /*!< in: index of buf_pool->zip_free[],
3387+ ulint i, /*!< in: index of buf_pool->zip_free[],
3388 or BUF_BUDDY_SIZES */
3389+ ibool have_page_hash_mutex)
3390 __attribute__((nonnull));
3391
3392 /**********************************************************************//**
3393@@ -102,16 +104,17 @@
3394 the page resides */
3395 ulint size, /*!< in: block size, up to
3396 UNIV_PAGE_SIZE */
3397- ibool* lru) /*!< in: pointer to a variable
3398+ ibool* lru, /*!< in: pointer to a variable
3399 that will be assigned TRUE if
3400 storage was allocated from the
3401 LRU list and buf_pool->mutex was
3402 temporarily released, or NULL if
3403 the LRU list should not be used */
3404+ ibool have_page_hash_mutex)
3405 {
3406- ut_ad(buf_pool_mutex_own(buf_pool));
3407+ //ut_ad(buf_pool_mutex_own(buf_pool));
3408
3409- return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3410+ return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3411 }
3412
3413 /**********************************************************************//**
3414@@ -123,12 +126,25 @@
3415 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3416 void* buf, /*!< in: block to be freed, must not be
3417 pointed to by the buffer pool */
3418- ulint size) /*!< in: block size, up to
3419+ ulint size, /*!< in: block size, up to
3420 UNIV_PAGE_SIZE */
3421+ ibool have_page_hash_mutex)
3422 {
3423- ut_ad(buf_pool_mutex_own(buf_pool));
3424+ //ut_ad(buf_pool_mutex_own(buf_pool));
3425+
3426+ if (!have_page_hash_mutex) {
3427+ mutex_enter(&buf_pool->LRU_list_mutex);
3428+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3429+ }
3430
3431- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3432+ mutex_enter(&buf_pool->zip_free_mutex);
3433+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3434+ mutex_exit(&buf_pool->zip_free_mutex);
3435+
3436+ if (!have_page_hash_mutex) {
3437+ mutex_exit(&buf_pool->LRU_list_mutex);
3438+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3439+ }
3440 }
3441
3442 #ifdef UNIV_MATERIALIZE
3443diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3444--- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3445+++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
d8778560 3446@@ -203,6 +203,20 @@
b4e1fa2c
AM
3447 /*==========================*/
3448
3449 /********************************************************************//**
3450+*/
3451+UNIV_INLINE
3452+void
3453+buf_pool_page_hash_x_lock_all(void);
3454+/*================================*/
3455+
3456+/********************************************************************//**
3457+*/
3458+UNIV_INLINE
3459+void
3460+buf_pool_page_hash_x_unlock_all(void);
3461+/*==================================*/
3462+
3463+/********************************************************************//**
3464 Creates the buffer pool.
3465 @return own: buf_pool object, NULL if not enough memory or error */
3466 UNIV_INTERN
d8778560 3467@@ -832,6 +846,15 @@
b4e1fa2c
AM
3468 const buf_page_t* bpage) /*!< in: pointer to control block */
3469 __attribute__((pure));
3470
3471+/*************************************************************************
3472+Gets the mutex of a block and enter the mutex with consistency. */
3473+UNIV_INLINE
3474+mutex_t*
3475+buf_page_get_mutex_enter(
3476+/*=========================*/
3477+ const buf_page_t* bpage) /*!< in: pointer to control block */
3478+ __attribute__((pure));
3479+
3480 /*********************************************************************//**
3481 Get the flush type of a page.
3482 @return flush type */
d8778560 3483@@ -1313,7 +1336,7 @@
b4e1fa2c
AM
3484 All these are protected by buf_pool->mutex. */
3485 /* @{ */
3486
3487- UT_LIST_NODE_T(buf_page_t) list;
3488+ /* UT_LIST_NODE_T(buf_page_t) list; */
3489 /*!< based on state, this is a
3490 list node, protected either by
3491 buf_pool->mutex or by
d8778560 3492@@ -1341,6 +1364,10 @@
b4e1fa2c
AM
3493 BUF_BLOCK_REMOVE_HASH or
3494 BUF_BLOCK_READY_IN_USE. */
3495
3496+ /* resplit for optimistic use */
3497+ UT_LIST_NODE_T(buf_page_t) free;
3498+ UT_LIST_NODE_T(buf_page_t) flush_list;
3499+ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3500 #ifdef UNIV_DEBUG
3501 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3502 when buf_pool->flush_list_mutex is
d8778560 3503@@ -1433,11 +1460,11 @@
b4e1fa2c
AM
3504 a block is in the unzip_LRU list
3505 if page.state == BUF_BLOCK_FILE_PAGE
3506 and page.zip.data != NULL */
3507-#ifdef UNIV_DEBUG
3508+//#ifdef UNIV_DEBUG
3509 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3510 decompressed LRU list;
3511 used in debugging */
3512-#endif /* UNIV_DEBUG */
3513+//#endif /* UNIV_DEBUG */
3514 mutex_t mutex; /*!< mutex protecting this block:
3515 state (also protected by the buffer
3516 pool mutex), io_fix, buf_fix_count,
d8778560 3517@@ -1612,6 +1639,11 @@
b4e1fa2c
AM
3518 pool instance, protects compressed
3519 only pages (of type buf_page_t, not
3520 buf_block_t */
3521+ mutex_t LRU_list_mutex;
3522+ rw_lock_t page_hash_latch;
3523+ mutex_t free_list_mutex;
3524+ mutex_t zip_free_mutex;
3525+ mutex_t zip_hash_mutex;
3526 ulint instance_no; /*!< Array index of this buffer
3527 pool instance */
3528 ulint old_pool_size; /*!< Old pool size in bytes */
3529diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3530--- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3531+++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3532@@ -274,7 +274,7 @@
3533 case BUF_BLOCK_ZIP_FREE:
3534 /* This is a free page in buf_pool->zip_free[].
3535 Such pages should only be accessed by the buddy allocator. */
3536- ut_error;
3537+ /* ut_error; */ /* optimistic */
3538 break;
3539 case BUF_BLOCK_ZIP_PAGE:
3540 case BUF_BLOCK_ZIP_DIRTY:
3541@@ -317,9 +317,14 @@
3542 {
3543 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3544
3545+ if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3546+ /* TODO: this code is the interim. should be confirmed later. */
3547+ return(&buf_pool->zip_mutex);
3548+ }
3549+
3550 switch (buf_page_get_state(bpage)) {
3551 case BUF_BLOCK_ZIP_FREE:
3552- ut_error;
3553+ /* ut_error; */ /* optimistic */
3554 return(NULL);
3555 case BUF_BLOCK_ZIP_PAGE:
3556 case BUF_BLOCK_ZIP_DIRTY:
3557@@ -329,6 +334,28 @@
3558 }
3559 }
3560
3561+/*************************************************************************
3562+Gets the mutex of a block and enter the mutex with consistency. */
3563+UNIV_INLINE
3564+mutex_t*
3565+buf_page_get_mutex_enter(
3566+/*=========================*/
3567+ const buf_page_t* bpage) /*!< in: pointer to control block */
3568+{
3569+ mutex_t* block_mutex;
3570+
3571+ while(1) {
3572+ block_mutex = buf_page_get_mutex(bpage);
3573+ if (!block_mutex)
3574+ return block_mutex;
3575+
3576+ mutex_enter(block_mutex);
3577+ if (block_mutex == buf_page_get_mutex(bpage))
3578+ return block_mutex;
3579+ mutex_exit(block_mutex);
3580+ }
3581+}
3582+
3583 /*********************************************************************//**
3584 Get the flush type of a page.
3585 @return flush type */
3586@@ -425,8 +452,8 @@
3587 enum buf_io_fix io_fix) /*!< in: io_fix state */
3588 {
3589 #ifdef UNIV_DEBUG
3590- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3591- ut_ad(buf_pool_mutex_own(buf_pool));
3592+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3593+ //ut_ad(buf_pool_mutex_own(buf_pool));
3594 #endif
3595 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3596
3597@@ -456,14 +483,14 @@
3598 const buf_page_t* bpage) /*!< control block being relocated */
3599 {
3600 #ifdef UNIV_DEBUG
3601- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3602- ut_ad(buf_pool_mutex_own(buf_pool));
3603+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3604+ //ut_ad(buf_pool_mutex_own(buf_pool));
3605 #endif
3606 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3607 ut_ad(buf_page_in_file(bpage));
3608- ut_ad(bpage->in_LRU_list);
3609+ //ut_ad(bpage->in_LRU_list);
3610
3611- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3612+ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3613 && bpage->buf_fix_count == 0);
3614 }
3615
3616@@ -477,8 +504,8 @@
3617 const buf_page_t* bpage) /*!< in: control block */
3618 {
3619 #ifdef UNIV_DEBUG
3620- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3621- ut_ad(buf_pool_mutex_own(buf_pool));
3622+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3623+ //ut_ad(buf_pool_mutex_own(buf_pool));
3624 #endif
3625 ut_ad(buf_page_in_file(bpage));
3626
3627@@ -498,7 +525,8 @@
3628 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3629 #endif /* UNIV_DEBUG */
3630 ut_a(buf_page_in_file(bpage));
3631- ut_ad(buf_pool_mutex_own(buf_pool));
3632+ //ut_ad(buf_pool_mutex_own(buf_pool));
3633+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3634 ut_ad(bpage->in_LRU_list);
3635
3636 #ifdef UNIV_LRU_DEBUG
3637@@ -545,9 +573,10 @@
3638 ulint time_ms) /*!< in: ut_time_ms() */
3639 {
3640 #ifdef UNIV_DEBUG
3641- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3642- ut_ad(buf_pool_mutex_own(buf_pool));
3643+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3644+ //ut_ad(buf_pool_mutex_own(buf_pool));
3645 #endif
3646+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3647 ut_a(buf_page_in_file(bpage));
3648
3649 if (!bpage->access_time) {
3650@@ -761,19 +790,19 @@
3651 /*===========*/
3652 buf_block_t* block) /*!< in, own: block to be freed */
3653 {
3654- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3655+ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3656
3657- buf_pool_mutex_enter(buf_pool);
3658+ //buf_pool_mutex_enter(buf_pool);
3659
3660 mutex_enter(&block->mutex);
3661
3662 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3663
3664- buf_LRU_block_free_non_file_page(block);
3665+ buf_LRU_block_free_non_file_page(block, FALSE);
3666
3667 mutex_exit(&block->mutex);
3668
3669- buf_pool_mutex_exit(buf_pool);
3670+ //buf_pool_mutex_exit(buf_pool);
3671 }
3672 #endif /* !UNIV_HOTBACKUP */
3673
3674@@ -821,17 +850,17 @@
3675 page frame */
3676 {
3677 ib_uint64_t lsn;
3678- mutex_t* block_mutex = buf_page_get_mutex(bpage);
3679-
3680- mutex_enter(block_mutex);
3681+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3682
3683- if (buf_page_in_file(bpage)) {
3684+ if (block_mutex && buf_page_in_file(bpage)) {
3685 lsn = bpage->newest_modification;
3686 } else {
3687 lsn = 0;
3688 }
3689
3690- mutex_exit(block_mutex);
3691+ if (block_mutex) {
3692+ mutex_exit(block_mutex);
3693+ }
3694
3695 return(lsn);
3696 }
3697@@ -849,7 +878,7 @@
3698 #ifdef UNIV_SYNC_DEBUG
3699 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3700
3701- ut_ad((buf_pool_mutex_own(buf_pool)
3702+ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3703 && (block->page.buf_fix_count == 0))
3704 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3705 #endif /* UNIV_SYNC_DEBUG */
3706@@ -979,7 +1008,11 @@
3707 buf_page_t* bpage;
3708
3709 ut_ad(buf_pool);
3710- ut_ad(buf_pool_mutex_own(buf_pool));
3711+ //ut_ad(buf_pool_mutex_own(buf_pool));
3712+#ifdef UNIV_SYNC_DEBUG
3713+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3714+ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3715+#endif
3716 ut_ad(fold == buf_page_address_fold(space, offset));
3717
3718 /* Look for the page in the hash table */
3719@@ -1064,11 +1097,13 @@
3720 const buf_page_t* bpage;
3721 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3722
3723- buf_pool_mutex_enter(buf_pool);
3724+ //buf_pool_mutex_enter(buf_pool);
3725+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3726
3727 bpage = buf_page_hash_get(buf_pool, space, offset);
3728
3729- buf_pool_mutex_exit(buf_pool);
3730+ //buf_pool_mutex_exit(buf_pool);
3731+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3732
3733 return(bpage != NULL);
3734 }
3735@@ -1196,4 +1231,38 @@
3736 buf_pool_mutex_exit(buf_pool);
3737 }
3738 }
3739+
3740+/********************************************************************//**
3741+*/
3742+UNIV_INLINE
3743+void
3744+buf_pool_page_hash_x_lock_all(void)
3745+/*===============================*/
3746+{
3747+ ulint i;
3748+
3749+ for (i = 0; i < srv_buf_pool_instances; i++) {
3750+ buf_pool_t* buf_pool;
3751+
3752+ buf_pool = buf_pool_from_array(i);
3753+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3754+ }
3755+}
3756+
3757+/********************************************************************//**
3758+*/
3759+UNIV_INLINE
3760+void
3761+buf_pool_page_hash_x_unlock_all(void)
3762+/*=================================*/
3763+{
3764+ ulint i;
3765+
3766+ for (i = 0; i < srv_buf_pool_instances; i++) {
3767+ buf_pool_t* buf_pool;
3768+
3769+ buf_pool = buf_pool_from_array(i);
3770+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3771+ }
3772+}
3773 #endif /* !UNIV_HOTBACKUP */
3774diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3775--- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3776+++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
3777@@ -113,10 +113,11 @@
3778 buf_page_t* bpage, /*!< in: block to be freed */
3779 ibool zip, /*!< in: TRUE if should remove also the
3780 compressed page of an uncompressed page */
3781- ibool* buf_pool_mutex_released);
3782+ ibool* buf_pool_mutex_released,
3783 /*!< in: pointer to a variable that will
3784 be assigned TRUE if buf_pool->mutex
3785 was temporarily released, or NULL */
3786+ ibool have_LRU_mutex);
3787 /******************************************************************//**
3788 Try to free a replaceable block.
3789 @return TRUE if found and freed */
3790@@ -163,7 +164,8 @@
3791 void
3792 buf_LRU_block_free_non_file_page(
3793 /*=============================*/
3794- buf_block_t* block); /*!< in: block, must not contain a file page */
3795+ buf_block_t* block, /*!< in: block, must not contain a file page */
3796+ ibool have_page_hash_mutex);
3797 /******************************************************************//**
3798 Adds a block to the LRU list. */
3799 UNIV_INTERN
3800diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3801--- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3802+++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3803@@ -112,6 +112,7 @@
3804 extern mysql_pfs_key_t archive_lock_key;
3805 # endif /* UNIV_LOG_ARCHIVE */
3806 extern mysql_pfs_key_t btr_search_latch_key;
3807+extern mysql_pfs_key_t buf_pool_page_hash_key;
3808 extern mysql_pfs_key_t buf_block_lock_key;
3809 # ifdef UNIV_SYNC_DEBUG
3810 extern mysql_pfs_key_t buf_block_debug_latch_key;
3811diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3812--- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3813+++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3814@@ -75,6 +75,10 @@
3815 extern mysql_pfs_key_t buffer_block_mutex_key;
3816 extern mysql_pfs_key_t buf_pool_mutex_key;
3817 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3818+extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3819+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3820+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3821+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3822 extern mysql_pfs_key_t cache_last_read_mutex_key;
3823 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3824 extern mysql_pfs_key_t dict_sys_mutex_key;
3825@@ -660,7 +664,7 @@
3826 #define SYNC_TRX_LOCK_HEAP 298
3827 #define SYNC_TRX_SYS_HEADER 290
3828 #define SYNC_LOG 170
3829-#define SYNC_LOG_FLUSH_ORDER 147
3830+#define SYNC_LOG_FLUSH_ORDER 156
3831 #define SYNC_RECV 168
3832 #define SYNC_WORK_QUEUE 162
3833 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3834@@ -670,8 +674,13 @@
3835 SYNC_SEARCH_SYS, as memory allocation
3836 can call routines there! Otherwise
3837 the level is SYNC_MEM_HASH. */
3838+#define SYNC_BUF_LRU_LIST 158
3839+#define SYNC_BUF_PAGE_HASH 157
3840+#define SYNC_BUF_BLOCK 155 /* Block mutex */
3841+#define SYNC_BUF_FREE_LIST 153
3842+#define SYNC_BUF_ZIP_FREE 152
3843+#define SYNC_BUF_ZIP_HASH 151
3844 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3845-#define SYNC_BUF_BLOCK 146 /* Block mutex */
3846 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3847 #define SYNC_DOUBLEWRITE 140
3848 #define SYNC_ANY_LATCH 135
3849@@ -703,7 +712,7 @@
3850 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3851 when atomic operations are not enabled */
3852 #endif
3853- ulint waiters; /*!< This ulint is set to 1 if there are (or
3854+ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3855 may be) threads waiting in the global wait
3856 array for this mutex to be released.
3857 Otherwise, this is 0. */
3858diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3859--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3860+++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
d8778560 3861@@ -3094,7 +3094,7 @@
b4e1fa2c
AM
3862 level += log_sys->max_checkpoint_age
3863 - (lsn - oldest_modification);
3864 }
3865- bpage = UT_LIST_GET_NEXT(list, bpage);
3866+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3867 n_blocks++;
3868 }
3869
d8778560 3870@@ -3180,7 +3180,7 @@
b4e1fa2c
AM
3871 found = TRUE;
3872 break;
3873 }
3874- bpage = UT_LIST_GET_NEXT(list, bpage);
3875+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3876 new_blocks_num++;
3877 }
3878 if (!found) {
3879diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3880--- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3881+++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3882@@ -265,7 +265,7 @@
3883 mutex->lock_word = 0;
3884 #endif
3885 mutex->event = os_event_create(NULL);
3886- mutex_set_waiters(mutex, 0);
3887+ mutex->waiters = 0;
3888 #ifdef UNIV_DEBUG
3889 mutex->magic_n = MUTEX_MAGIC_N;
3890 #endif /* UNIV_DEBUG */
3891@@ -444,6 +444,15 @@
3892 mutex_t* mutex, /*!< in: mutex */
3893 ulint n) /*!< in: value to set */
3894 {
3895+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3896+ ut_ad(mutex);
3897+
3898+ if (n) {
3899+ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3900+ } else {
3901+ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3902+ }
3903+#else
3904 volatile ulint* ptr; /* declared volatile to ensure that
3905 the value is stored to memory */
3906 ut_ad(mutex);
3907@@ -452,6 +461,7 @@
3908
3909 *ptr = n; /* Here we assume that the write of a single
3910 word in memory is atomic */
3911+#endif
3912 }
3913
3914 /******************************************************************//**
3915@@ -1193,7 +1203,12 @@
3916 ut_error;
3917 }
3918 break;
3919+ case SYNC_BUF_LRU_LIST:
3920 case SYNC_BUF_FLUSH_LIST:
3921+ case SYNC_BUF_PAGE_HASH:
3922+ case SYNC_BUF_FREE_LIST:
3923+ case SYNC_BUF_ZIP_FREE:
3924+ case SYNC_BUF_ZIP_HASH:
3925 case SYNC_BUF_POOL:
3926 /* We can have multiple mutexes of this type therefore we
3927 can only check whether the greater than condition holds. */
3928@@ -1211,7 +1226,8 @@
3929 buffer block (block->mutex or buf_pool->zip_mutex). */
3930 if (!sync_thread_levels_g(array, level, FALSE)) {
3931 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3932- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3933+ /* the exact rule is not fixed yet, for now */
3934+ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3935 }
3936 break;
3937 case SYNC_REC_LOCK:
This page took 0.482922 seconds and 4 git commands to generate.