]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_split_buf_pool_mutex.patch
- rel 0.5 (consider this to be test before rel 1); update percona patches; drop obsol...
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_split_buf_pool_mutex.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9--- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10+++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
11@@ -4039,7 +4039,8 @@
12
13 mtr_commit(mtr);
14
15- buf_pool_mutex_enter(buf_pool);
16+ //buf_pool_mutex_enter(buf_pool);
17+ mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
19
20 /* Only free the block if it is still allocated to
21@@ -4050,17 +4051,22 @@
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
24
25- if (buf_LRU_free_block(&block->page, all, NULL)
26+ if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
27 != BUF_LRU_FREED
28- && all && block->page.zip.data) {
29+ && all && block->page.zip.data
30+ /* Now, buf_LRU_free_block() may release mutex temporarily */
31+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32+ && buf_block_get_space(block) == space
33+ && buf_block_get_page_no(block) == page_no) {
34 /* Attempt to deallocate the uncompressed page
35 if the whole block cannot be deallocted. */
36
37- buf_LRU_free_block(&block->page, FALSE, NULL);
38+ buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
39 }
40 }
41
42- buf_pool_mutex_exit(buf_pool);
43+ //buf_pool_mutex_exit(buf_pool);
44+ mutex_exit(&buf_pool->LRU_list_mutex);
45 mutex_exit(&block->mutex);
46 }
47
48diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49--- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
50+++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
51@@ -1211,7 +1211,7 @@
52 ulint* offsets;
53
54 rw_lock_x_lock(&btr_search_latch);
55- buf_pool_mutex_enter_all();
56+ //buf_pool_mutex_enter_all();
57
58 table = btr_search_sys->hash_index;
59
60@@ -1220,6 +1220,8 @@
61
62 buf_pool = buf_pool_from_array(j);
63
64+ mutex_enter(&buf_pool->LRU_list_mutex);
65+
66 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
67
68 while (bpage != NULL) {
69@@ -1301,9 +1303,11 @@
70
71 bpage = UT_LIST_GET_PREV(LRU, bpage);
72 }
73+
74+ mutex_exit(&buf_pool->LRU_list_mutex);
75 }
76
77- buf_pool_mutex_exit_all();
78+ //buf_pool_mutex_exit_all();
79 rw_lock_x_unlock(&btr_search_latch);
80
81 if (UNIV_LIKELY_NULL(heap)) {
82@@ -1896,7 +1900,7 @@
83 rec_offs_init(offsets_);
84
85 rw_lock_x_lock(&btr_search_latch);
86- buf_pool_mutex_enter_all();
87+ buf_pool_page_hash_x_lock_all();
88
89 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
90
91@@ -1904,11 +1908,11 @@
92 /* We release btr_search_latch every once in a while to
93 give other queries a chance to run. */
94 if ((i != 0) && ((i % chunk_size) == 0)) {
95- buf_pool_mutex_exit_all();
96+ buf_pool_page_hash_x_unlock_all();
97 rw_lock_x_unlock(&btr_search_latch);
98 os_thread_yield();
99 rw_lock_x_lock(&btr_search_latch);
100- buf_pool_mutex_enter_all();
101+ buf_pool_page_hash_x_lock_all();
102 }
103
104 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
105@@ -2019,11 +2023,11 @@
106 /* We release btr_search_latch every once in a while to
107 give other queries a chance to run. */
108 if (i != 0) {
109- buf_pool_mutex_exit_all();
110+ buf_pool_page_hash_x_unlock_all();
111 rw_lock_x_unlock(&btr_search_latch);
112 os_thread_yield();
113 rw_lock_x_lock(&btr_search_latch);
114- buf_pool_mutex_enter_all();
115+ buf_pool_page_hash_x_lock_all();
116 }
117
118 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
119@@ -2031,7 +2035,7 @@
120 }
121 }
122
123- buf_pool_mutex_exit_all();
124+ buf_pool_page_hash_x_unlock_all();
125 rw_lock_x_unlock(&btr_search_latch);
126 if (UNIV_LIKELY_NULL(heap)) {
127 mem_heap_free(heap);
128diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
129--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
130+++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
131@@ -73,10 +73,11 @@
132 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
133 #endif /* UNIV_DEBUG_VALGRIND */
134
135- ut_ad(buf_pool_mutex_own(buf_pool));
136+ //ut_ad(buf_pool_mutex_own(buf_pool));
137+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
138 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
139 ut_ad(buf_pool->zip_free[i].start != bpage);
140- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
141+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
142
143 #ifdef UNIV_DEBUG_VALGRIND
144 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
145@@ -96,8 +97,8 @@
146 buf_pool->zip_free[] */
147 {
148 #ifdef UNIV_DEBUG_VALGRIND
149- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
150- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
151+ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
152+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
153
154 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
155 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
156@@ -106,9 +107,10 @@
157 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
158 #endif /* UNIV_DEBUG_VALGRIND */
159
160- ut_ad(buf_pool_mutex_own(buf_pool));
161+ //ut_ad(buf_pool_mutex_own(buf_pool));
162+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
163 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
164- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
165+ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
166
167 #ifdef UNIV_DEBUG_VALGRIND
168 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
169@@ -128,12 +130,13 @@
170 {
171 buf_page_t* bpage;
172
173- ut_ad(buf_pool_mutex_own(buf_pool));
174+ //ut_ad(buf_pool_mutex_own(buf_pool));
175+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
176 ut_a(i < BUF_BUDDY_SIZES);
177
178 #ifndef UNIV_DEBUG_VALGRIND
179 /* Valgrind would complain about accessing free memory. */
180- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
181+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
182 ut_ad(buf_page_get_state(ut_list_node_313)
183 == BUF_BLOCK_ZIP_FREE)));
184 #endif /* !UNIV_DEBUG_VALGRIND */
185@@ -177,16 +180,19 @@
186 buf_buddy_block_free(
187 /*=================*/
188 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
189- void* buf) /*!< in: buffer frame to deallocate */
190+ void* buf, /*!< in: buffer frame to deallocate */
191+ ibool have_page_hash_mutex)
192 {
193 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
194 buf_page_t* bpage;
195 buf_block_t* block;
196
197- ut_ad(buf_pool_mutex_own(buf_pool));
198+ //ut_ad(buf_pool_mutex_own(buf_pool));
199 ut_ad(!mutex_own(&buf_pool->zip_mutex));
200 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
201
202+ mutex_enter(&buf_pool->zip_hash_mutex);
203+
204 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
205 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
206 && bpage->in_zip_hash && !bpage->in_page_hash),
207@@ -198,12 +204,14 @@
208 ut_d(bpage->in_zip_hash = FALSE);
209 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
210
211+ mutex_exit(&buf_pool->zip_hash_mutex);
212+
213 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
214 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
215
216 block = (buf_block_t*) bpage;
217 mutex_enter(&block->mutex);
218- buf_LRU_block_free_non_file_page(block);
219+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
220 mutex_exit(&block->mutex);
221
222 ut_ad(buf_pool->buddy_n_frames > 0);
223@@ -220,7 +228,7 @@
224 {
225 buf_pool_t* buf_pool = buf_pool_from_block(block);
226 const ulint fold = BUF_POOL_ZIP_FOLD(block);
227- ut_ad(buf_pool_mutex_own(buf_pool));
228+ //ut_ad(buf_pool_mutex_own(buf_pool));
229 ut_ad(!mutex_own(&buf_pool->zip_mutex));
230 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
231
232@@ -232,7 +240,10 @@
233 ut_ad(!block->page.in_page_hash);
234 ut_ad(!block->page.in_zip_hash);
235 ut_d(block->page.in_zip_hash = TRUE);
236+
237+ mutex_enter(&buf_pool->zip_hash_mutex);
238 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
239+ mutex_exit(&buf_pool->zip_hash_mutex);
240
241 ut_d(buf_pool->buddy_n_frames++);
242 }
243@@ -268,7 +279,7 @@
244 bpage->state = BUF_BLOCK_ZIP_FREE;
245 #ifndef UNIV_DEBUG_VALGRIND
246 /* Valgrind would complain about accessing free memory. */
247- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
248+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
249 ut_ad(buf_page_get_state(
250 ut_list_node_313)
251 == BUF_BLOCK_ZIP_FREE)));
252@@ -291,25 +302,29 @@
253 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
254 ulint i, /*!< in: index of buf_pool->zip_free[],
255 or BUF_BUDDY_SIZES */
256- ibool* lru) /*!< in: pointer to a variable that
257+ ibool* lru, /*!< in: pointer to a variable that
258 will be assigned TRUE if storage was
259 allocated from the LRU list and
260 buf_pool->mutex was temporarily
261 released, or NULL if the LRU list
262 should not be used */
263+ ibool have_page_hash_mutex)
264 {
265 buf_block_t* block;
266
267- ut_ad(buf_pool_mutex_own(buf_pool));
268+ //ut_ad(buf_pool_mutex_own(buf_pool));
269+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
270 ut_ad(!mutex_own(&buf_pool->zip_mutex));
271
272 if (i < BUF_BUDDY_SIZES) {
273 /* Try to allocate from the buddy system. */
274+ mutex_enter(&buf_pool->zip_free_mutex);
275 block = buf_buddy_alloc_zip(buf_pool, i);
276
277 if (block) {
278 goto func_exit;
279 }
280+ mutex_exit(&buf_pool->zip_free_mutex);
281 }
282
283 /* Try allocating from the buf_pool->free list. */
284@@ -326,19 +341,30 @@
285 }
286
287 /* Try replacing an uncompressed page in the buffer pool. */
288- buf_pool_mutex_exit(buf_pool);
289+ //buf_pool_mutex_exit(buf_pool);
290+ mutex_exit(&buf_pool->LRU_list_mutex);
291+ if (have_page_hash_mutex) {
292+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
293+ }
294 block = buf_LRU_get_free_block(buf_pool, 0);
295 *lru = TRUE;
296- buf_pool_mutex_enter(buf_pool);
297+ //buf_pool_mutex_enter(buf_pool);
298+ mutex_enter(&buf_pool->LRU_list_mutex);
299+ if (have_page_hash_mutex) {
300+ rw_lock_x_lock(&buf_pool->page_hash_latch);
301+ }
302
303 alloc_big:
304 buf_buddy_block_register(block);
305
306+ mutex_enter(&buf_pool->zip_free_mutex);
307 block = buf_buddy_alloc_from(
308 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
309
310 func_exit:
311 buf_pool->buddy_stat[i].used++;
312+ mutex_exit(&buf_pool->zip_free_mutex);
313+
314 return(block);
315 }
316
317@@ -355,7 +381,10 @@
318 buf_page_t* b;
319 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
320
321- ut_ad(buf_pool_mutex_own(buf_pool));
322+ //ut_ad(buf_pool_mutex_own(buf_pool));
323+#ifdef UNIV_SYNC_DEBUG
324+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
325+#endif
326
327 switch (buf_page_get_state(bpage)) {
328 case BUF_BLOCK_ZIP_FREE:
329@@ -364,7 +393,7 @@
330 case BUF_BLOCK_FILE_PAGE:
331 case BUF_BLOCK_MEMORY:
332 case BUF_BLOCK_REMOVE_HASH:
333- ut_error;
334+ /* ut_error; */ /* optimistic */
335 case BUF_BLOCK_ZIP_DIRTY:
336 /* Cannot relocate dirty pages. */
337 return(FALSE);
338@@ -374,9 +403,18 @@
339 }
340
341 mutex_enter(&buf_pool->zip_mutex);
342+ mutex_enter(&buf_pool->zip_free_mutex);
343
344 if (!buf_page_can_relocate(bpage)) {
345 mutex_exit(&buf_pool->zip_mutex);
346+ mutex_exit(&buf_pool->zip_free_mutex);
347+ return(FALSE);
348+ }
349+
350+ if (bpage != buf_page_hash_get(buf_pool,
351+ bpage->space, bpage->offset)) {
352+ mutex_exit(&buf_pool->zip_mutex);
353+ mutex_exit(&buf_pool->zip_free_mutex);
354 return(FALSE);
355 }
356
357@@ -384,18 +422,19 @@
358 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
359
360 /* relocate buf_pool->zip_clean */
361- b = UT_LIST_GET_PREV(list, dpage);
362- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
363+ b = UT_LIST_GET_PREV(zip_list, dpage);
364+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
365
366 if (b) {
367- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
368+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
369 } else {
370- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
371+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
372 }
373
374 UNIV_MEM_INVALID(bpage, sizeof *bpage);
375
376 mutex_exit(&buf_pool->zip_mutex);
377+ mutex_exit(&buf_pool->zip_free_mutex);
378 return(TRUE);
379 }
380
381@@ -409,14 +448,16 @@
382 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
383 void* src, /*!< in: block to relocate */
384 void* dst, /*!< in: free block to relocate to */
385- ulint i) /*!< in: index of
386+ ulint i, /*!< in: index of
387 buf_pool->zip_free[] */
388+ ibool have_page_hash_mutex)
389 {
390 buf_page_t* bpage;
391 const ulint size = BUF_BUDDY_LOW << i;
392 ullint usec = ut_time_us(NULL);
393
394- ut_ad(buf_pool_mutex_own(buf_pool));
395+ //ut_ad(buf_pool_mutex_own(buf_pool));
396+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
397 ut_ad(!mutex_own(&buf_pool->zip_mutex));
398 ut_ad(!ut_align_offset(src, size));
399 ut_ad(!ut_align_offset(dst, size));
400@@ -438,6 +479,12 @@
401 /* This is a compressed page. */
402 mutex_t* mutex;
403
404+ if (!have_page_hash_mutex) {
405+ mutex_exit(&buf_pool->zip_free_mutex);
406+ mutex_enter(&buf_pool->LRU_list_mutex);
407+ rw_lock_x_lock(&buf_pool->page_hash_latch);
408+ }
409+
410 /* The src block may be split into smaller blocks,
411 some of which may be free. Thus, the
412 mach_read_from_4() calls below may attempt to read
413@@ -462,6 +509,11 @@
414 added to buf_pool->page_hash yet. Obviously,
415 it cannot be relocated. */
416
417+ if (!have_page_hash_mutex) {
418+ mutex_enter(&buf_pool->zip_free_mutex);
419+ mutex_exit(&buf_pool->LRU_list_mutex);
420+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
421+ }
422 return(FALSE);
423 }
424
425@@ -473,18 +525,27 @@
426 For the sake of simplicity, give up. */
427 ut_ad(page_zip_get_size(&bpage->zip) < size);
428
429+ if (!have_page_hash_mutex) {
430+ mutex_enter(&buf_pool->zip_free_mutex);
431+ mutex_exit(&buf_pool->LRU_list_mutex);
432+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
433+ }
434 return(FALSE);
435 }
436
437+ /* To keep latch order */
438+ if (have_page_hash_mutex)
439+ mutex_exit(&buf_pool->zip_free_mutex);
440+
441 /* The block must have been allocated, but it may
442 contain uninitialized data. */
443 UNIV_MEM_ASSERT_W(src, size);
444
445- mutex = buf_page_get_mutex(bpage);
446+ mutex = buf_page_get_mutex_enter(bpage);
447
448- mutex_enter(mutex);
449+ mutex_enter(&buf_pool->zip_free_mutex);
450
451- if (buf_page_can_relocate(bpage)) {
452+ if (mutex && buf_page_can_relocate(bpage)) {
453 /* Relocate the compressed page. */
454 ut_a(bpage->zip.data == src);
455 memcpy(dst, src, size);
456@@ -499,10 +560,22 @@
457 buddy_stat->relocated_usec
458 += ut_time_us(NULL) - usec;
459 }
460+
461+ if (!have_page_hash_mutex) {
462+ mutex_exit(&buf_pool->LRU_list_mutex);
463+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
464+ }
465 return(TRUE);
466 }
467
468- mutex_exit(mutex);
469+ if (!have_page_hash_mutex) {
470+ mutex_exit(&buf_pool->LRU_list_mutex);
471+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
472+ }
473+
474+ if (mutex) {
475+ mutex_exit(mutex);
476+ }
477 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
478 /* This must be a buf_page_t object. */
479 #if UNIV_WORD_SIZE == 4
480@@ -511,10 +584,31 @@
481 about uninitialized pad bytes. */
482 UNIV_MEM_ASSERT_RW(src, size);
483 #endif
484+
485+ mutex_exit(&buf_pool->zip_free_mutex);
486+
487+ if (!have_page_hash_mutex) {
488+ mutex_enter(&buf_pool->LRU_list_mutex);
489+ rw_lock_x_lock(&buf_pool->page_hash_latch);
490+ }
491+
492 if (buf_buddy_relocate_block(src, dst)) {
493+ mutex_enter(&buf_pool->zip_free_mutex);
494+
495+ if (!have_page_hash_mutex) {
496+ mutex_exit(&buf_pool->LRU_list_mutex);
497+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
498+ }
499
500 goto success;
501 }
502+
503+ mutex_enter(&buf_pool->zip_free_mutex);
504+
505+ if (!have_page_hash_mutex) {
506+ mutex_exit(&buf_pool->LRU_list_mutex);
507+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
508+ }
509 }
510
511 return(FALSE);
512@@ -529,13 +623,15 @@
513 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
514 void* buf, /*!< in: block to be freed, must not be
515 pointed to by the buffer pool */
516- ulint i) /*!< in: index of buf_pool->zip_free[],
517+ ulint i, /*!< in: index of buf_pool->zip_free[],
518 or BUF_BUDDY_SIZES */
519+ ibool have_page_hash_mutex)
520 {
521 buf_page_t* bpage;
522 buf_page_t* buddy;
523
524- ut_ad(buf_pool_mutex_own(buf_pool));
525+ //ut_ad(buf_pool_mutex_own(buf_pool));
526+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
527 ut_ad(!mutex_own(&buf_pool->zip_mutex));
528 ut_ad(i <= BUF_BUDDY_SIZES);
529 ut_ad(buf_pool->buddy_stat[i].used > 0);
530@@ -546,7 +642,9 @@
531 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
532
533 if (i == BUF_BUDDY_SIZES) {
534- buf_buddy_block_free(buf_pool, buf);
535+ mutex_exit(&buf_pool->zip_free_mutex);
536+ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
537+ mutex_enter(&buf_pool->zip_free_mutex);
538 return;
539 }
540
541@@ -591,7 +689,7 @@
542 ut_a(bpage != buf);
543
544 {
545- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
546+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
547 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
548 bpage = next;
549 }
550@@ -600,13 +698,13 @@
551 #ifndef UNIV_DEBUG_VALGRIND
552 buddy_nonfree:
553 /* Valgrind would complain about accessing free memory. */
554- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
555+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
556 ut_ad(buf_page_get_state(ut_list_node_313)
557 == BUF_BLOCK_ZIP_FREE)));
558 #endif /* UNIV_DEBUG_VALGRIND */
559
560 /* The buddy is not free. Is there a free block of this size? */
561- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
562+ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
563
564 if (bpage) {
565 /* Remove the block from the free list, because a successful
566@@ -616,7 +714,7 @@
567 buf_buddy_remove_from_free(buf_pool, bpage, i);
568
569 /* Try to relocate the buddy of buf to the free block. */
570- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
571+ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
572
573 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
574 goto buddy_free2;
575@@ -636,14 +734,14 @@
576
577 (Parts of the buddy can be free in
578 buf_pool->zip_free[j] with j < i.) */
579- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
580+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
581 ut_ad(buf_page_get_state(
582 ut_list_node_313)
583 == BUF_BLOCK_ZIP_FREE
584 && ut_list_node_313 != buddy)));
585 #endif /* !UNIV_DEBUG_VALGRIND */
586
587- if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
588+ if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
589
590 buf = bpage;
591 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
592diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
593--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
594+++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
595@@ -263,6 +263,7 @@
596 #ifdef UNIV_PFS_RWLOCK
597 /* Keys to register buffer block related rwlocks and mutexes with
598 performance schema */
599+UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
600 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
601 # ifdef UNIV_SYNC_DEBUG
602 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
603@@ -273,6 +274,10 @@
604 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
605 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
606 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
607+UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
608+UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
609+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
610+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
611 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
612 #endif /* UNIV_PFS_MUTEX */
613
614@@ -881,9 +886,9 @@
615 block->page.in_zip_hash = FALSE;
616 block->page.in_flush_list = FALSE;
617 block->page.in_free_list = FALSE;
618- block->in_unzip_LRU_list = FALSE;
619 #endif /* UNIV_DEBUG */
620 block->page.in_LRU_list = FALSE;
621+ block->in_unzip_LRU_list = FALSE;
622 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
623 block->n_pointers = 0;
624 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
625@@ -981,9 +986,11 @@
626 memset(block->frame, '\0', UNIV_PAGE_SIZE);
627 #endif
628 /* Add the block to the free list */
629- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
630+ mutex_enter(&buf_pool->free_list_mutex);
631+ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
632
633 ut_d(block->page.in_free_list = TRUE);
634+ mutex_exit(&buf_pool->free_list_mutex);
635 ut_ad(buf_pool_from_block(block) == buf_pool);
636
637 block++;
638@@ -1038,7 +1045,8 @@
639 buf_chunk_t* chunk = buf_pool->chunks;
640
641 ut_ad(buf_pool);
642- ut_ad(buf_pool_mutex_own(buf_pool));
643+ //ut_ad(buf_pool_mutex_own(buf_pool));
644+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
645 for (n = buf_pool->n_chunks; n--; chunk++) {
646
647 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
648@@ -1138,7 +1146,7 @@
649 buf_block_t* block;
650 const buf_block_t* block_end;
651
652- ut_ad(buf_pool_mutex_own(buf_pool));
653+ //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
654
655 block_end = chunk->blocks + chunk->size;
656
657@@ -1150,8 +1158,10 @@
658 ut_ad(!block->in_unzip_LRU_list);
659 ut_ad(!block->page.in_flush_list);
660 /* Remove the block from the free list. */
661+ mutex_enter(&buf_pool->free_list_mutex);
662 ut_ad(block->page.in_free_list);
663- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
664+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
665+ mutex_exit(&buf_pool->free_list_mutex);
666
667 /* Free the latches. */
668 mutex_free(&block->mutex);
669@@ -1208,9 +1218,21 @@
670 ------------------------------- */
671 mutex_create(buf_pool_mutex_key,
672 &buf_pool->mutex, SYNC_BUF_POOL);
673+ mutex_create(buf_pool_LRU_list_mutex_key,
674+ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
675+ rw_lock_create(buf_pool_page_hash_key,
676+ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
677+ mutex_create(buf_pool_free_list_mutex_key,
678+ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
679+ mutex_create(buf_pool_zip_free_mutex_key,
680+ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
681+ mutex_create(buf_pool_zip_hash_mutex_key,
682+ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
683 mutex_create(buf_pool_zip_mutex_key,
684 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
685
686+ mutex_enter(&buf_pool->LRU_list_mutex);
687+ rw_lock_x_lock(&buf_pool->page_hash_latch);
688 buf_pool_mutex_enter(buf_pool);
689
690 if (buf_pool_size > 0) {
691@@ -1223,6 +1245,8 @@
692 mem_free(chunk);
693 mem_free(buf_pool);
694
695+ mutex_exit(&buf_pool->LRU_list_mutex);
696+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
697 buf_pool_mutex_exit(buf_pool);
698
699 return(DB_ERROR);
700@@ -1253,6 +1277,8 @@
701
702 /* All fields are initialized by mem_zalloc(). */
703
704+ mutex_exit(&buf_pool->LRU_list_mutex);
705+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
706 buf_pool_mutex_exit(buf_pool);
707
708 return(DB_SUCCESS);
709@@ -1467,7 +1493,11 @@
710 ulint fold;
711 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
712
713- ut_ad(buf_pool_mutex_own(buf_pool));
714+ //ut_ad(buf_pool_mutex_own(buf_pool));
715+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
716+#ifdef UNIV_SYNC_DEBUG
717+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
718+#endif
719 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
720 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
721 ut_a(bpage->buf_fix_count == 0);
722@@ -1554,7 +1584,8 @@
723
724 try_again:
725 btr_search_disable(); /* Empty the adaptive hash index again */
726- buf_pool_mutex_enter(buf_pool);
727+ //buf_pool_mutex_enter(buf_pool);
728+ mutex_enter(&buf_pool->LRU_list_mutex);
729
730 shrink_again:
731 if (buf_pool->n_chunks <= 1) {
732@@ -1625,7 +1656,7 @@
733
734 buf_LRU_make_block_old(&block->page);
735 dirty++;
736- } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
737+ } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
738 != BUF_LRU_FREED) {
739 nonfree++;
740 }
741@@ -1633,7 +1664,8 @@
742 mutex_exit(&block->mutex);
743 }
744
745- buf_pool_mutex_exit(buf_pool);
746+ //buf_pool_mutex_exit(buf_pool);
747+ mutex_exit(&buf_pool->LRU_list_mutex);
748
749 /* Request for a flush of the chunk if it helps.
750 Do not flush if there are non-free blocks, since
751@@ -1683,7 +1715,8 @@
752 func_done:
753 buf_pool->old_pool_size = buf_pool->curr_pool_size;
754 func_exit:
755- buf_pool_mutex_exit(buf_pool);
756+ //buf_pool_mutex_exit(buf_pool);
757+ mutex_exit(&buf_pool->LRU_list_mutex);
758 btr_search_enable();
759 }
760
761@@ -1724,7 +1757,9 @@
762 hash_table_t* zip_hash;
763 hash_table_t* page_hash;
764
765- buf_pool_mutex_enter(buf_pool);
766+ //buf_pool_mutex_enter(buf_pool);
767+ mutex_enter(&buf_pool->LRU_list_mutex);
768+ rw_lock_x_lock(&buf_pool->page_hash_latch);
769
770 /* Free, create, and populate the hash table. */
771 hash_table_free(buf_pool->page_hash);
772@@ -1765,8 +1800,9 @@
773 All such blocks are either in buf_pool->zip_clean or
774 in buf_pool->flush_list. */
775
776+ mutex_enter(&buf_pool->zip_mutex);
777 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
778- b = UT_LIST_GET_NEXT(list, b)) {
779+ b = UT_LIST_GET_NEXT(zip_list, b)) {
780 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
781 ut_ad(!b->in_flush_list);
782 ut_ad(b->in_LRU_list);
783@@ -1776,10 +1812,11 @@
784 HASH_INSERT(buf_page_t, hash, page_hash,
785 buf_page_address_fold(b->space, b->offset), b);
786 }
787+ mutex_exit(&buf_pool->zip_mutex);
788
789 buf_flush_list_mutex_enter(buf_pool);
790 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
791- b = UT_LIST_GET_NEXT(list, b)) {
792+ b = UT_LIST_GET_NEXT(flush_list, b)) {
793 ut_ad(b->in_flush_list);
794 ut_ad(b->in_LRU_list);
795 ut_ad(b->in_page_hash);
796@@ -1806,7 +1843,9 @@
797 }
798
799 buf_flush_list_mutex_exit(buf_pool);
800- buf_pool_mutex_exit(buf_pool);
801+ //buf_pool_mutex_exit(buf_pool);
802+ mutex_exit(&buf_pool->LRU_list_mutex);
803+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
804 }
805
806 /********************************************************************
807@@ -1853,21 +1892,32 @@
808 buf_page_t* bpage;
809 ulint i;
810 buf_pool_t* buf_pool = buf_pool_get(space, offset);
811+ mutex_t* block_mutex;
812
813- ut_ad(buf_pool_mutex_own(buf_pool));
814+ //ut_ad(buf_pool_mutex_own(buf_pool));
815
816+ rw_lock_x_lock(&buf_pool->page_hash_latch);
817 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
818+ if (bpage) {
819+ block_mutex = buf_page_get_mutex_enter(bpage);
820+ ut_a(block_mutex);
821+ }
822
823 if (UNIV_LIKELY_NULL(bpage)) {
824 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
825 /* The page was loaded meanwhile. */
826+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
827 return(bpage);
828 }
829 /* Add to an existing watch. */
830 bpage->buf_fix_count++;
831+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
832+ mutex_exit(block_mutex);
833 return(NULL);
834 }
835
836+ /* buf_pool->watch is protected by zip_mutex for now */
837+ mutex_enter(&buf_pool->zip_mutex);
838 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
839 bpage = &buf_pool->watch[i];
840
841@@ -1891,10 +1941,12 @@
842 bpage->space = space;
843 bpage->offset = offset;
844 bpage->buf_fix_count = 1;
845-
846+ bpage->buf_pool_index = buf_pool_index(buf_pool);
847 ut_d(bpage->in_page_hash = TRUE);
848 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
849 fold, bpage);
850+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
851+ mutex_exit(&buf_pool->zip_mutex);
852 return(NULL);
853 case BUF_BLOCK_ZIP_PAGE:
854 ut_ad(bpage->in_page_hash);
855@@ -1912,6 +1964,8 @@
856 ut_error;
857
858 /* Fix compiler warning */
859+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
860+ mutex_exit(&buf_pool->zip_mutex);
861 return(NULL);
862 }
863
864@@ -1941,6 +1995,8 @@
865 buf_chunk_t* chunks;
866 buf_chunk_t* chunk;
867
868+ mutex_enter(&buf_pool->LRU_list_mutex);
869+ rw_lock_x_lock(&buf_pool->page_hash_latch);
870 buf_pool_mutex_enter(buf_pool);
871 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
872
873@@ -1959,6 +2015,8 @@
874 buf_pool->n_chunks++;
875 }
876
877+ mutex_exit(&buf_pool->LRU_list_mutex);
878+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
879 buf_pool_mutex_exit(buf_pool);
880 }
881
882@@ -2046,7 +2104,11 @@
883 space, offset) */
884 buf_page_t* watch) /*!< in/out: sentinel for watch */
885 {
886- ut_ad(buf_pool_mutex_own(buf_pool));
887+ //ut_ad(buf_pool_mutex_own(buf_pool));
888+#ifdef UNIV_SYNC_DEBUG
889+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
890+#endif
891+ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
892
893 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
894 ut_d(watch->in_page_hash = FALSE);
895@@ -2068,28 +2130,31 @@
896 buf_pool_t* buf_pool = buf_pool_get(space, offset);
897 ulint fold = buf_page_address_fold(space, offset);
898
899- buf_pool_mutex_enter(buf_pool);
900+ //buf_pool_mutex_enter(buf_pool);
901+ rw_lock_x_lock(&buf_pool->page_hash_latch);
902 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
903 /* The page must exist because buf_pool_watch_set()
904 increments buf_fix_count. */
905 ut_a(bpage);
906
907 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
908- mutex_t* mutex = buf_page_get_mutex(bpage);
909+ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
910
911- mutex_enter(mutex);
912 ut_a(bpage->buf_fix_count > 0);
913 bpage->buf_fix_count--;
914 mutex_exit(mutex);
915 } else {
916+ mutex_enter(&buf_pool->zip_mutex);
917 ut_a(bpage->buf_fix_count > 0);
918
919 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
920 buf_pool_watch_remove(buf_pool, fold, bpage);
921 }
922+ mutex_exit(&buf_pool->zip_mutex);
923 }
924
925- buf_pool_mutex_exit(buf_pool);
926+ //buf_pool_mutex_exit(buf_pool);
927+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
928 }
929
930 /****************************************************************//**
931@@ -2109,14 +2174,16 @@
932 buf_pool_t* buf_pool = buf_pool_get(space, offset);
933 ulint fold = buf_page_address_fold(space, offset);
934
935- buf_pool_mutex_enter(buf_pool);
936+ //buf_pool_mutex_enter(buf_pool);
937+ rw_lock_s_lock(&buf_pool->page_hash_latch);
938
939 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
940 /* The page must exist because buf_pool_watch_set()
941 increments buf_fix_count. */
942 ut_a(bpage);
943 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
944- buf_pool_mutex_exit(buf_pool);
945+ //buf_pool_mutex_exit(buf_pool);
946+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
947
948 return(ret);
949 }
950@@ -2133,13 +2200,15 @@
951 {
952 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
953
954- buf_pool_mutex_enter(buf_pool);
955+ //buf_pool_mutex_enter(buf_pool);
956+ mutex_enter(&buf_pool->LRU_list_mutex);
957
958 ut_a(buf_page_in_file(bpage));
959
960 buf_LRU_make_block_young(bpage);
961
962- buf_pool_mutex_exit(buf_pool);
963+ //buf_pool_mutex_exit(buf_pool);
964+ mutex_exit(&buf_pool->LRU_list_mutex);
965 }
966
967 /********************************************************************//**
968@@ -2163,14 +2232,20 @@
969 ut_a(buf_page_in_file(bpage));
970
971 if (buf_page_peek_if_too_old(bpage)) {
972- buf_pool_mutex_enter(buf_pool);
973+ //buf_pool_mutex_enter(buf_pool);
974+ mutex_enter(&buf_pool->LRU_list_mutex);
975 buf_LRU_make_block_young(bpage);
976- buf_pool_mutex_exit(buf_pool);
977+ //buf_pool_mutex_exit(buf_pool);
978+ mutex_exit(&buf_pool->LRU_list_mutex);
979 } else if (!access_time) {
980 ulint time_ms = ut_time_ms();
981- buf_pool_mutex_enter(buf_pool);
982+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
983+ //buf_pool_mutex_enter(buf_pool);
984+ if (block_mutex) {
985 buf_page_set_accessed(bpage, time_ms);
986- buf_pool_mutex_exit(buf_pool);
987+ mutex_exit(block_mutex);
988+ }
989+ //buf_pool_mutex_exit(buf_pool);
990 }
991 }
992
993@@ -2187,7 +2262,8 @@
994 buf_block_t* block;
995 buf_pool_t* buf_pool = buf_pool_get(space, offset);
996
997- buf_pool_mutex_enter(buf_pool);
998+ //buf_pool_mutex_enter(buf_pool);
999+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1000
1001 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1002
1003@@ -2196,7 +2272,8 @@
1004 block->check_index_page_at_flush = FALSE;
1005 }
1006
1007- buf_pool_mutex_exit(buf_pool);
1008+ //buf_pool_mutex_exit(buf_pool);
1009+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1010 }
1011
1012 /********************************************************************//**
1013@@ -2215,7 +2292,8 @@
1014 ibool is_hashed;
1015 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1016
1017- buf_pool_mutex_enter(buf_pool);
1018+ //buf_pool_mutex_enter(buf_pool);
1019+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1020
1021 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1022
1023@@ -2226,7 +2304,8 @@
1024 is_hashed = block->is_hashed;
1025 }
1026
1027- buf_pool_mutex_exit(buf_pool);
1028+ //buf_pool_mutex_exit(buf_pool);
1029+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1030
1031 return(is_hashed);
1032 }
1033@@ -2248,7 +2327,8 @@
1034 buf_page_t* bpage;
1035 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1036
1037- buf_pool_mutex_enter(buf_pool);
1038+ //buf_pool_mutex_enter(buf_pool);
1039+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1040
1041 bpage = buf_page_hash_get(buf_pool, space, offset);
1042
1043@@ -2257,7 +2337,8 @@
1044 bpage->file_page_was_freed = TRUE;
1045 }
1046
1047- buf_pool_mutex_exit(buf_pool);
1048+ //buf_pool_mutex_exit(buf_pool);
1049+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1050
1051 return(bpage);
1052 }
1053@@ -2278,7 +2359,8 @@
1054 buf_page_t* bpage;
1055 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1056
1057- buf_pool_mutex_enter(buf_pool);
1058+ //buf_pool_mutex_enter(buf_pool);
1059+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1060
1061 bpage = buf_page_hash_get(buf_pool, space, offset);
1062
1063@@ -2287,7 +2369,8 @@
1064 bpage->file_page_was_freed = FALSE;
1065 }
1066
1067- buf_pool_mutex_exit(buf_pool);
1068+ //buf_pool_mutex_exit(buf_pool);
1069+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1070
1071 return(bpage);
1072 }
1073@@ -2322,8 +2405,9 @@
1074 buf_pool->stat.n_page_gets++;
1075
1076 for (;;) {
1077- buf_pool_mutex_enter(buf_pool);
1078+ //buf_pool_mutex_enter(buf_pool);
1079 lookup:
1080+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1081 bpage = buf_page_hash_get(buf_pool, space, offset);
1082 if (bpage) {
1083 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1084@@ -2332,7 +2416,8 @@
1085
1086 /* Page not in buf_pool: needs to be read from file */
1087
1088- buf_pool_mutex_exit(buf_pool);
1089+ //buf_pool_mutex_exit(buf_pool);
1090+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1091
1092 buf_read_page(space, zip_size, offset);
1093
1094@@ -2344,10 +2429,15 @@
1095 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1096 /* There is no compressed page. */
1097 err_exit:
1098- buf_pool_mutex_exit(buf_pool);
1099+ //buf_pool_mutex_exit(buf_pool);
1100+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1101 return(NULL);
1102 }
1103
1104+ block_mutex = buf_page_get_mutex_enter(bpage);
1105+
1106+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1107+
1108 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1109
1110 switch (buf_page_get_state(bpage)) {
1111@@ -2356,19 +2446,19 @@
1112 case BUF_BLOCK_MEMORY:
1113 case BUF_BLOCK_REMOVE_HASH:
1114 case BUF_BLOCK_ZIP_FREE:
1115+ if (block_mutex)
1116+ mutex_exit(block_mutex);
1117 break;
1118 case BUF_BLOCK_ZIP_PAGE:
1119 case BUF_BLOCK_ZIP_DIRTY:
1120- block_mutex = &buf_pool->zip_mutex;
1121- mutex_enter(block_mutex);
1122+ ut_a(block_mutex == &buf_pool->zip_mutex);
1123 bpage->buf_fix_count++;
1124 goto got_block;
1125 case BUF_BLOCK_FILE_PAGE:
1126- block_mutex = &((buf_block_t*) bpage)->mutex;
1127- mutex_enter(block_mutex);
1128+ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1129
1130 /* Discard the uncompressed page frame if possible. */
1131- if (buf_LRU_free_block(bpage, FALSE, NULL)
1132+ if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1133 == BUF_LRU_FREED) {
1134
1135 mutex_exit(block_mutex);
1136@@ -2387,7 +2477,7 @@
1137 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1138 access_time = buf_page_is_accessed(bpage);
1139
1140- buf_pool_mutex_exit(buf_pool);
1141+ //buf_pool_mutex_exit(buf_pool);
1142
1143 mutex_exit(block_mutex);
1144
1145@@ -2696,7 +2786,7 @@
1146 const buf_block_t* block) /*!< in: pointer to block,
1147 not dereferenced */
1148 {
1149- ut_ad(buf_pool_mutex_own(buf_pool));
1150+ //ut_ad(buf_pool_mutex_own(buf_pool));
1151
1152 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1153 /* The pointer should be aligned. */
1154@@ -2732,6 +2822,7 @@
1155 ulint fix_type;
1156 ibool must_read;
1157 ulint retries = 0;
1158+ mutex_t* block_mutex = NULL;
1159 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1160
1161 ut_ad(mtr);
1162@@ -2753,9 +2844,11 @@
1163 fold = buf_page_address_fold(space, offset);
1164 loop:
1165 block = guess;
1166- buf_pool_mutex_enter(buf_pool);
1167+ //buf_pool_mutex_enter(buf_pool);
1168
1169 if (block) {
1170+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1171+
1172 /* If the guess is a compressed page descriptor that
1173 has been allocated by buf_buddy_alloc(), it may have
1174 been invalidated by buf_buddy_relocate(). In that
1175@@ -2764,11 +2857,15 @@
1176 the guess may be pointing to a buffer pool chunk that
1177 has been released when resizing the buffer pool. */
1178
1179- if (!buf_block_is_uncompressed(buf_pool, block)
1180+ if (!block_mutex) {
1181+ block = guess = NULL;
1182+ } else if (!buf_block_is_uncompressed(buf_pool, block)
1183 || offset != block->page.offset
1184 || space != block->page.space
1185 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1186
1187+ mutex_exit(block_mutex);
1188+
1189 block = guess = NULL;
1190 } else {
1191 ut_ad(!block->page.in_zip_hash);
1192@@ -2777,12 +2874,19 @@
1193 }
1194
1195 if (block == NULL) {
1196+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1197 block = (buf_block_t*) buf_page_hash_get_low(
1198 buf_pool, space, offset, fold);
1199+ if (block) {
1200+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1201+ ut_a(block_mutex);
1202+ }
1203+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1204 }
1205
1206 loop2:
1207 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1208+ mutex_exit(block_mutex);
1209 block = NULL;
1210 }
1211
1212@@ -2794,12 +2898,14 @@
1213 space, offset, fold);
1214
1215 if (UNIV_LIKELY_NULL(block)) {
1216-
1217+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
1218+ ut_a(block_mutex);
1219+ ut_ad(mutex_own(block_mutex));
1220 goto got_block;
1221 }
1222 }
1223
1224- buf_pool_mutex_exit(buf_pool);
1225+ //buf_pool_mutex_exit(buf_pool);
1226
1227 if (mode == BUF_GET_IF_IN_POOL
1228 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1229@@ -2847,7 +2953,8 @@
1230 /* The page is being read to buffer pool,
1231 but we cannot wait around for the read to
1232 complete. */
1233- buf_pool_mutex_exit(buf_pool);
1234+ //buf_pool_mutex_exit(buf_pool);
1235+ mutex_exit(block_mutex);
1236
1237 return(NULL);
1238 }
1239@@ -2857,38 +2964,49 @@
1240 ibool success;
1241
1242 case BUF_BLOCK_FILE_PAGE:
1243+ if (block_mutex == &buf_pool->zip_mutex) {
1244+ /* it is wrong mutex... */
1245+ mutex_exit(block_mutex);
1246+ goto loop;
1247+ }
1248 break;
1249
1250 case BUF_BLOCK_ZIP_PAGE:
1251 case BUF_BLOCK_ZIP_DIRTY:
1252+ ut_ad(block_mutex == &buf_pool->zip_mutex);
1253 bpage = &block->page;
1254 /* Protect bpage->buf_fix_count. */
1255- mutex_enter(&buf_pool->zip_mutex);
1256+ //mutex_enter(&buf_pool->zip_mutex);
1257
1258 if (bpage->buf_fix_count
1259 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1260 /* This condition often occurs when the buffer
1261 is not buffer-fixed, but I/O-fixed by
1262 buf_page_init_for_read(). */
1263- mutex_exit(&buf_pool->zip_mutex);
1264+ //mutex_exit(&buf_pool->zip_mutex);
1265 wait_until_unfixed:
1266 /* The block is buffer-fixed or I/O-fixed.
1267 Try again later. */
1268- buf_pool_mutex_exit(buf_pool);
1269+ //buf_pool_mutex_exit(buf_pool);
1270+ mutex_exit(block_mutex);
1271 os_thread_sleep(WAIT_FOR_READ);
1272
1273 goto loop;
1274 }
1275
1276 /* Allocate an uncompressed page. */
1277- buf_pool_mutex_exit(buf_pool);
1278- mutex_exit(&buf_pool->zip_mutex);
1279+ //buf_pool_mutex_exit(buf_pool);
1280+ //mutex_exit(&buf_pool->zip_mutex);
1281+ mutex_exit(block_mutex);
1282
1283 block = buf_LRU_get_free_block(buf_pool, 0);
1284 ut_a(block);
1285+ block_mutex = &block->mutex;
1286
1287- buf_pool_mutex_enter(buf_pool);
1288- mutex_enter(&block->mutex);
1289+ //buf_pool_mutex_enter(buf_pool);
1290+ mutex_enter(&buf_pool->LRU_list_mutex);
1291+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1292+ mutex_enter(block_mutex);
1293
1294 {
1295 buf_page_t* hash_bpage;
1296@@ -2901,35 +3019,47 @@
1297 while buf_pool->mutex was released.
1298 Free the block that was allocated. */
1299
1300- buf_LRU_block_free_non_file_page(block);
1301- mutex_exit(&block->mutex);
1302+ buf_LRU_block_free_non_file_page(block, TRUE);
1303+ mutex_exit(block_mutex);
1304
1305 block = (buf_block_t*) hash_bpage;
1306+ if (block) {
1307+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1308+ ut_a(block_mutex);
1309+ }
1310+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1311+ mutex_exit(&buf_pool->LRU_list_mutex);
1312 goto loop2;
1313 }
1314 }
1315
1316+ mutex_enter(&buf_pool->zip_mutex);
1317+
1318 if (UNIV_UNLIKELY
1319 (bpage->buf_fix_count
1320 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1321
1322+ mutex_exit(&buf_pool->zip_mutex);
1323 /* The block was buffer-fixed or I/O-fixed
1324 while buf_pool->mutex was not held by this thread.
1325 Free the block that was allocated and try again.
1326 This should be extremely unlikely. */
1327
1328- buf_LRU_block_free_non_file_page(block);
1329- mutex_exit(&block->mutex);
1330+ buf_LRU_block_free_non_file_page(block, TRUE);
1331+ //mutex_exit(&block->mutex);
1332
1333+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1334+ mutex_exit(&buf_pool->LRU_list_mutex);
1335 goto wait_until_unfixed;
1336 }
1337
1338 /* Move the compressed page from bpage to block,
1339 and uncompress it. */
1340
1341- mutex_enter(&buf_pool->zip_mutex);
1342-
1343 buf_relocate(bpage, &block->page);
1344+
1345+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1346+
1347 buf_block_init_low(block);
1348 block->lock_hash_val = lock_rec_hash(space, offset);
1349
1350@@ -2938,7 +3068,7 @@
1351
1352 if (buf_page_get_state(&block->page)
1353 == BUF_BLOCK_ZIP_PAGE) {
1354- UT_LIST_REMOVE(list, buf_pool->zip_clean,
1355+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1356 &block->page);
1357 ut_ad(!block->page.in_flush_list);
1358 } else {
1359@@ -2955,19 +3085,24 @@
1360 /* Insert at the front of unzip_LRU list */
1361 buf_unzip_LRU_add_block(block, FALSE);
1362
1363+ mutex_exit(&buf_pool->LRU_list_mutex);
1364+
1365 block->page.buf_fix_count = 1;
1366 buf_block_set_io_fix(block, BUF_IO_READ);
1367 rw_lock_x_lock_func(&block->lock, 0, file, line);
1368
1369 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1370
1371- mutex_exit(&block->mutex);
1372+ mutex_exit(block_mutex);
1373 mutex_exit(&buf_pool->zip_mutex);
1374+
1375+ buf_pool_mutex_enter(buf_pool);
1376 buf_pool->n_pend_unzip++;
1377+ buf_pool_mutex_exit(buf_pool);
1378
1379- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1380+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1381
1382- buf_pool_mutex_exit(buf_pool);
1383+ //buf_pool_mutex_exit(buf_pool);
1384
1385 /* Decompress the page and apply buffered operations
1386 while not holding buf_pool->mutex or block->mutex. */
1387@@ -2980,12 +3115,15 @@
1388 }
1389
1390 /* Unfix and unlatch the block. */
1391- buf_pool_mutex_enter(buf_pool);
1392- mutex_enter(&block->mutex);
1393+ //buf_pool_mutex_enter(buf_pool);
1394+ block_mutex = &block->mutex;
1395+ mutex_enter(block_mutex);
1396 block->page.buf_fix_count--;
1397 buf_block_set_io_fix(block, BUF_IO_NONE);
1398- mutex_exit(&block->mutex);
1399+
1400+ buf_pool_mutex_enter(buf_pool);
1401 buf_pool->n_pend_unzip--;
1402+ buf_pool_mutex_exit(buf_pool);
1403 rw_lock_x_unlock(&block->lock);
1404
1405 break;
1406@@ -3001,7 +3139,7 @@
1407
1408 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1409
1410- mutex_enter(&block->mutex);
1411+ //mutex_enter(&block->mutex);
1412 #if UNIV_WORD_SIZE == 4
1413 /* On 32-bit systems, there is no padding in buf_page_t. On
1414 other systems, Valgrind could complain about uninitialized pad
1415@@ -3014,7 +3152,7 @@
1416 /* Try to evict the block from the buffer pool, to use the
1417 insert buffer (change buffer) as much as possible. */
1418
1419- if (buf_LRU_free_block(&block->page, TRUE, NULL)
1420+ if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
1421 == BUF_LRU_FREED) {
1422 mutex_exit(&block->mutex);
1423 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1424@@ -3051,13 +3189,14 @@
1425
1426 buf_block_buf_fix_inc(block, file, line);
1427
1428- mutex_exit(&block->mutex);
1429+ //mutex_exit(&block->mutex);
1430
1431 /* Check if this is the first access to the page */
1432
1433 access_time = buf_page_is_accessed(&block->page);
1434
1435- buf_pool_mutex_exit(buf_pool);
1436+ //buf_pool_mutex_exit(buf_pool);
1437+ mutex_exit(block_mutex);
1438
1439 buf_page_set_accessed_make_young(&block->page, access_time);
1440
1441@@ -3290,9 +3429,11 @@
1442 buf_pool = buf_pool_from_block(block);
1443
1444 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1445- buf_pool_mutex_enter(buf_pool);
1446+ //buf_pool_mutex_enter(buf_pool);
1447+ mutex_enter(&buf_pool->LRU_list_mutex);
1448 buf_LRU_make_block_young(&block->page);
1449- buf_pool_mutex_exit(buf_pool);
1450+ //buf_pool_mutex_exit(buf_pool);
1451+ mutex_exit(&buf_pool->LRU_list_mutex);
1452 } else if (!buf_page_is_accessed(&block->page)) {
1453 /* Above, we do a dirty read on purpose, to avoid
1454 mutex contention. The field buf_page_t::access_time
1455@@ -3300,9 +3441,11 @@
1456 field must be protected by mutex, however. */
1457 ulint time_ms = ut_time_ms();
1458
1459- buf_pool_mutex_enter(buf_pool);
1460+ //buf_pool_mutex_enter(buf_pool);
1461+ mutex_enter(&block->mutex);
1462 buf_page_set_accessed(&block->page, time_ms);
1463- buf_pool_mutex_exit(buf_pool);
1464+ //buf_pool_mutex_exit(buf_pool);
1465+ mutex_exit(&block->mutex);
1466 }
1467
1468 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1469@@ -3369,18 +3512,21 @@
1470 ut_ad(mtr);
1471 ut_ad(mtr->state == MTR_ACTIVE);
1472
1473- buf_pool_mutex_enter(buf_pool);
1474+ //buf_pool_mutex_enter(buf_pool);
1475+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1476 block = buf_block_hash_get(buf_pool, space_id, page_no);
1477
1478 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1479- buf_pool_mutex_exit(buf_pool);
1480+ //buf_pool_mutex_exit(buf_pool);
1481+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1482 return(NULL);
1483 }
1484
1485 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1486
1487 mutex_enter(&block->mutex);
1488- buf_pool_mutex_exit(buf_pool);
1489+ //buf_pool_mutex_exit(buf_pool);
1490+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1491
1492 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1493 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1494@@ -3469,7 +3615,10 @@
1495 buf_page_t* hash_page;
1496 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1497
1498- ut_ad(buf_pool_mutex_own(buf_pool));
1499+ //ut_ad(buf_pool_mutex_own(buf_pool));
1500+#ifdef UNIV_SYNC_DEBUG
1501+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1502+#endif
1503 ut_ad(mutex_own(&(block->mutex)));
1504 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1505
1506@@ -3498,11 +3647,14 @@
1507 if (UNIV_LIKELY(!hash_page)) {
1508 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1509 /* Preserve the reference count. */
1510- ulint buf_fix_count = hash_page->buf_fix_count;
1511+ ulint buf_fix_count;
1512
1513+ mutex_enter(&buf_pool->zip_mutex);
1514+ buf_fix_count = hash_page->buf_fix_count;
1515 ut_a(buf_fix_count > 0);
1516 block->page.buf_fix_count += buf_fix_count;
1517 buf_pool_watch_remove(buf_pool, fold, hash_page);
1518+ mutex_exit(&buf_pool->zip_mutex);
1519 } else {
1520 fprintf(stderr,
1521 "InnoDB: Error: page %lu %lu already found"
1522@@ -3512,7 +3664,8 @@
1523 (const void*) hash_page, (const void*) block);
1524 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1525 mutex_exit(&block->mutex);
1526- buf_pool_mutex_exit(buf_pool);
1527+ //buf_pool_mutex_exit(buf_pool);
1528+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1529 buf_print();
1530 buf_LRU_print();
1531 buf_validate();
1532@@ -3596,7 +3749,9 @@
1533
1534 fold = buf_page_address_fold(space, offset);
1535
1536- buf_pool_mutex_enter(buf_pool);
1537+ //buf_pool_mutex_enter(buf_pool);
1538+ mutex_enter(&buf_pool->LRU_list_mutex);
1539+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1540
1541 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1542 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1543@@ -3605,9 +3760,15 @@
1544 err_exit:
1545 if (block) {
1546 mutex_enter(&block->mutex);
1547- buf_LRU_block_free_non_file_page(block);
1548+ mutex_exit(&buf_pool->LRU_list_mutex);
1549+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1550+ buf_LRU_block_free_non_file_page(block, FALSE);
1551 mutex_exit(&block->mutex);
1552 }
1553+ else {
1554+ mutex_exit(&buf_pool->LRU_list_mutex);
1555+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1556+ }
1557
1558 bpage = NULL;
1559 goto func_exit;
1560@@ -3630,6 +3791,8 @@
1561
1562 buf_page_init(space, offset, fold, block);
1563
1564+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1565+
1566 /* The block must be put to the LRU list, to the old blocks */
1567 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1568
1569@@ -3657,7 +3820,7 @@
1570 been added to buf_pool->LRU and
1571 buf_pool->page_hash. */
1572 mutex_exit(&block->mutex);
1573- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1574+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1575 mutex_enter(&block->mutex);
1576 block->page.zip.data = data;
1577
1578@@ -3670,6 +3833,7 @@
1579 buf_unzip_LRU_add_block(block, TRUE);
1580 }
1581
1582+ mutex_exit(&buf_pool->LRU_list_mutex);
1583 mutex_exit(&block->mutex);
1584 } else {
1585 /* Defer buf_buddy_alloc() until after the block has
1586@@ -3681,8 +3845,8 @@
1587 control block (bpage), in order to avoid the
1588 invocation of buf_buddy_relocate_block() on
1589 uninitialized data. */
1590- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1591- bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1592+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1593+ bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1594
1595 /* Initialize the buf_pool pointer. */
1596 bpage->buf_pool_index = buf_pool_index(buf_pool);
1597@@ -3701,8 +3865,11 @@
1598
1599 /* The block was added by some other thread. */
1600 watch_page = NULL;
1601- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1602- buf_buddy_free(buf_pool, data, zip_size);
1603+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1604+ buf_buddy_free(buf_pool, data, zip_size, TRUE);
1605+
1606+ mutex_exit(&buf_pool->LRU_list_mutex);
1607+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1608
1609 bpage = NULL;
1610 goto func_exit;
1611@@ -3746,18 +3913,24 @@
1612 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1613 bpage);
1614
1615+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1616+
1617 /* The block must be put to the LRU list, to the old blocks */
1618 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1619 buf_LRU_insert_zip_clean(bpage);
1620
1621+ mutex_exit(&buf_pool->LRU_list_mutex);
1622+
1623 buf_page_set_io_fix(bpage, BUF_IO_READ);
1624
1625 mutex_exit(&buf_pool->zip_mutex);
1626 }
1627
1628+ buf_pool_mutex_enter(buf_pool);
1629 buf_pool->n_pend_reads++;
1630-func_exit:
1631 buf_pool_mutex_exit(buf_pool);
1632+func_exit:
1633+ //buf_pool_mutex_exit(buf_pool);
1634
1635 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1636
1637@@ -3799,7 +3972,9 @@
1638
1639 fold = buf_page_address_fold(space, offset);
1640
1641- buf_pool_mutex_enter(buf_pool);
1642+ //buf_pool_mutex_enter(buf_pool);
1643+ mutex_enter(&buf_pool->LRU_list_mutex);
1644+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1645
1646 block = (buf_block_t*) buf_page_hash_get_low(
1647 buf_pool, space, offset, fold);
1648@@ -3815,7 +3990,9 @@
1649 #endif /* UNIV_DEBUG_FILE_ACCESSES */
1650
1651 /* Page can be found in buf_pool */
1652- buf_pool_mutex_exit(buf_pool);
1653+ //buf_pool_mutex_exit(buf_pool);
1654+ mutex_exit(&buf_pool->LRU_list_mutex);
1655+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1656
1657 buf_block_free(free_block);
1658
1659@@ -3837,6 +4014,7 @@
1660 mutex_enter(&block->mutex);
1661
1662 buf_page_init(space, offset, fold, block);
1663+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1664
1665 /* The block must be put to the LRU list */
1666 buf_LRU_add_block(&block->page, FALSE);
1667@@ -3863,7 +4041,7 @@
1668 the reacquisition of buf_pool->mutex. We also must
1669 defer this operation until after the block descriptor
1670 has been added to buf_pool->LRU and buf_pool->page_hash. */
1671- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1672+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1673 mutex_enter(&block->mutex);
1674 block->page.zip.data = data;
1675
1676@@ -3881,7 +4059,8 @@
1677
1678 buf_page_set_accessed(&block->page, time_ms);
1679
1680- buf_pool_mutex_exit(buf_pool);
1681+ //buf_pool_mutex_exit(buf_pool);
1682+ mutex_exit(&buf_pool->LRU_list_mutex);
1683
1684 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1685
1686@@ -3932,6 +4111,8 @@
1687 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1688 const ibool uncompressed = (buf_page_get_state(bpage)
1689 == BUF_BLOCK_FILE_PAGE);
1690+ ibool have_LRU_mutex = FALSE;
1691+ mutex_t* block_mutex;
1692
1693 ut_a(buf_page_in_file(bpage));
1694
1695@@ -4065,8 +4246,26 @@
1696 }
1697 }
1698
1699+ if (io_type == BUF_IO_WRITE
1700+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1701+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1702+ /* to keep consistency at buf_LRU_insert_zip_clean() */
1703+ have_LRU_mutex = TRUE; /* optimistic */
1704+ }
1705+retry_mutex:
1706+ if (have_LRU_mutex)
1707+ mutex_enter(&buf_pool->LRU_list_mutex);
1708+ block_mutex = buf_page_get_mutex_enter(bpage);
1709+ ut_a(block_mutex);
1710+ if (io_type == BUF_IO_WRITE
1711+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1712+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1713+ && !have_LRU_mutex) {
1714+ mutex_exit(block_mutex);
1715+ have_LRU_mutex = TRUE;
1716+ goto retry_mutex;
1717+ }
1718 buf_pool_mutex_enter(buf_pool);
1719- mutex_enter(buf_page_get_mutex(bpage));
1720
1721 #ifdef UNIV_IBUF_COUNT_DEBUG
1722 if (io_type == BUF_IO_WRITE || uncompressed) {
1723@@ -4089,6 +4288,7 @@
1724 the x-latch to this OS thread: do not let this confuse you in
1725 debugging! */
1726
1727+ ut_a(!have_LRU_mutex);
1728 ut_ad(buf_pool->n_pend_reads > 0);
1729 buf_pool->n_pend_reads--;
1730 buf_pool->stat.n_pages_read++;
1731@@ -4106,6 +4306,9 @@
1732
1733 buf_flush_write_complete(bpage);
1734
1735+ if (have_LRU_mutex)
1736+ mutex_exit(&buf_pool->LRU_list_mutex);
1737+
1738 if (uncompressed) {
1739 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1740 BUF_IO_WRITE);
1741@@ -4128,8 +4331,8 @@
1742 }
1743 #endif /* UNIV_DEBUG */
1744
1745- mutex_exit(buf_page_get_mutex(bpage));
1746 buf_pool_mutex_exit(buf_pool);
1747+ mutex_exit(block_mutex);
1748 }
1749
1750 /*********************************************************************//**
1751@@ -4146,7 +4349,9 @@
1752
1753 ut_ad(buf_pool);
1754
1755- buf_pool_mutex_enter(buf_pool);
1756+ //buf_pool_mutex_enter(buf_pool);
1757+ mutex_enter(&buf_pool->LRU_list_mutex);
1758+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1759
1760 chunk = buf_pool->chunks;
1761
1762@@ -4163,7 +4368,9 @@
1763 }
1764 }
1765
1766- buf_pool_mutex_exit(buf_pool);
1767+ //buf_pool_mutex_exit(buf_pool);
1768+ mutex_exit(&buf_pool->LRU_list_mutex);
1769+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1770
1771 return(TRUE);
1772 }
1773@@ -4211,7 +4418,8 @@
1774 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1775 }
1776
1777- buf_pool_mutex_enter(buf_pool);
1778+ //buf_pool_mutex_enter(buf_pool);
1779+ mutex_enter(&buf_pool->LRU_list_mutex);
1780
1781 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1782 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1783@@ -4224,7 +4432,8 @@
1784 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1785 buf_refresh_io_stats(buf_pool);
1786
1787- buf_pool_mutex_exit(buf_pool);
1788+ //buf_pool_mutex_exit(buf_pool);
1789+ mutex_exit(&buf_pool->LRU_list_mutex);
1790 }
1791
1792 /*********************************************************************//**
1793@@ -4266,7 +4475,10 @@
1794
1795 ut_ad(buf_pool);
1796
1797- buf_pool_mutex_enter(buf_pool);
1798+ //buf_pool_mutex_enter(buf_pool);
1799+ mutex_enter(&buf_pool->LRU_list_mutex);
1800+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1801+ /* for keep the new latch order, it cannot validate correctly... */
1802
1803 chunk = buf_pool->chunks;
1804
1805@@ -4361,7 +4573,7 @@
1806 /* Check clean compressed-only blocks. */
1807
1808 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1809- b = UT_LIST_GET_NEXT(list, b)) {
1810+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1811 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1812 switch (buf_page_get_io_fix(b)) {
1813 case BUF_IO_NONE:
1814@@ -4392,7 +4604,7 @@
1815
1816 buf_flush_list_mutex_enter(buf_pool);
1817 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1818- b = UT_LIST_GET_NEXT(list, b)) {
1819+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1820 ut_ad(b->in_flush_list);
1821 ut_a(b->oldest_modification);
1822 n_flush++;
1823@@ -4451,6 +4663,8 @@
1824 }
1825
1826 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1827+ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1828+/*
1829 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1830 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1831 (ulong) UT_LIST_GET_LEN(buf_pool->free),
1832@@ -4461,8 +4675,11 @@
1833 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1834 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1835 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1836+*/
1837
1838- buf_pool_mutex_exit(buf_pool);
1839+ //buf_pool_mutex_exit(buf_pool);
1840+ mutex_exit(&buf_pool->LRU_list_mutex);
1841+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1842
1843 ut_a(buf_LRU_validate());
1844 ut_a(buf_flush_validate(buf_pool));
1845@@ -4518,7 +4735,9 @@
1846 index_ids = mem_alloc(size * sizeof *index_ids);
1847 counts = mem_alloc(sizeof(ulint) * size);
1848
1849- buf_pool_mutex_enter(buf_pool);
1850+ //buf_pool_mutex_enter(buf_pool);
1851+ mutex_enter(&buf_pool->LRU_list_mutex);
1852+ mutex_enter(&buf_pool->free_list_mutex);
1853 buf_flush_list_mutex_enter(buf_pool);
1854
1855 fprintf(stderr,
1856@@ -4587,7 +4806,9 @@
1857 }
1858 }
1859
1860- buf_pool_mutex_exit(buf_pool);
1861+ //buf_pool_mutex_exit(buf_pool);
1862+ mutex_exit(&buf_pool->LRU_list_mutex);
1863+ mutex_exit(&buf_pool->free_list_mutex);
1864
1865 for (i = 0; i < n_found; i++) {
1866 index = dict_index_get_if_in_cache(index_ids[i]);
1867@@ -4644,7 +4865,7 @@
1868 buf_chunk_t* chunk;
1869 ulint fixed_pages_number = 0;
1870
1871- buf_pool_mutex_enter(buf_pool);
1872+ //buf_pool_mutex_enter(buf_pool);
1873
1874 chunk = buf_pool->chunks;
1875
1876@@ -4678,7 +4899,7 @@
1877 /* Traverse the lists of clean and dirty compressed-only blocks. */
1878
1879 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1880- b = UT_LIST_GET_NEXT(list, b)) {
1881+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1882 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1883 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1884
1885@@ -4690,7 +4911,7 @@
1886
1887 buf_flush_list_mutex_enter(buf_pool);
1888 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1889- b = UT_LIST_GET_NEXT(list, b)) {
1890+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1891 ut_ad(b->in_flush_list);
1892
1893 switch (buf_page_get_state(b)) {
1894@@ -4716,7 +4937,7 @@
1895
1896 buf_flush_list_mutex_exit(buf_pool);
1897 mutex_exit(&buf_pool->zip_mutex);
1898- buf_pool_mutex_exit(buf_pool);
1899+ //buf_pool_mutex_exit(buf_pool);
1900
1901 return(fixed_pages_number);
1902 }
1903@@ -4810,6 +5031,8 @@
1904
1905 ut_ad(buf_pool);
1906
1907+ mutex_enter(&buf_pool->LRU_list_mutex);
1908+ mutex_enter(&buf_pool->free_list_mutex);
1909 buf_pool_mutex_enter(buf_pool);
1910 buf_flush_list_mutex_enter(buf_pool);
1911
1912@@ -4913,6 +5136,8 @@
1913 buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
1914
1915 buf_refresh_io_stats(buf_pool);
1916+ mutex_exit(&buf_pool->LRU_list_mutex);
1917+ mutex_exit(&buf_pool->free_list_mutex);
1918 buf_pool_mutex_exit(buf_pool);
1919 }
1920
1921@@ -5032,11 +5257,13 @@
1922 {
1923 ulint len;
1924
1925- buf_pool_mutex_enter(buf_pool);
1926+ //buf_pool_mutex_enter(buf_pool);
1927+ mutex_enter(&buf_pool->free_list_mutex);
1928
1929 len = UT_LIST_GET_LEN(buf_pool->free);
1930
1931- buf_pool_mutex_exit(buf_pool);
1932+ //buf_pool_mutex_exit(buf_pool);
1933+ mutex_exit(&buf_pool->free_list_mutex);
1934
1935 return(len);
1936 }
1937diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1938--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1939+++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
1940@@ -279,7 +279,7 @@
1941
1942 ut_d(block->page.in_flush_list = TRUE);
1943 block->page.oldest_modification = lsn;
1944- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1945+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1946
1947 #ifdef UNIV_DEBUG_VALGRIND
1948 {
1949@@ -373,14 +373,14 @@
1950 > block->page.oldest_modification) {
1951 ut_ad(b->in_flush_list);
1952 prev_b = b;
1953- b = UT_LIST_GET_NEXT(list, b);
1954+ b = UT_LIST_GET_NEXT(flush_list, b);
1955 }
1956 }
1957
1958 if (prev_b == NULL) {
1959- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1960+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1961 } else {
1962- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1963+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1964 prev_b, &block->page);
1965 }
1966
1967@@ -406,7 +406,7 @@
1968 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1969 //ut_ad(buf_pool_mutex_own(buf_pool));
1970 #endif
1971- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1972+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1973 //ut_ad(bpage->in_LRU_list);
1974
1975 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1976@@ -442,14 +442,14 @@
1977 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1978 {
1979 #ifdef UNIV_DEBUG
1980- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1981- ut_ad(buf_pool_mutex_own(buf_pool));
1982+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1983+ //ut_ad(buf_pool_mutex_own(buf_pool));
1984 #endif
1985- ut_a(buf_page_in_file(bpage));
1986+ //ut_a(buf_page_in_file(bpage));
1987 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1988 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1989
1990- if (bpage->oldest_modification != 0
1991+ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1992 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1993 ut_ad(bpage->in_flush_list);
1994
1995@@ -480,7 +480,7 @@
1996 {
1997 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1998
1999- ut_ad(buf_pool_mutex_own(buf_pool));
2000+ //ut_ad(buf_pool_mutex_own(buf_pool));
2001 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2002 ut_ad(bpage->in_flush_list);
2003
2004@@ -498,11 +498,11 @@
2005 return;
2006 case BUF_BLOCK_ZIP_DIRTY:
2007 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
2008- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2009+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2010 buf_LRU_insert_zip_clean(bpage);
2011 break;
2012 case BUF_BLOCK_FILE_PAGE:
2013- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2014+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2015 break;
2016 }
2017
2018@@ -546,7 +546,7 @@
2019 buf_page_t* prev_b = NULL;
2020 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2021
2022- ut_ad(buf_pool_mutex_own(buf_pool));
2023+ //ut_ad(buf_pool_mutex_own(buf_pool));
2024 /* Must reside in the same buffer pool. */
2025 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2026
2027@@ -575,18 +575,18 @@
2028 because we assert on in_flush_list in comparison function. */
2029 ut_d(bpage->in_flush_list = FALSE);
2030
2031- prev = UT_LIST_GET_PREV(list, bpage);
2032- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2033+ prev = UT_LIST_GET_PREV(flush_list, bpage);
2034+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2035
2036 if (prev) {
2037 ut_ad(prev->in_flush_list);
2038 UT_LIST_INSERT_AFTER(
2039- list,
2040+ flush_list,
2041 buf_pool->flush_list,
2042 prev, dpage);
2043 } else {
2044 UT_LIST_ADD_FIRST(
2045- list,
2046+ flush_list,
2047 buf_pool->flush_list,
2048 dpage);
2049 }
2050@@ -1055,7 +1055,7 @@
2051
2052 #ifdef UNIV_DEBUG
2053 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2054- ut_ad(!buf_pool_mutex_own(buf_pool));
2055+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2056 #endif
2057
2058 #ifdef UNIV_LOG_DEBUG
2059@@ -1069,7 +1069,8 @@
2060 io_fixed and oldest_modification != 0. Thus, it cannot be
2061 relocated in the buffer pool or removed from flush_list or
2062 LRU_list. */
2063- ut_ad(!buf_pool_mutex_own(buf_pool));
2064+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2065+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2066 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2067 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2068 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2069@@ -1232,12 +1233,18 @@
2070 ibool is_uncompressed;
2071
2072 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2073- ut_ad(buf_pool_mutex_own(buf_pool));
2074+ //ut_ad(buf_pool_mutex_own(buf_pool));
2075+#ifdef UNIV_SYNC_DEBUG
2076+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2077+#endif
2078 ut_ad(buf_page_in_file(bpage));
2079
2080 block_mutex = buf_page_get_mutex(bpage);
2081 ut_ad(mutex_own(block_mutex));
2082
2083+ buf_pool_mutex_enter(buf_pool);
2084+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2085+
2086 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2087
2088 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2089@@ -1399,14 +1406,16 @@
2090
2091 buf_pool = buf_pool_get(space, i);
2092
2093- buf_pool_mutex_enter(buf_pool);
2094+ //buf_pool_mutex_enter(buf_pool);
2095+ rw_lock_s_lock(&buf_pool->page_hash_latch);
2096
2097 /* We only want to flush pages from this buffer pool. */
2098 bpage = buf_page_hash_get(buf_pool, space, i);
2099
2100 if (!bpage) {
2101
2102- buf_pool_mutex_exit(buf_pool);
2103+ //buf_pool_mutex_exit(buf_pool);
2104+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2105 continue;
2106 }
2107
2108@@ -1418,11 +1427,9 @@
2109 if (flush_type != BUF_FLUSH_LRU
2110 || i == offset
2111 || buf_page_is_old(bpage)) {
2112- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2113-
2114- mutex_enter(block_mutex);
2115+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2116
2117- if (buf_flush_ready_for_flush(bpage, flush_type)
2118+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2119 && (i == offset || !bpage->buf_fix_count)) {
2120 /* We only try to flush those
2121 neighbors != offset where the buf fix
2122@@ -1438,11 +1445,12 @@
2123 ut_ad(!buf_pool_mutex_own(buf_pool));
2124 count++;
2125 continue;
2126- } else {
2127+ } else if (block_mutex) {
2128 mutex_exit(block_mutex);
2129 }
2130 }
2131- buf_pool_mutex_exit(buf_pool);
2132+ //buf_pool_mutex_exit(buf_pool);
2133+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2134 }
2135
2136 return(count);
2137@@ -1475,21 +1483,25 @@
2138 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2139 #endif /* UNIV_DEBUG */
2140
2141- ut_ad(buf_pool_mutex_own(buf_pool));
2142+ //ut_ad(buf_pool_mutex_own(buf_pool));
2143+ ut_ad(flush_type != BUF_FLUSH_LRU
2144+ || mutex_own(&buf_pool->LRU_list_mutex));
2145
2146- block_mutex = buf_page_get_mutex(bpage);
2147- mutex_enter(block_mutex);
2148+ block_mutex = buf_page_get_mutex_enter(bpage);
2149
2150- ut_a(buf_page_in_file(bpage));
2151+ //ut_a(buf_page_in_file(bpage));
2152
2153- if (buf_flush_ready_for_flush(bpage, flush_type)) {
2154+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2155 ulint space;
2156 ulint offset;
2157 buf_pool_t* buf_pool;
2158
2159 buf_pool = buf_pool_from_bpage(bpage);
2160
2161- buf_pool_mutex_exit(buf_pool);
2162+ //buf_pool_mutex_exit(buf_pool);
2163+ if (flush_type == BUF_FLUSH_LRU) {
2164+ mutex_exit(&buf_pool->LRU_list_mutex);
2165+ }
2166
2167 /* These fields are protected by both the
2168 buffer pool mutex and block mutex. */
2169@@ -1505,13 +1517,18 @@
2170 *count,
2171 n_to_flush);
2172
2173- buf_pool_mutex_enter(buf_pool);
2174+ //buf_pool_mutex_enter(buf_pool);
2175+ if (flush_type == BUF_FLUSH_LRU) {
2176+ mutex_enter(&buf_pool->LRU_list_mutex);
2177+ }
2178 flushed = TRUE;
2179- } else {
2180+ } else if (block_mutex) {
2181 mutex_exit(block_mutex);
2182 }
2183
2184- ut_ad(buf_pool_mutex_own(buf_pool));
2185+ //ut_ad(buf_pool_mutex_own(buf_pool));
2186+ ut_ad(flush_type != BUF_FLUSH_LRU
2187+ || mutex_own(&buf_pool->LRU_list_mutex));
2188
2189 return(flushed);
2190 }
2191@@ -1532,7 +1549,8 @@
2192 buf_page_t* bpage;
2193 ulint count = 0;
2194
2195- ut_ad(buf_pool_mutex_own(buf_pool));
2196+ //ut_ad(buf_pool_mutex_own(buf_pool));
2197+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2198
2199 do {
2200 /* Start from the end of the list looking for a
2201@@ -1554,7 +1572,8 @@
2202 should be flushed, we factor in this value. */
2203 buf_lru_flush_page_count += count;
2204
2205- ut_ad(buf_pool_mutex_own(buf_pool));
2206+ //ut_ad(buf_pool_mutex_own(buf_pool));
2207+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2208
2209 return(count);
2210 }
2211@@ -1582,9 +1601,10 @@
2212 {
2213 ulint len;
2214 buf_page_t* bpage;
2215+ buf_page_t* prev_bpage = NULL;
2216 ulint count = 0;
2217
2218- ut_ad(buf_pool_mutex_own(buf_pool));
2219+ //ut_ad(buf_pool_mutex_own(buf_pool));
2220
2221 /* If we have flushed enough, leave the loop */
2222 do {
2223@@ -1603,6 +1623,7 @@
2224
2225 if (bpage) {
2226 ut_a(bpage->oldest_modification > 0);
2227+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2228 }
2229
2230 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2231@@ -1644,9 +1665,17 @@
2232 break;
2233 }
2234
2235- bpage = UT_LIST_GET_PREV(list, bpage);
2236+ bpage = UT_LIST_GET_PREV(flush_list, bpage);
2237
2238- ut_ad(!bpage || bpage->in_flush_list);
2239+ //ut_ad(!bpage || bpage->in_flush_list);
2240+ if (bpage != prev_bpage) {
2241+ /* the search might warp.. retrying */
2242+ buf_flush_list_mutex_exit(buf_pool);
2243+ break;
2244+ }
2245+ if (bpage) {
2246+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2247+ }
2248
2249 buf_flush_list_mutex_exit(buf_pool);
2250
2251@@ -1655,7 +1684,7 @@
2252
2253 } while (count < min_n && bpage != NULL && len > 0);
2254
2255- ut_ad(buf_pool_mutex_own(buf_pool));
2256+ //ut_ad(buf_pool_mutex_own(buf_pool));
2257
2258 return(count);
2259 }
2260@@ -1694,13 +1723,15 @@
2261 || sync_thread_levels_empty_gen(TRUE));
2262 #endif /* UNIV_SYNC_DEBUG */
2263
2264- buf_pool_mutex_enter(buf_pool);
2265+ //buf_pool_mutex_enter(buf_pool);
2266
2267 /* Note: The buffer pool mutex is released and reacquired within
2268 the flush functions. */
2269 switch(flush_type) {
2270 case BUF_FLUSH_LRU:
2271+ mutex_enter(&buf_pool->LRU_list_mutex);
2272 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2273+ mutex_exit(&buf_pool->LRU_list_mutex);
2274 break;
2275 case BUF_FLUSH_LIST:
2276 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2277@@ -1709,7 +1740,7 @@
2278 ut_error;
2279 }
2280
2281- buf_pool_mutex_exit(buf_pool);
2282+ //buf_pool_mutex_exit(buf_pool);
2283
2284 buf_flush_buffered_writes();
2285
2286@@ -1965,7 +1996,7 @@
2287 retry:
2288 //buf_pool_mutex_enter(buf_pool);
2289 if (have_LRU_mutex)
2290- buf_pool_mutex_enter(buf_pool);
2291+ mutex_enter(&buf_pool->LRU_list_mutex);
2292
2293 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2294
2295@@ -1982,15 +2013,15 @@
2296 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2297 continue;
2298 }
2299- block_mutex = buf_page_get_mutex(bpage);
2300-
2301- mutex_enter(block_mutex);
2302+ block_mutex = buf_page_get_mutex_enter(bpage);
2303
2304- if (buf_flush_ready_for_replace(bpage)) {
2305+ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2306 n_replaceable++;
2307 }
2308
2309- mutex_exit(block_mutex);
2310+ if (block_mutex) {
2311+ mutex_exit(block_mutex);
2312+ }
2313
2314 distance++;
2315
2316@@ -1999,7 +2030,7 @@
2317
2318 //buf_pool_mutex_exit(buf_pool);
2319 if (have_LRU_mutex)
2320- buf_pool_mutex_exit(buf_pool);
2321+ mutex_exit(&buf_pool->LRU_list_mutex);
2322
2323 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2324
2325@@ -2198,7 +2229,7 @@
2326
2327 ut_ad(buf_flush_list_mutex_own(buf_pool));
2328
2329- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2330+ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2331 ut_ad(ut_list_node_313->in_flush_list));
2332
2333 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2334@@ -2238,7 +2269,7 @@
2335 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2336 }
2337
2338- bpage = UT_LIST_GET_NEXT(list, bpage);
2339+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2340
2341 ut_a(!bpage || om >= bpage->oldest_modification);
2342 }
2343diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2344--- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2345+++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2346@@ -143,8 +143,9 @@
2347 void
2348 buf_LRU_block_free_hashed_page(
2349 /*===========================*/
2350- buf_block_t* block); /*!< in: block, must contain a file page and
2351+ buf_block_t* block, /*!< in: block, must contain a file page and
2352 be in a state where it can be freed */
2353+ ibool have_page_hash_mutex);
2354
2355 /******************************************************************//**
2356 Determines if the unzip_LRU list should be used for evicting a victim
2357@@ -154,15 +155,20 @@
2358 ibool
2359 buf_LRU_evict_from_unzip_LRU(
2360 /*=========================*/
2361- buf_pool_t* buf_pool)
2362+ buf_pool_t* buf_pool,
2363+ ibool have_LRU_mutex)
2364 {
2365 ulint io_avg;
2366 ulint unzip_avg;
2367
2368- ut_ad(buf_pool_mutex_own(buf_pool));
2369+ //ut_ad(buf_pool_mutex_own(buf_pool));
2370
2371+ if (!have_LRU_mutex)
2372+ mutex_enter(&buf_pool->LRU_list_mutex);
2373 /* If the unzip_LRU list is empty, we can only use the LRU. */
2374 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2375+ if (!have_LRU_mutex)
2376+ mutex_exit(&buf_pool->LRU_list_mutex);
2377 return(FALSE);
2378 }
2379
2380@@ -171,14 +177,20 @@
2381 decompressed pages in the buffer pool. */
2382 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2383 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2384+ if (!have_LRU_mutex)
2385+ mutex_exit(&buf_pool->LRU_list_mutex);
2386 return(FALSE);
2387 }
2388
2389 /* If eviction hasn't started yet, we assume by default
2390 that a workload is disk bound. */
2391 if (buf_pool->freed_page_clock == 0) {
2392+ if (!have_LRU_mutex)
2393+ mutex_exit(&buf_pool->LRU_list_mutex);
2394 return(TRUE);
2395 }
2396+ if (!have_LRU_mutex)
2397+ mutex_exit(&buf_pool->LRU_list_mutex);
2398
2399 /* Calculate the average over past intervals, and add the values
2400 of the current interval. */
2401@@ -246,19 +258,23 @@
2402 page_arr = ut_malloc(
2403 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2404
2405- buf_pool_mutex_enter(buf_pool);
2406+ //buf_pool_mutex_enter(buf_pool);
2407+ mutex_enter(&buf_pool->LRU_list_mutex);
2408
2409 scan_again:
2410 num_entries = 0;
2411 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2412
2413 while (bpage != NULL) {
2414- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2415+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2416 buf_page_t* prev_bpage;
2417
2418- mutex_enter(block_mutex);
2419 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2420
2421+ if (!block_mutex) {
2422+ goto next_page;
2423+ }
2424+
2425 ut_a(buf_page_in_file(bpage));
2426
2427 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2428@@ -287,14 +303,16 @@
2429
2430 /* Array full. We release the buf_pool->mutex to
2431 obey the latching order. */
2432- buf_pool_mutex_exit(buf_pool);
2433+ //buf_pool_mutex_exit(buf_pool);
2434+ mutex_exit(&buf_pool->LRU_list_mutex);
2435
2436 buf_LRU_drop_page_hash_batch(
2437 id, zip_size, page_arr, num_entries);
2438
2439 num_entries = 0;
2440
2441- buf_pool_mutex_enter(buf_pool);
2442+ //buf_pool_mutex_enter(buf_pool);
2443+ mutex_enter(&buf_pool->LRU_list_mutex);
2444 } else {
2445 mutex_exit(block_mutex);
2446 }
2447@@ -319,7 +337,8 @@
2448 }
2449 }
2450
2451- buf_pool_mutex_exit(buf_pool);
2452+ //buf_pool_mutex_exit(buf_pool);
2453+ mutex_exit(&buf_pool->LRU_list_mutex);
2454
2455 /* Drop any remaining batch of search hashed pages. */
2456 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2457@@ -341,7 +360,9 @@
2458 ibool all_freed;
2459
2460 scan_again:
2461- buf_pool_mutex_enter(buf_pool);
2462+ //buf_pool_mutex_enter(buf_pool);
2463+ mutex_enter(&buf_pool->LRU_list_mutex);
2464+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2465
2466 all_freed = TRUE;
2467
2468@@ -369,8 +390,16 @@
2469
2470 all_freed = FALSE;
2471 } else {
2472- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2473- mutex_enter(block_mutex);
2474+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2475+
2476+ if (!block_mutex) {
2477+ /* It may be impossible case...
2478+ Something wrong, so will be scan_again */
2479+
2480+ all_freed = FALSE;
2481+
2482+ goto next_page_no_mutex;
2483+ }
2484
2485 if (bpage->buf_fix_count > 0) {
2486
2487@@ -429,7 +458,9 @@
2488 ulint page_no;
2489 ulint zip_size;
2490
2491- buf_pool_mutex_exit(buf_pool);
2492+ //buf_pool_mutex_exit(buf_pool);
2493+ mutex_exit(&buf_pool->LRU_list_mutex);
2494+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2495
2496 zip_size = buf_page_get_zip_size(bpage);
2497 page_no = buf_page_get_page_no(bpage);
2498@@ -454,7 +485,7 @@
2499 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2500 != BUF_BLOCK_ZIP_FREE) {
2501 buf_LRU_block_free_hashed_page((buf_block_t*)
2502- bpage);
2503+ bpage, TRUE);
2504 } else {
2505 /* The block_mutex should have been
2506 released by buf_LRU_block_remove_hashed_page()
2507@@ -486,7 +517,9 @@
2508 bpage = prev_bpage;
2509 }
2510
2511- buf_pool_mutex_exit(buf_pool);
2512+ //buf_pool_mutex_exit(buf_pool);
2513+ mutex_exit(&buf_pool->LRU_list_mutex);
2514+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2515
2516 if (!all_freed) {
2517 os_thread_sleep(20000);
2518@@ -532,7 +565,9 @@
2519 buf_page_t* b;
2520 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2521
2522- ut_ad(buf_pool_mutex_own(buf_pool));
2523+ //ut_ad(buf_pool_mutex_own(buf_pool));
2524+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2525+ ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2526 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2527
2528 /* Find the first successor of bpage in the LRU list
2529@@ -540,17 +575,17 @@
2530 b = bpage;
2531 do {
2532 b = UT_LIST_GET_NEXT(LRU, b);
2533- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2534+ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2535
2536 /* Insert bpage before b, i.e., after the predecessor of b. */
2537 if (b) {
2538- b = UT_LIST_GET_PREV(list, b);
2539+ b = UT_LIST_GET_PREV(zip_list, b);
2540 }
2541
2542 if (b) {
2543- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2544+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2545 } else {
2546- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2547+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2548 }
2549 }
2550
2551@@ -563,18 +598,19 @@
2552 buf_LRU_free_from_unzip_LRU_list(
2553 /*=============================*/
2554 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2555- ulint n_iterations) /*!< in: how many times this has
2556+ ulint n_iterations, /*!< in: how many times this has
2557 been called repeatedly without
2558 result: a high value means that
2559 we should search farther; we will
2560 search n_iterations / 5 of the
2561 unzip_LRU list, or nothing if
2562 n_iterations >= 5 */
2563+ ibool have_LRU_mutex)
2564 {
2565 buf_block_t* block;
2566 ulint distance;
2567
2568- ut_ad(buf_pool_mutex_own(buf_pool));
2569+ //ut_ad(buf_pool_mutex_own(buf_pool));
2570
2571 /* Theoratically it should be much easier to find a victim
2572 from unzip_LRU as we can choose even a dirty block (as we'll
2573@@ -584,7 +620,7 @@
2574 if we have done five iterations so far. */
2575
2576 if (UNIV_UNLIKELY(n_iterations >= 5)
2577- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2578+ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2579
2580 return(FALSE);
2581 }
2582@@ -592,18 +628,25 @@
2583 distance = 100 + (n_iterations
2584 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2585
2586+restart:
2587 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2588 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2589 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2590
2591 enum buf_lru_free_block_status freed;
2592
2593+ mutex_enter(&block->mutex);
2594+ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2595+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2596+ mutex_exit(&block->mutex);
2597+ goto restart;
2598+ }
2599+
2600 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2601 ut_ad(block->in_unzip_LRU_list);
2602 ut_ad(block->page.in_LRU_list);
2603
2604- mutex_enter(&block->mutex);
2605- freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2606+ freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2607 mutex_exit(&block->mutex);
2608
2609 switch (freed) {
2610@@ -637,21 +680,23 @@
2611 buf_LRU_free_from_common_LRU_list(
2612 /*==============================*/
2613 buf_pool_t* buf_pool,
2614- ulint n_iterations)
2615+ ulint n_iterations,
2616 /*!< in: how many times this has been called
2617 repeatedly without result: a high value means
2618 that we should search farther; if
2619 n_iterations < 10, then we search
2620 n_iterations / 10 * buf_pool->curr_size
2621 pages from the end of the LRU list */
2622+ ibool have_LRU_mutex)
2623 {
2624 buf_page_t* bpage;
2625 ulint distance;
2626
2627- ut_ad(buf_pool_mutex_own(buf_pool));
2628+ //ut_ad(buf_pool_mutex_own(buf_pool));
2629
2630 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2631
2632+restart:
2633 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2634 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2635 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2636@@ -659,14 +704,23 @@
2637 enum buf_lru_free_block_status freed;
2638 unsigned accessed;
2639 mutex_t* block_mutex
2640- = buf_page_get_mutex(bpage);
2641+ = buf_page_get_mutex_enter(bpage);
2642+
2643+ if (!block_mutex) {
2644+ goto restart;
2645+ }
2646+
2647+ if (!bpage->in_LRU_list
2648+ || !buf_page_in_file(bpage)) {
2649+ mutex_exit(block_mutex);
2650+ goto restart;
2651+ }
2652
2653 ut_ad(buf_page_in_file(bpage));
2654 ut_ad(bpage->in_LRU_list);
2655
2656- mutex_enter(block_mutex);
2657 accessed = buf_page_is_accessed(bpage);
2658- freed = buf_LRU_free_block(bpage, TRUE, NULL);
2659+ freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2660 mutex_exit(block_mutex);
2661
2662 switch (freed) {
2663@@ -718,16 +772,23 @@
2664 n_iterations / 5 of the unzip_LRU list. */
2665 {
2666 ibool freed = FALSE;
2667+ ibool have_LRU_mutex = FALSE;
2668
2669- buf_pool_mutex_enter(buf_pool);
2670+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2671+ have_LRU_mutex = TRUE;
2672+
2673+ //buf_pool_mutex_enter(buf_pool);
2674+ if (have_LRU_mutex)
2675+ mutex_enter(&buf_pool->LRU_list_mutex);
2676
2677- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2678+ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2679
2680 if (!freed) {
2681 freed = buf_LRU_free_from_common_LRU_list(
2682- buf_pool, n_iterations);
2683+ buf_pool, n_iterations, have_LRU_mutex);
2684 }
2685
2686+ buf_pool_mutex_enter(buf_pool);
2687 if (!freed) {
2688 buf_pool->LRU_flush_ended = 0;
2689 } else if (buf_pool->LRU_flush_ended > 0) {
2690@@ -735,6 +796,8 @@
2691 }
2692
2693 buf_pool_mutex_exit(buf_pool);
2694+ if (have_LRU_mutex)
2695+ mutex_exit(&buf_pool->LRU_list_mutex);
2696
2697 return(freed);
2698 }
2699@@ -795,7 +858,9 @@
2700
2701 buf_pool = buf_pool_from_array(i);
2702
2703- buf_pool_mutex_enter(buf_pool);
2704+ //buf_pool_mutex_enter(buf_pool);
2705+ mutex_enter(&buf_pool->LRU_list_mutex);
2706+ mutex_enter(&buf_pool->free_list_mutex);
2707
2708 if (!recv_recovery_on
2709 && UT_LIST_GET_LEN(buf_pool->free)
2710@@ -805,7 +870,9 @@
2711 ret = TRUE;
2712 }
2713
2714- buf_pool_mutex_exit(buf_pool);
2715+ //buf_pool_mutex_exit(buf_pool);
2716+ mutex_exit(&buf_pool->LRU_list_mutex);
2717+ mutex_exit(&buf_pool->free_list_mutex);
2718 }
2719
2720 return(ret);
2721@@ -823,9 +890,10 @@
2722 {
2723 buf_block_t* block;
2724
2725- ut_ad(buf_pool_mutex_own(buf_pool));
2726+ //ut_ad(buf_pool_mutex_own(buf_pool));
2727
2728- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2729+ mutex_enter(&buf_pool->free_list_mutex);
2730+ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2731
2732 if (block) {
2733
2734@@ -834,7 +902,9 @@
2735 ut_ad(!block->page.in_flush_list);
2736 ut_ad(!block->page.in_LRU_list);
2737 ut_a(!buf_page_in_file(&block->page));
2738- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2739+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2740+
2741+ mutex_exit(&buf_pool->free_list_mutex);
2742
2743 mutex_enter(&block->mutex);
2744
2745@@ -844,6 +914,8 @@
2746 ut_ad(buf_pool_from_block(block) == buf_pool);
2747
2748 mutex_exit(&block->mutex);
2749+ } else {
2750+ mutex_exit(&buf_pool->free_list_mutex);
2751 }
2752
2753 return(block);
2754@@ -868,7 +940,7 @@
2755 ibool mon_value_was = FALSE;
2756 ibool started_monitor = FALSE;
2757 loop:
2758- buf_pool_mutex_enter(buf_pool);
2759+ //buf_pool_mutex_enter(buf_pool);
2760
2761 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2762 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2763@@ -951,8 +1023,10 @@
2764 ibool lru;
2765 page_zip_set_size(&block->page.zip, zip_size);
2766
2767+ mutex_enter(&buf_pool->LRU_list_mutex);
2768 block->page.zip.data = buf_buddy_alloc(
2769- buf_pool, zip_size, &lru);
2770+ buf_pool, zip_size, &lru, FALSE);
2771+ mutex_exit(&buf_pool->LRU_list_mutex);
2772
2773 UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2774 } else {
2775@@ -960,7 +1034,7 @@
2776 block->page.zip.data = NULL;
2777 }
2778
2779- buf_pool_mutex_exit(buf_pool);
2780+ //buf_pool_mutex_exit(buf_pool);
2781
2782 if (started_monitor) {
2783 srv_print_innodb_monitor = mon_value_was;
2784@@ -972,7 +1046,7 @@
2785 /* If no block was in the free list, search from the end of the LRU
2786 list and try to free a block there */
2787
2788- buf_pool_mutex_exit(buf_pool);
2789+ //buf_pool_mutex_exit(buf_pool);
2790
2791 freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2792
2793@@ -1058,7 +1132,8 @@
2794 ulint new_len;
2795
2796 ut_a(buf_pool->LRU_old);
2797- ut_ad(buf_pool_mutex_own(buf_pool));
2798+ //ut_ad(buf_pool_mutex_own(buf_pool));
2799+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2800 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2801 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2802 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2803@@ -1124,7 +1199,8 @@
2804 {
2805 buf_page_t* bpage;
2806
2807- ut_ad(buf_pool_mutex_own(buf_pool));
2808+ //ut_ad(buf_pool_mutex_own(buf_pool));
2809+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2810 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2811
2812 /* We first initialize all blocks in the LRU list as old and then use
2813@@ -1159,13 +1235,14 @@
2814 ut_ad(buf_pool);
2815 ut_ad(bpage);
2816 ut_ad(buf_page_in_file(bpage));
2817- ut_ad(buf_pool_mutex_own(buf_pool));
2818+ //ut_ad(buf_pool_mutex_own(buf_pool));
2819+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2820
2821 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2822 buf_block_t* block = (buf_block_t*) bpage;
2823
2824 ut_ad(block->in_unzip_LRU_list);
2825- ut_d(block->in_unzip_LRU_list = FALSE);
2826+ block->in_unzip_LRU_list = FALSE;
2827
2828 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2829 }
2830@@ -1183,7 +1260,8 @@
2831
2832 ut_ad(buf_pool);
2833 ut_ad(bpage);
2834- ut_ad(buf_pool_mutex_own(buf_pool));
2835+ //ut_ad(buf_pool_mutex_own(buf_pool));
2836+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2837
2838 ut_a(buf_page_in_file(bpage));
2839
2840@@ -1260,12 +1338,13 @@
2841
2842 ut_ad(buf_pool);
2843 ut_ad(block);
2844- ut_ad(buf_pool_mutex_own(buf_pool));
2845+ //ut_ad(buf_pool_mutex_own(buf_pool));
2846+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2847
2848 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2849
2850 ut_ad(!block->in_unzip_LRU_list);
2851- ut_d(block->in_unzip_LRU_list = TRUE);
2852+ block->in_unzip_LRU_list = TRUE;
2853
2854 if (old) {
2855 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2856@@ -1286,7 +1365,8 @@
2857
2858 ut_ad(buf_pool);
2859 ut_ad(bpage);
2860- ut_ad(buf_pool_mutex_own(buf_pool));
2861+ //ut_ad(buf_pool_mutex_own(buf_pool));
2862+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2863
2864 ut_a(buf_page_in_file(bpage));
2865
2866@@ -1337,7 +1417,8 @@
2867
2868 ut_ad(buf_pool);
2869 ut_ad(bpage);
2870- ut_ad(buf_pool_mutex_own(buf_pool));
2871+ //ut_ad(buf_pool_mutex_own(buf_pool));
2872+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2873
2874 ut_a(buf_page_in_file(bpage));
2875 ut_ad(!bpage->in_LRU_list);
2876@@ -1416,7 +1497,8 @@
2877 {
2878 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2879
2880- ut_ad(buf_pool_mutex_own(buf_pool));
2881+ //ut_ad(buf_pool_mutex_own(buf_pool));
2882+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2883
2884 if (bpage->old) {
2885 buf_pool->stat.n_pages_made_young++;
2886@@ -1458,19 +1540,20 @@
2887 buf_page_t* bpage, /*!< in: block to be freed */
2888 ibool zip, /*!< in: TRUE if should remove also the
2889 compressed page of an uncompressed page */
2890- ibool* buf_pool_mutex_released)
2891+ ibool* buf_pool_mutex_released,
2892 /*!< in: pointer to a variable that will
2893 be assigned TRUE if buf_pool_mutex
2894 was temporarily released, or NULL */
2895+ ibool have_LRU_mutex)
2896 {
2897 buf_page_t* b = NULL;
2898 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2899 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2900
2901- ut_ad(buf_pool_mutex_own(buf_pool));
2902+ //ut_ad(buf_pool_mutex_own(buf_pool));
2903 ut_ad(mutex_own(block_mutex));
2904 ut_ad(buf_page_in_file(bpage));
2905- ut_ad(bpage->in_LRU_list);
2906+ //ut_ad(bpage->in_LRU_list);
2907 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2908 #if UNIV_WORD_SIZE == 4
2909 /* On 32-bit systems, there is no padding in buf_page_t. On
2910@@ -1479,7 +1562,7 @@
2911 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2912 #endif
2913
2914- if (!buf_page_can_relocate(bpage)) {
2915+ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2916
2917 /* Do not free buffer-fixed or I/O-fixed blocks. */
2918 return(BUF_LRU_NOT_FREED);
2919@@ -1511,15 +1594,15 @@
2920 If it cannot be allocated (without freeing a block
2921 from the LRU list), refuse to free bpage. */
2922 alloc:
2923- buf_pool_mutex_exit_forbid(buf_pool);
2924- b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2925- buf_pool_mutex_exit_allow(buf_pool);
2926+ //buf_pool_mutex_exit_forbid(buf_pool);
2927+ b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2928+ //buf_pool_mutex_exit_allow(buf_pool);
2929
2930 if (UNIV_UNLIKELY(!b)) {
2931 return(BUF_LRU_CANNOT_RELOCATE);
2932 }
2933
2934- memcpy(b, bpage, sizeof *b);
2935+ //memcpy(b, bpage, sizeof *b);
2936 }
2937
2938 #ifdef UNIV_DEBUG
2939@@ -1530,6 +1613,39 @@
2940 }
2941 #endif /* UNIV_DEBUG */
2942
2943+ /* not to break latch order, must re-enter block_mutex */
2944+ mutex_exit(block_mutex);
2945+
2946+ if (!have_LRU_mutex)
2947+ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2948+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2949+ mutex_enter(block_mutex);
2950+
2951+ /* recheck states of block */
2952+ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2953+ || !buf_page_can_relocate(bpage)) {
2954+not_freed:
2955+ if (b) {
2956+ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2957+ }
2958+ if (!have_LRU_mutex)
2959+ mutex_exit(&buf_pool->LRU_list_mutex);
2960+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2961+ return(BUF_LRU_NOT_FREED);
2962+ } else if (zip || !bpage->zip.data) {
2963+ if (bpage->oldest_modification)
2964+ goto not_freed;
2965+ } else if (bpage->oldest_modification) {
2966+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2967+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2968+ goto not_freed;
2969+ }
2970+ }
2971+
2972+ if (b) {
2973+ memcpy(b, bpage, sizeof *b);
2974+ }
2975+
2976 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2977 != BUF_BLOCK_ZIP_FREE) {
2978 ut_a(bpage->buf_fix_count == 0);
2979@@ -1546,6 +1662,10 @@
2980
2981 ut_a(!hash_b);
2982
2983+ while (prev_b && !prev_b->in_LRU_list) {
2984+ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2985+ }
2986+
2987 b->state = b->oldest_modification
2988 ? BUF_BLOCK_ZIP_DIRTY
2989 : BUF_BLOCK_ZIP_PAGE;
2990@@ -1642,7 +1762,9 @@
2991 *buf_pool_mutex_released = TRUE;
2992 }
2993
2994- buf_pool_mutex_exit(buf_pool);
2995+ //buf_pool_mutex_exit(buf_pool);
2996+ mutex_exit(&buf_pool->LRU_list_mutex);
2997+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2998 mutex_exit(block_mutex);
2999
3000 /* Remove possible adaptive hash index on the page.
3001@@ -1674,7 +1796,9 @@
3002 : BUF_NO_CHECKSUM_MAGIC);
3003 }
3004
3005- buf_pool_mutex_enter(buf_pool);
3006+ //buf_pool_mutex_enter(buf_pool);
3007+ if (have_LRU_mutex)
3008+ mutex_enter(&buf_pool->LRU_list_mutex);
3009 mutex_enter(block_mutex);
3010
3011 if (b) {
3012@@ -1684,13 +1808,17 @@
3013 mutex_exit(&buf_pool->zip_mutex);
3014 }
3015
3016- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
3017+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3018 } else {
3019 /* The block_mutex should have been released by
3020 buf_LRU_block_remove_hashed_page() when it returns
3021 BUF_BLOCK_ZIP_FREE. */
3022 ut_ad(block_mutex == &buf_pool->zip_mutex);
3023 mutex_enter(block_mutex);
3024+
3025+ if (!have_LRU_mutex)
3026+ mutex_exit(&buf_pool->LRU_list_mutex);
3027+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3028 }
3029
3030 return(BUF_LRU_FREED);
3031@@ -1702,13 +1830,14 @@
3032 void
3033 buf_LRU_block_free_non_file_page(
3034 /*=============================*/
3035- buf_block_t* block) /*!< in: block, must not contain a file page */
3036+ buf_block_t* block, /*!< in: block, must not contain a file page */
3037+ ibool have_page_hash_mutex)
3038 {
3039 void* data;
3040 buf_pool_t* buf_pool = buf_pool_from_block(block);
3041
3042 ut_ad(block);
3043- ut_ad(buf_pool_mutex_own(buf_pool));
3044+ //ut_ad(buf_pool_mutex_own(buf_pool));
3045 ut_ad(mutex_own(&block->mutex));
3046
3047 switch (buf_block_get_state(block)) {
3048@@ -1742,18 +1871,21 @@
3049 if (data) {
3050 block->page.zip.data = NULL;
3051 mutex_exit(&block->mutex);
3052- buf_pool_mutex_exit_forbid(buf_pool);
3053+ //buf_pool_mutex_exit_forbid(buf_pool);
3054
3055 buf_buddy_free(
3056- buf_pool, data, page_zip_get_size(&block->page.zip));
3057+ buf_pool, data, page_zip_get_size(&block->page.zip),
3058+ have_page_hash_mutex);
3059
3060- buf_pool_mutex_exit_allow(buf_pool);
3061+ //buf_pool_mutex_exit_allow(buf_pool);
3062 mutex_enter(&block->mutex);
3063 page_zip_set_size(&block->page.zip, 0);
3064 }
3065
3066- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3067+ mutex_enter(&buf_pool->free_list_mutex);
3068+ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3069 ut_d(block->page.in_free_list = TRUE);
3070+ mutex_exit(&buf_pool->free_list_mutex);
3071
3072 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3073 }
3074@@ -1783,7 +1915,11 @@
3075 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3076
3077 ut_ad(bpage);
3078- ut_ad(buf_pool_mutex_own(buf_pool));
3079+ //ut_ad(buf_pool_mutex_own(buf_pool));
3080+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3081+#ifdef UNIV_SYNC_DEBUG
3082+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3083+#endif
3084 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3085
3086 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3087@@ -1891,7 +2027,9 @@
3088
3089 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3090 mutex_exit(buf_page_get_mutex(bpage));
3091- buf_pool_mutex_exit(buf_pool);
3092+ //buf_pool_mutex_exit(buf_pool);
3093+ mutex_exit(&buf_pool->LRU_list_mutex);
3094+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3095 buf_print();
3096 buf_LRU_print();
3097 buf_validate();
3098@@ -1912,17 +2050,17 @@
3099 ut_a(bpage->zip.data);
3100 ut_a(buf_page_get_zip_size(bpage));
3101
3102- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3103+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3104
3105 mutex_exit(&buf_pool->zip_mutex);
3106- buf_pool_mutex_exit_forbid(buf_pool);
3107+ //buf_pool_mutex_exit_forbid(buf_pool);
3108
3109 buf_buddy_free(
3110 buf_pool, bpage->zip.data,
3111- page_zip_get_size(&bpage->zip));
3112+ page_zip_get_size(&bpage->zip), TRUE);
3113
3114- buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3115- buf_pool_mutex_exit_allow(buf_pool);
3116+ buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3117+ //buf_pool_mutex_exit_allow(buf_pool);
3118
3119 UNIV_MEM_UNDESC(bpage);
3120 return(BUF_BLOCK_ZIP_FREE);
3121@@ -1945,13 +2083,13 @@
3122 ut_ad(!bpage->in_flush_list);
3123 ut_ad(!bpage->in_LRU_list);
3124 mutex_exit(&((buf_block_t*) bpage)->mutex);
3125- buf_pool_mutex_exit_forbid(buf_pool);
3126+ //buf_pool_mutex_exit_forbid(buf_pool);
3127
3128 buf_buddy_free(
3129 buf_pool, data,
3130- page_zip_get_size(&bpage->zip));
3131+ page_zip_get_size(&bpage->zip), TRUE);
3132
3133- buf_pool_mutex_exit_allow(buf_pool);
3134+ //buf_pool_mutex_exit_allow(buf_pool);
3135 mutex_enter(&((buf_block_t*) bpage)->mutex);
3136 page_zip_set_size(&bpage->zip, 0);
3137 }
3138@@ -1977,18 +2115,19 @@
3139 void
3140 buf_LRU_block_free_hashed_page(
3141 /*===========================*/
3142- buf_block_t* block) /*!< in: block, must contain a file page and
3143+ buf_block_t* block, /*!< in: block, must contain a file page and
3144 be in a state where it can be freed */
3145+ ibool have_page_hash_mutex)
3146 {
3147 #ifdef UNIV_DEBUG
3148- buf_pool_t* buf_pool = buf_pool_from_block(block);
3149- ut_ad(buf_pool_mutex_own(buf_pool));
3150+ //buf_pool_t* buf_pool = buf_pool_from_block(block);
3151+ //ut_ad(buf_pool_mutex_own(buf_pool));
3152 #endif
3153 ut_ad(mutex_own(&block->mutex));
3154
3155 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3156
3157- buf_LRU_block_free_non_file_page(block);
3158+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3159 }
3160
3161 /**********************************************************************//**
3162@@ -2015,7 +2154,8 @@
3163 }
3164
3165 if (adjust) {
3166- buf_pool_mutex_enter(buf_pool);
3167+ //buf_pool_mutex_enter(buf_pool);
3168+ mutex_enter(&buf_pool->LRU_list_mutex);
3169
3170 if (ratio != buf_pool->LRU_old_ratio) {
3171 buf_pool->LRU_old_ratio = ratio;
3172@@ -2027,7 +2167,8 @@
3173 }
3174 }
3175
3176- buf_pool_mutex_exit(buf_pool);
3177+ //buf_pool_mutex_exit(buf_pool);
3178+ mutex_exit(&buf_pool->LRU_list_mutex);
3179 } else {
3180 buf_pool->LRU_old_ratio = ratio;
3181 }
3182@@ -2124,7 +2265,8 @@
3183 ulint new_len;
3184
3185 ut_ad(buf_pool);
3186- buf_pool_mutex_enter(buf_pool);
3187+ //buf_pool_mutex_enter(buf_pool);
3188+ mutex_enter(&buf_pool->LRU_list_mutex);
3189
3190 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3191
3192@@ -2185,16 +2327,22 @@
3193
3194 ut_a(buf_pool->LRU_old_len == old_len);
3195
3196- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3197+ mutex_exit(&buf_pool->LRU_list_mutex);
3198+ mutex_enter(&buf_pool->free_list_mutex);
3199+
3200+ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3201 ut_ad(ut_list_node_313->in_free_list));
3202
3203 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3204 bpage != NULL;
3205- bpage = UT_LIST_GET_NEXT(list, bpage)) {
3206+ bpage = UT_LIST_GET_NEXT(free, bpage)) {
3207
3208 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3209 }
3210
3211+ mutex_exit(&buf_pool->free_list_mutex);
3212+ mutex_enter(&buf_pool->LRU_list_mutex);
3213+
3214 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3215 ut_ad(ut_list_node_313->in_unzip_LRU_list
3216 && ut_list_node_313->page.in_LRU_list));
3217@@ -2208,7 +2356,8 @@
3218 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3219 }
3220
3221- buf_pool_mutex_exit(buf_pool);
3222+ //buf_pool_mutex_exit(buf_pool);
3223+ mutex_exit(&buf_pool->LRU_list_mutex);
3224 }
3225
3226 /**********************************************************************//**
3227@@ -2244,7 +2393,8 @@
3228 const buf_page_t* bpage;
3229
3230 ut_ad(buf_pool);
3231- buf_pool_mutex_enter(buf_pool);
3232+ //buf_pool_mutex_enter(buf_pool);
3233+ mutex_enter(&buf_pool->LRU_list_mutex);
3234
3235 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3236
3237@@ -2301,7 +2451,8 @@
3238 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3239 }
3240
3241- buf_pool_mutex_exit(buf_pool);
3242+ //buf_pool_mutex_exit(buf_pool);
3243+ mutex_exit(&buf_pool->LRU_list_mutex);
3244 }
3245
3246 /**********************************************************************//**
3247diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3248--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3249+++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3250@@ -311,6 +311,7 @@
3251
3252 return(0);
3253 }
3254+ buf_pool_mutex_exit(buf_pool);
3255
3256 /* Check that almost all pages in the area have been accessed; if
3257 offset == low, the accesses must be in a descending order, otherwise,
3258@@ -329,6 +330,7 @@
3259
3260 fail_count = 0;
3261
3262+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3263 for (i = low; i < high; i++) {
3264 bpage = buf_page_hash_get(buf_pool, space, i);
3265
3266@@ -356,7 +358,8 @@
3267
3268 if (fail_count > threshold) {
3269 /* Too many failures: return */
3270- buf_pool_mutex_exit(buf_pool);
3271+ //buf_pool_mutex_exit(buf_pool);
3272+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3273 return(0);
3274 }
3275
3276@@ -371,7 +374,8 @@
3277 bpage = buf_page_hash_get(buf_pool, space, offset);
3278
3279 if (bpage == NULL) {
3280- buf_pool_mutex_exit(buf_pool);
3281+ //buf_pool_mutex_exit(buf_pool);
3282+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3283
3284 return(0);
3285 }
3286@@ -397,7 +401,8 @@
3287 pred_offset = fil_page_get_prev(frame);
3288 succ_offset = fil_page_get_next(frame);
3289
3290- buf_pool_mutex_exit(buf_pool);
3291+ //buf_pool_mutex_exit(buf_pool);
3292+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3293
3294 if ((offset == low) && (succ_offset == offset + 1)) {
3295
3296diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3297--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3298+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
3299@@ -245,6 +245,10 @@
3300 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3301 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3302 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3303+ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3304+ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3305+ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3306+ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3307 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3308 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3309 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3310@@ -295,6 +299,7 @@
3311 {&archive_lock_key, "archive_lock", 0},
3312 # endif /* UNIV_LOG_ARCHIVE */
3313 {&btr_search_latch_key, "btr_search_latch", 0},
3314+ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3315 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3316 {&buf_block_lock_key, "buf_block_lock", 0},
3317 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3318diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3319--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3320+++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
3321@@ -1566,7 +1566,8 @@
3322
3323 buf_pool = buf_pool_from_array(i);
3324
3325- buf_pool_mutex_enter(buf_pool);
3326+ //buf_pool_mutex_enter(buf_pool);
3327+ mutex_enter(&buf_pool->zip_free_mutex);
3328
3329 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3330 buf_buddy_stat_t* buddy_stat;
3331@@ -1596,7 +1597,8 @@
3332 }
3333 }
3334
3335- buf_pool_mutex_exit(buf_pool);
3336+ //buf_pool_mutex_exit(buf_pool);
3337+ mutex_exit(&buf_pool->zip_free_mutex);
3338
3339 if (status) {
3340 break;
3341diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3342--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3343+++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3344@@ -3705,9 +3705,11 @@
3345 ulint fold = buf_page_address_fold(space, page_no);
3346 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3347
3348- buf_pool_mutex_enter(buf_pool);
3349+ //buf_pool_mutex_enter(buf_pool);
3350+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3351 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3352- buf_pool_mutex_exit(buf_pool);
3353+ //buf_pool_mutex_exit(buf_pool);
3354+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3355
3356 if (UNIV_LIKELY_NULL(bpage)) {
3357 /* A buffer pool watch has been set or the
3358diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3359--- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3360+++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3361@@ -51,10 +51,11 @@
3362 buf_pool_t* buf_pool,
3363 /*!< buffer pool in which the block resides */
3364 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3365- ibool* lru) /*!< in: pointer to a variable that will be assigned
3366+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3367 TRUE if storage was allocated from the LRU list
3368 and buf_pool->mutex was temporarily released,
3369 or NULL if the LRU list should not be used */
3370+ ibool have_page_hash_mutex)
3371 __attribute__((malloc));
3372
3373 /**********************************************************************//**
3374@@ -67,7 +68,8 @@
3375 /*!< buffer pool in which the block resides */
3376 void* buf, /*!< in: block to be freed, must not be
3377 pointed to by the buffer pool */
3378- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3379+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3380+ ibool have_page_hash_mutex)
3381 __attribute__((nonnull));
3382
3383 #ifndef UNIV_NONINL
3384diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3385--- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3386+++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3387@@ -46,10 +46,11 @@
3388 /*!< in: buffer pool in which the page resides */
3389 ulint i, /*!< in: index of buf_pool->zip_free[],
3390 or BUF_BUDDY_SIZES */
3391- ibool* lru) /*!< in: pointer to a variable that will be assigned
3392+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3393 TRUE if storage was allocated from the LRU list
3394 and buf_pool->mutex was temporarily released,
3395 or NULL if the LRU list should not be used */
3396+ ibool have_page_hash_mutex)
3397 __attribute__((malloc));
3398
3399 /**********************************************************************//**
3400@@ -61,8 +62,9 @@
3401 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3402 void* buf, /*!< in: block to be freed, must not be
3403 pointed to by the buffer pool */
3404- ulint i) /*!< in: index of buf_pool->zip_free[],
3405+ ulint i, /*!< in: index of buf_pool->zip_free[],
3406 or BUF_BUDDY_SIZES */
3407+ ibool have_page_hash_mutex)
3408 __attribute__((nonnull));
3409
3410 /**********************************************************************//**
3411@@ -102,16 +104,17 @@
3412 the page resides */
3413 ulint size, /*!< in: block size, up to
3414 UNIV_PAGE_SIZE */
3415- ibool* lru) /*!< in: pointer to a variable
3416+ ibool* lru, /*!< in: pointer to a variable
3417 that will be assigned TRUE if
3418 storage was allocated from the
3419 LRU list and buf_pool->mutex was
3420 temporarily released, or NULL if
3421 the LRU list should not be used */
3422+ ibool have_page_hash_mutex)
3423 {
3424- ut_ad(buf_pool_mutex_own(buf_pool));
3425+ //ut_ad(buf_pool_mutex_own(buf_pool));
3426
3427- return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3428+ return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3429 }
3430
3431 /**********************************************************************//**
3432@@ -123,12 +126,25 @@
3433 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3434 void* buf, /*!< in: block to be freed, must not be
3435 pointed to by the buffer pool */
3436- ulint size) /*!< in: block size, up to
3437+ ulint size, /*!< in: block size, up to
3438 UNIV_PAGE_SIZE */
3439+ ibool have_page_hash_mutex)
3440 {
3441- ut_ad(buf_pool_mutex_own(buf_pool));
3442+ //ut_ad(buf_pool_mutex_own(buf_pool));
3443+
3444+ if (!have_page_hash_mutex) {
3445+ mutex_enter(&buf_pool->LRU_list_mutex);
3446+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3447+ }
3448
3449- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3450+ mutex_enter(&buf_pool->zip_free_mutex);
3451+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3452+ mutex_exit(&buf_pool->zip_free_mutex);
3453+
3454+ if (!have_page_hash_mutex) {
3455+ mutex_exit(&buf_pool->LRU_list_mutex);
3456+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3457+ }
3458 }
3459
3460 #ifdef UNIV_MATERIALIZE
3461diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3462--- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3463+++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
3464@@ -132,6 +132,20 @@
3465 /*==========================*/
3466
3467 /********************************************************************//**
3468+*/
3469+UNIV_INLINE
3470+void
3471+buf_pool_page_hash_x_lock_all(void);
3472+/*================================*/
3473+
3474+/********************************************************************//**
3475+*/
3476+UNIV_INLINE
3477+void
3478+buf_pool_page_hash_x_unlock_all(void);
3479+/*==================================*/
3480+
3481+/********************************************************************//**
3482 Creates the buffer pool.
3483 @return own: buf_pool object, NULL if not enough memory or error */
3484 UNIV_INTERN
3485@@ -761,6 +775,15 @@
3486 const buf_page_t* bpage) /*!< in: pointer to control block */
3487 __attribute__((pure));
3488
3489+/*************************************************************************
3490+Gets the mutex of a block and enter the mutex with consistency. */
3491+UNIV_INLINE
3492+mutex_t*
3493+buf_page_get_mutex_enter(
3494+/*=========================*/
3495+ const buf_page_t* bpage) /*!< in: pointer to control block */
3496+ __attribute__((pure));
3497+
3498 /*********************************************************************//**
3499 Get the flush type of a page.
3500 @return flush type */
3501@@ -1242,7 +1265,7 @@
3502 All these are protected by buf_pool->mutex. */
3503 /* @{ */
3504
3505- UT_LIST_NODE_T(buf_page_t) list;
3506+ /* UT_LIST_NODE_T(buf_page_t) list; */
3507 /*!< based on state, this is a
3508 list node, protected either by
3509 buf_pool->mutex or by
3510@@ -1270,6 +1293,10 @@
3511 BUF_BLOCK_REMOVE_HASH or
3512 BUF_BLOCK_READY_IN_USE. */
3513
3514+ /* resplit for optimistic use */
3515+ UT_LIST_NODE_T(buf_page_t) free;
3516+ UT_LIST_NODE_T(buf_page_t) flush_list;
3517+ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3518 #ifdef UNIV_DEBUG
3519 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3520 when buf_pool->flush_list_mutex is
3521@@ -1362,11 +1389,11 @@
3522 a block is in the unzip_LRU list
3523 if page.state == BUF_BLOCK_FILE_PAGE
3524 and page.zip.data != NULL */
3525-#ifdef UNIV_DEBUG
3526+//#ifdef UNIV_DEBUG
3527 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3528 decompressed LRU list;
3529 used in debugging */
3530-#endif /* UNIV_DEBUG */
3531+//#endif /* UNIV_DEBUG */
3532 mutex_t mutex; /*!< mutex protecting this block:
3533 state (also protected by the buffer
3534 pool mutex), io_fix, buf_fix_count,
3535@@ -1532,6 +1559,11 @@
3536 pool instance, protects compressed
3537 only pages (of type buf_page_t, not
3538 buf_block_t */
3539+ mutex_t LRU_list_mutex;
3540+ rw_lock_t page_hash_latch;
3541+ mutex_t free_list_mutex;
3542+ mutex_t zip_free_mutex;
3543+ mutex_t zip_hash_mutex;
3544 ulint instance_no; /*!< Array index of this buffer
3545 pool instance */
3546 ulint old_pool_size; /*!< Old pool size in bytes */
3547diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3548--- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3549+++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3550@@ -274,7 +274,7 @@
3551 case BUF_BLOCK_ZIP_FREE:
3552 /* This is a free page in buf_pool->zip_free[].
3553 Such pages should only be accessed by the buddy allocator. */
3554- ut_error;
3555+ /* ut_error; */ /* optimistic */
3556 break;
3557 case BUF_BLOCK_ZIP_PAGE:
3558 case BUF_BLOCK_ZIP_DIRTY:
3559@@ -317,9 +317,14 @@
3560 {
3561 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3562
3563+ if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3564+ /* TODO: this code is the interim. should be confirmed later. */
3565+ return(&buf_pool->zip_mutex);
3566+ }
3567+
3568 switch (buf_page_get_state(bpage)) {
3569 case BUF_BLOCK_ZIP_FREE:
3570- ut_error;
3571+ /* ut_error; */ /* optimistic */
3572 return(NULL);
3573 case BUF_BLOCK_ZIP_PAGE:
3574 case BUF_BLOCK_ZIP_DIRTY:
3575@@ -329,6 +334,28 @@
3576 }
3577 }
3578
3579+/*************************************************************************
3580+Gets the mutex of a block and enter the mutex with consistency. */
3581+UNIV_INLINE
3582+mutex_t*
3583+buf_page_get_mutex_enter(
3584+/*=========================*/
3585+ const buf_page_t* bpage) /*!< in: pointer to control block */
3586+{
3587+ mutex_t* block_mutex;
3588+
3589+ while(1) {
3590+ block_mutex = buf_page_get_mutex(bpage);
3591+ if (!block_mutex)
3592+ return block_mutex;
3593+
3594+ mutex_enter(block_mutex);
3595+ if (block_mutex == buf_page_get_mutex(bpage))
3596+ return block_mutex;
3597+ mutex_exit(block_mutex);
3598+ }
3599+}
3600+
3601 /*********************************************************************//**
3602 Get the flush type of a page.
3603 @return flush type */
3604@@ -425,8 +452,8 @@
3605 enum buf_io_fix io_fix) /*!< in: io_fix state */
3606 {
3607 #ifdef UNIV_DEBUG
3608- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3609- ut_ad(buf_pool_mutex_own(buf_pool));
3610+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3611+ //ut_ad(buf_pool_mutex_own(buf_pool));
3612 #endif
3613 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3614
3615@@ -456,14 +483,14 @@
3616 const buf_page_t* bpage) /*!< control block being relocated */
3617 {
3618 #ifdef UNIV_DEBUG
3619- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3620- ut_ad(buf_pool_mutex_own(buf_pool));
3621+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3622+ //ut_ad(buf_pool_mutex_own(buf_pool));
3623 #endif
3624 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3625 ut_ad(buf_page_in_file(bpage));
3626- ut_ad(bpage->in_LRU_list);
3627+ //ut_ad(bpage->in_LRU_list);
3628
3629- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3630+ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3631 && bpage->buf_fix_count == 0);
3632 }
3633
3634@@ -477,8 +504,8 @@
3635 const buf_page_t* bpage) /*!< in: control block */
3636 {
3637 #ifdef UNIV_DEBUG
3638- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3639- ut_ad(buf_pool_mutex_own(buf_pool));
3640+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3641+ //ut_ad(buf_pool_mutex_own(buf_pool));
3642 #endif
3643 ut_ad(buf_page_in_file(bpage));
3644
3645@@ -498,7 +525,8 @@
3646 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3647 #endif /* UNIV_DEBUG */
3648 ut_a(buf_page_in_file(bpage));
3649- ut_ad(buf_pool_mutex_own(buf_pool));
3650+ //ut_ad(buf_pool_mutex_own(buf_pool));
3651+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3652 ut_ad(bpage->in_LRU_list);
3653
3654 #ifdef UNIV_LRU_DEBUG
3655@@ -545,9 +573,10 @@
3656 ulint time_ms) /*!< in: ut_time_ms() */
3657 {
3658 #ifdef UNIV_DEBUG
3659- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3660- ut_ad(buf_pool_mutex_own(buf_pool));
3661+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3662+ //ut_ad(buf_pool_mutex_own(buf_pool));
3663 #endif
3664+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3665 ut_a(buf_page_in_file(bpage));
3666
3667 if (!bpage->access_time) {
3668@@ -761,19 +790,19 @@
3669 /*===========*/
3670 buf_block_t* block) /*!< in, own: block to be freed */
3671 {
3672- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3673+ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3674
3675- buf_pool_mutex_enter(buf_pool);
3676+ //buf_pool_mutex_enter(buf_pool);
3677
3678 mutex_enter(&block->mutex);
3679
3680 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3681
3682- buf_LRU_block_free_non_file_page(block);
3683+ buf_LRU_block_free_non_file_page(block, FALSE);
3684
3685 mutex_exit(&block->mutex);
3686
3687- buf_pool_mutex_exit(buf_pool);
3688+ //buf_pool_mutex_exit(buf_pool);
3689 }
3690 #endif /* !UNIV_HOTBACKUP */
3691
3692@@ -821,17 +850,17 @@
3693 page frame */
3694 {
3695 ib_uint64_t lsn;
3696- mutex_t* block_mutex = buf_page_get_mutex(bpage);
3697-
3698- mutex_enter(block_mutex);
3699+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3700
3701- if (buf_page_in_file(bpage)) {
3702+ if (block_mutex && buf_page_in_file(bpage)) {
3703 lsn = bpage->newest_modification;
3704 } else {
3705 lsn = 0;
3706 }
3707
3708- mutex_exit(block_mutex);
3709+ if (block_mutex) {
3710+ mutex_exit(block_mutex);
3711+ }
3712
3713 return(lsn);
3714 }
3715@@ -849,7 +878,7 @@
3716 #ifdef UNIV_SYNC_DEBUG
3717 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3718
3719- ut_ad((buf_pool_mutex_own(buf_pool)
3720+ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3721 && (block->page.buf_fix_count == 0))
3722 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3723 #endif /* UNIV_SYNC_DEBUG */
3724@@ -979,7 +1008,11 @@
3725 buf_page_t* bpage;
3726
3727 ut_ad(buf_pool);
3728- ut_ad(buf_pool_mutex_own(buf_pool));
3729+ //ut_ad(buf_pool_mutex_own(buf_pool));
3730+#ifdef UNIV_SYNC_DEBUG
3731+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3732+ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3733+#endif
3734 ut_ad(fold == buf_page_address_fold(space, offset));
3735
3736 /* Look for the page in the hash table */
3737@@ -1064,11 +1097,13 @@
3738 const buf_page_t* bpage;
3739 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3740
3741- buf_pool_mutex_enter(buf_pool);
3742+ //buf_pool_mutex_enter(buf_pool);
3743+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3744
3745 bpage = buf_page_hash_get(buf_pool, space, offset);
3746
3747- buf_pool_mutex_exit(buf_pool);
3748+ //buf_pool_mutex_exit(buf_pool);
3749+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3750
3751 return(bpage != NULL);
3752 }
3753@@ -1196,4 +1231,38 @@
3754 buf_pool_mutex_exit(buf_pool);
3755 }
3756 }
3757+
3758+/********************************************************************//**
3759+*/
3760+UNIV_INLINE
3761+void
3762+buf_pool_page_hash_x_lock_all(void)
3763+/*===============================*/
3764+{
3765+ ulint i;
3766+
3767+ for (i = 0; i < srv_buf_pool_instances; i++) {
3768+ buf_pool_t* buf_pool;
3769+
3770+ buf_pool = buf_pool_from_array(i);
3771+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3772+ }
3773+}
3774+
3775+/********************************************************************//**
3776+*/
3777+UNIV_INLINE
3778+void
3779+buf_pool_page_hash_x_unlock_all(void)
3780+/*=================================*/
3781+{
3782+ ulint i;
3783+
3784+ for (i = 0; i < srv_buf_pool_instances; i++) {
3785+ buf_pool_t* buf_pool;
3786+
3787+ buf_pool = buf_pool_from_array(i);
3788+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3789+ }
3790+}
3791 #endif /* !UNIV_HOTBACKUP */
3792diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3793--- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3794+++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
3795@@ -113,10 +113,11 @@
3796 buf_page_t* bpage, /*!< in: block to be freed */
3797 ibool zip, /*!< in: TRUE if should remove also the
3798 compressed page of an uncompressed page */
3799- ibool* buf_pool_mutex_released);
3800+ ibool* buf_pool_mutex_released,
3801 /*!< in: pointer to a variable that will
3802 be assigned TRUE if buf_pool->mutex
3803 was temporarily released, or NULL */
3804+ ibool have_LRU_mutex);
3805 /******************************************************************//**
3806 Try to free a replaceable block.
3807 @return TRUE if found and freed */
3808@@ -163,7 +164,8 @@
3809 void
3810 buf_LRU_block_free_non_file_page(
3811 /*=============================*/
3812- buf_block_t* block); /*!< in: block, must not contain a file page */
3813+ buf_block_t* block, /*!< in: block, must not contain a file page */
3814+ ibool have_page_hash_mutex);
3815 /******************************************************************//**
3816 Adds a block to the LRU list. */
3817 UNIV_INTERN
3818diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3819--- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3820+++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3821@@ -112,6 +112,7 @@
3822 extern mysql_pfs_key_t archive_lock_key;
3823 # endif /* UNIV_LOG_ARCHIVE */
3824 extern mysql_pfs_key_t btr_search_latch_key;
3825+extern mysql_pfs_key_t buf_pool_page_hash_key;
3826 extern mysql_pfs_key_t buf_block_lock_key;
3827 # ifdef UNIV_SYNC_DEBUG
3828 extern mysql_pfs_key_t buf_block_debug_latch_key;
3829diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3830--- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3831+++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3832@@ -75,6 +75,10 @@
3833 extern mysql_pfs_key_t buffer_block_mutex_key;
3834 extern mysql_pfs_key_t buf_pool_mutex_key;
3835 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3836+extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3837+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3838+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3839+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3840 extern mysql_pfs_key_t cache_last_read_mutex_key;
3841 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3842 extern mysql_pfs_key_t dict_sys_mutex_key;
3843@@ -660,7 +664,7 @@
3844 #define SYNC_TRX_LOCK_HEAP 298
3845 #define SYNC_TRX_SYS_HEADER 290
3846 #define SYNC_LOG 170
3847-#define SYNC_LOG_FLUSH_ORDER 147
3848+#define SYNC_LOG_FLUSH_ORDER 156
3849 #define SYNC_RECV 168
3850 #define SYNC_WORK_QUEUE 162
3851 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3852@@ -670,8 +674,13 @@
3853 SYNC_SEARCH_SYS, as memory allocation
3854 can call routines there! Otherwise
3855 the level is SYNC_MEM_HASH. */
3856+#define SYNC_BUF_LRU_LIST 158
3857+#define SYNC_BUF_PAGE_HASH 157
3858+#define SYNC_BUF_BLOCK 155 /* Block mutex */
3859+#define SYNC_BUF_FREE_LIST 153
3860+#define SYNC_BUF_ZIP_FREE 152
3861+#define SYNC_BUF_ZIP_HASH 151
3862 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3863-#define SYNC_BUF_BLOCK 146 /* Block mutex */
3864 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3865 #define SYNC_DOUBLEWRITE 140
3866 #define SYNC_ANY_LATCH 135
3867@@ -703,7 +712,7 @@
3868 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3869 when atomic operations are not enabled */
3870 #endif
3871- ulint waiters; /*!< This ulint is set to 1 if there are (or
3872+ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3873 may be) threads waiting in the global wait
3874 array for this mutex to be released.
3875 Otherwise, this is 0. */
3876diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3877--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3878+++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
3879@@ -3060,7 +3060,7 @@
3880 level += log_sys->max_checkpoint_age
3881 - (lsn - oldest_modification);
3882 }
3883- bpage = UT_LIST_GET_NEXT(list, bpage);
3884+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3885 n_blocks++;
3886 }
3887
3888@@ -3145,7 +3145,7 @@
3889 found = TRUE;
3890 break;
3891 }
3892- bpage = UT_LIST_GET_NEXT(list, bpage);
3893+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3894 new_blocks_num++;
3895 }
3896 if (!found) {
3897diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3898--- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3899+++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3900@@ -265,7 +265,7 @@
3901 mutex->lock_word = 0;
3902 #endif
3903 mutex->event = os_event_create(NULL);
3904- mutex_set_waiters(mutex, 0);
3905+ mutex->waiters = 0;
3906 #ifdef UNIV_DEBUG
3907 mutex->magic_n = MUTEX_MAGIC_N;
3908 #endif /* UNIV_DEBUG */
3909@@ -444,6 +444,15 @@
3910 mutex_t* mutex, /*!< in: mutex */
3911 ulint n) /*!< in: value to set */
3912 {
3913+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3914+ ut_ad(mutex);
3915+
3916+ if (n) {
3917+ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3918+ } else {
3919+ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3920+ }
3921+#else
3922 volatile ulint* ptr; /* declared volatile to ensure that
3923 the value is stored to memory */
3924 ut_ad(mutex);
3925@@ -452,6 +461,7 @@
3926
3927 *ptr = n; /* Here we assume that the write of a single
3928 word in memory is atomic */
3929+#endif
3930 }
3931
3932 /******************************************************************//**
3933@@ -1193,7 +1203,12 @@
3934 ut_error;
3935 }
3936 break;
3937+ case SYNC_BUF_LRU_LIST:
3938 case SYNC_BUF_FLUSH_LIST:
3939+ case SYNC_BUF_PAGE_HASH:
3940+ case SYNC_BUF_FREE_LIST:
3941+ case SYNC_BUF_ZIP_FREE:
3942+ case SYNC_BUF_ZIP_HASH:
3943 case SYNC_BUF_POOL:
3944 /* We can have multiple mutexes of this type therefore we
3945 can only check whether the greater than condition holds. */
3946@@ -1211,7 +1226,8 @@
3947 buffer block (block->mutex or buf_pool->zip_mutex). */
3948 if (!sync_thread_levels_g(array, level, FALSE)) {
3949 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3950- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3951+ /* the exact rule is not fixed yet, for now */
3952+ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3953 }
3954 break;
3955 case SYNC_REC_LOCK:
This page took 0.482007 seconds and 4 git commands to generate.