]> git.pld-linux.org Git - packages/mysql.git/blob - innodb_split_buf_pool_mutex.patch
- up to 5.5.10
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
1 # name       : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c    2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c    2010-12-03 15:48:29.268957148 +0900
11 @@ -4066,7 +4066,8 @@
12  
13         mtr_commit(mtr);
14  
15 -       buf_pool_mutex_enter(buf_pool);
16 +       //buf_pool_mutex_enter(buf_pool);
17 +       mutex_enter(&buf_pool->LRU_list_mutex);
18         mutex_enter(&block->mutex);
19  
20         /* Only free the block if it is still allocated to
21 @@ -4077,16 +4078,21 @@
22             && buf_block_get_space(block) == space
23             && buf_block_get_page_no(block) == page_no) {
24  
25 -               if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
26 -                   && all && block->page.zip.data) {
27 +               if (buf_LRU_free_block(&block->page, all, TRUE) != BUF_LRU_FREED
28 +                   && all && block->page.zip.data
29 +                   /* Now, buf_LRU_free_block() may release mutex temporarily */
30 +                   && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
31 +                   && buf_block_get_space(block) == space
32 +                   && buf_block_get_page_no(block) == page_no) {
33                         /* Attempt to deallocate the uncompressed page
34                         if the whole block cannot be deallocted. */
35  
36 -                       buf_LRU_free_block(&block->page, FALSE);
37 +                       buf_LRU_free_block(&block->page, FALSE, TRUE);
38                 }
39         }
40  
41 -       buf_pool_mutex_exit(buf_pool);
42 +       //buf_pool_mutex_exit(buf_pool);
43 +       mutex_exit(&buf_pool->LRU_list_mutex);
44         mutex_exit(&block->mutex);
45  }
46  
47 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
48 --- a/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:03.033037049 +0900
49 +++ b/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:29.271024260 +0900
50 @@ -1943,7 +1943,7 @@
51         rec_offs_init(offsets_);
52  
53         rw_lock_x_lock(&btr_search_latch);
54 -       buf_pool_mutex_enter_all();
55 +       buf_pool_page_hash_x_lock_all();
56  
57         cell_count = hash_get_n_cells(btr_search_sys->hash_index);
58  
59 @@ -1951,11 +1951,11 @@
60                 /* We release btr_search_latch every once in a while to
61                 give other queries a chance to run. */
62                 if ((i != 0) && ((i % chunk_size) == 0)) {
63 -                       buf_pool_mutex_exit_all();
64 +                       buf_pool_page_hash_x_unlock_all();
65                         rw_lock_x_unlock(&btr_search_latch);
66                         os_thread_yield();
67                         rw_lock_x_lock(&btr_search_latch);
68 -                       buf_pool_mutex_enter_all();
69 +                       buf_pool_page_hash_x_lock_all();
70                 }
71  
72                 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
73 @@ -2066,11 +2066,11 @@
74                 /* We release btr_search_latch every once in a while to
75                 give other queries a chance to run. */
76                 if (i != 0) {
77 -                       buf_pool_mutex_exit_all();
78 +                       buf_pool_page_hash_x_unlock_all();
79                         rw_lock_x_unlock(&btr_search_latch);
80                         os_thread_yield();
81                         rw_lock_x_lock(&btr_search_latch);
82 -                       buf_pool_mutex_enter_all();
83 +                       buf_pool_page_hash_x_lock_all();
84                 }
85  
86                 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
87 @@ -2078,7 +2078,7 @@
88                 }
89         }
90  
91 -       buf_pool_mutex_exit_all();
92 +       buf_pool_page_hash_x_unlock_all();
93         rw_lock_x_unlock(&btr_search_latch);
94         if (UNIV_LIKELY_NULL(heap)) {
95                 mem_heap_free(heap);
96 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
97 --- a/storage/innobase/buf/buf0buddy.c  2010-12-03 15:22:36.307986907 +0900
98 +++ b/storage/innobase/buf/buf0buddy.c  2010-12-03 15:48:29.275025723 +0900
99 @@ -73,10 +73,11 @@
100         if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
101  #endif /* UNIV_DEBUG_VALGRIND */
102  
103 -       ut_ad(buf_pool_mutex_own(buf_pool));
104 +       //ut_ad(buf_pool_mutex_own(buf_pool));
105 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
106         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
107         ut_ad(buf_pool->zip_free[i].start != bpage);
108 -       UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
109 +       UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
110  
111  #ifdef UNIV_DEBUG_VALGRIND
112         if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
113 @@ -96,8 +97,8 @@
114                                         buf_pool->zip_free[] */
115  {
116  #ifdef UNIV_DEBUG_VALGRIND
117 -       buf_page_t*     prev = UT_LIST_GET_PREV(list, bpage);
118 -       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
119 +       buf_page_t*     prev = UT_LIST_GET_PREV(zip_list, bpage);
120 +       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
121  
122         if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
123         if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
124 @@ -106,9 +107,10 @@
125         ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
126  #endif /* UNIV_DEBUG_VALGRIND */
127  
128 -       ut_ad(buf_pool_mutex_own(buf_pool));
129 +       //ut_ad(buf_pool_mutex_own(buf_pool));
130 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
131         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
132 -       UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
133 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
134  
135  #ifdef UNIV_DEBUG_VALGRIND
136         if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
137 @@ -128,12 +130,13 @@
138  {
139         buf_page_t*     bpage;
140  
141 -       ut_ad(buf_pool_mutex_own(buf_pool));
142 +       //ut_ad(buf_pool_mutex_own(buf_pool));
143 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
144         ut_a(i < BUF_BUDDY_SIZES);
145  
146  #ifndef UNIV_DEBUG_VALGRIND
147         /* Valgrind would complain about accessing free memory. */
148 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
149 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
150                               ut_ad(buf_page_get_state(ut_list_node_313)
151                                     == BUF_BLOCK_ZIP_FREE)));
152  #endif /* !UNIV_DEBUG_VALGRIND */
153 @@ -177,16 +180,19 @@
154  buf_buddy_block_free(
155  /*=================*/
156         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
157 -       void*           buf)            /*!< in: buffer frame to deallocate */
158 +       void*           buf,            /*!< in: buffer frame to deallocate */
159 +       ibool           have_page_hash_mutex)
160  {
161         const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
162         buf_page_t*     bpage;
163         buf_block_t*    block;
164  
165 -       ut_ad(buf_pool_mutex_own(buf_pool));
166 +       //ut_ad(buf_pool_mutex_own(buf_pool));
167         ut_ad(!mutex_own(&buf_pool->zip_mutex));
168         ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
169  
170 +       mutex_enter(&buf_pool->zip_hash_mutex);
171 +
172         HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
173                     ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
174                           && bpage->in_zip_hash && !bpage->in_page_hash),
175 @@ -198,12 +204,14 @@
176         ut_d(bpage->in_zip_hash = FALSE);
177         HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
178  
179 +       mutex_exit(&buf_pool->zip_hash_mutex);
180 +
181         ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
182         UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
183  
184         block = (buf_block_t*) bpage;
185         mutex_enter(&block->mutex);
186 -       buf_LRU_block_free_non_file_page(block);
187 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
188         mutex_exit(&block->mutex);
189  
190         ut_ad(buf_pool->buddy_n_frames > 0);
191 @@ -220,7 +228,7 @@
192  {
193         buf_pool_t*     buf_pool = buf_pool_from_block(block);
194         const ulint     fold = BUF_POOL_ZIP_FOLD(block);
195 -       ut_ad(buf_pool_mutex_own(buf_pool));
196 +       //ut_ad(buf_pool_mutex_own(buf_pool));
197         ut_ad(!mutex_own(&buf_pool->zip_mutex));
198         ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
199  
200 @@ -232,7 +240,10 @@
201         ut_ad(!block->page.in_page_hash);
202         ut_ad(!block->page.in_zip_hash);
203         ut_d(block->page.in_zip_hash = TRUE);
204 +
205 +       mutex_enter(&buf_pool->zip_hash_mutex);
206         HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
207 +       mutex_exit(&buf_pool->zip_hash_mutex);
208  
209         ut_d(buf_pool->buddy_n_frames++);
210  }
211 @@ -268,7 +279,7 @@
212                 bpage->state = BUF_BLOCK_ZIP_FREE;
213  #ifndef UNIV_DEBUG_VALGRIND
214                 /* Valgrind would complain about accessing free memory. */
215 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
216 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
217                                       ut_ad(buf_page_get_state(
218                                                     ut_list_node_313)
219                                             == BUF_BLOCK_ZIP_FREE)));
220 @@ -291,25 +302,29 @@
221         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
222         ulint           i,              /*!< in: index of buf_pool->zip_free[],
223                                         or BUF_BUDDY_SIZES */
224 -       ibool*          lru)            /*!< in: pointer to a variable that
225 +       ibool*          lru,            /*!< in: pointer to a variable that
226                                         will be assigned TRUE if storage was
227                                         allocated from the LRU list and
228                                         buf_pool->mutex was temporarily
229                                         released, or NULL if the LRU list
230                                         should not be used */
231 +       ibool           have_page_hash_mutex)
232  {
233         buf_block_t*    block;
234  
235 -       ut_ad(buf_pool_mutex_own(buf_pool));
236 +       //ut_ad(buf_pool_mutex_own(buf_pool));
237 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
238         ut_ad(!mutex_own(&buf_pool->zip_mutex));
239  
240         if (i < BUF_BUDDY_SIZES) {
241                 /* Try to allocate from the buddy system. */
242 +               mutex_enter(&buf_pool->zip_free_mutex);
243                 block = buf_buddy_alloc_zip(buf_pool, i);
244  
245                 if (block) {
246                         goto func_exit;
247                 }
248 +               mutex_exit(&buf_pool->zip_free_mutex);
249         }
250  
251         /* Try allocating from the buf_pool->free list. */
252 @@ -326,19 +341,30 @@
253         }
254  
255         /* Try replacing an uncompressed page in the buffer pool. */
256 -       buf_pool_mutex_exit(buf_pool);
257 +       //buf_pool_mutex_exit(buf_pool);
258 +       mutex_exit(&buf_pool->LRU_list_mutex);
259 +       if (have_page_hash_mutex) {
260 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
261 +       }
262         block = buf_LRU_get_free_block(buf_pool);
263         *lru = TRUE;
264 -       buf_pool_mutex_enter(buf_pool);
265 +       //buf_pool_mutex_enter(buf_pool);
266 +       mutex_enter(&buf_pool->LRU_list_mutex);
267 +       if (have_page_hash_mutex) {
268 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
269 +       }
270  
271  alloc_big:
272         buf_buddy_block_register(block);
273  
274 +       mutex_enter(&buf_pool->zip_free_mutex);
275         block = buf_buddy_alloc_from(
276                 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
277  
278  func_exit:
279         buf_pool->buddy_stat[i].used++;
280 +       mutex_exit(&buf_pool->zip_free_mutex);
281 +
282         return(block);
283  }
284  
285 @@ -355,7 +381,10 @@
286         buf_page_t*     b;
287         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
288  
289 -       ut_ad(buf_pool_mutex_own(buf_pool));
290 +       //ut_ad(buf_pool_mutex_own(buf_pool));
291 +#ifdef UNIV_SYNC_DEBUG
292 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
293 +#endif
294  
295         switch (buf_page_get_state(bpage)) {
296         case BUF_BLOCK_ZIP_FREE:
297 @@ -364,7 +393,7 @@
298         case BUF_BLOCK_FILE_PAGE:
299         case BUF_BLOCK_MEMORY:
300         case BUF_BLOCK_REMOVE_HASH:
301 -               ut_error;
302 +               /* ut_error; */ /* optimistic */
303         case BUF_BLOCK_ZIP_DIRTY:
304                 /* Cannot relocate dirty pages. */
305                 return(FALSE);
306 @@ -374,9 +403,18 @@
307         }
308  
309         mutex_enter(&buf_pool->zip_mutex);
310 +       mutex_enter(&buf_pool->zip_free_mutex);
311  
312         if (!buf_page_can_relocate(bpage)) {
313                 mutex_exit(&buf_pool->zip_mutex);
314 +               mutex_exit(&buf_pool->zip_free_mutex);
315 +               return(FALSE);
316 +       }
317 +
318 +       if (bpage != buf_page_hash_get(buf_pool,
319 +                                      bpage->space, bpage->offset)) {
320 +               mutex_exit(&buf_pool->zip_mutex);
321 +               mutex_exit(&buf_pool->zip_free_mutex);
322                 return(FALSE);
323         }
324  
325 @@ -384,18 +422,19 @@
326         ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
327  
328         /* relocate buf_pool->zip_clean */
329 -       b = UT_LIST_GET_PREV(list, dpage);
330 -       UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
331 +       b = UT_LIST_GET_PREV(zip_list, dpage);
332 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
333  
334         if (b) {
335 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
336 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
337         } else {
338 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
339 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
340         }
341  
342         UNIV_MEM_INVALID(bpage, sizeof *bpage);
343  
344         mutex_exit(&buf_pool->zip_mutex);
345 +       mutex_exit(&buf_pool->zip_free_mutex);
346         return(TRUE);
347  }
348  
349 @@ -409,14 +448,16 @@
350         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
351         void*           src,            /*!< in: block to relocate */
352         void*           dst,            /*!< in: free block to relocate to */
353 -       ulint           i)              /*!< in: index of
354 +       ulint           i,              /*!< in: index of
355                                         buf_pool->zip_free[] */
356 +       ibool           have_page_hash_mutex)
357  {
358         buf_page_t*     bpage;
359         const ulint     size    = BUF_BUDDY_LOW << i;
360         ullint          usec    = ut_time_us(NULL);
361  
362 -       ut_ad(buf_pool_mutex_own(buf_pool));
363 +       //ut_ad(buf_pool_mutex_own(buf_pool));
364 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
365         ut_ad(!mutex_own(&buf_pool->zip_mutex));
366         ut_ad(!ut_align_offset(src, size));
367         ut_ad(!ut_align_offset(dst, size));
368 @@ -437,6 +478,13 @@
369         if (size >= PAGE_ZIP_MIN_SIZE) {
370                 /* This is a compressed page. */
371                 mutex_t*        mutex;
372 +               ulint           space, page_no;
373 +
374 +               if (!have_page_hash_mutex) {
375 +                       mutex_exit(&buf_pool->zip_free_mutex);
376 +                       mutex_enter(&buf_pool->LRU_list_mutex);
377 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
378 +               }
379  
380                 /* The src block may be split into smaller blocks,
381                 some of which may be free.  Thus, the
382 @@ -446,9 +494,9 @@
383                 pool), so there is nothing wrong about this.  The
384                 mach_read_from_4() calls here will only trigger bogus
385                 Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
386 -               ulint           space   = mach_read_from_4(
387 +               space   = mach_read_from_4(
388                         (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
389 -               ulint           page_no = mach_read_from_4(
390 +               page_no = mach_read_from_4(
391                         (const byte*) src + FIL_PAGE_OFFSET);
392                 /* Suppress Valgrind warnings about conditional jump
393                 on uninitialized value. */
394 @@ -462,6 +510,11 @@
395                         added to buf_pool->page_hash yet.  Obviously,
396                         it cannot be relocated. */
397  
398 +                       if (!have_page_hash_mutex) {
399 +                               mutex_enter(&buf_pool->zip_free_mutex);
400 +                               mutex_exit(&buf_pool->LRU_list_mutex);
401 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
402 +                       }
403                         return(FALSE);
404                 }
405  
406 @@ -473,18 +526,27 @@
407                         For the sake of simplicity, give up. */
408                         ut_ad(page_zip_get_size(&bpage->zip) < size);
409  
410 +                       if (!have_page_hash_mutex) {
411 +                               mutex_enter(&buf_pool->zip_free_mutex);
412 +                               mutex_exit(&buf_pool->LRU_list_mutex);
413 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
414 +                       }
415                         return(FALSE);
416                 }
417  
418 +               /* To keep latch order */
419 +               if (have_page_hash_mutex)
420 +                       mutex_exit(&buf_pool->zip_free_mutex);
421 +
422                 /* The block must have been allocated, but it may
423                 contain uninitialized data. */
424                 UNIV_MEM_ASSERT_W(src, size);
425  
426 -               mutex = buf_page_get_mutex(bpage);
427 +               mutex = buf_page_get_mutex_enter(bpage);
428  
429 -               mutex_enter(mutex);
430 +               mutex_enter(&buf_pool->zip_free_mutex);
431  
432 -               if (buf_page_can_relocate(bpage)) {
433 +               if (mutex && buf_page_can_relocate(bpage)) {
434                         /* Relocate the compressed page. */
435                         ut_a(bpage->zip.data == src);
436                         memcpy(dst, src, size);
437 @@ -499,10 +561,22 @@
438                                 buddy_stat->relocated_usec
439                                         += ut_time_us(NULL) - usec;
440                         }
441 +
442 +                       if (!have_page_hash_mutex) {
443 +                               mutex_exit(&buf_pool->LRU_list_mutex);
444 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
445 +                       }
446                         return(TRUE);
447                 }
448  
449 -               mutex_exit(mutex);
450 +               if (!have_page_hash_mutex) {
451 +                       mutex_exit(&buf_pool->LRU_list_mutex);
452 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
453 +               }
454 +
455 +               if (mutex) {
456 +                       mutex_exit(mutex);
457 +               }
458         } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
459                 /* This must be a buf_page_t object. */
460  #if UNIV_WORD_SIZE == 4
461 @@ -511,10 +585,31 @@
462                 about uninitialized pad bytes. */
463                 UNIV_MEM_ASSERT_RW(src, size);
464  #endif
465 +
466 +               mutex_exit(&buf_pool->zip_free_mutex);
467 +
468 +               if (!have_page_hash_mutex) {
469 +                       mutex_enter(&buf_pool->LRU_list_mutex);
470 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
471 +               }
472 +
473                 if (buf_buddy_relocate_block(src, dst)) {
474 +                       mutex_enter(&buf_pool->zip_free_mutex);
475 +
476 +                       if (!have_page_hash_mutex) {
477 +                               mutex_exit(&buf_pool->LRU_list_mutex);
478 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
479 +                       }
480  
481                         goto success;
482                 }
483 +
484 +               mutex_enter(&buf_pool->zip_free_mutex);
485 +
486 +               if (!have_page_hash_mutex) {
487 +                       mutex_exit(&buf_pool->LRU_list_mutex);
488 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
489 +               }
490         }
491  
492         return(FALSE);
493 @@ -529,13 +624,15 @@
494         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
495         void*           buf,            /*!< in: block to be freed, must not be
496                                         pointed to by the buffer pool */
497 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
498 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
499                                         or BUF_BUDDY_SIZES */
500 +       ibool           have_page_hash_mutex)
501  {
502         buf_page_t*     bpage;
503         buf_page_t*     buddy;
504  
505 -       ut_ad(buf_pool_mutex_own(buf_pool));
506 +       //ut_ad(buf_pool_mutex_own(buf_pool));
507 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
508         ut_ad(!mutex_own(&buf_pool->zip_mutex));
509         ut_ad(i <= BUF_BUDDY_SIZES);
510         ut_ad(buf_pool->buddy_stat[i].used > 0);
511 @@ -546,7 +643,9 @@
512         ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
513  
514         if (i == BUF_BUDDY_SIZES) {
515 -               buf_buddy_block_free(buf_pool, buf);
516 +               mutex_exit(&buf_pool->zip_free_mutex);
517 +               buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
518 +               mutex_enter(&buf_pool->zip_free_mutex);
519                 return;
520         }
521  
522 @@ -591,7 +690,7 @@
523                 ut_a(bpage != buf);
524  
525                 {
526 -                       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
527 +                       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
528                         UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
529                         bpage = next;
530                 }
531 @@ -600,13 +699,13 @@
532  #ifndef UNIV_DEBUG_VALGRIND
533  buddy_nonfree:
534         /* Valgrind would complain about accessing free memory. */
535 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
536 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
537                               ut_ad(buf_page_get_state(ut_list_node_313)
538                                     == BUF_BLOCK_ZIP_FREE)));
539  #endif /* UNIV_DEBUG_VALGRIND */
540  
541         /* The buddy is not free. Is there a free block of this size? */
542 -       bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
543 +       bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
544  
545         if (bpage) {
546                 /* Remove the block from the free list, because a successful
547 @@ -616,7 +715,7 @@
548                 buf_buddy_remove_from_free(buf_pool, bpage, i);
549  
550                 /* Try to relocate the buddy of buf to the free block. */
551 -               if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
552 +               if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
553  
554                         ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
555                         goto buddy_free2;
556 @@ -636,14 +735,14 @@
557  
558                 (Parts of the buddy can be free in
559                 buf_pool->zip_free[j] with j < i.) */
560 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
561 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
562                                       ut_ad(buf_page_get_state(
563                                                     ut_list_node_313)
564                                             == BUF_BLOCK_ZIP_FREE
565                                             && ut_list_node_313 != buddy)));
566  #endif /* !UNIV_DEBUG_VALGRIND */
567  
568 -               if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
569 +               if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
570  
571                         buf = bpage;
572                         UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
573 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
574 --- a/storage/innobase/buf/buf0buf.c    2010-12-03 15:22:36.314943336 +0900
575 +++ b/storage/innobase/buf/buf0buf.c    2010-12-03 15:48:29.282947357 +0900
576 @@ -263,6 +263,7 @@
577  #ifdef UNIV_PFS_RWLOCK
578  /* Keys to register buffer block related rwlocks and mutexes with
579  performance schema */
580 +UNIV_INTERN mysql_pfs_key_t    buf_pool_page_hash_key;
581  UNIV_INTERN mysql_pfs_key_t    buf_block_lock_key;
582  # ifdef UNIV_SYNC_DEBUG
583  UNIV_INTERN mysql_pfs_key_t    buf_block_debug_latch_key;
584 @@ -273,6 +274,10 @@
585  UNIV_INTERN mysql_pfs_key_t    buffer_block_mutex_key;
586  UNIV_INTERN mysql_pfs_key_t    buf_pool_mutex_key;
587  UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_mutex_key;
588 +UNIV_INTERN mysql_pfs_key_t    buf_pool_LRU_list_mutex_key;
589 +UNIV_INTERN mysql_pfs_key_t    buf_pool_free_list_mutex_key;
590 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_free_mutex_key;
591 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_hash_mutex_key;
592  UNIV_INTERN mysql_pfs_key_t    flush_list_mutex_key;
593  #endif /* UNIV_PFS_MUTEX */
594  
595 @@ -881,9 +886,9 @@
596         block->page.in_zip_hash = FALSE;
597         block->page.in_flush_list = FALSE;
598         block->page.in_free_list = FALSE;
599 -       block->in_unzip_LRU_list = FALSE;
600  #endif /* UNIV_DEBUG */
601         block->page.in_LRU_list = FALSE;
602 +       block->in_unzip_LRU_list = FALSE;
603  #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
604         block->n_pointers = 0;
605  #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
606 @@ -981,9 +986,11 @@
607                 memset(block->frame, '\0', UNIV_PAGE_SIZE);
608  #endif
609                 /* Add the block to the free list */
610 -               UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
611 +               mutex_enter(&buf_pool->free_list_mutex);
612 +               UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
613  
614                 ut_d(block->page.in_free_list = TRUE);
615 +               mutex_exit(&buf_pool->free_list_mutex);
616                 ut_ad(buf_pool_from_block(block) == buf_pool);
617  
618                 block++;
619 @@ -1038,7 +1045,8 @@
620         buf_chunk_t*    chunk = buf_pool->chunks;
621  
622         ut_ad(buf_pool);
623 -       ut_ad(buf_pool_mutex_own(buf_pool));
624 +       //ut_ad(buf_pool_mutex_own(buf_pool));
625 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
626         for (n = buf_pool->n_chunks; n--; chunk++) {
627  
628                 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
629 @@ -1138,7 +1146,7 @@
630         buf_block_t*            block;
631         const buf_block_t*      block_end;
632  
633 -       ut_ad(buf_pool_mutex_own(buf_pool));
634 +       //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
635  
636         block_end = chunk->blocks + chunk->size;
637  
638 @@ -1150,8 +1158,10 @@
639                 ut_ad(!block->in_unzip_LRU_list);
640                 ut_ad(!block->page.in_flush_list);
641                 /* Remove the block from the free list. */
642 +               mutex_enter(&buf_pool->free_list_mutex);
643                 ut_ad(block->page.in_free_list);
644 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
645 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
646 +               mutex_exit(&buf_pool->free_list_mutex);
647  
648                 /* Free the latches. */
649                 mutex_free(&block->mutex);
650 @@ -1208,9 +1218,21 @@
651         ------------------------------- */
652         mutex_create(buf_pool_mutex_key,
653                      &buf_pool->mutex, SYNC_BUF_POOL);
654 +       mutex_create(buf_pool_LRU_list_mutex_key,
655 +                    &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
656 +       rw_lock_create(buf_pool_page_hash_key,
657 +                      &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
658 +       mutex_create(buf_pool_free_list_mutex_key,
659 +                    &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
660 +       mutex_create(buf_pool_zip_free_mutex_key,
661 +                    &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
662 +       mutex_create(buf_pool_zip_hash_mutex_key,
663 +                    &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
664         mutex_create(buf_pool_zip_mutex_key,
665                      &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
666  
667 +       mutex_enter(&buf_pool->LRU_list_mutex);
668 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
669         buf_pool_mutex_enter(buf_pool);
670  
671         if (buf_pool_size > 0) {
672 @@ -1223,6 +1245,8 @@
673                         mem_free(chunk);
674                         mem_free(buf_pool);
675  
676 +                       mutex_exit(&buf_pool->LRU_list_mutex);
677 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
678                         buf_pool_mutex_exit(buf_pool);
679  
680                         return(DB_ERROR);
681 @@ -1253,6 +1277,8 @@
682  
683         /* All fields are initialized by mem_zalloc(). */
684  
685 +       mutex_exit(&buf_pool->LRU_list_mutex);
686 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
687         buf_pool_mutex_exit(buf_pool);
688  
689         return(DB_SUCCESS);
690 @@ -1467,7 +1493,11 @@
691         ulint           fold;
692         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
693  
694 -       ut_ad(buf_pool_mutex_own(buf_pool));
695 +       //ut_ad(buf_pool_mutex_own(buf_pool));
696 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
697 +#ifdef UNIV_SYNC_DEBUG
698 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
699 +#endif
700         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
701         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
702         ut_a(bpage->buf_fix_count == 0);
703 @@ -1554,7 +1584,8 @@
704  
705  try_again:
706         btr_search_disable(); /* Empty the adaptive hash index again */
707 -       buf_pool_mutex_enter(buf_pool);
708 +       //buf_pool_mutex_enter(buf_pool);
709 +       mutex_enter(&buf_pool->LRU_list_mutex);
710  
711  shrink_again:
712         if (buf_pool->n_chunks <= 1) {
713 @@ -1625,7 +1656,7 @@
714  
715                                 buf_LRU_make_block_old(&block->page);
716                                 dirty++;
717 -                       } else if (buf_LRU_free_block(&block->page, TRUE)
718 +                       } else if (buf_LRU_free_block(&block->page, TRUE, TRUE)
719                                    != BUF_LRU_FREED) {
720                                 nonfree++;
721                         }
722 @@ -1633,7 +1664,8 @@
723                         mutex_exit(&block->mutex);
724                 }
725  
726 -               buf_pool_mutex_exit(buf_pool);
727 +               //buf_pool_mutex_exit(buf_pool);
728 +               mutex_exit(&buf_pool->LRU_list_mutex);
729  
730                 /* Request for a flush of the chunk if it helps.
731                 Do not flush if there are non-free blocks, since
732 @@ -1683,7 +1715,8 @@
733  func_done:
734         buf_pool->old_pool_size = buf_pool->curr_pool_size;
735  func_exit:
736 -       buf_pool_mutex_exit(buf_pool);
737 +       //buf_pool_mutex_exit(buf_pool);
738 +       mutex_exit(&buf_pool->LRU_list_mutex);
739         btr_search_enable();
740  }
741  
742 @@ -1724,7 +1757,9 @@
743         hash_table_t*   zip_hash;
744         hash_table_t*   page_hash;
745  
746 -       buf_pool_mutex_enter(buf_pool);
747 +       //buf_pool_mutex_enter(buf_pool);
748 +       mutex_enter(&buf_pool->LRU_list_mutex);
749 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
750  
751         /* Free, create, and populate the hash table. */
752         hash_table_free(buf_pool->page_hash);
753 @@ -1765,8 +1800,9 @@
754         All such blocks are either in buf_pool->zip_clean or
755         in buf_pool->flush_list. */
756  
757 +       mutex_enter(&buf_pool->zip_mutex);
758         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
759 -            b = UT_LIST_GET_NEXT(list, b)) {
760 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
761                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
762                 ut_ad(!b->in_flush_list);
763                 ut_ad(b->in_LRU_list);
764 @@ -1776,10 +1812,11 @@
765                 HASH_INSERT(buf_page_t, hash, page_hash,
766                             buf_page_address_fold(b->space, b->offset), b);
767         }
768 +       mutex_exit(&buf_pool->zip_mutex);
769  
770         buf_flush_list_mutex_enter(buf_pool);
771         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
772 -            b = UT_LIST_GET_NEXT(list, b)) {
773 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
774                 ut_ad(b->in_flush_list);
775                 ut_ad(b->in_LRU_list);
776                 ut_ad(b->in_page_hash);
777 @@ -1806,7 +1843,9 @@
778         }
779  
780         buf_flush_list_mutex_exit(buf_pool);
781 -       buf_pool_mutex_exit(buf_pool);
782 +       //buf_pool_mutex_exit(buf_pool);
783 +       mutex_exit(&buf_pool->LRU_list_mutex);
784 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
785  }
786  
787  /********************************************************************
788 @@ -1853,21 +1892,32 @@
789         buf_page_t*     bpage;
790         ulint           i;
791         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
792 +       mutex_t*        block_mutex;
793  
794 -       ut_ad(buf_pool_mutex_own(buf_pool));
795 +       //ut_ad(buf_pool_mutex_own(buf_pool));
796  
797 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
798         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
799 +       if (bpage) {
800 +               block_mutex = buf_page_get_mutex_enter(bpage);
801 +               ut_a(block_mutex);
802 +       }
803  
804         if (UNIV_LIKELY_NULL(bpage)) {
805                 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
806                         /* The page was loaded meanwhile. */
807 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
808                         return(bpage);
809                 }
810                 /* Add to an existing watch. */
811                 bpage->buf_fix_count++;
812 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
813 +               mutex_exit(block_mutex);
814                 return(NULL);
815         }
816  
817 +       /* buf_pool->watch is protected by zip_mutex for now */
818 +       mutex_enter(&buf_pool->zip_mutex);
819         for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
820                 bpage = &buf_pool->watch[i];
821  
822 @@ -1891,10 +1941,12 @@
823                         bpage->space = space;
824                         bpage->offset = offset;
825                         bpage->buf_fix_count = 1;
826 -
827 +                       bpage->buf_pool_index = buf_pool_index(buf_pool);
828                         ut_d(bpage->in_page_hash = TRUE);
829                         HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
830                                     fold, bpage);
831 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
832 +                       mutex_exit(&buf_pool->zip_mutex);
833                         return(NULL);
834                 case BUF_BLOCK_ZIP_PAGE:
835                         ut_ad(bpage->in_page_hash);
836 @@ -1912,6 +1964,8 @@
837         ut_error;
838  
839         /* Fix compiler warning */
840 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
841 +       mutex_exit(&buf_pool->zip_mutex);
842         return(NULL);
843  }
844  
845 @@ -1941,6 +1995,8 @@
846         buf_chunk_t*    chunks;
847         buf_chunk_t*    chunk;
848  
849 +       mutex_enter(&buf_pool->LRU_list_mutex);
850 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
851         buf_pool_mutex_enter(buf_pool);
852         chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
853  
854 @@ -1959,6 +2015,8 @@
855                 buf_pool->n_chunks++;
856         }
857  
858 +       mutex_exit(&buf_pool->LRU_list_mutex);
859 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
860         buf_pool_mutex_exit(buf_pool);
861  }
862  
863 @@ -2046,7 +2104,11 @@
864                                         space, offset) */
865         buf_page_t*     watch)          /*!< in/out: sentinel for watch */
866  {
867 -       ut_ad(buf_pool_mutex_own(buf_pool));
868 +       //ut_ad(buf_pool_mutex_own(buf_pool));
869 +#ifdef UNIV_SYNC_DEBUG
870 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
871 +#endif
872 +       ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
873  
874         HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
875         ut_d(watch->in_page_hash = FALSE);
876 @@ -2068,28 +2130,31 @@
877         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
878         ulint           fold = buf_page_address_fold(space, offset);
879  
880 -       buf_pool_mutex_enter(buf_pool);
881 +       //buf_pool_mutex_enter(buf_pool);
882 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
883         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
884         /* The page must exist because buf_pool_watch_set()
885         increments buf_fix_count. */
886         ut_a(bpage);
887  
888         if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
889 -               mutex_t* mutex = buf_page_get_mutex(bpage);
890 +               mutex_t* mutex = buf_page_get_mutex_enter(bpage);
891  
892 -               mutex_enter(mutex);
893                 ut_a(bpage->buf_fix_count > 0);
894                 bpage->buf_fix_count--;
895                 mutex_exit(mutex);
896         } else {
897 +               mutex_enter(&buf_pool->zip_mutex);
898                 ut_a(bpage->buf_fix_count > 0);
899  
900                 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
901                         buf_pool_watch_remove(buf_pool, fold, bpage);
902                 }
903 +               mutex_exit(&buf_pool->zip_mutex);
904         }
905  
906 -       buf_pool_mutex_exit(buf_pool);
907 +       //buf_pool_mutex_exit(buf_pool);
908 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
909  }
910  
911  /****************************************************************//**
912 @@ -2109,14 +2174,16 @@
913         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
914         ulint           fold    = buf_page_address_fold(space, offset);
915  
916 -       buf_pool_mutex_enter(buf_pool);
917 +       //buf_pool_mutex_enter(buf_pool);
918 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
919  
920         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
921         /* The page must exist because buf_pool_watch_set()
922         increments buf_fix_count. */
923         ut_a(bpage);
924         ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
925 -       buf_pool_mutex_exit(buf_pool);
926 +       //buf_pool_mutex_exit(buf_pool);
927 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
928  
929         return(ret);
930  }
931 @@ -2133,13 +2200,15 @@
932  {
933         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
934  
935 -       buf_pool_mutex_enter(buf_pool);
936 +       //buf_pool_mutex_enter(buf_pool);
937 +       mutex_enter(&buf_pool->LRU_list_mutex);
938  
939         ut_a(buf_page_in_file(bpage));
940  
941         buf_LRU_make_block_young(bpage);
942  
943 -       buf_pool_mutex_exit(buf_pool);
944 +       //buf_pool_mutex_exit(buf_pool);
945 +       mutex_exit(&buf_pool->LRU_list_mutex);
946  }
947  
948  /********************************************************************//**
949 @@ -2163,14 +2232,20 @@
950         ut_a(buf_page_in_file(bpage));
951  
952         if (buf_page_peek_if_too_old(bpage)) {
953 -               buf_pool_mutex_enter(buf_pool);
954 +               //buf_pool_mutex_enter(buf_pool);
955 +               mutex_enter(&buf_pool->LRU_list_mutex);
956                 buf_LRU_make_block_young(bpage);
957 -               buf_pool_mutex_exit(buf_pool);
958 +               //buf_pool_mutex_exit(buf_pool);
959 +               mutex_exit(&buf_pool->LRU_list_mutex);
960         } else if (!access_time) {
961                 ulint   time_ms = ut_time_ms();
962 -               buf_pool_mutex_enter(buf_pool);
963 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
964 +               //buf_pool_mutex_enter(buf_pool);
965 +               if (block_mutex) {
966                 buf_page_set_accessed(bpage, time_ms);
967 -               buf_pool_mutex_exit(buf_pool);
968 +               mutex_exit(block_mutex);
969 +               }
970 +               //buf_pool_mutex_exit(buf_pool);
971         }
972  }
973  
974 @@ -2187,7 +2262,8 @@
975         buf_block_t*    block;
976         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
977  
978 -       buf_pool_mutex_enter(buf_pool);
979 +       //buf_pool_mutex_enter(buf_pool);
980 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
981  
982         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
983  
984 @@ -2196,7 +2272,8 @@
985                 block->check_index_page_at_flush = FALSE;
986         }
987  
988 -       buf_pool_mutex_exit(buf_pool);
989 +       //buf_pool_mutex_exit(buf_pool);
990 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
991  }
992  
993  /********************************************************************//**
994 @@ -2215,7 +2292,8 @@
995         ibool           is_hashed;
996         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
997  
998 -       buf_pool_mutex_enter(buf_pool);
999 +       //buf_pool_mutex_enter(buf_pool);
1000 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1001  
1002         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1003  
1004 @@ -2226,7 +2304,8 @@
1005                 is_hashed = block->is_hashed;
1006         }
1007  
1008 -       buf_pool_mutex_exit(buf_pool);
1009 +       //buf_pool_mutex_exit(buf_pool);
1010 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1011  
1012         return(is_hashed);
1013  }
1014 @@ -2248,7 +2327,8 @@
1015         buf_page_t*     bpage;
1016         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1017  
1018 -       buf_pool_mutex_enter(buf_pool);
1019 +       //buf_pool_mutex_enter(buf_pool);
1020 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1021  
1022         bpage = buf_page_hash_get(buf_pool, space, offset);
1023  
1024 @@ -2259,7 +2339,8 @@
1025                 bpage->file_page_was_freed = TRUE;
1026         }
1027  
1028 -       buf_pool_mutex_exit(buf_pool);
1029 +       //buf_pool_mutex_exit(buf_pool);
1030 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1031  
1032         return(bpage);
1033  }
1034 @@ -2280,7 +2361,8 @@
1035         buf_page_t*     bpage;
1036         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1037  
1038 -       buf_pool_mutex_enter(buf_pool);
1039 +       //buf_pool_mutex_enter(buf_pool);
1040 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1041  
1042         bpage = buf_page_hash_get(buf_pool, space, offset);
1043  
1044 @@ -2289,7 +2371,8 @@
1045                 bpage->file_page_was_freed = FALSE;
1046         }
1047  
1048 -       buf_pool_mutex_exit(buf_pool);
1049 +       //buf_pool_mutex_exit(buf_pool);
1050 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1051  
1052         return(bpage);
1053  }
1054 @@ -2324,8 +2407,9 @@
1055         buf_pool->stat.n_page_gets++;
1056  
1057         for (;;) {
1058 -               buf_pool_mutex_enter(buf_pool);
1059 +               //buf_pool_mutex_enter(buf_pool);
1060  lookup:
1061 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1062                 bpage = buf_page_hash_get(buf_pool, space, offset);
1063                 if (bpage) {
1064                         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1065 @@ -2334,7 +2418,8 @@
1066  
1067                 /* Page not in buf_pool: needs to be read from file */
1068  
1069 -               buf_pool_mutex_exit(buf_pool);
1070 +               //buf_pool_mutex_exit(buf_pool);
1071 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1072  
1073                 buf_read_page(space, zip_size, offset);
1074  
1075 @@ -2346,10 +2431,15 @@
1076         if (UNIV_UNLIKELY(!bpage->zip.data)) {
1077                 /* There is no compressed page. */
1078  err_exit:
1079 -               buf_pool_mutex_exit(buf_pool);
1080 +               //buf_pool_mutex_exit(buf_pool);
1081 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1082                 return(NULL);
1083         }
1084  
1085 +       block_mutex = buf_page_get_mutex_enter(bpage);
1086 +
1087 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1088 +
1089         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1090  
1091         switch (buf_page_get_state(bpage)) {
1092 @@ -2358,19 +2448,19 @@
1093         case BUF_BLOCK_MEMORY:
1094         case BUF_BLOCK_REMOVE_HASH:
1095         case BUF_BLOCK_ZIP_FREE:
1096 +               if (block_mutex)
1097 +                       mutex_exit(block_mutex);
1098                 break;
1099         case BUF_BLOCK_ZIP_PAGE:
1100         case BUF_BLOCK_ZIP_DIRTY:
1101 -               block_mutex = &buf_pool->zip_mutex;
1102 -               mutex_enter(block_mutex);
1103 +               ut_a(block_mutex == &buf_pool->zip_mutex);
1104                 bpage->buf_fix_count++;
1105                 goto got_block;
1106         case BUF_BLOCK_FILE_PAGE:
1107 -               block_mutex = &((buf_block_t*) bpage)->mutex;
1108 -               mutex_enter(block_mutex);
1109 +               ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1110  
1111                 /* Discard the uncompressed page frame if possible. */
1112 -               if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
1113 +               if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) {
1114  
1115                         mutex_exit(block_mutex);
1116                         goto lookup;
1117 @@ -2388,7 +2478,7 @@
1118         must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1119         access_time = buf_page_is_accessed(bpage);
1120  
1121 -       buf_pool_mutex_exit(buf_pool);
1122 +       //buf_pool_mutex_exit(buf_pool);
1123  
1124         mutex_exit(block_mutex);
1125  
1126 @@ -2697,7 +2787,7 @@
1127         const buf_block_t*      block)          /*!< in: pointer to block,
1128                                                 not dereferenced */
1129  {
1130 -       ut_ad(buf_pool_mutex_own(buf_pool));
1131 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1132  
1133         if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1134                 /* The pointer should be aligned. */
1135 @@ -2733,6 +2823,7 @@
1136         ulint           fix_type;
1137         ibool           must_read;
1138         ulint           retries = 0;
1139 +       mutex_t*        block_mutex = NULL;
1140         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1141  
1142         ut_ad(mtr);
1143 @@ -2755,9 +2846,11 @@
1144         fold = buf_page_address_fold(space, offset);
1145  loop:
1146         block = guess;
1147 -       buf_pool_mutex_enter(buf_pool);
1148 +       //buf_pool_mutex_enter(buf_pool);
1149  
1150         if (block) {
1151 +               block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1152 +
1153                 /* If the guess is a compressed page descriptor that
1154                 has been allocated by buf_buddy_alloc(), it may have
1155                 been invalidated by buf_buddy_relocate().  In that
1156 @@ -2766,11 +2859,15 @@
1157                 the guess may be pointing to a buffer pool chunk that
1158                 has been released when resizing the buffer pool. */
1159  
1160 -               if (!buf_block_is_uncompressed(buf_pool, block)
1161 +               if (!block_mutex) {
1162 +                       block = guess = NULL;
1163 +               } else if (!buf_block_is_uncompressed(buf_pool, block)
1164                     || offset != block->page.offset
1165                     || space != block->page.space
1166                     || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1167  
1168 +                       mutex_exit(block_mutex);
1169 +
1170                         block = guess = NULL;
1171                 } else {
1172                         ut_ad(!block->page.in_zip_hash);
1173 @@ -2779,12 +2876,19 @@
1174         }
1175  
1176         if (block == NULL) {
1177 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1178                 block = (buf_block_t*) buf_page_hash_get_low(
1179                         buf_pool, space, offset, fold);
1180 +               if (block) {
1181 +                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1182 +                       ut_a(block_mutex);
1183 +               }
1184 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1185         }
1186  
1187  loop2:
1188         if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1189 +               mutex_exit(block_mutex);
1190                 block = NULL;
1191         }
1192  
1193 @@ -2796,12 +2900,14 @@
1194                                 space, offset, fold);
1195  
1196                         if (UNIV_LIKELY_NULL(block)) {
1197 -
1198 +                               block_mutex = buf_page_get_mutex((buf_page_t*)block);
1199 +                               ut_a(block_mutex);
1200 +                               ut_ad(mutex_own(block_mutex));
1201                                 goto got_block;
1202                         }
1203                 }
1204  
1205 -               buf_pool_mutex_exit(buf_pool);
1206 +               //buf_pool_mutex_exit(buf_pool);
1207  
1208                 if (mode == BUF_GET_IF_IN_POOL
1209                     || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1210 @@ -2849,7 +2955,8 @@
1211                 /* The page is being read to buffer pool,
1212                 but we cannot wait around for the read to
1213                 complete. */
1214 -               buf_pool_mutex_exit(buf_pool);
1215 +               //buf_pool_mutex_exit(buf_pool);
1216 +               mutex_exit(block_mutex);
1217  
1218                 return(NULL);
1219         }
1220 @@ -2859,38 +2966,49 @@
1221                 ibool           success;
1222  
1223         case BUF_BLOCK_FILE_PAGE:
1224 +               if (block_mutex == &buf_pool->zip_mutex) {
1225 +                       /* it is wrong mutex... */
1226 +                       mutex_exit(block_mutex);
1227 +                       goto loop;
1228 +               }
1229                 break;
1230  
1231         case BUF_BLOCK_ZIP_PAGE:
1232         case BUF_BLOCK_ZIP_DIRTY:
1233 +               ut_ad(block_mutex == &buf_pool->zip_mutex);
1234                 bpage = &block->page;
1235                 /* Protect bpage->buf_fix_count. */
1236 -               mutex_enter(&buf_pool->zip_mutex);
1237 +               //mutex_enter(&buf_pool->zip_mutex);
1238  
1239                 if (bpage->buf_fix_count
1240                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1241                         /* This condition often occurs when the buffer
1242                         is not buffer-fixed, but I/O-fixed by
1243                         buf_page_init_for_read(). */
1244 -                       mutex_exit(&buf_pool->zip_mutex);
1245 +                       //mutex_exit(&buf_pool->zip_mutex);
1246  wait_until_unfixed:
1247                         /* The block is buffer-fixed or I/O-fixed.
1248                         Try again later. */
1249 -                       buf_pool_mutex_exit(buf_pool);
1250 +                       //buf_pool_mutex_exit(buf_pool);
1251 +                       mutex_exit(block_mutex);
1252                         os_thread_sleep(WAIT_FOR_READ);
1253    
1254                         goto loop;
1255                 }
1256  
1257                 /* Allocate an uncompressed page. */
1258 -               buf_pool_mutex_exit(buf_pool);
1259 -               mutex_exit(&buf_pool->zip_mutex);
1260 +               //buf_pool_mutex_exit(buf_pool);
1261 +               //mutex_exit(&buf_pool->zip_mutex);
1262 +               mutex_exit(block_mutex);
1263  
1264                 block = buf_LRU_get_free_block(buf_pool);
1265                 ut_a(block);
1266 +               block_mutex = &block->mutex;
1267  
1268 -               buf_pool_mutex_enter(buf_pool);
1269 -               mutex_enter(&block->mutex);
1270 +               //buf_pool_mutex_enter(buf_pool);
1271 +               mutex_enter(&buf_pool->LRU_list_mutex);
1272 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
1273 +               mutex_enter(block_mutex);
1274  
1275                 {
1276                         buf_page_t*     hash_bpage;
1277 @@ -2903,35 +3021,47 @@
1278                                 while buf_pool->mutex was released.
1279                                 Free the block that was allocated. */
1280  
1281 -                               buf_LRU_block_free_non_file_page(block);
1282 -                               mutex_exit(&block->mutex);
1283 +                               buf_LRU_block_free_non_file_page(block, TRUE);
1284 +                               mutex_exit(block_mutex);
1285  
1286                                 block = (buf_block_t*) hash_bpage;
1287 +                               if (block) {
1288 +                                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1289 +                                       ut_a(block_mutex);
1290 +                               }
1291 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1292 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1293                                 goto loop2;
1294                         }
1295                 }
1296  
1297 +               mutex_enter(&buf_pool->zip_mutex);
1298 +
1299                 if (UNIV_UNLIKELY
1300                     (bpage->buf_fix_count
1301                      || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1302  
1303 +                       mutex_exit(&buf_pool->zip_mutex);
1304                         /* The block was buffer-fixed or I/O-fixed
1305                         while buf_pool->mutex was not held by this thread.
1306                         Free the block that was allocated and try again.
1307                         This should be extremely unlikely. */
1308  
1309 -                       buf_LRU_block_free_non_file_page(block);
1310 -                       mutex_exit(&block->mutex);
1311 +                       buf_LRU_block_free_non_file_page(block, TRUE);
1312 +                       //mutex_exit(&block->mutex);
1313  
1314 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1315 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1316                         goto wait_until_unfixed;
1317                 }
1318  
1319                 /* Move the compressed page from bpage to block,
1320                 and uncompress it. */
1321  
1322 -               mutex_enter(&buf_pool->zip_mutex);
1323 -
1324                 buf_relocate(bpage, &block->page);
1325 +
1326 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1327 +
1328                 buf_block_init_low(block);
1329                 block->lock_hash_val = lock_rec_hash(space, offset);
1330  
1331 @@ -2940,7 +3070,7 @@
1332  
1333                 if (buf_page_get_state(&block->page)
1334                     == BUF_BLOCK_ZIP_PAGE) {
1335 -                       UT_LIST_REMOVE(list, buf_pool->zip_clean,
1336 +                       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1337                                        &block->page);
1338                         ut_ad(!block->page.in_flush_list);
1339                 } else {
1340 @@ -2957,19 +3087,24 @@
1341                 /* Insert at the front of unzip_LRU list */
1342                 buf_unzip_LRU_add_block(block, FALSE);
1343  
1344 +               mutex_exit(&buf_pool->LRU_list_mutex);
1345 +
1346                 block->page.buf_fix_count = 1;
1347                 buf_block_set_io_fix(block, BUF_IO_READ);
1348                 rw_lock_x_lock_func(&block->lock, 0, file, line);
1349  
1350                 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1351  
1352 -               mutex_exit(&block->mutex);
1353 +               mutex_exit(block_mutex);
1354                 mutex_exit(&buf_pool->zip_mutex);
1355 +
1356 +               buf_pool_mutex_enter(buf_pool);
1357                 buf_pool->n_pend_unzip++;
1358 +               buf_pool_mutex_exit(buf_pool);
1359  
1360 -               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1361 +               buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1362  
1363 -               buf_pool_mutex_exit(buf_pool);
1364 +               //buf_pool_mutex_exit(buf_pool);
1365  
1366                 /* Decompress the page and apply buffered operations
1367                 while not holding buf_pool->mutex or block->mutex. */
1368 @@ -2982,12 +3117,15 @@
1369                 }
1370  
1371                 /* Unfix and unlatch the block. */
1372 -               buf_pool_mutex_enter(buf_pool);
1373 -               mutex_enter(&block->mutex);
1374 +               //buf_pool_mutex_enter(buf_pool);
1375 +               block_mutex = &block->mutex;
1376 +               mutex_enter(block_mutex);
1377                 block->page.buf_fix_count--;
1378                 buf_block_set_io_fix(block, BUF_IO_NONE);
1379 -               mutex_exit(&block->mutex);
1380 +
1381 +               buf_pool_mutex_enter(buf_pool);
1382                 buf_pool->n_pend_unzip--;
1383 +               buf_pool_mutex_exit(buf_pool);
1384                 rw_lock_x_unlock(&block->lock);
1385  
1386                 break;
1387 @@ -3003,7 +3141,7 @@
1388  
1389         ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1390  
1391 -       mutex_enter(&block->mutex);
1392 +       //mutex_enter(&block->mutex);
1393  #if UNIV_WORD_SIZE == 4
1394         /* On 32-bit systems, there is no padding in buf_page_t.  On
1395         other systems, Valgrind could complain about uninitialized pad
1396 @@ -3016,7 +3154,7 @@
1397                 /* Try to evict the block from the buffer pool, to use the
1398                 insert buffer (change buffer) as much as possible. */
1399  
1400 -               if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
1401 +               if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) {
1402                         mutex_exit(&block->mutex);
1403                         if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1404                                 /* Set the watch, as it would have
1405 @@ -3052,13 +3190,14 @@
1406  
1407         buf_block_buf_fix_inc(block, file, line);
1408  
1409 -       mutex_exit(&block->mutex);
1410 +       //mutex_exit(&block->mutex);
1411  
1412         /* Check if this is the first access to the page */
1413  
1414         access_time = buf_page_is_accessed(&block->page);
1415  
1416 -       buf_pool_mutex_exit(buf_pool);
1417 +       //buf_pool_mutex_exit(buf_pool);
1418 +       mutex_exit(block_mutex);
1419  
1420         buf_page_set_accessed_make_young(&block->page, access_time);
1421  
1422 @@ -3291,9 +3430,11 @@
1423         buf_pool = buf_pool_from_block(block);
1424  
1425         if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1426 -               buf_pool_mutex_enter(buf_pool);
1427 +               //buf_pool_mutex_enter(buf_pool);
1428 +               mutex_enter(&buf_pool->LRU_list_mutex);
1429                 buf_LRU_make_block_young(&block->page);
1430 -               buf_pool_mutex_exit(buf_pool);
1431 +               //buf_pool_mutex_exit(buf_pool);
1432 +               mutex_exit(&buf_pool->LRU_list_mutex);
1433         } else if (!buf_page_is_accessed(&block->page)) {
1434                 /* Above, we do a dirty read on purpose, to avoid
1435                 mutex contention.  The field buf_page_t::access_time
1436 @@ -3301,9 +3442,11 @@
1437                 field must be protected by mutex, however. */
1438                 ulint   time_ms = ut_time_ms();
1439  
1440 -               buf_pool_mutex_enter(buf_pool);
1441 +               //buf_pool_mutex_enter(buf_pool);
1442 +               mutex_enter(&block->mutex);
1443                 buf_page_set_accessed(&block->page, time_ms);
1444 -               buf_pool_mutex_exit(buf_pool);
1445 +               //buf_pool_mutex_exit(buf_pool);
1446 +               mutex_exit(&block->mutex);
1447         }
1448  
1449         ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1450 @@ -3370,18 +3513,21 @@
1451         ut_ad(mtr);
1452         ut_ad(mtr->state == MTR_ACTIVE);
1453  
1454 -       buf_pool_mutex_enter(buf_pool);
1455 +       //buf_pool_mutex_enter(buf_pool);
1456 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1457         block = buf_block_hash_get(buf_pool, space_id, page_no);
1458  
1459         if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1460 -               buf_pool_mutex_exit(buf_pool);
1461 +               //buf_pool_mutex_exit(buf_pool);
1462 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1463                 return(NULL);
1464         }
1465  
1466         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1467  
1468         mutex_enter(&block->mutex);
1469 -       buf_pool_mutex_exit(buf_pool);
1470 +       //buf_pool_mutex_exit(buf_pool);
1471 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1472  
1473  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1474         ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1475 @@ -3470,7 +3616,10 @@
1476         buf_page_t*     hash_page;
1477         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1478  
1479 -       ut_ad(buf_pool_mutex_own(buf_pool));
1480 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1481 +#ifdef UNIV_SYNC_DEBUG
1482 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1483 +#endif
1484         ut_ad(mutex_own(&(block->mutex)));
1485         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1486  
1487 @@ -3499,11 +3648,14 @@
1488         if (UNIV_LIKELY(!hash_page)) {
1489         } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1490                 /* Preserve the reference count. */
1491 -               ulint   buf_fix_count = hash_page->buf_fix_count;
1492 +               ulint   buf_fix_count;
1493  
1494 +               mutex_enter(&buf_pool->zip_mutex);
1495 +               buf_fix_count = hash_page->buf_fix_count;
1496                 ut_a(buf_fix_count > 0);
1497                 block->page.buf_fix_count += buf_fix_count;
1498                 buf_pool_watch_remove(buf_pool, fold, hash_page);
1499 +               mutex_exit(&buf_pool->zip_mutex);
1500         } else {
1501                 fprintf(stderr,
1502                         "InnoDB: Error: page %lu %lu already found"
1503 @@ -3513,7 +3665,8 @@
1504                         (const void*) hash_page, (const void*) block);
1505  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1506                 mutex_exit(&block->mutex);
1507 -               buf_pool_mutex_exit(buf_pool);
1508 +               //buf_pool_mutex_exit(buf_pool);
1509 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1510                 buf_print();
1511                 buf_LRU_print();
1512                 buf_validate();
1513 @@ -3597,7 +3750,9 @@
1514  
1515         fold = buf_page_address_fold(space, offset);
1516  
1517 -       buf_pool_mutex_enter(buf_pool);
1518 +       //buf_pool_mutex_enter(buf_pool);
1519 +       mutex_enter(&buf_pool->LRU_list_mutex);
1520 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1521  
1522         watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1523         if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1524 @@ -3606,9 +3761,15 @@
1525  err_exit:
1526                 if (block) {
1527                         mutex_enter(&block->mutex);
1528 -                       buf_LRU_block_free_non_file_page(block);
1529 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1530 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1531 +                       buf_LRU_block_free_non_file_page(block, FALSE);
1532                         mutex_exit(&block->mutex);
1533                 }
1534 +               else {
1535 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1536 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1537 +               }
1538  
1539                 bpage = NULL;
1540                 goto func_exit;
1541 @@ -3631,6 +3792,8 @@
1542  
1543                 buf_page_init(space, offset, fold, block);
1544  
1545 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1546 +
1547                 /* The block must be put to the LRU list, to the old blocks */
1548                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1549  
1550 @@ -3658,7 +3821,7 @@
1551                         been added to buf_pool->LRU and
1552                         buf_pool->page_hash. */
1553                         mutex_exit(&block->mutex);
1554 -                       data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1555 +                       data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1556                         mutex_enter(&block->mutex);
1557                         block->page.zip.data = data;
1558  
1559 @@ -3671,6 +3834,7 @@
1560                         buf_unzip_LRU_add_block(block, TRUE);
1561                 }
1562  
1563 +               mutex_exit(&buf_pool->LRU_list_mutex);
1564                 mutex_exit(&block->mutex);
1565         } else {
1566                 /* Defer buf_buddy_alloc() until after the block has
1567 @@ -3682,8 +3846,8 @@
1568                 control block (bpage), in order to avoid the
1569                 invocation of buf_buddy_relocate_block() on
1570                 uninitialized data. */
1571 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1572 -               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1573 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1574 +               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1575  
1576                 /* Initialize the buf_pool pointer. */
1577                 bpage->buf_pool_index = buf_pool_index(buf_pool);
1578 @@ -3702,8 +3866,11 @@
1579  
1580                                 /* The block was added by some other thread. */
1581                                 watch_page = NULL;
1582 -                               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1583 -                               buf_buddy_free(buf_pool, data, zip_size);
1584 +                               buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1585 +                               buf_buddy_free(buf_pool, data, zip_size, TRUE);
1586 +
1587 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1588 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1589  
1590                                 bpage = NULL;
1591                                 goto func_exit;
1592 @@ -3747,18 +3914,24 @@
1593                 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1594                             bpage);
1595  
1596 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1597 +
1598                 /* The block must be put to the LRU list, to the old blocks */
1599                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1600                 buf_LRU_insert_zip_clean(bpage);
1601  
1602 +               mutex_exit(&buf_pool->LRU_list_mutex);
1603 +
1604                 buf_page_set_io_fix(bpage, BUF_IO_READ);
1605  
1606                 mutex_exit(&buf_pool->zip_mutex);
1607         }
1608  
1609 +       buf_pool_mutex_enter(buf_pool);
1610         buf_pool->n_pend_reads++;
1611 -func_exit:
1612         buf_pool_mutex_exit(buf_pool);
1613 +func_exit:
1614 +       //buf_pool_mutex_exit(buf_pool);
1615  
1616         if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1617  
1618 @@ -3800,7 +3973,9 @@
1619  
1620         fold = buf_page_address_fold(space, offset);
1621  
1622 -       buf_pool_mutex_enter(buf_pool);
1623 +       //buf_pool_mutex_enter(buf_pool);
1624 +       mutex_enter(&buf_pool->LRU_list_mutex);
1625 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1626  
1627         block = (buf_block_t*) buf_page_hash_get_low(
1628                 buf_pool, space, offset, fold);
1629 @@ -3816,7 +3991,9 @@
1630  #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
1631  
1632                 /* Page can be found in buf_pool */
1633 -               buf_pool_mutex_exit(buf_pool);
1634 +               //buf_pool_mutex_exit(buf_pool);
1635 +               mutex_exit(&buf_pool->LRU_list_mutex);
1636 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1637  
1638                 buf_block_free(free_block);
1639  
1640 @@ -3838,6 +4015,7 @@
1641         mutex_enter(&block->mutex);
1642  
1643         buf_page_init(space, offset, fold, block);
1644 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1645  
1646         /* The block must be put to the LRU list */
1647         buf_LRU_add_block(&block->page, FALSE);
1648 @@ -3864,7 +4042,7 @@
1649                 the reacquisition of buf_pool->mutex.  We also must
1650                 defer this operation until after the block descriptor
1651                 has been added to buf_pool->LRU and buf_pool->page_hash. */
1652 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1653 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1654                 mutex_enter(&block->mutex);
1655                 block->page.zip.data = data;
1656  
1657 @@ -3882,7 +4060,8 @@
1658  
1659         buf_page_set_accessed(&block->page, time_ms);
1660  
1661 -       buf_pool_mutex_exit(buf_pool);
1662 +       //buf_pool_mutex_exit(buf_pool);
1663 +       mutex_exit(&buf_pool->LRU_list_mutex);
1664  
1665         mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1666  
1667 @@ -3933,6 +4112,8 @@
1668         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1669         const ibool     uncompressed = (buf_page_get_state(bpage)
1670                                         == BUF_BLOCK_FILE_PAGE);
1671 +       ibool           have_LRU_mutex = FALSE;
1672 +       mutex_t*        block_mutex;
1673  
1674         ut_a(buf_page_in_file(bpage));
1675  
1676 @@ -4066,8 +4247,26 @@
1677                 }
1678         }
1679  
1680 +       if (io_type == BUF_IO_WRITE
1681 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1682 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1683 +               /* to keep consistency at buf_LRU_insert_zip_clean() */
1684 +               have_LRU_mutex = TRUE; /* optimistic */
1685 +       }
1686 +retry_mutex:
1687 +       if (have_LRU_mutex)
1688 +               mutex_enter(&buf_pool->LRU_list_mutex);
1689 +       block_mutex = buf_page_get_mutex_enter(bpage);
1690 +       ut_a(block_mutex);
1691 +       if (io_type == BUF_IO_WRITE
1692 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1693 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1694 +           && !have_LRU_mutex) {
1695 +               mutex_exit(block_mutex);
1696 +               have_LRU_mutex = TRUE;
1697 +               goto retry_mutex;
1698 +       }
1699         buf_pool_mutex_enter(buf_pool);
1700 -       mutex_enter(buf_page_get_mutex(bpage));
1701  
1702  #ifdef UNIV_IBUF_COUNT_DEBUG
1703         if (io_type == BUF_IO_WRITE || uncompressed) {
1704 @@ -4090,6 +4289,7 @@
1705                 the x-latch to this OS thread: do not let this confuse you in
1706                 debugging! */
1707  
1708 +               ut_a(!have_LRU_mutex);
1709                 ut_ad(buf_pool->n_pend_reads > 0);
1710                 buf_pool->n_pend_reads--;
1711                 buf_pool->stat.n_pages_read++;
1712 @@ -4107,6 +4307,9 @@
1713  
1714                 buf_flush_write_complete(bpage);
1715  
1716 +               if (have_LRU_mutex)
1717 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1718 +
1719                 if (uncompressed) {
1720                         rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1721                                              BUF_IO_WRITE);
1722 @@ -4129,8 +4332,8 @@
1723         }
1724  #endif /* UNIV_DEBUG */
1725  
1726 -       mutex_exit(buf_page_get_mutex(bpage));
1727         buf_pool_mutex_exit(buf_pool);
1728 +       mutex_exit(block_mutex);
1729  }
1730  
1731  /*********************************************************************//**
1732 @@ -4147,7 +4350,9 @@
1733  
1734         ut_ad(buf_pool);
1735  
1736 -       buf_pool_mutex_enter(buf_pool);
1737 +       //buf_pool_mutex_enter(buf_pool);
1738 +       mutex_enter(&buf_pool->LRU_list_mutex);
1739 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1740  
1741         chunk = buf_pool->chunks;
1742  
1743 @@ -4164,7 +4369,9 @@
1744                 }
1745         }
1746  
1747 -       buf_pool_mutex_exit(buf_pool);
1748 +       //buf_pool_mutex_exit(buf_pool);
1749 +       mutex_exit(&buf_pool->LRU_list_mutex);
1750 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1751  
1752         return(TRUE);
1753  }
1754 @@ -4212,7 +4419,8 @@
1755                 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1756         }
1757  
1758 -       buf_pool_mutex_enter(buf_pool);
1759 +       //buf_pool_mutex_enter(buf_pool);
1760 +       mutex_enter(&buf_pool->LRU_list_mutex);
1761  
1762         ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1763         ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1764 @@ -4225,7 +4433,8 @@
1765         memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1766         buf_refresh_io_stats(buf_pool);
1767  
1768 -       buf_pool_mutex_exit(buf_pool);
1769 +       //buf_pool_mutex_exit(buf_pool);
1770 +       mutex_exit(&buf_pool->LRU_list_mutex);
1771  }
1772  
1773  /*********************************************************************//**
1774 @@ -4267,7 +4476,10 @@
1775  
1776         ut_ad(buf_pool);
1777  
1778 -       buf_pool_mutex_enter(buf_pool);
1779 +       //buf_pool_mutex_enter(buf_pool);
1780 +       mutex_enter(&buf_pool->LRU_list_mutex);
1781 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1782 +       /* for keep the new latch order, it cannot validate correctly... */
1783  
1784         chunk = buf_pool->chunks;
1785  
1786 @@ -4362,7 +4574,7 @@
1787         /* Check clean compressed-only blocks. */
1788  
1789         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1790 -            b = UT_LIST_GET_NEXT(list, b)) {
1791 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1792                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1793                 switch (buf_page_get_io_fix(b)) {
1794                 case BUF_IO_NONE:
1795 @@ -4393,7 +4605,7 @@
1796  
1797         buf_flush_list_mutex_enter(buf_pool);
1798         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1799 -            b = UT_LIST_GET_NEXT(list, b)) {
1800 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1801                 ut_ad(b->in_flush_list);
1802                 ut_a(b->oldest_modification);
1803                 n_flush++;
1804 @@ -4452,6 +4664,8 @@
1805         }
1806  
1807         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1808 +       /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1809 +/*
1810         if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1811                 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1812                         (ulong) UT_LIST_GET_LEN(buf_pool->free),
1813 @@ -4462,8 +4676,11 @@
1814         ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1815         ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1816         ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1817 +*/
1818  
1819 -       buf_pool_mutex_exit(buf_pool);
1820 +       //buf_pool_mutex_exit(buf_pool);
1821 +       mutex_exit(&buf_pool->LRU_list_mutex);
1822 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1823  
1824         ut_a(buf_LRU_validate());
1825         ut_a(buf_flush_validate(buf_pool));
1826 @@ -4519,7 +4736,9 @@
1827         index_ids = mem_alloc(size * sizeof *index_ids);
1828         counts = mem_alloc(sizeof(ulint) * size);
1829  
1830 -       buf_pool_mutex_enter(buf_pool);
1831 +       //buf_pool_mutex_enter(buf_pool);
1832 +       mutex_enter(&buf_pool->LRU_list_mutex);
1833 +       mutex_enter(&buf_pool->free_list_mutex);
1834         buf_flush_list_mutex_enter(buf_pool);
1835  
1836         fprintf(stderr,
1837 @@ -4588,7 +4807,9 @@
1838                 }
1839         }
1840  
1841 -       buf_pool_mutex_exit(buf_pool);
1842 +       //buf_pool_mutex_exit(buf_pool);
1843 +       mutex_exit(&buf_pool->LRU_list_mutex);
1844 +       mutex_exit(&buf_pool->free_list_mutex);
1845  
1846         for (i = 0; i < n_found; i++) {
1847                 index = dict_index_get_if_in_cache(index_ids[i]);
1848 @@ -4645,7 +4866,7 @@
1849         buf_chunk_t*    chunk;
1850         ulint           fixed_pages_number = 0;
1851  
1852 -       buf_pool_mutex_enter(buf_pool);
1853 +       //buf_pool_mutex_enter(buf_pool);
1854  
1855         chunk = buf_pool->chunks;
1856  
1857 @@ -4679,7 +4900,7 @@
1858         /* Traverse the lists of clean and dirty compressed-only blocks. */
1859  
1860         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1861 -            b = UT_LIST_GET_NEXT(list, b)) {
1862 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1863                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1864                 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1865  
1866 @@ -4691,7 +4912,7 @@
1867  
1868         buf_flush_list_mutex_enter(buf_pool);
1869         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1870 -            b = UT_LIST_GET_NEXT(list, b)) {
1871 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1872                 ut_ad(b->in_flush_list);
1873  
1874                 switch (buf_page_get_state(b)) {
1875 @@ -4717,7 +4938,7 @@
1876  
1877         buf_flush_list_mutex_exit(buf_pool);
1878         mutex_exit(&buf_pool->zip_mutex);
1879 -       buf_pool_mutex_exit(buf_pool);
1880 +       //buf_pool_mutex_exit(buf_pool);
1881  
1882         return(fixed_pages_number);
1883  }
1884 @@ -4873,6 +5094,8 @@
1885         /* Find appropriate pool_info to store stats for this buffer pool */
1886         pool_info = &all_pool_info[pool_id];
1887  
1888 +       mutex_enter(&buf_pool->LRU_list_mutex);
1889 +       mutex_enter(&buf_pool->free_list_mutex);
1890         buf_pool_mutex_enter(buf_pool);
1891         buf_flush_list_mutex_enter(buf_pool);
1892  
1893 @@ -4983,6 +5206,8 @@
1894         pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
1895  
1896         buf_refresh_io_stats(buf_pool);
1897 +       mutex_exit(&buf_pool->LRU_list_mutex);
1898 +       mutex_exit(&buf_pool->free_list_mutex);
1899         buf_pool_mutex_exit(buf_pool);
1900  }
1901  
1902 @@ -5224,11 +5449,13 @@
1903  {
1904         ulint   len;
1905  
1906 -       buf_pool_mutex_enter(buf_pool);
1907 +       //buf_pool_mutex_enter(buf_pool);
1908 +       mutex_enter(&buf_pool->free_list_mutex);
1909  
1910         len = UT_LIST_GET_LEN(buf_pool->free);
1911  
1912 -       buf_pool_mutex_exit(buf_pool);
1913 +       //buf_pool_mutex_exit(buf_pool);
1914 +       mutex_exit(&buf_pool->free_list_mutex);
1915  
1916         return(len);
1917  }
1918 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1919 --- a/storage/innobase/buf/buf0flu.c    2010-12-03 15:22:36.318955693 +0900
1920 +++ b/storage/innobase/buf/buf0flu.c    2010-12-03 15:48:29.289024083 +0900
1921 @@ -307,7 +307,7 @@
1922  
1923         ut_d(block->page.in_flush_list = TRUE);
1924         block->page.oldest_modification = lsn;
1925 -       UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1926 +       UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1927  
1928  #ifdef UNIV_DEBUG_VALGRIND
1929         {
1930 @@ -401,14 +401,14 @@
1931                        > block->page.oldest_modification) {
1932                         ut_ad(b->in_flush_list);
1933                         prev_b = b;
1934 -                       b = UT_LIST_GET_NEXT(list, b);
1935 +                       b = UT_LIST_GET_NEXT(flush_list, b);
1936                 }
1937         }
1938  
1939         if (prev_b == NULL) {
1940 -               UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1941 +               UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1942         } else {
1943 -               UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1944 +               UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1945                                      prev_b, &block->page);
1946         }
1947  
1948 @@ -434,7 +434,7 @@
1949         //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1950         //ut_ad(buf_pool_mutex_own(buf_pool));
1951  #endif
1952 -       //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1953 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1954         //ut_ad(bpage->in_LRU_list);
1955  
1956         if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1957 @@ -470,14 +470,14 @@
1958         enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1959  {
1960  #ifdef UNIV_DEBUG
1961 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1962 -       ut_ad(buf_pool_mutex_own(buf_pool));
1963 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1964 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1965  #endif
1966 -       ut_a(buf_page_in_file(bpage));
1967 +       //ut_a(buf_page_in_file(bpage));
1968         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1969         ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1970  
1971 -       if (bpage->oldest_modification != 0
1972 +       if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1973             && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1974                 ut_ad(bpage->in_flush_list);
1975  
1976 @@ -508,7 +508,7 @@
1977  {
1978         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1979  
1980 -       ut_ad(buf_pool_mutex_own(buf_pool));
1981 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1982         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1983         ut_ad(bpage->in_flush_list);
1984  
1985 @@ -526,11 +526,11 @@
1986                 return;
1987         case BUF_BLOCK_ZIP_DIRTY:
1988                 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1989 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1990 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1991                 buf_LRU_insert_zip_clean(bpage);
1992                 break;
1993         case BUF_BLOCK_FILE_PAGE:
1994 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1995 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1996                 break;
1997         }
1998  
1999 @@ -574,7 +574,7 @@
2000         buf_page_t*     prev_b = NULL;
2001         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2002  
2003 -       ut_ad(buf_pool_mutex_own(buf_pool));
2004 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2005         /* Must reside in the same buffer pool. */
2006         ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2007  
2008 @@ -603,18 +603,18 @@
2009         because we assert on in_flush_list in comparison function. */
2010         ut_d(bpage->in_flush_list = FALSE);
2011  
2012 -       prev = UT_LIST_GET_PREV(list, bpage);
2013 -       UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2014 +       prev = UT_LIST_GET_PREV(flush_list, bpage);
2015 +       UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2016  
2017         if (prev) {
2018                 ut_ad(prev->in_flush_list);
2019                 UT_LIST_INSERT_AFTER(
2020 -                       list,
2021 +                       flush_list,
2022                         buf_pool->flush_list,
2023                         prev, dpage);
2024         } else {
2025                 UT_LIST_ADD_FIRST(
2026 -                       list,
2027 +                       flush_list,
2028                         buf_pool->flush_list,
2029                         dpage);
2030         }
2031 @@ -1083,7 +1083,7 @@
2032  
2033  #ifdef UNIV_DEBUG
2034         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2035 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2036 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2037  #endif
2038  
2039  #ifdef UNIV_LOG_DEBUG
2040 @@ -1097,7 +1097,8 @@
2041         io_fixed and oldest_modification != 0.  Thus, it cannot be
2042         relocated in the buffer pool or removed from flush_list or
2043         LRU_list. */
2044 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2045 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2046 +       ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2047         ut_ad(!buf_flush_list_mutex_own(buf_pool));
2048         ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2049         ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2050 @@ -1260,12 +1261,18 @@
2051         ibool           is_uncompressed;
2052  
2053         ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2054 -       ut_ad(buf_pool_mutex_own(buf_pool));
2055 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2056 +#ifdef UNIV_SYNC_DEBUG
2057 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2058 +#endif
2059         ut_ad(buf_page_in_file(bpage));
2060  
2061         block_mutex = buf_page_get_mutex(bpage);
2062         ut_ad(mutex_own(block_mutex));
2063  
2064 +       buf_pool_mutex_enter(buf_pool);
2065 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2066 +
2067         ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2068  
2069         buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2070 @@ -1427,14 +1434,16 @@
2071  
2072                 buf_pool = buf_pool_get(space, i);
2073  
2074 -               buf_pool_mutex_enter(buf_pool);
2075 +               //buf_pool_mutex_enter(buf_pool);
2076 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
2077  
2078                 /* We only want to flush pages from this buffer pool. */
2079                 bpage = buf_page_hash_get(buf_pool, space, i);
2080  
2081                 if (!bpage) {
2082  
2083 -                       buf_pool_mutex_exit(buf_pool);
2084 +                       //buf_pool_mutex_exit(buf_pool);
2085 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2086                         continue;
2087                 }
2088  
2089 @@ -1446,11 +1455,9 @@
2090                 if (flush_type != BUF_FLUSH_LRU
2091                     || i == offset
2092                     || buf_page_is_old(bpage)) {
2093 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2094 -
2095 -                       mutex_enter(block_mutex);
2096 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2097  
2098 -                       if (buf_flush_ready_for_flush(bpage, flush_type)
2099 +                       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2100                             && (i == offset || !bpage->buf_fix_count)) {
2101                                 /* We only try to flush those
2102                                 neighbors != offset where the buf fix
2103 @@ -1466,11 +1473,12 @@
2104                                 ut_ad(!buf_pool_mutex_own(buf_pool));
2105                                 count++;
2106                                 continue;
2107 -                       } else {
2108 +                       } else if (block_mutex) {
2109                                 mutex_exit(block_mutex);
2110                         }
2111                 }
2112 -               buf_pool_mutex_exit(buf_pool);
2113 +               //buf_pool_mutex_exit(buf_pool);
2114 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
2115         }
2116  
2117         return(count);
2118 @@ -1503,21 +1511,25 @@
2119         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2120  #endif /* UNIV_DEBUG */
2121  
2122 -       ut_ad(buf_pool_mutex_own(buf_pool));
2123 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2124 +       ut_ad(flush_type != BUF_FLUSH_LRU
2125 +             || mutex_own(&buf_pool->LRU_list_mutex));
2126  
2127 -       block_mutex = buf_page_get_mutex(bpage);
2128 -       mutex_enter(block_mutex);
2129 +       block_mutex = buf_page_get_mutex_enter(bpage);
2130  
2131 -       ut_a(buf_page_in_file(bpage));
2132 +       //ut_a(buf_page_in_file(bpage));
2133  
2134 -       if (buf_flush_ready_for_flush(bpage, flush_type)) {
2135 +       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2136                 ulint           space;
2137                 ulint           offset;
2138                 buf_pool_t*     buf_pool;
2139  
2140                 buf_pool = buf_pool_from_bpage(bpage);
2141  
2142 -               buf_pool_mutex_exit(buf_pool);
2143 +               //buf_pool_mutex_exit(buf_pool);
2144 +               if (flush_type == BUF_FLUSH_LRU) {
2145 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2146 +               }
2147  
2148                 /* These fields are protected by both the
2149                 buffer pool mutex and block mutex. */
2150 @@ -1533,13 +1545,18 @@
2151                                                   *count,
2152                                                   n_to_flush);
2153  
2154 -               buf_pool_mutex_enter(buf_pool);
2155 +               //buf_pool_mutex_enter(buf_pool);
2156 +               if (flush_type == BUF_FLUSH_LRU) {
2157 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2158 +               }
2159                 flushed = TRUE;
2160 -       } else {
2161 +       } else if (block_mutex) {
2162                 mutex_exit(block_mutex);
2163         }
2164  
2165 -       ut_ad(buf_pool_mutex_own(buf_pool));
2166 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2167 +       ut_ad(flush_type != BUF_FLUSH_LRU
2168 +             || mutex_own(&buf_pool->LRU_list_mutex));
2169  
2170         return(flushed);
2171  }
2172 @@ -1560,7 +1577,8 @@
2173         buf_page_t*     bpage;
2174         ulint           count = 0;
2175  
2176 -       ut_ad(buf_pool_mutex_own(buf_pool));
2177 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2178 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2179  
2180         do {
2181                 /* Start from the end of the list looking for a
2182 @@ -1582,7 +1600,8 @@
2183         should be flushed, we factor in this value. */
2184         buf_lru_flush_page_count += count;
2185  
2186 -       ut_ad(buf_pool_mutex_own(buf_pool));
2187 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2188 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2189  
2190         return(count);
2191  }
2192 @@ -1610,9 +1629,10 @@
2193  {
2194         ulint           len;
2195         buf_page_t*     bpage;
2196 +       buf_page_t*     prev_bpage = NULL;
2197         ulint           count = 0;
2198  
2199 -       ut_ad(buf_pool_mutex_own(buf_pool));
2200 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2201  
2202         /* If we have flushed enough, leave the loop */
2203         do {
2204 @@ -1631,6 +1651,7 @@
2205  
2206                 if (bpage) {
2207                         ut_a(bpage->oldest_modification > 0);
2208 +                       prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2209                 }
2210  
2211                 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2212 @@ -1672,9 +1693,17 @@
2213                                 break;
2214                         }
2215  
2216 -                       bpage = UT_LIST_GET_PREV(list, bpage);
2217 +                       bpage = UT_LIST_GET_PREV(flush_list, bpage);
2218  
2219 -                       ut_ad(!bpage || bpage->in_flush_list);
2220 +                       //ut_ad(!bpage || bpage->in_flush_list);
2221 +                       if (bpage != prev_bpage) {
2222 +                               /* the search might warp.. retrying */
2223 +                               buf_flush_list_mutex_exit(buf_pool);
2224 +                               break;
2225 +                       }
2226 +                       if (bpage) {
2227 +                               prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2228 +                       }
2229  
2230                         buf_flush_list_mutex_exit(buf_pool);
2231  
2232 @@ -1683,7 +1712,7 @@
2233  
2234         } while (count < min_n && bpage != NULL && len > 0);
2235  
2236 -       ut_ad(buf_pool_mutex_own(buf_pool));
2237 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2238  
2239         return(count);
2240  }
2241 @@ -1722,13 +1751,15 @@
2242               || sync_thread_levels_empty_gen(TRUE));
2243  #endif /* UNIV_SYNC_DEBUG */
2244  
2245 -       buf_pool_mutex_enter(buf_pool);
2246 +       //buf_pool_mutex_enter(buf_pool);
2247  
2248         /* Note: The buffer pool mutex is released and reacquired within
2249         the flush functions. */
2250         switch(flush_type) {
2251         case BUF_FLUSH_LRU:
2252 +               mutex_enter(&buf_pool->LRU_list_mutex);
2253                 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2254 +               mutex_exit(&buf_pool->LRU_list_mutex);
2255                 break;
2256         case BUF_FLUSH_LIST:
2257                 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2258 @@ -1737,7 +1768,7 @@
2259                 ut_error;
2260         }
2261  
2262 -       buf_pool_mutex_exit(buf_pool);
2263 +       //buf_pool_mutex_exit(buf_pool);
2264  
2265         buf_flush_buffered_writes();
2266  
2267 @@ -1993,7 +2024,7 @@
2268  retry:
2269         //buf_pool_mutex_enter(buf_pool);
2270         if (have_LRU_mutex)
2271 -               buf_pool_mutex_enter(buf_pool);
2272 +               mutex_enter(&buf_pool->LRU_list_mutex);
2273  
2274         n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2275  
2276 @@ -2010,15 +2041,15 @@
2277                         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2278                         continue;
2279                 }
2280 -               block_mutex = buf_page_get_mutex(bpage);
2281 -
2282 -               mutex_enter(block_mutex);
2283 +               block_mutex = buf_page_get_mutex_enter(bpage);
2284  
2285 -               if (buf_flush_ready_for_replace(bpage)) {
2286 +               if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2287                         n_replaceable++;
2288                 }
2289  
2290 -               mutex_exit(block_mutex);
2291 +               if (block_mutex) {
2292 +                       mutex_exit(block_mutex);
2293 +               }
2294  
2295                 distance++;
2296  
2297 @@ -2027,7 +2058,7 @@
2298  
2299         //buf_pool_mutex_exit(buf_pool);
2300         if (have_LRU_mutex)
2301 -               buf_pool_mutex_exit(buf_pool);
2302 +               mutex_exit(&buf_pool->LRU_list_mutex);
2303  
2304         if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2305  
2306 @@ -2226,7 +2257,7 @@
2307  
2308         ut_ad(buf_flush_list_mutex_own(buf_pool));
2309  
2310 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2311 +       UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2312                          ut_ad(ut_list_node_313->in_flush_list));
2313  
2314         bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2315 @@ -2266,7 +2297,7 @@
2316                         rnode = rbt_next(buf_pool->flush_rbt, rnode);
2317                 }
2318  
2319 -               bpage = UT_LIST_GET_NEXT(list, bpage);
2320 +               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2321  
2322                 ut_a(!bpage || om >= bpage->oldest_modification);
2323         }
2324 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2325 --- a/storage/innobase/buf/buf0lru.c    2010-12-03 15:22:36.321987250 +0900
2326 +++ b/storage/innobase/buf/buf0lru.c    2010-12-03 15:48:29.293023197 +0900
2327 @@ -143,8 +143,9 @@
2328  void
2329  buf_LRU_block_free_hashed_page(
2330  /*===========================*/
2331 -       buf_block_t*    block); /*!< in: block, must contain a file page and
2332 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
2333                                 be in a state where it can be freed */
2334 +       ibool           have_page_hash_mutex);
2335  
2336  /******************************************************************//**
2337  Determines if the unzip_LRU list should be used for evicting a victim
2338 @@ -154,15 +155,20 @@
2339  ibool
2340  buf_LRU_evict_from_unzip_LRU(
2341  /*=========================*/
2342 -       buf_pool_t*     buf_pool)
2343 +       buf_pool_t*     buf_pool,
2344 +       ibool           have_LRU_mutex)
2345  {
2346         ulint   io_avg;
2347         ulint   unzip_avg;
2348  
2349 -       ut_ad(buf_pool_mutex_own(buf_pool));
2350 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2351  
2352 +       if (!have_LRU_mutex)
2353 +               mutex_enter(&buf_pool->LRU_list_mutex);
2354         /* If the unzip_LRU list is empty, we can only use the LRU. */
2355         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2356 +               if (!have_LRU_mutex)
2357 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2358                 return(FALSE);
2359         }
2360  
2361 @@ -171,14 +177,20 @@
2362         decompressed pages in the buffer pool. */
2363         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2364             <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2365 +               if (!have_LRU_mutex)
2366 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2367                 return(FALSE);
2368         }
2369  
2370         /* If eviction hasn't started yet, we assume by default
2371         that a workload is disk bound. */
2372         if (buf_pool->freed_page_clock == 0) {
2373 +               if (!have_LRU_mutex)
2374 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2375                 return(TRUE);
2376         }
2377 +       if (!have_LRU_mutex)
2378 +               mutex_exit(&buf_pool->LRU_list_mutex);
2379  
2380         /* Calculate the average over past intervals, and add the values
2381         of the current interval. */
2382 @@ -246,19 +258,23 @@
2383         page_arr = ut_malloc(
2384                 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2385  
2386 -       buf_pool_mutex_enter(buf_pool);
2387 +       //buf_pool_mutex_enter(buf_pool);
2388 +       mutex_enter(&buf_pool->LRU_list_mutex);
2389  
2390  scan_again:
2391         num_entries = 0;
2392         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2393  
2394         while (bpage != NULL) {
2395 -               mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2396 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
2397                 buf_page_t*     prev_bpage;
2398  
2399 -               mutex_enter(block_mutex);
2400                 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2401  
2402 +               if (!block_mutex) {
2403 +                       goto next_page;
2404 +               }
2405 +
2406                 ut_a(buf_page_in_file(bpage));
2407  
2408                 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2409 @@ -287,14 +303,16 @@
2410  
2411                         /* Array full. We release the buf_pool->mutex to
2412                         obey the latching order. */
2413 -                       buf_pool_mutex_exit(buf_pool);
2414 +                       //buf_pool_mutex_exit(buf_pool);
2415 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2416  
2417                         buf_LRU_drop_page_hash_batch(
2418                                 id, zip_size, page_arr, num_entries);
2419  
2420                         num_entries = 0;
2421  
2422 -                       buf_pool_mutex_enter(buf_pool);
2423 +                       //buf_pool_mutex_enter(buf_pool);
2424 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2425                 } else {
2426                         mutex_exit(block_mutex);
2427                 }
2428 @@ -319,7 +337,8 @@
2429                 }
2430         }
2431  
2432 -       buf_pool_mutex_exit(buf_pool);
2433 +       //buf_pool_mutex_exit(buf_pool);
2434 +       mutex_exit(&buf_pool->LRU_list_mutex);
2435  
2436         /* Drop any remaining batch of search hashed pages. */
2437         buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2438 @@ -341,7 +360,9 @@
2439         ibool           all_freed;
2440  
2441  scan_again:
2442 -       buf_pool_mutex_enter(buf_pool);
2443 +       //buf_pool_mutex_enter(buf_pool);
2444 +       mutex_enter(&buf_pool->LRU_list_mutex);
2445 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2446  
2447         all_freed = TRUE;
2448  
2449 @@ -369,8 +390,16 @@
2450  
2451                         all_freed = FALSE;
2452                 } else {
2453 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2454 -                       mutex_enter(block_mutex);
2455 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2456 +
2457 +                       if (!block_mutex) {
2458 +                               /* It may be impossible case...
2459 +                               Something wrong, so will be scan_again */
2460 +
2461 +                               all_freed = FALSE;
2462 +
2463 +                               goto next_page_no_mutex;
2464 +                       }
2465  
2466                         if (bpage->buf_fix_count > 0) {
2467  
2468 @@ -429,7 +458,9 @@
2469                                 ulint   page_no;
2470                                 ulint   zip_size;
2471  
2472 -                               buf_pool_mutex_exit(buf_pool);
2473 +                               //buf_pool_mutex_exit(buf_pool);
2474 +                               mutex_exit(&buf_pool->LRU_list_mutex);
2475 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2476  
2477                                 zip_size = buf_page_get_zip_size(bpage);
2478                                 page_no = buf_page_get_page_no(bpage);
2479 @@ -454,7 +485,7 @@
2480                         if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2481                             != BUF_BLOCK_ZIP_FREE) {
2482                                 buf_LRU_block_free_hashed_page((buf_block_t*)
2483 -                                                              bpage);
2484 +                                                              bpage, TRUE);
2485                         } else {
2486                                 /* The block_mutex should have been
2487                                 released by buf_LRU_block_remove_hashed_page()
2488 @@ -486,7 +517,9 @@
2489                 bpage = prev_bpage;
2490         }
2491  
2492 -       buf_pool_mutex_exit(buf_pool);
2493 +       //buf_pool_mutex_exit(buf_pool);
2494 +       mutex_exit(&buf_pool->LRU_list_mutex);
2495 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
2496  
2497         if (!all_freed) {
2498                 os_thread_sleep(20000);
2499 @@ -532,7 +565,9 @@
2500         buf_page_t*     b;
2501         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2502  
2503 -       ut_ad(buf_pool_mutex_own(buf_pool));
2504 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2505 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2506 +       ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2507         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2508  
2509         /* Find the first successor of bpage in the LRU list
2510 @@ -540,17 +575,17 @@
2511         b = bpage;
2512         do {
2513                 b = UT_LIST_GET_NEXT(LRU, b);
2514 -       } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2515 +       } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2516  
2517         /* Insert bpage before b, i.e., after the predecessor of b. */
2518         if (b) {
2519 -               b = UT_LIST_GET_PREV(list, b);
2520 +               b = UT_LIST_GET_PREV(zip_list, b);
2521         }
2522  
2523         if (b) {
2524 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2525 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2526         } else {
2527 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2528 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2529         }
2530  }
2531  
2532 @@ -563,18 +598,19 @@
2533  buf_LRU_free_from_unzip_LRU_list(
2534  /*=============================*/
2535         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
2536 -       ulint           n_iterations)   /*!< in: how many times this has
2537 +       ulint           n_iterations,   /*!< in: how many times this has
2538                                         been called repeatedly without
2539                                         result: a high value means that
2540                                         we should search farther; we will
2541                                         search n_iterations / 5 of the
2542                                         unzip_LRU list, or nothing if
2543                                         n_iterations >= 5 */
2544 +       ibool           have_LRU_mutex)
2545  {
2546         buf_block_t*    block;
2547         ulint           distance;
2548  
2549 -       ut_ad(buf_pool_mutex_own(buf_pool));
2550 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2551  
2552         /* Theoratically it should be much easier to find a victim
2553         from unzip_LRU as we can choose even a dirty block (as we'll
2554 @@ -584,7 +620,7 @@
2555         if we have done five iterations so far. */
2556  
2557         if (UNIV_UNLIKELY(n_iterations >= 5)
2558 -           || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2559 +           || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2560  
2561                 return(FALSE);
2562         }
2563 @@ -592,18 +628,25 @@
2564         distance = 100 + (n_iterations
2565                           * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2566  
2567 +restart:
2568         for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2569              UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2570              block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2571  
2572                 enum buf_lru_free_block_status  freed;
2573  
2574 +               mutex_enter(&block->mutex);
2575 +               if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2576 +                   || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2577 +                       mutex_exit(&block->mutex);
2578 +                       goto restart;
2579 +               }
2580 +
2581                 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2582                 ut_ad(block->in_unzip_LRU_list);
2583                 ut_ad(block->page.in_LRU_list);
2584  
2585 -               mutex_enter(&block->mutex);
2586 -               freed = buf_LRU_free_block(&block->page, FALSE);
2587 +               freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
2588                 mutex_exit(&block->mutex);
2589  
2590                 switch (freed) {
2591 @@ -637,21 +680,23 @@
2592  buf_LRU_free_from_common_LRU_list(
2593  /*==============================*/
2594         buf_pool_t*     buf_pool,
2595 -       ulint           n_iterations)
2596 +       ulint           n_iterations,
2597                                 /*!< in: how many times this has been called
2598                                 repeatedly without result: a high value means
2599                                 that we should search farther; if
2600                                 n_iterations < 10, then we search
2601                                 n_iterations / 10 * buf_pool->curr_size
2602                                 pages from the end of the LRU list */
2603 +       ibool           have_LRU_mutex)
2604  {
2605         buf_page_t*     bpage;
2606         ulint           distance;
2607  
2608 -       ut_ad(buf_pool_mutex_own(buf_pool));
2609 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2610  
2611         distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2612  
2613 +restart:
2614         for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2615              UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2616              bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2617 @@ -659,14 +704,23 @@
2618                 enum buf_lru_free_block_status  freed;
2619                 unsigned                        accessed;
2620                 mutex_t*                        block_mutex
2621 -                       = buf_page_get_mutex(bpage);
2622 +                       = buf_page_get_mutex_enter(bpage);
2623 +
2624 +               if (!block_mutex) {
2625 +                       goto restart;
2626 +               }
2627 +
2628 +               if (!bpage->in_LRU_list
2629 +                   || !buf_page_in_file(bpage)) {
2630 +                       mutex_exit(block_mutex);
2631 +                       goto restart;
2632 +               }
2633  
2634                 ut_ad(buf_page_in_file(bpage));
2635                 ut_ad(bpage->in_LRU_list);
2636  
2637 -               mutex_enter(block_mutex);
2638                 accessed = buf_page_is_accessed(bpage);
2639 -               freed = buf_LRU_free_block(bpage, TRUE);
2640 +               freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
2641                 mutex_exit(block_mutex);
2642  
2643                 switch (freed) {
2644 @@ -718,16 +772,23 @@
2645                                 n_iterations / 5 of the unzip_LRU list. */
2646  {
2647         ibool   freed = FALSE;
2648 +       ibool   have_LRU_mutex = FALSE;
2649  
2650 -       buf_pool_mutex_enter(buf_pool);
2651 +       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2652 +               have_LRU_mutex = TRUE;
2653 +
2654 +       //buf_pool_mutex_enter(buf_pool);
2655 +       if (have_LRU_mutex)
2656 +               mutex_enter(&buf_pool->LRU_list_mutex);
2657  
2658 -       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2659 +       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2660  
2661         if (!freed) {
2662                 freed = buf_LRU_free_from_common_LRU_list(
2663 -                       buf_pool, n_iterations);
2664 +                       buf_pool, n_iterations, have_LRU_mutex);
2665         }
2666  
2667 +       buf_pool_mutex_enter(buf_pool);
2668         if (!freed) {
2669                 buf_pool->LRU_flush_ended = 0;
2670         } else if (buf_pool->LRU_flush_ended > 0) {
2671 @@ -735,6 +796,8 @@
2672         }
2673  
2674         buf_pool_mutex_exit(buf_pool);
2675 +       if (have_LRU_mutex)
2676 +               mutex_exit(&buf_pool->LRU_list_mutex);
2677  
2678         return(freed);
2679  }
2680 @@ -795,7 +858,9 @@
2681  
2682                 buf_pool = buf_pool_from_array(i);
2683  
2684 -               buf_pool_mutex_enter(buf_pool);
2685 +               //buf_pool_mutex_enter(buf_pool);
2686 +               mutex_enter(&buf_pool->LRU_list_mutex);
2687 +               mutex_enter(&buf_pool->free_list_mutex);
2688  
2689                 if (!recv_recovery_on
2690                     && UT_LIST_GET_LEN(buf_pool->free)
2691 @@ -805,7 +870,9 @@
2692                         ret = TRUE;
2693                 }
2694  
2695 -               buf_pool_mutex_exit(buf_pool);
2696 +               //buf_pool_mutex_exit(buf_pool);
2697 +               mutex_exit(&buf_pool->LRU_list_mutex);
2698 +               mutex_exit(&buf_pool->free_list_mutex);
2699         }
2700  
2701         return(ret);
2702 @@ -823,9 +890,10 @@
2703  {
2704         buf_block_t*    block;
2705  
2706 -       ut_ad(buf_pool_mutex_own(buf_pool));
2707 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2708  
2709 -       block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2710 +       mutex_enter(&buf_pool->free_list_mutex);
2711 +       block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2712  
2713         if (block) {
2714  
2715 @@ -834,7 +902,9 @@
2716                 ut_ad(!block->page.in_flush_list);
2717                 ut_ad(!block->page.in_LRU_list);
2718                 ut_a(!buf_page_in_file(&block->page));
2719 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2720 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2721 +
2722 +               mutex_exit(&buf_pool->free_list_mutex);
2723  
2724                 mutex_enter(&block->mutex);
2725  
2726 @@ -844,6 +914,8 @@
2727                 ut_ad(buf_pool_from_block(block) == buf_pool);
2728  
2729                 mutex_exit(&block->mutex);
2730 +       } else {
2731 +               mutex_exit(&buf_pool->free_list_mutex);
2732         }
2733  
2734         return(block);
2735 @@ -866,7 +938,7 @@
2736         ibool           mon_value_was   = FALSE;
2737         ibool           started_monitor = FALSE;
2738  loop:
2739 -       buf_pool_mutex_enter(buf_pool);
2740 +       //buf_pool_mutex_enter(buf_pool);
2741  
2742         if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2743             + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2744 @@ -934,7 +1006,7 @@
2745  
2746         /* If there is a block in the free list, take it */
2747         block = buf_LRU_get_free_only(buf_pool);
2748 -       buf_pool_mutex_exit(buf_pool);
2749 +       //buf_pool_mutex_exit(buf_pool);
2750  
2751         if (block) {
2752                 ut_ad(buf_pool_from_block(block) == buf_pool);
2753 @@ -1034,7 +1106,8 @@
2754         ulint   new_len;
2755  
2756         ut_a(buf_pool->LRU_old);
2757 -       ut_ad(buf_pool_mutex_own(buf_pool));
2758 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2759 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2760         ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2761         ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2762  #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2763 @@ -1100,7 +1173,8 @@
2764  {
2765         buf_page_t*     bpage;
2766  
2767 -       ut_ad(buf_pool_mutex_own(buf_pool));
2768 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2769 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2770         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2771  
2772         /* We first initialize all blocks in the LRU list as old and then use
2773 @@ -1135,13 +1209,14 @@
2774         ut_ad(buf_pool);
2775         ut_ad(bpage);
2776         ut_ad(buf_page_in_file(bpage));
2777 -       ut_ad(buf_pool_mutex_own(buf_pool));
2778 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2779 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2780  
2781         if (buf_page_belongs_to_unzip_LRU(bpage)) {
2782                 buf_block_t*    block = (buf_block_t*) bpage;
2783  
2784                 ut_ad(block->in_unzip_LRU_list);
2785 -               ut_d(block->in_unzip_LRU_list = FALSE);
2786 +               block->in_unzip_LRU_list = FALSE;
2787  
2788                 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2789         }
2790 @@ -1159,7 +1234,8 @@
2791  
2792         ut_ad(buf_pool);
2793         ut_ad(bpage);
2794 -       ut_ad(buf_pool_mutex_own(buf_pool));
2795 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2796 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2797  
2798         ut_a(buf_page_in_file(bpage));
2799  
2800 @@ -1236,12 +1312,13 @@
2801  
2802         ut_ad(buf_pool);
2803         ut_ad(block);
2804 -       ut_ad(buf_pool_mutex_own(buf_pool));
2805 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2806 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2807  
2808         ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2809  
2810         ut_ad(!block->in_unzip_LRU_list);
2811 -       ut_d(block->in_unzip_LRU_list = TRUE);
2812 +       block->in_unzip_LRU_list = TRUE;
2813  
2814         if (old) {
2815                 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2816 @@ -1262,7 +1339,8 @@
2817  
2818         ut_ad(buf_pool);
2819         ut_ad(bpage);
2820 -       ut_ad(buf_pool_mutex_own(buf_pool));
2821 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2822 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2823  
2824         ut_a(buf_page_in_file(bpage));
2825  
2826 @@ -1313,7 +1391,8 @@
2827  
2828         ut_ad(buf_pool);
2829         ut_ad(bpage);
2830 -       ut_ad(buf_pool_mutex_own(buf_pool));
2831 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2832 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2833  
2834         ut_a(buf_page_in_file(bpage));
2835         ut_ad(!bpage->in_LRU_list);
2836 @@ -1392,7 +1471,8 @@
2837  {
2838         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2839  
2840 -       ut_ad(buf_pool_mutex_own(buf_pool));
2841 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2842 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2843  
2844         if (bpage->old) {
2845                 buf_pool->stat.n_pages_made_young++;
2846 @@ -1432,17 +1512,18 @@
2847  buf_LRU_free_block(
2848  /*===============*/
2849         buf_page_t*     bpage,  /*!< in: block to be freed */
2850 -       ibool           zip)    /*!< in: TRUE if should remove also the
2851 +       ibool           zip,    /*!< in: TRUE if should remove also the
2852                                 compressed page of an uncompressed page */
2853 +       ibool           have_LRU_mutex)
2854  {
2855         buf_page_t*     b = NULL;
2856         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2857         mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2858  
2859 -       ut_ad(buf_pool_mutex_own(buf_pool));
2860 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2861         ut_ad(mutex_own(block_mutex));
2862         ut_ad(buf_page_in_file(bpage));
2863 -       ut_ad(bpage->in_LRU_list);
2864 +       //ut_ad(bpage->in_LRU_list);
2865         ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2866  #if UNIV_WORD_SIZE == 4
2867         /* On 32-bit systems, there is no padding in buf_page_t.  On
2868 @@ -1451,7 +1532,7 @@
2869         UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2870  #endif
2871  
2872 -       if (!buf_page_can_relocate(bpage)) {
2873 +       if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2874  
2875                 /* Do not free buffer-fixed or I/O-fixed blocks. */
2876                 return(BUF_LRU_NOT_FREED);
2877 @@ -1483,15 +1564,15 @@
2878                 If it cannot be allocated (without freeing a block
2879                 from the LRU list), refuse to free bpage. */
2880  alloc:
2881 -               buf_pool_mutex_exit_forbid(buf_pool);
2882 -               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2883 -               buf_pool_mutex_exit_allow(buf_pool);
2884 +               //buf_pool_mutex_exit_forbid(buf_pool);
2885 +               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2886 +               //buf_pool_mutex_exit_allow(buf_pool);
2887  
2888                 if (UNIV_UNLIKELY(!b)) {
2889                         return(BUF_LRU_CANNOT_RELOCATE);
2890                 }
2891  
2892 -               memcpy(b, bpage, sizeof *b);
2893 +               //memcpy(b, bpage, sizeof *b);
2894         }
2895  
2896  #ifdef UNIV_DEBUG
2897 @@ -1502,6 +1583,39 @@
2898         }
2899  #endif /* UNIV_DEBUG */
2900  
2901 +       /* not to break latch order, must re-enter block_mutex */
2902 +       mutex_exit(block_mutex);
2903 +
2904 +       if (!have_LRU_mutex)
2905 +               mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2906 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2907 +       mutex_enter(block_mutex);
2908 +
2909 +       /* recheck states of block */
2910 +       if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2911 +           || !buf_page_can_relocate(bpage)) {
2912 +not_freed:
2913 +               if (b) {
2914 +                       buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2915 +               }
2916 +               if (!have_LRU_mutex)
2917 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2918 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2919 +               return(BUF_LRU_NOT_FREED);
2920 +       } else if (zip || !bpage->zip.data) {
2921 +               if (bpage->oldest_modification)
2922 +                       goto not_freed;
2923 +       } else if (bpage->oldest_modification) {
2924 +               if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2925 +                       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2926 +                       goto not_freed;
2927 +               }
2928 +       }
2929 +
2930 +       if (b) {
2931 +               memcpy(b, bpage, sizeof *b);
2932 +       }
2933 +
2934         if (buf_LRU_block_remove_hashed_page(bpage, zip)
2935             != BUF_BLOCK_ZIP_FREE) {
2936                 ut_a(bpage->buf_fix_count == 0);
2937 @@ -1518,6 +1632,10 @@
2938  
2939                         ut_a(!hash_b);
2940  
2941 +                       while (prev_b && !prev_b->in_LRU_list) {
2942 +                               prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2943 +                       }
2944 +
2945                         b->state = b->oldest_modification
2946                                 ? BUF_BLOCK_ZIP_DIRTY
2947                                 : BUF_BLOCK_ZIP_PAGE;
2948 @@ -1610,7 +1728,9 @@
2949                         b->io_fix = BUF_IO_READ;
2950                 }
2951  
2952 -               buf_pool_mutex_exit(buf_pool);
2953 +               //buf_pool_mutex_exit(buf_pool);
2954 +               mutex_exit(&buf_pool->LRU_list_mutex);
2955 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2956                 mutex_exit(block_mutex);
2957  
2958                 /* Remove possible adaptive hash index on the page.
2959 @@ -1642,7 +1762,9 @@
2960                                 : BUF_NO_CHECKSUM_MAGIC);
2961                 }
2962  
2963 -               buf_pool_mutex_enter(buf_pool);
2964 +               //buf_pool_mutex_enter(buf_pool);
2965 +               if (have_LRU_mutex)
2966 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2967                 mutex_enter(block_mutex);
2968  
2969                 if (b) {
2970 @@ -1652,13 +1774,17 @@
2971                         mutex_exit(&buf_pool->zip_mutex);
2972                 }
2973  
2974 -               buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2975 +               buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
2976         } else {
2977                 /* The block_mutex should have been released by
2978                 buf_LRU_block_remove_hashed_page() when it returns
2979                 BUF_BLOCK_ZIP_FREE. */
2980                 ut_ad(block_mutex == &buf_pool->zip_mutex);
2981                 mutex_enter(block_mutex);
2982 +
2983 +               if (!have_LRU_mutex)
2984 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2985 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2986         }
2987  
2988         return(BUF_LRU_FREED);
2989 @@ -1670,13 +1796,14 @@
2990  void
2991  buf_LRU_block_free_non_file_page(
2992  /*=============================*/
2993 -       buf_block_t*    block)  /*!< in: block, must not contain a file page */
2994 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
2995 +       ibool           have_page_hash_mutex)
2996  {
2997         void*           data;
2998         buf_pool_t*     buf_pool = buf_pool_from_block(block);
2999  
3000         ut_ad(block);
3001 -       ut_ad(buf_pool_mutex_own(buf_pool));
3002 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3003         ut_ad(mutex_own(&block->mutex));
3004  
3005         switch (buf_block_get_state(block)) {
3006 @@ -1710,18 +1837,21 @@
3007         if (data) {
3008                 block->page.zip.data = NULL;
3009                 mutex_exit(&block->mutex);
3010 -               buf_pool_mutex_exit_forbid(buf_pool);
3011 +               //buf_pool_mutex_exit_forbid(buf_pool);
3012  
3013                 buf_buddy_free(
3014 -                       buf_pool, data, page_zip_get_size(&block->page.zip));
3015 +                       buf_pool, data, page_zip_get_size(&block->page.zip),
3016 +                       have_page_hash_mutex);
3017  
3018 -               buf_pool_mutex_exit_allow(buf_pool);
3019 +               //buf_pool_mutex_exit_allow(buf_pool);
3020                 mutex_enter(&block->mutex);
3021                 page_zip_set_size(&block->page.zip, 0);
3022         }
3023  
3024 -       UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3025 +       mutex_enter(&buf_pool->free_list_mutex);
3026 +       UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3027         ut_d(block->page.in_free_list = TRUE);
3028 +       mutex_exit(&buf_pool->free_list_mutex);
3029  
3030         UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3031  }
3032 @@ -1751,7 +1881,11 @@
3033         buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
3034  
3035         ut_ad(bpage);
3036 -       ut_ad(buf_pool_mutex_own(buf_pool));
3037 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3038 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3039 +#ifdef UNIV_SYNC_DEBUG
3040 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3041 +#endif
3042         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3043  
3044         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3045 @@ -1859,7 +1993,9 @@
3046  
3047  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3048                 mutex_exit(buf_page_get_mutex(bpage));
3049 -               buf_pool_mutex_exit(buf_pool);
3050 +               //buf_pool_mutex_exit(buf_pool);
3051 +               mutex_exit(&buf_pool->LRU_list_mutex);
3052 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3053                 buf_print();
3054                 buf_LRU_print();
3055                 buf_validate();
3056 @@ -1880,17 +2016,17 @@
3057                 ut_a(bpage->zip.data);
3058                 ut_a(buf_page_get_zip_size(bpage));
3059  
3060 -               UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3061 +               UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3062  
3063                 mutex_exit(&buf_pool->zip_mutex);
3064 -               buf_pool_mutex_exit_forbid(buf_pool);
3065 +               //buf_pool_mutex_exit_forbid(buf_pool);
3066  
3067                 buf_buddy_free(
3068                         buf_pool, bpage->zip.data,
3069 -                       page_zip_get_size(&bpage->zip));
3070 +                       page_zip_get_size(&bpage->zip), TRUE);
3071  
3072 -               buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3073 -               buf_pool_mutex_exit_allow(buf_pool);
3074 +               buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3075 +               //buf_pool_mutex_exit_allow(buf_pool);
3076  
3077                 UNIV_MEM_UNDESC(bpage);
3078                 return(BUF_BLOCK_ZIP_FREE);
3079 @@ -1913,13 +2049,13 @@
3080                         ut_ad(!bpage->in_flush_list);
3081                         ut_ad(!bpage->in_LRU_list);
3082                         mutex_exit(&((buf_block_t*) bpage)->mutex);
3083 -                       buf_pool_mutex_exit_forbid(buf_pool);
3084 +                       //buf_pool_mutex_exit_forbid(buf_pool);
3085  
3086                         buf_buddy_free(
3087                                 buf_pool, data,
3088 -                               page_zip_get_size(&bpage->zip));
3089 +                               page_zip_get_size(&bpage->zip), TRUE);
3090  
3091 -                       buf_pool_mutex_exit_allow(buf_pool);
3092 +                       //buf_pool_mutex_exit_allow(buf_pool);
3093                         mutex_enter(&((buf_block_t*) bpage)->mutex);
3094                         page_zip_set_size(&bpage->zip, 0);
3095                 }
3096 @@ -1945,18 +2081,19 @@
3097  void
3098  buf_LRU_block_free_hashed_page(
3099  /*===========================*/
3100 -       buf_block_t*    block)  /*!< in: block, must contain a file page and
3101 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
3102                                 be in a state where it can be freed */
3103 +       ibool           have_page_hash_mutex)
3104  {
3105  #ifdef UNIV_DEBUG
3106 -       buf_pool_t*     buf_pool = buf_pool_from_block(block);
3107 -       ut_ad(buf_pool_mutex_own(buf_pool));
3108 +       //buf_pool_t*   buf_pool = buf_pool_from_block(block);
3109 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3110  #endif
3111         ut_ad(mutex_own(&block->mutex));
3112  
3113         buf_block_set_state(block, BUF_BLOCK_MEMORY);
3114  
3115 -       buf_LRU_block_free_non_file_page(block);
3116 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3117  }
3118  
3119  /**********************************************************************//**
3120 @@ -1983,7 +2120,8 @@
3121         }
3122  
3123         if (adjust) {
3124 -               buf_pool_mutex_enter(buf_pool);
3125 +               //buf_pool_mutex_enter(buf_pool);
3126 +               mutex_enter(&buf_pool->LRU_list_mutex);
3127  
3128                 if (ratio != buf_pool->LRU_old_ratio) {
3129                         buf_pool->LRU_old_ratio = ratio;
3130 @@ -1995,7 +2133,8 @@
3131                         }
3132                 }
3133  
3134 -               buf_pool_mutex_exit(buf_pool);
3135 +               //buf_pool_mutex_exit(buf_pool);
3136 +               mutex_exit(&buf_pool->LRU_list_mutex);
3137         } else {
3138                 buf_pool->LRU_old_ratio = ratio;
3139         }
3140 @@ -2100,7 +2239,8 @@
3141         ulint           new_len;
3142  
3143         ut_ad(buf_pool);
3144 -       buf_pool_mutex_enter(buf_pool);
3145 +       //buf_pool_mutex_enter(buf_pool);
3146 +       mutex_enter(&buf_pool->LRU_list_mutex);
3147  
3148         if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3149  
3150 @@ -2161,16 +2301,22 @@
3151  
3152         ut_a(buf_pool->LRU_old_len == old_len);
3153  
3154 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3155 +       mutex_exit(&buf_pool->LRU_list_mutex);
3156 +       mutex_enter(&buf_pool->free_list_mutex);
3157 +
3158 +       UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3159                          ut_ad(ut_list_node_313->in_free_list));
3160  
3161         for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3162              bpage != NULL;
3163 -            bpage = UT_LIST_GET_NEXT(list, bpage)) {
3164 +            bpage = UT_LIST_GET_NEXT(free, bpage)) {
3165  
3166                 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3167         }
3168  
3169 +       mutex_exit(&buf_pool->free_list_mutex);
3170 +       mutex_enter(&buf_pool->LRU_list_mutex);
3171 +
3172         UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3173                          ut_ad(ut_list_node_313->in_unzip_LRU_list
3174                                && ut_list_node_313->page.in_LRU_list));
3175 @@ -2184,7 +2330,8 @@
3176                 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3177         }
3178  
3179 -       buf_pool_mutex_exit(buf_pool);
3180 +       //buf_pool_mutex_exit(buf_pool);
3181 +       mutex_exit(&buf_pool->LRU_list_mutex);
3182  }
3183  
3184  /**********************************************************************//**
3185 @@ -2220,7 +2367,8 @@
3186         const buf_page_t*       bpage;
3187  
3188         ut_ad(buf_pool);
3189 -       buf_pool_mutex_enter(buf_pool);
3190 +       //buf_pool_mutex_enter(buf_pool);
3191 +       mutex_enter(&buf_pool->LRU_list_mutex);
3192  
3193         bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3194  
3195 @@ -2277,7 +2425,8 @@
3196                 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3197         }
3198  
3199 -       buf_pool_mutex_exit(buf_pool);
3200 +       //buf_pool_mutex_exit(buf_pool);
3201 +       mutex_exit(&buf_pool->LRU_list_mutex);
3202  }
3203  
3204  /**********************************************************************//**
3205 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3206 --- a/storage/innobase/buf/buf0rea.c    2010-12-03 15:22:36.323977308 +0900
3207 +++ b/storage/innobase/buf/buf0rea.c    2010-12-03 15:48:29.296024468 +0900
3208 @@ -311,6 +311,7 @@
3209  
3210                 return(0);
3211         }
3212 +       buf_pool_mutex_exit(buf_pool);
3213  
3214         /* Check that almost all pages in the area have been accessed; if
3215         offset == low, the accesses must be in a descending order, otherwise,
3216 @@ -329,6 +330,7 @@
3217  
3218         fail_count = 0;
3219  
3220 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3221         for (i = low; i < high; i++) {
3222                 bpage = buf_page_hash_get(buf_pool, space, i);
3223  
3224 @@ -356,7 +358,8 @@
3225  
3226                 if (fail_count > threshold) {
3227                         /* Too many failures: return */
3228 -                       buf_pool_mutex_exit(buf_pool);
3229 +                       //buf_pool_mutex_exit(buf_pool);
3230 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3231                         return(0);
3232                 }
3233  
3234 @@ -371,7 +374,8 @@
3235         bpage = buf_page_hash_get(buf_pool, space, offset);
3236  
3237         if (bpage == NULL) {
3238 -               buf_pool_mutex_exit(buf_pool);
3239 +               //buf_pool_mutex_exit(buf_pool);
3240 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3241  
3242                 return(0);
3243         }
3244 @@ -397,7 +401,8 @@
3245         pred_offset = fil_page_get_prev(frame);
3246         succ_offset = fil_page_get_next(frame);
3247  
3248 -       buf_pool_mutex_exit(buf_pool);
3249 +       //buf_pool_mutex_exit(buf_pool);
3250 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3251  
3252         if ((offset == low) && (succ_offset == offset + 1)) {
3253  
3254 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3255 --- a/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:03.048955897 +0900
3256 +++ b/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:29.304024564 +0900
3257 @@ -264,6 +264,10 @@
3258  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3259         {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3260         {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3261 +       {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3262 +       {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3263 +       {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3264 +       {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3265         {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3266         {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3267         {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3268 @@ -314,6 +318,7 @@
3269         {&archive_lock_key, "archive_lock", 0},
3270  #  endif /* UNIV_LOG_ARCHIVE */
3271         {&btr_search_latch_key, "btr_search_latch", 0},
3272 +       {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3273  #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3274         {&buf_block_lock_key, "buf_block_lock", 0},
3275  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3276 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3277 --- a/storage/innobase/handler/i_s.cc   2010-12-03 15:37:45.517105700 +0900
3278 +++ b/storage/innobase/handler/i_s.cc   2010-12-03 15:48:29.331024462 +0900
3279 @@ -1565,7 +1565,8 @@
3280  
3281                 buf_pool = buf_pool_from_array(i);
3282  
3283 -               buf_pool_mutex_enter(buf_pool);
3284 +               //buf_pool_mutex_enter(buf_pool);
3285 +               mutex_enter(&buf_pool->zip_free_mutex);
3286  
3287                 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3288                         buf_buddy_stat_t*       buddy_stat;
3289 @@ -1595,7 +1596,8 @@
3290                         }
3291                 }
3292  
3293 -               buf_pool_mutex_exit(buf_pool);
3294 +               //buf_pool_mutex_exit(buf_pool);
3295 +               mutex_exit(&buf_pool->zip_free_mutex);
3296  
3297                 if (status) {
3298                         break;
3299 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3300 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3301 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3302 @@ -3766,9 +3766,11 @@
3303                 ulint           fold = buf_page_address_fold(space, page_no);
3304                 buf_pool_t*     buf_pool = buf_pool_get(space, page_no);
3305  
3306 -               buf_pool_mutex_enter(buf_pool);
3307 +               //buf_pool_mutex_enter(buf_pool);
3308 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
3309                 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3310 -               buf_pool_mutex_exit(buf_pool);
3311 +               //buf_pool_mutex_exit(buf_pool);
3312 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3313  
3314                 if (UNIV_LIKELY_NULL(bpage)) {
3315                         /* A buffer pool watch has been set or the
3316 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3317 --- a/storage/innobase/include/buf0buddy.h      2010-11-03 07:01:13.000000000 +0900
3318 +++ b/storage/innobase/include/buf0buddy.h      2010-12-03 15:48:29.338023826 +0900
3319 @@ -51,10 +51,11 @@
3320         buf_pool_t*     buf_pool,
3321                         /*!< buffer pool in which the block resides */
3322         ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3323 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3324 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3325                         TRUE if storage was allocated from the LRU list
3326                         and buf_pool->mutex was temporarily released,
3327                         or NULL if the LRU list should not be used */
3328 +       ibool   have_page_hash_mutex)
3329         __attribute__((malloc));
3330  
3331  /**********************************************************************//**
3332 @@ -67,7 +68,8 @@
3333                         /*!< buffer pool in which the block resides */
3334         void*   buf,    /*!< in: block to be freed, must not be
3335                         pointed to by the buffer pool */
3336 -       ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
3337 +       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3338 +       ibool   have_page_hash_mutex)
3339         __attribute__((nonnull));
3340  
3341  #ifndef UNIV_NONINL
3342 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3343 --- a/storage/innobase/include/buf0buddy.ic     2010-11-03 07:01:13.000000000 +0900
3344 +++ b/storage/innobase/include/buf0buddy.ic     2010-12-03 15:48:29.339040413 +0900
3345 @@ -46,10 +46,11 @@
3346                         /*!< in: buffer pool in which the page resides */
3347         ulint   i,      /*!< in: index of buf_pool->zip_free[],
3348                         or BUF_BUDDY_SIZES */
3349 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3350 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3351                         TRUE if storage was allocated from the LRU list
3352                         and buf_pool->mutex was temporarily released,
3353                         or NULL if the LRU list should not be used */
3354 +       ibool   have_page_hash_mutex)
3355         __attribute__((malloc));
3356  
3357  /**********************************************************************//**
3358 @@ -61,8 +62,9 @@
3359         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3360         void*           buf,            /*!< in: block to be freed, must not be
3361                                         pointed to by the buffer pool */
3362 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
3363 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
3364                                         or BUF_BUDDY_SIZES */
3365 +       ibool           have_page_hash_mutex)
3366         __attribute__((nonnull));
3367  
3368  /**********************************************************************//**
3369 @@ -102,16 +104,17 @@
3370                                         the page resides */
3371         ulint           size,           /*!< in: block size, up to
3372                                         UNIV_PAGE_SIZE */
3373 -       ibool*          lru)            /*!< in: pointer to a variable
3374 +       ibool*          lru,            /*!< in: pointer to a variable
3375                                         that will be assigned TRUE if
3376                                         storage was allocated from the
3377                                         LRU list and buf_pool->mutex was
3378                                         temporarily released, or NULL if
3379                                         the LRU list should not be used */
3380 +       ibool           have_page_hash_mutex)
3381  {
3382 -       ut_ad(buf_pool_mutex_own(buf_pool));
3383 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3384  
3385 -       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3386 +       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3387  }
3388  
3389  /**********************************************************************//**
3390 @@ -123,12 +126,25 @@
3391         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3392         void*           buf,            /*!< in: block to be freed, must not be
3393                                         pointed to by the buffer pool */
3394 -       ulint           size)           /*!< in: block size, up to
3395 +       ulint           size,           /*!< in: block size, up to
3396                                         UNIV_PAGE_SIZE */
3397 +       ibool           have_page_hash_mutex)
3398  {
3399 -       ut_ad(buf_pool_mutex_own(buf_pool));
3400 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3401 +
3402 +       if (!have_page_hash_mutex) {
3403 +               mutex_enter(&buf_pool->LRU_list_mutex);
3404 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3405 +       }
3406  
3407 -       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3408 +       mutex_enter(&buf_pool->zip_free_mutex);
3409 +       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3410 +       mutex_exit(&buf_pool->zip_free_mutex);
3411 +
3412 +       if (!have_page_hash_mutex) {
3413 +               mutex_exit(&buf_pool->LRU_list_mutex);
3414 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3415 +       }
3416  }
3417  
3418  #ifdef UNIV_MATERIALIZE
3419 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3420 --- a/storage/innobase/include/buf0buf.h        2010-12-03 15:22:36.327954660 +0900
3421 +++ b/storage/innobase/include/buf0buf.h        2010-12-03 15:48:29.343024683 +0900
3422 @@ -203,6 +203,20 @@
3423  /*==========================*/
3424  
3425  /********************************************************************//**
3426 +*/
3427 +UNIV_INLINE
3428 +void
3429 +buf_pool_page_hash_x_lock_all(void);
3430 +/*================================*/
3431 +
3432 +/********************************************************************//**
3433 +*/
3434 +UNIV_INLINE
3435 +void
3436 +buf_pool_page_hash_x_unlock_all(void);
3437 +/*==================================*/
3438 +
3439 +/********************************************************************//**
3440  Creates the buffer pool.
3441  @return        own: buf_pool object, NULL if not enough memory or error */
3442  UNIV_INTERN
3443 @@ -832,6 +846,15 @@
3444         const buf_page_t*       bpage)  /*!< in: pointer to control block */
3445         __attribute__((pure));
3446  
3447 +/*************************************************************************
3448 +Gets the mutex of a block and enter the mutex with consistency. */
3449 +UNIV_INLINE
3450 +mutex_t*
3451 +buf_page_get_mutex_enter(
3452 +/*=========================*/
3453 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3454 +       __attribute__((pure));
3455 +
3456  /*********************************************************************//**
3457  Get the flush type of a page.
3458  @return        flush type */
3459 @@ -1313,7 +1336,7 @@
3460         All these are protected by buf_pool->mutex. */
3461         /* @{ */
3462  
3463 -       UT_LIST_NODE_T(buf_page_t) list;
3464 +       /* UT_LIST_NODE_T(buf_page_t) list; */
3465                                         /*!< based on state, this is a
3466                                         list node, protected either by
3467                                         buf_pool->mutex or by
3468 @@ -1341,6 +1364,10 @@
3469                                         BUF_BLOCK_REMOVE_HASH or
3470                                         BUF_BLOCK_READY_IN_USE. */
3471  
3472 +       /* resplit for optimistic use */
3473 +       UT_LIST_NODE_T(buf_page_t) free;
3474 +       UT_LIST_NODE_T(buf_page_t) flush_list;
3475 +       UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3476  #ifdef UNIV_DEBUG
3477         ibool           in_flush_list;  /*!< TRUE if in buf_pool->flush_list;
3478                                         when buf_pool->flush_list_mutex is
3479 @@ -1433,11 +1460,11 @@
3480                                         a block is in the unzip_LRU list
3481                                         if page.state == BUF_BLOCK_FILE_PAGE
3482                                         and page.zip.data != NULL */
3483 -#ifdef UNIV_DEBUG
3484 +//#ifdef UNIV_DEBUG
3485         ibool           in_unzip_LRU_list;/*!< TRUE if the page is in the
3486                                         decompressed LRU list;
3487                                         used in debugging */
3488 -#endif /* UNIV_DEBUG */
3489 +//#endif /* UNIV_DEBUG */
3490         mutex_t         mutex;          /*!< mutex protecting this block:
3491                                         state (also protected by the buffer
3492                                         pool mutex), io_fix, buf_fix_count,
3493 @@ -1612,6 +1639,11 @@
3494                                         pool instance, protects compressed
3495                                         only pages (of type buf_page_t, not
3496                                         buf_block_t */
3497 +       mutex_t         LRU_list_mutex;
3498 +       rw_lock_t       page_hash_latch;
3499 +       mutex_t         free_list_mutex;
3500 +       mutex_t         zip_free_mutex;
3501 +       mutex_t         zip_hash_mutex;
3502         ulint           instance_no;    /*!< Array index of this buffer
3503                                         pool instance */
3504         ulint           old_pool_size;  /*!< Old pool size in bytes */
3505 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3506 --- a/storage/innobase/include/buf0buf.ic       2010-11-03 07:01:13.000000000 +0900
3507 +++ b/storage/innobase/include/buf0buf.ic       2010-12-03 15:48:29.345024524 +0900
3508 @@ -274,7 +274,7 @@
3509         case BUF_BLOCK_ZIP_FREE:
3510                 /* This is a free page in buf_pool->zip_free[].
3511                 Such pages should only be accessed by the buddy allocator. */
3512 -               ut_error;
3513 +               /* ut_error; */ /* optimistic */
3514                 break;
3515         case BUF_BLOCK_ZIP_PAGE:
3516         case BUF_BLOCK_ZIP_DIRTY:
3517 @@ -317,9 +317,14 @@
3518  {
3519         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3520  
3521 +       if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3522 +               /* TODO: this code is the interim. should be confirmed later. */
3523 +               return(&buf_pool->zip_mutex);
3524 +       }
3525 +
3526         switch (buf_page_get_state(bpage)) {
3527         case BUF_BLOCK_ZIP_FREE:
3528 -               ut_error;
3529 +               /* ut_error; */ /* optimistic */
3530                 return(NULL);
3531         case BUF_BLOCK_ZIP_PAGE:
3532         case BUF_BLOCK_ZIP_DIRTY:
3533 @@ -329,6 +334,28 @@
3534         }
3535  }
3536  
3537 +/*************************************************************************
3538 +Gets the mutex of a block and enter the mutex with consistency. */
3539 +UNIV_INLINE
3540 +mutex_t*
3541 +buf_page_get_mutex_enter(
3542 +/*=========================*/
3543 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3544 +{
3545 +       mutex_t*        block_mutex;
3546 +
3547 +       while(1) {
3548 +               block_mutex = buf_page_get_mutex(bpage);
3549 +               if (!block_mutex)
3550 +                       return block_mutex;
3551 +
3552 +               mutex_enter(block_mutex);
3553 +               if (block_mutex == buf_page_get_mutex(bpage))
3554 +                       return block_mutex;
3555 +               mutex_exit(block_mutex);
3556 +       }
3557 +}
3558 +
3559  /*********************************************************************//**
3560  Get the flush type of a page.
3561  @return        flush type */
3562 @@ -425,8 +452,8 @@
3563         enum buf_io_fix io_fix) /*!< in: io_fix state */
3564  {
3565  #ifdef UNIV_DEBUG
3566 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3567 -       ut_ad(buf_pool_mutex_own(buf_pool));
3568 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3569 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3570  #endif
3571         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3572  
3573 @@ -456,14 +483,14 @@
3574         const buf_page_t*       bpage)  /*!< control block being relocated */
3575  {
3576  #ifdef UNIV_DEBUG
3577 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3578 -       ut_ad(buf_pool_mutex_own(buf_pool));
3579 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3580 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3581  #endif
3582         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3583         ut_ad(buf_page_in_file(bpage));
3584 -       ut_ad(bpage->in_LRU_list);
3585 +       //ut_ad(bpage->in_LRU_list);
3586  
3587 -       return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3588 +       return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3589                && bpage->buf_fix_count == 0);
3590  }
3591  
3592 @@ -477,8 +504,8 @@
3593         const buf_page_t*       bpage)  /*!< in: control block */
3594  {
3595  #ifdef UNIV_DEBUG
3596 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3597 -       ut_ad(buf_pool_mutex_own(buf_pool));
3598 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3599 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3600  #endif
3601         ut_ad(buf_page_in_file(bpage));
3602  
3603 @@ -498,7 +525,8 @@
3604         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3605  #endif /* UNIV_DEBUG */
3606         ut_a(buf_page_in_file(bpage));
3607 -       ut_ad(buf_pool_mutex_own(buf_pool));
3608 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3609 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3610         ut_ad(bpage->in_LRU_list);
3611  
3612  #ifdef UNIV_LRU_DEBUG
3613 @@ -545,9 +573,10 @@
3614         ulint           time_ms)        /*!< in: ut_time_ms() */
3615  {
3616  #ifdef UNIV_DEBUG
3617 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3618 -       ut_ad(buf_pool_mutex_own(buf_pool));
3619 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3620 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3621  #endif
3622 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3623         ut_a(buf_page_in_file(bpage));
3624  
3625         if (!bpage->access_time) {
3626 @@ -761,19 +790,19 @@
3627  /*===========*/
3628         buf_block_t*    block)  /*!< in, own: block to be freed */
3629  {
3630 -       buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3631 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3632  
3633 -       buf_pool_mutex_enter(buf_pool);
3634 +       //buf_pool_mutex_enter(buf_pool);
3635  
3636         mutex_enter(&block->mutex);
3637  
3638         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3639  
3640 -       buf_LRU_block_free_non_file_page(block);
3641 +       buf_LRU_block_free_non_file_page(block, FALSE);
3642  
3643         mutex_exit(&block->mutex);
3644  
3645 -       buf_pool_mutex_exit(buf_pool);
3646 +       //buf_pool_mutex_exit(buf_pool);
3647  }
3648  #endif /* !UNIV_HOTBACKUP */
3649  
3650 @@ -821,17 +850,17 @@
3651                                         page frame */
3652  {
3653         ib_uint64_t     lsn;
3654 -       mutex_t*        block_mutex = buf_page_get_mutex(bpage);
3655 -
3656 -       mutex_enter(block_mutex);
3657 +       mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
3658  
3659 -       if (buf_page_in_file(bpage)) {
3660 +       if (block_mutex && buf_page_in_file(bpage)) {
3661                 lsn = bpage->newest_modification;
3662         } else {
3663                 lsn = 0;
3664         }
3665  
3666 -       mutex_exit(block_mutex);
3667 +       if (block_mutex) {
3668 +               mutex_exit(block_mutex);
3669 +       }
3670  
3671         return(lsn);
3672  }
3673 @@ -849,7 +878,7 @@
3674  #ifdef UNIV_SYNC_DEBUG
3675         buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3676  
3677 -       ut_ad((buf_pool_mutex_own(buf_pool)
3678 +       ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3679                && (block->page.buf_fix_count == 0))
3680               || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3681  #endif /* UNIV_SYNC_DEBUG */
3682 @@ -979,7 +1008,11 @@
3683         buf_page_t*     bpage;
3684  
3685         ut_ad(buf_pool);
3686 -       ut_ad(buf_pool_mutex_own(buf_pool));
3687 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3688 +#ifdef UNIV_SYNC_DEBUG
3689 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3690 +             || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3691 +#endif
3692         ut_ad(fold == buf_page_address_fold(space, offset));
3693  
3694         /* Look for the page in the hash table */
3695 @@ -1064,11 +1097,13 @@
3696         const buf_page_t*       bpage;
3697         buf_pool_t*             buf_pool = buf_pool_get(space, offset);
3698  
3699 -       buf_pool_mutex_enter(buf_pool);
3700 +       //buf_pool_mutex_enter(buf_pool);
3701 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3702  
3703         bpage = buf_page_hash_get(buf_pool, space, offset);
3704  
3705 -       buf_pool_mutex_exit(buf_pool);
3706 +       //buf_pool_mutex_exit(buf_pool);
3707 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3708  
3709         return(bpage != NULL);
3710  }
3711 @@ -1196,4 +1231,38 @@
3712                 buf_pool_mutex_exit(buf_pool);
3713         }
3714  }
3715 +
3716 +/********************************************************************//**
3717 +*/
3718 +UNIV_INLINE
3719 +void
3720 +buf_pool_page_hash_x_lock_all(void)
3721 +/*===============================*/
3722 +{
3723 +       ulint   i;
3724 +
3725 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3726 +               buf_pool_t*     buf_pool;
3727 +
3728 +               buf_pool = buf_pool_from_array(i);
3729 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3730 +       }
3731 +}
3732 +
3733 +/********************************************************************//**
3734 +*/
3735 +UNIV_INLINE
3736 +void
3737 +buf_pool_page_hash_x_unlock_all(void)
3738 +/*=================================*/
3739 +{
3740 +       ulint   i;
3741 +
3742 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3743 +               buf_pool_t*     buf_pool;
3744 +
3745 +               buf_pool = buf_pool_from_array(i);
3746 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3747 +       }
3748 +}
3749  #endif /* !UNIV_HOTBACKUP */
3750 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3751 --- a/storage/innobase/include/buf0lru.h        2010-11-03 07:01:13.000000000 +0900
3752 +++ b/storage/innobase/include/buf0lru.h        2010-12-03 15:48:29.349024701 +0900
3753 @@ -111,8 +111,9 @@
3754  buf_LRU_free_block(
3755  /*===============*/
3756         buf_page_t*     bpage,  /*!< in: block to be freed */
3757 -       ibool           zip)    /*!< in: TRUE if should remove also the
3758 +       ibool           zip,    /*!< in: TRUE if should remove also the
3759                                 compressed page of an uncompressed page */
3760 +       ibool           have_LRU_mutex)
3761         __attribute__((nonnull));
3762  /******************************************************************//**
3763  Try to free a replaceable block.
3764 @@ -159,7 +160,8 @@
3765  void
3766  buf_LRU_block_free_non_file_page(
3767  /*=============================*/
3768 -       buf_block_t*    block); /*!< in: block, must not contain a file page */
3769 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
3770 +       ibool           have_page_hash_mutex);
3771  /******************************************************************//**
3772  Adds a block to the LRU list. */
3773  UNIV_INTERN
3774 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3775 --- a/storage/innobase/include/sync0rw.h        2010-11-03 07:01:13.000000000 +0900
3776 +++ b/storage/innobase/include/sync0rw.h        2010-12-03 15:48:29.349942993 +0900
3777 @@ -112,6 +112,7 @@
3778  extern mysql_pfs_key_t archive_lock_key;
3779  # endif /* UNIV_LOG_ARCHIVE */
3780  extern mysql_pfs_key_t btr_search_latch_key;
3781 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3782  extern mysql_pfs_key_t buf_block_lock_key;
3783  # ifdef UNIV_SYNC_DEBUG
3784  extern mysql_pfs_key_t buf_block_debug_latch_key;
3785 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3786 --- a/storage/innobase/include/sync0sync.h      2010-11-03 07:01:13.000000000 +0900
3787 +++ b/storage/innobase/include/sync0sync.h      2010-12-03 15:48:29.352024614 +0900
3788 @@ -75,6 +75,10 @@
3789  extern mysql_pfs_key_t buffer_block_mutex_key;
3790  extern mysql_pfs_key_t buf_pool_mutex_key;
3791  extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3792 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3793 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3794 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3795 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3796  extern mysql_pfs_key_t cache_last_read_mutex_key;
3797  extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3798  extern mysql_pfs_key_t dict_sys_mutex_key;
3799 @@ -660,7 +664,7 @@
3800  #define        SYNC_TRX_LOCK_HEAP      298
3801  #define SYNC_TRX_SYS_HEADER    290
3802  #define SYNC_LOG               170
3803 -#define SYNC_LOG_FLUSH_ORDER   147
3804 +#define SYNC_LOG_FLUSH_ORDER   156
3805  #define SYNC_RECV              168
3806  #define        SYNC_WORK_QUEUE         162
3807  #define        SYNC_SEARCH_SYS_CONF    161     /* for assigning btr_search_enabled */
3808 @@ -670,8 +674,13 @@
3809                                         SYNC_SEARCH_SYS, as memory allocation
3810                                         can call routines there! Otherwise
3811                                         the level is SYNC_MEM_HASH. */
3812 +#define        SYNC_BUF_LRU_LIST       158
3813 +#define        SYNC_BUF_PAGE_HASH      157
3814 +#define        SYNC_BUF_BLOCK          155     /* Block mutex */
3815 +#define        SYNC_BUF_FREE_LIST      153
3816 +#define        SYNC_BUF_ZIP_FREE       152
3817 +#define        SYNC_BUF_ZIP_HASH       151
3818  #define        SYNC_BUF_POOL           150     /* Buffer pool mutex */
3819 -#define        SYNC_BUF_BLOCK          146     /* Block mutex */
3820  #define        SYNC_BUF_FLUSH_LIST     145     /* Buffer flush list mutex */
3821  #define SYNC_DOUBLEWRITE       140
3822  #define        SYNC_ANY_LATCH          135
3823 @@ -703,7 +712,7 @@
3824                 os_fast_mutex;  /*!< We use this OS mutex in place of lock_word
3825                                 when atomic operations are not enabled */
3826  #endif
3827 -       ulint   waiters;        /*!< This ulint is set to 1 if there are (or
3828 +       volatile ulint  waiters;        /*!< This ulint is set to 1 if there are (or
3829                                 may be) threads waiting in the global wait
3830                                 array for this mutex to be released.
3831                                 Otherwise, this is 0. */
3832 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3833 --- a/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:03.080956216 +0900
3834 +++ b/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:29.355023766 +0900
3835 @@ -3099,7 +3099,7 @@
3836                                                                 level += log_sys->max_checkpoint_age
3837                                                                          - (lsn - oldest_modification);
3838                                                         }
3839 -                                                       bpage = UT_LIST_GET_NEXT(list, bpage);
3840 +                                                       bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3841                                                         n_blocks++;
3842                                                 }
3843  
3844 @@ -3185,7 +3185,7 @@
3845                                                         found = TRUE;
3846                                                         break;
3847                                                 }
3848 -                                               bpage = UT_LIST_GET_NEXT(list, bpage);
3849 +                                               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3850                                                 new_blocks_num++;
3851                                         }
3852                                         if (!found) {
3853 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3854 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3855 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3856 @@ -284,7 +284,7 @@
3857         mutex->lock_word = 0;
3858  #endif
3859         mutex->event = os_event_create(NULL);
3860 -       mutex_set_waiters(mutex, 0);
3861 +       mutex->waiters = 0;
3862  #ifdef UNIV_DEBUG
3863         mutex->magic_n = MUTEX_MAGIC_N;
3864  #endif /* UNIV_DEBUG */
3865 @@ -463,6 +463,15 @@
3866         mutex_t*        mutex,  /*!< in: mutex */
3867         ulint           n)      /*!< in: value to set */
3868  {
3869 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3870 +       ut_ad(mutex);
3871 +
3872 +       if (n) {
3873 +               os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3874 +       } else {
3875 +               os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3876 +       }
3877 +#else
3878         volatile ulint* ptr;            /* declared volatile to ensure that
3879                                         the value is stored to memory */
3880         ut_ad(mutex);
3881 @@ -471,6 +480,7 @@
3882  
3883         *ptr = n;               /* Here we assume that the write of a single
3884                                 word in memory is atomic */
3885 +#endif
3886  }
3887  
3888  /******************************************************************//**
3889 @@ -1185,7 +1195,12 @@
3890                         ut_error;
3891                 }
3892                 break;
3893 +       case SYNC_BUF_LRU_LIST:
3894         case SYNC_BUF_FLUSH_LIST:
3895 +       case SYNC_BUF_PAGE_HASH:
3896 +       case SYNC_BUF_FREE_LIST:
3897 +       case SYNC_BUF_ZIP_FREE:
3898 +       case SYNC_BUF_ZIP_HASH:
3899         case SYNC_BUF_POOL:
3900                 /* We can have multiple mutexes of this type therefore we
3901                 can only check whether the greater than condition holds. */
3902 @@ -1203,7 +1218,8 @@
3903                 buffer block (block->mutex or buf_pool->zip_mutex). */
3904                 if (!sync_thread_levels_g(array, level, FALSE)) {
3905                         ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3906 -                       ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3907 +                       /* the exact rule is not fixed yet, for now */
3908 +                       //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3909                 }
3910                 break;
3911         case SYNC_REC_LOCK:
This page took 0.435708 seconds and 4 git commands to generate.