]> git.pld-linux.org Git - packages/mysql.git/blob - innodb_split_buf_pool_mutex.patch
- rel 0.7; export client_errors symbol for php mysqli
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
1 # name       : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c    2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c    2010-12-03 15:48:29.268957148 +0900
11 @@ -4039,7 +4039,8 @@
12  
13         mtr_commit(mtr);
14  
15 -       buf_pool_mutex_enter(buf_pool);
16 +       //buf_pool_mutex_enter(buf_pool);
17 +       mutex_enter(&buf_pool->LRU_list_mutex);
18         mutex_enter(&block->mutex);
19  
20         /* Only free the block if it is still allocated to
21 @@ -4050,17 +4051,22 @@
22             && buf_block_get_space(block) == space
23             && buf_block_get_page_no(block) == page_no) {
24  
25 -               if (buf_LRU_free_block(&block->page, all, NULL)
26 +               if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
27                     != BUF_LRU_FREED
28 -                   && all && block->page.zip.data) {
29 +                   && all && block->page.zip.data
30 +                   /* Now, buf_LRU_free_block() may release mutex temporarily */
31 +                   && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32 +                   && buf_block_get_space(block) == space
33 +                   && buf_block_get_page_no(block) == page_no) {
34                         /* Attempt to deallocate the uncompressed page
35                         if the whole block cannot be deallocted. */
36  
37 -                       buf_LRU_free_block(&block->page, FALSE, NULL);
38 +                       buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
39                 }
40         }
41  
42 -       buf_pool_mutex_exit(buf_pool);
43 +       //buf_pool_mutex_exit(buf_pool);
44 +       mutex_exit(&buf_pool->LRU_list_mutex);
45         mutex_exit(&block->mutex);
46  }
47  
48 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49 --- a/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:03.033037049 +0900
50 +++ b/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:29.271024260 +0900
51 @@ -1211,7 +1211,7 @@
52         ulint*          offsets;
53  
54         rw_lock_x_lock(&btr_search_latch);
55 -       buf_pool_mutex_enter_all();
56 +       //buf_pool_mutex_enter_all();
57  
58         table = btr_search_sys->hash_index;
59  
60 @@ -1220,6 +1220,8 @@
61  
62                 buf_pool = buf_pool_from_array(j);
63  
64 +               mutex_enter(&buf_pool->LRU_list_mutex);
65 +
66                 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
67  
68                 while (bpage != NULL) {
69 @@ -1301,9 +1303,11 @@
70  
71                         bpage = UT_LIST_GET_PREV(LRU, bpage);
72                 }
73 +
74 +               mutex_exit(&buf_pool->LRU_list_mutex);
75         }
76  
77 -       buf_pool_mutex_exit_all();
78 +       //buf_pool_mutex_exit_all();
79         rw_lock_x_unlock(&btr_search_latch);
80  
81         if (UNIV_LIKELY_NULL(heap)) {
82 @@ -1896,7 +1900,7 @@
83         rec_offs_init(offsets_);
84  
85         rw_lock_x_lock(&btr_search_latch);
86 -       buf_pool_mutex_enter_all();
87 +       buf_pool_page_hash_x_lock_all();
88  
89         cell_count = hash_get_n_cells(btr_search_sys->hash_index);
90  
91 @@ -1904,11 +1908,11 @@
92                 /* We release btr_search_latch every once in a while to
93                 give other queries a chance to run. */
94                 if ((i != 0) && ((i % chunk_size) == 0)) {
95 -                       buf_pool_mutex_exit_all();
96 +                       buf_pool_page_hash_x_unlock_all();
97                         rw_lock_x_unlock(&btr_search_latch);
98                         os_thread_yield();
99                         rw_lock_x_lock(&btr_search_latch);
100 -                       buf_pool_mutex_enter_all();
101 +                       buf_pool_page_hash_x_lock_all();
102                 }
103  
104                 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
105 @@ -2019,11 +2023,11 @@
106                 /* We release btr_search_latch every once in a while to
107                 give other queries a chance to run. */
108                 if (i != 0) {
109 -                       buf_pool_mutex_exit_all();
110 +                       buf_pool_page_hash_x_unlock_all();
111                         rw_lock_x_unlock(&btr_search_latch);
112                         os_thread_yield();
113                         rw_lock_x_lock(&btr_search_latch);
114 -                       buf_pool_mutex_enter_all();
115 +                       buf_pool_page_hash_x_lock_all();
116                 }
117  
118                 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
119 @@ -2031,7 +2035,7 @@
120                 }
121         }
122  
123 -       buf_pool_mutex_exit_all();
124 +       buf_pool_page_hash_x_unlock_all();
125         rw_lock_x_unlock(&btr_search_latch);
126         if (UNIV_LIKELY_NULL(heap)) {
127                 mem_heap_free(heap);
128 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
129 --- a/storage/innobase/buf/buf0buddy.c  2010-12-03 15:22:36.307986907 +0900
130 +++ b/storage/innobase/buf/buf0buddy.c  2010-12-03 15:48:29.275025723 +0900
131 @@ -73,10 +73,11 @@
132         if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
133  #endif /* UNIV_DEBUG_VALGRIND */
134  
135 -       ut_ad(buf_pool_mutex_own(buf_pool));
136 +       //ut_ad(buf_pool_mutex_own(buf_pool));
137 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
138         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
139         ut_ad(buf_pool->zip_free[i].start != bpage);
140 -       UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
141 +       UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
142  
143  #ifdef UNIV_DEBUG_VALGRIND
144         if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
145 @@ -96,8 +97,8 @@
146                                         buf_pool->zip_free[] */
147  {
148  #ifdef UNIV_DEBUG_VALGRIND
149 -       buf_page_t*     prev = UT_LIST_GET_PREV(list, bpage);
150 -       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
151 +       buf_page_t*     prev = UT_LIST_GET_PREV(zip_list, bpage);
152 +       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
153  
154         if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
155         if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
156 @@ -106,9 +107,10 @@
157         ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
158  #endif /* UNIV_DEBUG_VALGRIND */
159  
160 -       ut_ad(buf_pool_mutex_own(buf_pool));
161 +       //ut_ad(buf_pool_mutex_own(buf_pool));
162 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
163         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
164 -       UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
165 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
166  
167  #ifdef UNIV_DEBUG_VALGRIND
168         if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
169 @@ -128,12 +130,13 @@
170  {
171         buf_page_t*     bpage;
172  
173 -       ut_ad(buf_pool_mutex_own(buf_pool));
174 +       //ut_ad(buf_pool_mutex_own(buf_pool));
175 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
176         ut_a(i < BUF_BUDDY_SIZES);
177  
178  #ifndef UNIV_DEBUG_VALGRIND
179         /* Valgrind would complain about accessing free memory. */
180 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
181 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
182                               ut_ad(buf_page_get_state(ut_list_node_313)
183                                     == BUF_BLOCK_ZIP_FREE)));
184  #endif /* !UNIV_DEBUG_VALGRIND */
185 @@ -177,16 +180,19 @@
186  buf_buddy_block_free(
187  /*=================*/
188         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
189 -       void*           buf)            /*!< in: buffer frame to deallocate */
190 +       void*           buf,            /*!< in: buffer frame to deallocate */
191 +       ibool           have_page_hash_mutex)
192  {
193         const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
194         buf_page_t*     bpage;
195         buf_block_t*    block;
196  
197 -       ut_ad(buf_pool_mutex_own(buf_pool));
198 +       //ut_ad(buf_pool_mutex_own(buf_pool));
199         ut_ad(!mutex_own(&buf_pool->zip_mutex));
200         ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
201  
202 +       mutex_enter(&buf_pool->zip_hash_mutex);
203 +
204         HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
205                     ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
206                           && bpage->in_zip_hash && !bpage->in_page_hash),
207 @@ -198,12 +204,14 @@
208         ut_d(bpage->in_zip_hash = FALSE);
209         HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
210  
211 +       mutex_exit(&buf_pool->zip_hash_mutex);
212 +
213         ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
214         UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
215  
216         block = (buf_block_t*) bpage;
217         mutex_enter(&block->mutex);
218 -       buf_LRU_block_free_non_file_page(block);
219 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
220         mutex_exit(&block->mutex);
221  
222         ut_ad(buf_pool->buddy_n_frames > 0);
223 @@ -220,7 +228,7 @@
224  {
225         buf_pool_t*     buf_pool = buf_pool_from_block(block);
226         const ulint     fold = BUF_POOL_ZIP_FOLD(block);
227 -       ut_ad(buf_pool_mutex_own(buf_pool));
228 +       //ut_ad(buf_pool_mutex_own(buf_pool));
229         ut_ad(!mutex_own(&buf_pool->zip_mutex));
230         ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
231  
232 @@ -232,7 +240,10 @@
233         ut_ad(!block->page.in_page_hash);
234         ut_ad(!block->page.in_zip_hash);
235         ut_d(block->page.in_zip_hash = TRUE);
236 +
237 +       mutex_enter(&buf_pool->zip_hash_mutex);
238         HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
239 +       mutex_exit(&buf_pool->zip_hash_mutex);
240  
241         ut_d(buf_pool->buddy_n_frames++);
242  }
243 @@ -268,7 +279,7 @@
244                 bpage->state = BUF_BLOCK_ZIP_FREE;
245  #ifndef UNIV_DEBUG_VALGRIND
246                 /* Valgrind would complain about accessing free memory. */
247 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
248 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
249                                       ut_ad(buf_page_get_state(
250                                                     ut_list_node_313)
251                                             == BUF_BLOCK_ZIP_FREE)));
252 @@ -291,25 +302,29 @@
253         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
254         ulint           i,              /*!< in: index of buf_pool->zip_free[],
255                                         or BUF_BUDDY_SIZES */
256 -       ibool*          lru)            /*!< in: pointer to a variable that
257 +       ibool*          lru,            /*!< in: pointer to a variable that
258                                         will be assigned TRUE if storage was
259                                         allocated from the LRU list and
260                                         buf_pool->mutex was temporarily
261                                         released, or NULL if the LRU list
262                                         should not be used */
263 +       ibool           have_page_hash_mutex)
264  {
265         buf_block_t*    block;
266  
267 -       ut_ad(buf_pool_mutex_own(buf_pool));
268 +       //ut_ad(buf_pool_mutex_own(buf_pool));
269 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
270         ut_ad(!mutex_own(&buf_pool->zip_mutex));
271  
272         if (i < BUF_BUDDY_SIZES) {
273                 /* Try to allocate from the buddy system. */
274 +               mutex_enter(&buf_pool->zip_free_mutex);
275                 block = buf_buddy_alloc_zip(buf_pool, i);
276  
277                 if (block) {
278                         goto func_exit;
279                 }
280 +               mutex_exit(&buf_pool->zip_free_mutex);
281         }
282  
283         /* Try allocating from the buf_pool->free list. */
284 @@ -326,19 +341,30 @@
285         }
286  
287         /* Try replacing an uncompressed page in the buffer pool. */
288 -       buf_pool_mutex_exit(buf_pool);
289 +       //buf_pool_mutex_exit(buf_pool);
290 +       mutex_exit(&buf_pool->LRU_list_mutex);
291 +       if (have_page_hash_mutex) {
292 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
293 +       }
294         block = buf_LRU_get_free_block(buf_pool, 0);
295         *lru = TRUE;
296 -       buf_pool_mutex_enter(buf_pool);
297 +       //buf_pool_mutex_enter(buf_pool);
298 +       mutex_enter(&buf_pool->LRU_list_mutex);
299 +       if (have_page_hash_mutex) {
300 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
301 +       }
302  
303  alloc_big:
304         buf_buddy_block_register(block);
305  
306 +       mutex_enter(&buf_pool->zip_free_mutex);
307         block = buf_buddy_alloc_from(
308                 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
309  
310  func_exit:
311         buf_pool->buddy_stat[i].used++;
312 +       mutex_exit(&buf_pool->zip_free_mutex);
313 +
314         return(block);
315  }
316  
317 @@ -355,7 +381,10 @@
318         buf_page_t*     b;
319         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
320  
321 -       ut_ad(buf_pool_mutex_own(buf_pool));
322 +       //ut_ad(buf_pool_mutex_own(buf_pool));
323 +#ifdef UNIV_SYNC_DEBUG
324 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
325 +#endif
326  
327         switch (buf_page_get_state(bpage)) {
328         case BUF_BLOCK_ZIP_FREE:
329 @@ -364,7 +393,7 @@
330         case BUF_BLOCK_FILE_PAGE:
331         case BUF_BLOCK_MEMORY:
332         case BUF_BLOCK_REMOVE_HASH:
333 -               ut_error;
334 +               /* ut_error; */ /* optimistic */
335         case BUF_BLOCK_ZIP_DIRTY:
336                 /* Cannot relocate dirty pages. */
337                 return(FALSE);
338 @@ -374,9 +403,18 @@
339         }
340  
341         mutex_enter(&buf_pool->zip_mutex);
342 +       mutex_enter(&buf_pool->zip_free_mutex);
343  
344         if (!buf_page_can_relocate(bpage)) {
345                 mutex_exit(&buf_pool->zip_mutex);
346 +               mutex_exit(&buf_pool->zip_free_mutex);
347 +               return(FALSE);
348 +       }
349 +
350 +       if (bpage != buf_page_hash_get(buf_pool,
351 +                                      bpage->space, bpage->offset)) {
352 +               mutex_exit(&buf_pool->zip_mutex);
353 +               mutex_exit(&buf_pool->zip_free_mutex);
354                 return(FALSE);
355         }
356  
357 @@ -384,18 +422,19 @@
358         ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
359  
360         /* relocate buf_pool->zip_clean */
361 -       b = UT_LIST_GET_PREV(list, dpage);
362 -       UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
363 +       b = UT_LIST_GET_PREV(zip_list, dpage);
364 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
365  
366         if (b) {
367 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
368 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
369         } else {
370 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
371 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
372         }
373  
374         UNIV_MEM_INVALID(bpage, sizeof *bpage);
375  
376         mutex_exit(&buf_pool->zip_mutex);
377 +       mutex_exit(&buf_pool->zip_free_mutex);
378         return(TRUE);
379  }
380  
381 @@ -409,14 +448,16 @@
382         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
383         void*           src,            /*!< in: block to relocate */
384         void*           dst,            /*!< in: free block to relocate to */
385 -       ulint           i)              /*!< in: index of
386 +       ulint           i,              /*!< in: index of
387                                         buf_pool->zip_free[] */
388 +       ibool           have_page_hash_mutex)
389  {
390         buf_page_t*     bpage;
391         const ulint     size    = BUF_BUDDY_LOW << i;
392         ullint          usec    = ut_time_us(NULL);
393  
394 -       ut_ad(buf_pool_mutex_own(buf_pool));
395 +       //ut_ad(buf_pool_mutex_own(buf_pool));
396 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
397         ut_ad(!mutex_own(&buf_pool->zip_mutex));
398         ut_ad(!ut_align_offset(src, size));
399         ut_ad(!ut_align_offset(dst, size));
400 @@ -438,6 +479,12 @@
401                 /* This is a compressed page. */
402                 mutex_t*        mutex;
403  
404 +               if (!have_page_hash_mutex) {
405 +                       mutex_exit(&buf_pool->zip_free_mutex);
406 +                       mutex_enter(&buf_pool->LRU_list_mutex);
407 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
408 +               }
409 +
410                 /* The src block may be split into smaller blocks,
411                 some of which may be free.  Thus, the
412                 mach_read_from_4() calls below may attempt to read
413 @@ -462,6 +509,11 @@
414                         added to buf_pool->page_hash yet.  Obviously,
415                         it cannot be relocated. */
416  
417 +                       if (!have_page_hash_mutex) {
418 +                               mutex_enter(&buf_pool->zip_free_mutex);
419 +                               mutex_exit(&buf_pool->LRU_list_mutex);
420 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
421 +                       }
422                         return(FALSE);
423                 }
424  
425 @@ -473,18 +525,27 @@
426                         For the sake of simplicity, give up. */
427                         ut_ad(page_zip_get_size(&bpage->zip) < size);
428  
429 +                       if (!have_page_hash_mutex) {
430 +                               mutex_enter(&buf_pool->zip_free_mutex);
431 +                               mutex_exit(&buf_pool->LRU_list_mutex);
432 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
433 +                       }
434                         return(FALSE);
435                 }
436  
437 +               /* To keep latch order */
438 +               if (have_page_hash_mutex)
439 +                       mutex_exit(&buf_pool->zip_free_mutex);
440 +
441                 /* The block must have been allocated, but it may
442                 contain uninitialized data. */
443                 UNIV_MEM_ASSERT_W(src, size);
444  
445 -               mutex = buf_page_get_mutex(bpage);
446 +               mutex = buf_page_get_mutex_enter(bpage);
447  
448 -               mutex_enter(mutex);
449 +               mutex_enter(&buf_pool->zip_free_mutex);
450  
451 -               if (buf_page_can_relocate(bpage)) {
452 +               if (mutex && buf_page_can_relocate(bpage)) {
453                         /* Relocate the compressed page. */
454                         ut_a(bpage->zip.data == src);
455                         memcpy(dst, src, size);
456 @@ -499,10 +560,22 @@
457                                 buddy_stat->relocated_usec
458                                         += ut_time_us(NULL) - usec;
459                         }
460 +
461 +                       if (!have_page_hash_mutex) {
462 +                               mutex_exit(&buf_pool->LRU_list_mutex);
463 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
464 +                       }
465                         return(TRUE);
466                 }
467  
468 -               mutex_exit(mutex);
469 +               if (!have_page_hash_mutex) {
470 +                       mutex_exit(&buf_pool->LRU_list_mutex);
471 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
472 +               }
473 +
474 +               if (mutex) {
475 +                       mutex_exit(mutex);
476 +               }
477         } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
478                 /* This must be a buf_page_t object. */
479  #if UNIV_WORD_SIZE == 4
480 @@ -511,10 +584,31 @@
481                 about uninitialized pad bytes. */
482                 UNIV_MEM_ASSERT_RW(src, size);
483  #endif
484 +
485 +               mutex_exit(&buf_pool->zip_free_mutex);
486 +
487 +               if (!have_page_hash_mutex) {
488 +                       mutex_enter(&buf_pool->LRU_list_mutex);
489 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
490 +               }
491 +
492                 if (buf_buddy_relocate_block(src, dst)) {
493 +                       mutex_enter(&buf_pool->zip_free_mutex);
494 +
495 +                       if (!have_page_hash_mutex) {
496 +                               mutex_exit(&buf_pool->LRU_list_mutex);
497 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
498 +                       }
499  
500                         goto success;
501                 }
502 +
503 +               mutex_enter(&buf_pool->zip_free_mutex);
504 +
505 +               if (!have_page_hash_mutex) {
506 +                       mutex_exit(&buf_pool->LRU_list_mutex);
507 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
508 +               }
509         }
510  
511         return(FALSE);
512 @@ -529,13 +623,15 @@
513         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
514         void*           buf,            /*!< in: block to be freed, must not be
515                                         pointed to by the buffer pool */
516 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
517 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
518                                         or BUF_BUDDY_SIZES */
519 +       ibool           have_page_hash_mutex)
520  {
521         buf_page_t*     bpage;
522         buf_page_t*     buddy;
523  
524 -       ut_ad(buf_pool_mutex_own(buf_pool));
525 +       //ut_ad(buf_pool_mutex_own(buf_pool));
526 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
527         ut_ad(!mutex_own(&buf_pool->zip_mutex));
528         ut_ad(i <= BUF_BUDDY_SIZES);
529         ut_ad(buf_pool->buddy_stat[i].used > 0);
530 @@ -546,7 +642,9 @@
531         ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
532  
533         if (i == BUF_BUDDY_SIZES) {
534 -               buf_buddy_block_free(buf_pool, buf);
535 +               mutex_exit(&buf_pool->zip_free_mutex);
536 +               buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
537 +               mutex_enter(&buf_pool->zip_free_mutex);
538                 return;
539         }
540  
541 @@ -591,7 +689,7 @@
542                 ut_a(bpage != buf);
543  
544                 {
545 -                       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
546 +                       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
547                         UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
548                         bpage = next;
549                 }
550 @@ -600,13 +698,13 @@
551  #ifndef UNIV_DEBUG_VALGRIND
552  buddy_nonfree:
553         /* Valgrind would complain about accessing free memory. */
554 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
555 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
556                               ut_ad(buf_page_get_state(ut_list_node_313)
557                                     == BUF_BLOCK_ZIP_FREE)));
558  #endif /* UNIV_DEBUG_VALGRIND */
559  
560         /* The buddy is not free. Is there a free block of this size? */
561 -       bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
562 +       bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
563  
564         if (bpage) {
565                 /* Remove the block from the free list, because a successful
566 @@ -616,7 +714,7 @@
567                 buf_buddy_remove_from_free(buf_pool, bpage, i);
568  
569                 /* Try to relocate the buddy of buf to the free block. */
570 -               if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
571 +               if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
572  
573                         ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
574                         goto buddy_free2;
575 @@ -636,14 +734,14 @@
576  
577                 (Parts of the buddy can be free in
578                 buf_pool->zip_free[j] with j < i.) */
579 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
580 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
581                                       ut_ad(buf_page_get_state(
582                                                     ut_list_node_313)
583                                             == BUF_BLOCK_ZIP_FREE
584                                             && ut_list_node_313 != buddy)));
585  #endif /* !UNIV_DEBUG_VALGRIND */
586  
587 -               if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
588 +               if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
589  
590                         buf = bpage;
591                         UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
592 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
593 --- a/storage/innobase/buf/buf0buf.c    2010-12-03 15:22:36.314943336 +0900
594 +++ b/storage/innobase/buf/buf0buf.c    2010-12-03 15:48:29.282947357 +0900
595 @@ -263,6 +263,7 @@
596  #ifdef UNIV_PFS_RWLOCK
597  /* Keys to register buffer block related rwlocks and mutexes with
598  performance schema */
599 +UNIV_INTERN mysql_pfs_key_t    buf_pool_page_hash_key;
600  UNIV_INTERN mysql_pfs_key_t    buf_block_lock_key;
601  # ifdef UNIV_SYNC_DEBUG
602  UNIV_INTERN mysql_pfs_key_t    buf_block_debug_latch_key;
603 @@ -273,6 +274,10 @@
604  UNIV_INTERN mysql_pfs_key_t    buffer_block_mutex_key;
605  UNIV_INTERN mysql_pfs_key_t    buf_pool_mutex_key;
606  UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_mutex_key;
607 +UNIV_INTERN mysql_pfs_key_t    buf_pool_LRU_list_mutex_key;
608 +UNIV_INTERN mysql_pfs_key_t    buf_pool_free_list_mutex_key;
609 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_free_mutex_key;
610 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_hash_mutex_key;
611  UNIV_INTERN mysql_pfs_key_t    flush_list_mutex_key;
612  #endif /* UNIV_PFS_MUTEX */
613  
614 @@ -881,9 +886,9 @@
615         block->page.in_zip_hash = FALSE;
616         block->page.in_flush_list = FALSE;
617         block->page.in_free_list = FALSE;
618 -       block->in_unzip_LRU_list = FALSE;
619  #endif /* UNIV_DEBUG */
620         block->page.in_LRU_list = FALSE;
621 +       block->in_unzip_LRU_list = FALSE;
622  #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
623         block->n_pointers = 0;
624  #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
625 @@ -981,9 +986,11 @@
626                 memset(block->frame, '\0', UNIV_PAGE_SIZE);
627  #endif
628                 /* Add the block to the free list */
629 -               UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
630 +               mutex_enter(&buf_pool->free_list_mutex);
631 +               UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
632  
633                 ut_d(block->page.in_free_list = TRUE);
634 +               mutex_exit(&buf_pool->free_list_mutex);
635                 ut_ad(buf_pool_from_block(block) == buf_pool);
636  
637                 block++;
638 @@ -1038,7 +1045,8 @@
639         buf_chunk_t*    chunk = buf_pool->chunks;
640  
641         ut_ad(buf_pool);
642 -       ut_ad(buf_pool_mutex_own(buf_pool));
643 +       //ut_ad(buf_pool_mutex_own(buf_pool));
644 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
645         for (n = buf_pool->n_chunks; n--; chunk++) {
646  
647                 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
648 @@ -1138,7 +1146,7 @@
649         buf_block_t*            block;
650         const buf_block_t*      block_end;
651  
652 -       ut_ad(buf_pool_mutex_own(buf_pool));
653 +       //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
654  
655         block_end = chunk->blocks + chunk->size;
656  
657 @@ -1150,8 +1158,10 @@
658                 ut_ad(!block->in_unzip_LRU_list);
659                 ut_ad(!block->page.in_flush_list);
660                 /* Remove the block from the free list. */
661 +               mutex_enter(&buf_pool->free_list_mutex);
662                 ut_ad(block->page.in_free_list);
663 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
664 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
665 +               mutex_exit(&buf_pool->free_list_mutex);
666  
667                 /* Free the latches. */
668                 mutex_free(&block->mutex);
669 @@ -1208,9 +1218,21 @@
670         ------------------------------- */
671         mutex_create(buf_pool_mutex_key,
672                      &buf_pool->mutex, SYNC_BUF_POOL);
673 +       mutex_create(buf_pool_LRU_list_mutex_key,
674 +                    &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
675 +       rw_lock_create(buf_pool_page_hash_key,
676 +                      &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
677 +       mutex_create(buf_pool_free_list_mutex_key,
678 +                    &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
679 +       mutex_create(buf_pool_zip_free_mutex_key,
680 +                    &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
681 +       mutex_create(buf_pool_zip_hash_mutex_key,
682 +                    &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
683         mutex_create(buf_pool_zip_mutex_key,
684                      &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
685  
686 +       mutex_enter(&buf_pool->LRU_list_mutex);
687 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
688         buf_pool_mutex_enter(buf_pool);
689  
690         if (buf_pool_size > 0) {
691 @@ -1223,6 +1245,8 @@
692                         mem_free(chunk);
693                         mem_free(buf_pool);
694  
695 +                       mutex_exit(&buf_pool->LRU_list_mutex);
696 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
697                         buf_pool_mutex_exit(buf_pool);
698  
699                         return(DB_ERROR);
700 @@ -1253,6 +1277,8 @@
701  
702         /* All fields are initialized by mem_zalloc(). */
703  
704 +       mutex_exit(&buf_pool->LRU_list_mutex);
705 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
706         buf_pool_mutex_exit(buf_pool);
707  
708         return(DB_SUCCESS);
709 @@ -1467,7 +1493,11 @@
710         ulint           fold;
711         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
712  
713 -       ut_ad(buf_pool_mutex_own(buf_pool));
714 +       //ut_ad(buf_pool_mutex_own(buf_pool));
715 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
716 +#ifdef UNIV_SYNC_DEBUG
717 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
718 +#endif
719         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
720         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
721         ut_a(bpage->buf_fix_count == 0);
722 @@ -1554,7 +1584,8 @@
723  
724  try_again:
725         btr_search_disable(); /* Empty the adaptive hash index again */
726 -       buf_pool_mutex_enter(buf_pool);
727 +       //buf_pool_mutex_enter(buf_pool);
728 +       mutex_enter(&buf_pool->LRU_list_mutex);
729  
730  shrink_again:
731         if (buf_pool->n_chunks <= 1) {
732 @@ -1625,7 +1656,7 @@
733  
734                                 buf_LRU_make_block_old(&block->page);
735                                 dirty++;
736 -                       } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
737 +                       } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
738                                    != BUF_LRU_FREED) {
739                                 nonfree++;
740                         }
741 @@ -1633,7 +1664,8 @@
742                         mutex_exit(&block->mutex);
743                 }
744  
745 -               buf_pool_mutex_exit(buf_pool);
746 +               //buf_pool_mutex_exit(buf_pool);
747 +               mutex_exit(&buf_pool->LRU_list_mutex);
748  
749                 /* Request for a flush of the chunk if it helps.
750                 Do not flush if there are non-free blocks, since
751 @@ -1683,7 +1715,8 @@
752  func_done:
753         buf_pool->old_pool_size = buf_pool->curr_pool_size;
754  func_exit:
755 -       buf_pool_mutex_exit(buf_pool);
756 +       //buf_pool_mutex_exit(buf_pool);
757 +       mutex_exit(&buf_pool->LRU_list_mutex);
758         btr_search_enable();
759  }
760  
761 @@ -1724,7 +1757,9 @@
762         hash_table_t*   zip_hash;
763         hash_table_t*   page_hash;
764  
765 -       buf_pool_mutex_enter(buf_pool);
766 +       //buf_pool_mutex_enter(buf_pool);
767 +       mutex_enter(&buf_pool->LRU_list_mutex);
768 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
769  
770         /* Free, create, and populate the hash table. */
771         hash_table_free(buf_pool->page_hash);
772 @@ -1765,8 +1800,9 @@
773         All such blocks are either in buf_pool->zip_clean or
774         in buf_pool->flush_list. */
775  
776 +       mutex_enter(&buf_pool->zip_mutex);
777         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
778 -            b = UT_LIST_GET_NEXT(list, b)) {
779 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
780                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
781                 ut_ad(!b->in_flush_list);
782                 ut_ad(b->in_LRU_list);
783 @@ -1776,10 +1812,11 @@
784                 HASH_INSERT(buf_page_t, hash, page_hash,
785                             buf_page_address_fold(b->space, b->offset), b);
786         }
787 +       mutex_exit(&buf_pool->zip_mutex);
788  
789         buf_flush_list_mutex_enter(buf_pool);
790         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
791 -            b = UT_LIST_GET_NEXT(list, b)) {
792 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
793                 ut_ad(b->in_flush_list);
794                 ut_ad(b->in_LRU_list);
795                 ut_ad(b->in_page_hash);
796 @@ -1806,7 +1843,9 @@
797         }
798  
799         buf_flush_list_mutex_exit(buf_pool);
800 -       buf_pool_mutex_exit(buf_pool);
801 +       //buf_pool_mutex_exit(buf_pool);
802 +       mutex_exit(&buf_pool->LRU_list_mutex);
803 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
804  }
805  
806  /********************************************************************
807 @@ -1853,21 +1892,32 @@
808         buf_page_t*     bpage;
809         ulint           i;
810         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
811 +       mutex_t*        block_mutex;
812  
813 -       ut_ad(buf_pool_mutex_own(buf_pool));
814 +       //ut_ad(buf_pool_mutex_own(buf_pool));
815  
816 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
817         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
818 +       if (bpage) {
819 +               block_mutex = buf_page_get_mutex_enter(bpage);
820 +               ut_a(block_mutex);
821 +       }
822  
823         if (UNIV_LIKELY_NULL(bpage)) {
824                 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
825                         /* The page was loaded meanwhile. */
826 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
827                         return(bpage);
828                 }
829                 /* Add to an existing watch. */
830                 bpage->buf_fix_count++;
831 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
832 +               mutex_exit(block_mutex);
833                 return(NULL);
834         }
835  
836 +       /* buf_pool->watch is protected by zip_mutex for now */
837 +       mutex_enter(&buf_pool->zip_mutex);
838         for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
839                 bpage = &buf_pool->watch[i];
840  
841 @@ -1891,10 +1941,12 @@
842                         bpage->space = space;
843                         bpage->offset = offset;
844                         bpage->buf_fix_count = 1;
845 -
846 +                       bpage->buf_pool_index = buf_pool_index(buf_pool);
847                         ut_d(bpage->in_page_hash = TRUE);
848                         HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
849                                     fold, bpage);
850 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
851 +                       mutex_exit(&buf_pool->zip_mutex);
852                         return(NULL);
853                 case BUF_BLOCK_ZIP_PAGE:
854                         ut_ad(bpage->in_page_hash);
855 @@ -1912,6 +1964,8 @@
856         ut_error;
857  
858         /* Fix compiler warning */
859 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
860 +       mutex_exit(&buf_pool->zip_mutex);
861         return(NULL);
862  }
863  
864 @@ -1941,6 +1995,8 @@
865         buf_chunk_t*    chunks;
866         buf_chunk_t*    chunk;
867  
868 +       mutex_enter(&buf_pool->LRU_list_mutex);
869 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
870         buf_pool_mutex_enter(buf_pool);
871         chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
872  
873 @@ -1959,6 +2015,8 @@
874                 buf_pool->n_chunks++;
875         }
876  
877 +       mutex_exit(&buf_pool->LRU_list_mutex);
878 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
879         buf_pool_mutex_exit(buf_pool);
880  }
881  
882 @@ -2046,7 +2104,11 @@
883                                         space, offset) */
884         buf_page_t*     watch)          /*!< in/out: sentinel for watch */
885  {
886 -       ut_ad(buf_pool_mutex_own(buf_pool));
887 +       //ut_ad(buf_pool_mutex_own(buf_pool));
888 +#ifdef UNIV_SYNC_DEBUG
889 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
890 +#endif
891 +       ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
892  
893         HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
894         ut_d(watch->in_page_hash = FALSE);
895 @@ -2068,28 +2130,31 @@
896         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
897         ulint           fold = buf_page_address_fold(space, offset);
898  
899 -       buf_pool_mutex_enter(buf_pool);
900 +       //buf_pool_mutex_enter(buf_pool);
901 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
902         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
903         /* The page must exist because buf_pool_watch_set()
904         increments buf_fix_count. */
905         ut_a(bpage);
906  
907         if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
908 -               mutex_t* mutex = buf_page_get_mutex(bpage);
909 +               mutex_t* mutex = buf_page_get_mutex_enter(bpage);
910  
911 -               mutex_enter(mutex);
912                 ut_a(bpage->buf_fix_count > 0);
913                 bpage->buf_fix_count--;
914                 mutex_exit(mutex);
915         } else {
916 +               mutex_enter(&buf_pool->zip_mutex);
917                 ut_a(bpage->buf_fix_count > 0);
918  
919                 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
920                         buf_pool_watch_remove(buf_pool, fold, bpage);
921                 }
922 +               mutex_exit(&buf_pool->zip_mutex);
923         }
924  
925 -       buf_pool_mutex_exit(buf_pool);
926 +       //buf_pool_mutex_exit(buf_pool);
927 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
928  }
929  
930  /****************************************************************//**
931 @@ -2109,14 +2174,16 @@
932         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
933         ulint           fold    = buf_page_address_fold(space, offset);
934  
935 -       buf_pool_mutex_enter(buf_pool);
936 +       //buf_pool_mutex_enter(buf_pool);
937 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
938  
939         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
940         /* The page must exist because buf_pool_watch_set()
941         increments buf_fix_count. */
942         ut_a(bpage);
943         ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
944 -       buf_pool_mutex_exit(buf_pool);
945 +       //buf_pool_mutex_exit(buf_pool);
946 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
947  
948         return(ret);
949  }
950 @@ -2133,13 +2200,15 @@
951  {
952         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
953  
954 -       buf_pool_mutex_enter(buf_pool);
955 +       //buf_pool_mutex_enter(buf_pool);
956 +       mutex_enter(&buf_pool->LRU_list_mutex);
957  
958         ut_a(buf_page_in_file(bpage));
959  
960         buf_LRU_make_block_young(bpage);
961  
962 -       buf_pool_mutex_exit(buf_pool);
963 +       //buf_pool_mutex_exit(buf_pool);
964 +       mutex_exit(&buf_pool->LRU_list_mutex);
965  }
966  
967  /********************************************************************//**
968 @@ -2163,14 +2232,20 @@
969         ut_a(buf_page_in_file(bpage));
970  
971         if (buf_page_peek_if_too_old(bpage)) {
972 -               buf_pool_mutex_enter(buf_pool);
973 +               //buf_pool_mutex_enter(buf_pool);
974 +               mutex_enter(&buf_pool->LRU_list_mutex);
975                 buf_LRU_make_block_young(bpage);
976 -               buf_pool_mutex_exit(buf_pool);
977 +               //buf_pool_mutex_exit(buf_pool);
978 +               mutex_exit(&buf_pool->LRU_list_mutex);
979         } else if (!access_time) {
980                 ulint   time_ms = ut_time_ms();
981 -               buf_pool_mutex_enter(buf_pool);
982 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
983 +               //buf_pool_mutex_enter(buf_pool);
984 +               if (block_mutex) {
985                 buf_page_set_accessed(bpage, time_ms);
986 -               buf_pool_mutex_exit(buf_pool);
987 +               mutex_exit(block_mutex);
988 +               }
989 +               //buf_pool_mutex_exit(buf_pool);
990         }
991  }
992  
993 @@ -2187,7 +2262,8 @@
994         buf_block_t*    block;
995         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
996  
997 -       buf_pool_mutex_enter(buf_pool);
998 +       //buf_pool_mutex_enter(buf_pool);
999 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1000  
1001         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1002  
1003 @@ -2196,7 +2272,8 @@
1004                 block->check_index_page_at_flush = FALSE;
1005         }
1006  
1007 -       buf_pool_mutex_exit(buf_pool);
1008 +       //buf_pool_mutex_exit(buf_pool);
1009 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1010  }
1011  
1012  /********************************************************************//**
1013 @@ -2215,7 +2292,8 @@
1014         ibool           is_hashed;
1015         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1016  
1017 -       buf_pool_mutex_enter(buf_pool);
1018 +       //buf_pool_mutex_enter(buf_pool);
1019 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1020  
1021         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1022  
1023 @@ -2226,7 +2304,8 @@
1024                 is_hashed = block->is_hashed;
1025         }
1026  
1027 -       buf_pool_mutex_exit(buf_pool);
1028 +       //buf_pool_mutex_exit(buf_pool);
1029 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1030  
1031         return(is_hashed);
1032  }
1033 @@ -2248,7 +2327,8 @@
1034         buf_page_t*     bpage;
1035         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1036  
1037 -       buf_pool_mutex_enter(buf_pool);
1038 +       //buf_pool_mutex_enter(buf_pool);
1039 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1040  
1041         bpage = buf_page_hash_get(buf_pool, space, offset);
1042  
1043 @@ -2257,7 +2337,8 @@
1044                 bpage->file_page_was_freed = TRUE;
1045         }
1046  
1047 -       buf_pool_mutex_exit(buf_pool);
1048 +       //buf_pool_mutex_exit(buf_pool);
1049 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1050  
1051         return(bpage);
1052  }
1053 @@ -2278,7 +2359,8 @@
1054         buf_page_t*     bpage;
1055         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1056  
1057 -       buf_pool_mutex_enter(buf_pool);
1058 +       //buf_pool_mutex_enter(buf_pool);
1059 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1060  
1061         bpage = buf_page_hash_get(buf_pool, space, offset);
1062  
1063 @@ -2287,7 +2369,8 @@
1064                 bpage->file_page_was_freed = FALSE;
1065         }
1066  
1067 -       buf_pool_mutex_exit(buf_pool);
1068 +       //buf_pool_mutex_exit(buf_pool);
1069 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1070  
1071         return(bpage);
1072  }
1073 @@ -2322,8 +2405,9 @@
1074         buf_pool->stat.n_page_gets++;
1075  
1076         for (;;) {
1077 -               buf_pool_mutex_enter(buf_pool);
1078 +               //buf_pool_mutex_enter(buf_pool);
1079  lookup:
1080 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1081                 bpage = buf_page_hash_get(buf_pool, space, offset);
1082                 if (bpage) {
1083                         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1084 @@ -2332,7 +2416,8 @@
1085  
1086                 /* Page not in buf_pool: needs to be read from file */
1087  
1088 -               buf_pool_mutex_exit(buf_pool);
1089 +               //buf_pool_mutex_exit(buf_pool);
1090 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1091  
1092                 buf_read_page(space, zip_size, offset);
1093  
1094 @@ -2344,10 +2429,15 @@
1095         if (UNIV_UNLIKELY(!bpage->zip.data)) {
1096                 /* There is no compressed page. */
1097  err_exit:
1098 -               buf_pool_mutex_exit(buf_pool);
1099 +               //buf_pool_mutex_exit(buf_pool);
1100 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1101                 return(NULL);
1102         }
1103  
1104 +       block_mutex = buf_page_get_mutex_enter(bpage);
1105 +
1106 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1107 +
1108         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1109  
1110         switch (buf_page_get_state(bpage)) {
1111 @@ -2356,19 +2446,19 @@
1112         case BUF_BLOCK_MEMORY:
1113         case BUF_BLOCK_REMOVE_HASH:
1114         case BUF_BLOCK_ZIP_FREE:
1115 +               if (block_mutex)
1116 +                       mutex_exit(block_mutex);
1117                 break;
1118         case BUF_BLOCK_ZIP_PAGE:
1119         case BUF_BLOCK_ZIP_DIRTY:
1120 -               block_mutex = &buf_pool->zip_mutex;
1121 -               mutex_enter(block_mutex);
1122 +               ut_a(block_mutex == &buf_pool->zip_mutex);
1123                 bpage->buf_fix_count++;
1124                 goto got_block;
1125         case BUF_BLOCK_FILE_PAGE:
1126 -               block_mutex = &((buf_block_t*) bpage)->mutex;
1127 -               mutex_enter(block_mutex);
1128 +               ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1129  
1130                 /* Discard the uncompressed page frame if possible. */
1131 -               if (buf_LRU_free_block(bpage, FALSE, NULL)
1132 +               if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1133                     == BUF_LRU_FREED) {
1134  
1135                         mutex_exit(block_mutex);
1136 @@ -2387,7 +2477,7 @@
1137         must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1138         access_time = buf_page_is_accessed(bpage);
1139  
1140 -       buf_pool_mutex_exit(buf_pool);
1141 +       //buf_pool_mutex_exit(buf_pool);
1142  
1143         mutex_exit(block_mutex);
1144  
1145 @@ -2696,7 +2786,7 @@
1146         const buf_block_t*      block)          /*!< in: pointer to block,
1147                                                 not dereferenced */
1148  {
1149 -       ut_ad(buf_pool_mutex_own(buf_pool));
1150 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1151  
1152         if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1153                 /* The pointer should be aligned. */
1154 @@ -2732,6 +2822,7 @@
1155         ulint           fix_type;
1156         ibool           must_read;
1157         ulint           retries = 0;
1158 +       mutex_t*        block_mutex = NULL;
1159         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1160  
1161         ut_ad(mtr);
1162 @@ -2753,9 +2844,11 @@
1163         fold = buf_page_address_fold(space, offset);
1164  loop:
1165         block = guess;
1166 -       buf_pool_mutex_enter(buf_pool);
1167 +       //buf_pool_mutex_enter(buf_pool);
1168  
1169         if (block) {
1170 +               block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1171 +
1172                 /* If the guess is a compressed page descriptor that
1173                 has been allocated by buf_buddy_alloc(), it may have
1174                 been invalidated by buf_buddy_relocate().  In that
1175 @@ -2764,11 +2857,15 @@
1176                 the guess may be pointing to a buffer pool chunk that
1177                 has been released when resizing the buffer pool. */
1178  
1179 -               if (!buf_block_is_uncompressed(buf_pool, block)
1180 +               if (!block_mutex) {
1181 +                       block = guess = NULL;
1182 +               } else if (!buf_block_is_uncompressed(buf_pool, block)
1183                     || offset != block->page.offset
1184                     || space != block->page.space
1185                     || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1186  
1187 +                       mutex_exit(block_mutex);
1188 +
1189                         block = guess = NULL;
1190                 } else {
1191                         ut_ad(!block->page.in_zip_hash);
1192 @@ -2777,12 +2874,19 @@
1193         }
1194  
1195         if (block == NULL) {
1196 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1197                 block = (buf_block_t*) buf_page_hash_get_low(
1198                         buf_pool, space, offset, fold);
1199 +               if (block) {
1200 +                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1201 +                       ut_a(block_mutex);
1202 +               }
1203 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1204         }
1205  
1206  loop2:
1207         if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1208 +               mutex_exit(block_mutex);
1209                 block = NULL;
1210         }
1211  
1212 @@ -2794,12 +2898,14 @@
1213                                 space, offset, fold);
1214  
1215                         if (UNIV_LIKELY_NULL(block)) {
1216 -
1217 +                               block_mutex = buf_page_get_mutex((buf_page_t*)block);
1218 +                               ut_a(block_mutex);
1219 +                               ut_ad(mutex_own(block_mutex));
1220                                 goto got_block;
1221                         }
1222                 }
1223  
1224 -               buf_pool_mutex_exit(buf_pool);
1225 +               //buf_pool_mutex_exit(buf_pool);
1226  
1227                 if (mode == BUF_GET_IF_IN_POOL
1228                     || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1229 @@ -2847,7 +2953,8 @@
1230                 /* The page is being read to buffer pool,
1231                 but we cannot wait around for the read to
1232                 complete. */
1233 -               buf_pool_mutex_exit(buf_pool);
1234 +               //buf_pool_mutex_exit(buf_pool);
1235 +               mutex_exit(block_mutex);
1236  
1237                 return(NULL);
1238         }
1239 @@ -2857,38 +2964,49 @@
1240                 ibool           success;
1241  
1242         case BUF_BLOCK_FILE_PAGE:
1243 +               if (block_mutex == &buf_pool->zip_mutex) {
1244 +                       /* it is wrong mutex... */
1245 +                       mutex_exit(block_mutex);
1246 +                       goto loop;
1247 +               }
1248                 break;
1249  
1250         case BUF_BLOCK_ZIP_PAGE:
1251         case BUF_BLOCK_ZIP_DIRTY:
1252 +               ut_ad(block_mutex == &buf_pool->zip_mutex);
1253                 bpage = &block->page;
1254                 /* Protect bpage->buf_fix_count. */
1255 -               mutex_enter(&buf_pool->zip_mutex);
1256 +               //mutex_enter(&buf_pool->zip_mutex);
1257  
1258                 if (bpage->buf_fix_count
1259                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1260                         /* This condition often occurs when the buffer
1261                         is not buffer-fixed, but I/O-fixed by
1262                         buf_page_init_for_read(). */
1263 -                       mutex_exit(&buf_pool->zip_mutex);
1264 +                       //mutex_exit(&buf_pool->zip_mutex);
1265  wait_until_unfixed:
1266                         /* The block is buffer-fixed or I/O-fixed.
1267                         Try again later. */
1268 -                       buf_pool_mutex_exit(buf_pool);
1269 +                       //buf_pool_mutex_exit(buf_pool);
1270 +                       mutex_exit(block_mutex);
1271                         os_thread_sleep(WAIT_FOR_READ);
1272    
1273                         goto loop;
1274                 }
1275  
1276                 /* Allocate an uncompressed page. */
1277 -               buf_pool_mutex_exit(buf_pool);
1278 -               mutex_exit(&buf_pool->zip_mutex);
1279 +               //buf_pool_mutex_exit(buf_pool);
1280 +               //mutex_exit(&buf_pool->zip_mutex);
1281 +               mutex_exit(block_mutex);
1282  
1283                 block = buf_LRU_get_free_block(buf_pool, 0);
1284                 ut_a(block);
1285 +               block_mutex = &block->mutex;
1286  
1287 -               buf_pool_mutex_enter(buf_pool);
1288 -               mutex_enter(&block->mutex);
1289 +               //buf_pool_mutex_enter(buf_pool);
1290 +               mutex_enter(&buf_pool->LRU_list_mutex);
1291 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
1292 +               mutex_enter(block_mutex);
1293  
1294                 {
1295                         buf_page_t*     hash_bpage;
1296 @@ -2901,35 +3019,47 @@
1297                                 while buf_pool->mutex was released.
1298                                 Free the block that was allocated. */
1299  
1300 -                               buf_LRU_block_free_non_file_page(block);
1301 -                               mutex_exit(&block->mutex);
1302 +                               buf_LRU_block_free_non_file_page(block, TRUE);
1303 +                               mutex_exit(block_mutex);
1304  
1305                                 block = (buf_block_t*) hash_bpage;
1306 +                               if (block) {
1307 +                                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1308 +                                       ut_a(block_mutex);
1309 +                               }
1310 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1311 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1312                                 goto loop2;
1313                         }
1314                 }
1315  
1316 +               mutex_enter(&buf_pool->zip_mutex);
1317 +
1318                 if (UNIV_UNLIKELY
1319                     (bpage->buf_fix_count
1320                      || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1321  
1322 +                       mutex_exit(&buf_pool->zip_mutex);
1323                         /* The block was buffer-fixed or I/O-fixed
1324                         while buf_pool->mutex was not held by this thread.
1325                         Free the block that was allocated and try again.
1326                         This should be extremely unlikely. */
1327  
1328 -                       buf_LRU_block_free_non_file_page(block);
1329 -                       mutex_exit(&block->mutex);
1330 +                       buf_LRU_block_free_non_file_page(block, TRUE);
1331 +                       //mutex_exit(&block->mutex);
1332  
1333 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1334 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1335                         goto wait_until_unfixed;
1336                 }
1337  
1338                 /* Move the compressed page from bpage to block,
1339                 and uncompress it. */
1340  
1341 -               mutex_enter(&buf_pool->zip_mutex);
1342 -
1343                 buf_relocate(bpage, &block->page);
1344 +
1345 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1346 +
1347                 buf_block_init_low(block);
1348                 block->lock_hash_val = lock_rec_hash(space, offset);
1349  
1350 @@ -2938,7 +3068,7 @@
1351  
1352                 if (buf_page_get_state(&block->page)
1353                     == BUF_BLOCK_ZIP_PAGE) {
1354 -                       UT_LIST_REMOVE(list, buf_pool->zip_clean,
1355 +                       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1356                                        &block->page);
1357                         ut_ad(!block->page.in_flush_list);
1358                 } else {
1359 @@ -2955,19 +3085,24 @@
1360                 /* Insert at the front of unzip_LRU list */
1361                 buf_unzip_LRU_add_block(block, FALSE);
1362  
1363 +               mutex_exit(&buf_pool->LRU_list_mutex);
1364 +
1365                 block->page.buf_fix_count = 1;
1366                 buf_block_set_io_fix(block, BUF_IO_READ);
1367                 rw_lock_x_lock_func(&block->lock, 0, file, line);
1368  
1369                 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1370  
1371 -               mutex_exit(&block->mutex);
1372 +               mutex_exit(block_mutex);
1373                 mutex_exit(&buf_pool->zip_mutex);
1374 +
1375 +               buf_pool_mutex_enter(buf_pool);
1376                 buf_pool->n_pend_unzip++;
1377 +               buf_pool_mutex_exit(buf_pool);
1378  
1379 -               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1380 +               buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1381  
1382 -               buf_pool_mutex_exit(buf_pool);
1383 +               //buf_pool_mutex_exit(buf_pool);
1384  
1385                 /* Decompress the page and apply buffered operations
1386                 while not holding buf_pool->mutex or block->mutex. */
1387 @@ -2980,12 +3115,15 @@
1388                 }
1389  
1390                 /* Unfix and unlatch the block. */
1391 -               buf_pool_mutex_enter(buf_pool);
1392 -               mutex_enter(&block->mutex);
1393 +               //buf_pool_mutex_enter(buf_pool);
1394 +               block_mutex = &block->mutex;
1395 +               mutex_enter(block_mutex);
1396                 block->page.buf_fix_count--;
1397                 buf_block_set_io_fix(block, BUF_IO_NONE);
1398 -               mutex_exit(&block->mutex);
1399 +
1400 +               buf_pool_mutex_enter(buf_pool);
1401                 buf_pool->n_pend_unzip--;
1402 +               buf_pool_mutex_exit(buf_pool);
1403                 rw_lock_x_unlock(&block->lock);
1404  
1405                 break;
1406 @@ -3001,7 +3139,7 @@
1407  
1408         ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1409  
1410 -       mutex_enter(&block->mutex);
1411 +       //mutex_enter(&block->mutex);
1412  #if UNIV_WORD_SIZE == 4
1413         /* On 32-bit systems, there is no padding in buf_page_t.  On
1414         other systems, Valgrind could complain about uninitialized pad
1415 @@ -3014,7 +3152,7 @@
1416                 /* Try to evict the block from the buffer pool, to use the
1417                 insert buffer (change buffer) as much as possible. */
1418  
1419 -               if (buf_LRU_free_block(&block->page, TRUE, NULL)
1420 +               if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
1421                     == BUF_LRU_FREED) {
1422                         mutex_exit(&block->mutex);
1423                         if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1424 @@ -3051,13 +3189,14 @@
1425  
1426         buf_block_buf_fix_inc(block, file, line);
1427  
1428 -       mutex_exit(&block->mutex);
1429 +       //mutex_exit(&block->mutex);
1430  
1431         /* Check if this is the first access to the page */
1432  
1433         access_time = buf_page_is_accessed(&block->page);
1434  
1435 -       buf_pool_mutex_exit(buf_pool);
1436 +       //buf_pool_mutex_exit(buf_pool);
1437 +       mutex_exit(block_mutex);
1438  
1439         buf_page_set_accessed_make_young(&block->page, access_time);
1440  
1441 @@ -3290,9 +3429,11 @@
1442         buf_pool = buf_pool_from_block(block);
1443  
1444         if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1445 -               buf_pool_mutex_enter(buf_pool);
1446 +               //buf_pool_mutex_enter(buf_pool);
1447 +               mutex_enter(&buf_pool->LRU_list_mutex);
1448                 buf_LRU_make_block_young(&block->page);
1449 -               buf_pool_mutex_exit(buf_pool);
1450 +               //buf_pool_mutex_exit(buf_pool);
1451 +               mutex_exit(&buf_pool->LRU_list_mutex);
1452         } else if (!buf_page_is_accessed(&block->page)) {
1453                 /* Above, we do a dirty read on purpose, to avoid
1454                 mutex contention.  The field buf_page_t::access_time
1455 @@ -3300,9 +3441,11 @@
1456                 field must be protected by mutex, however. */
1457                 ulint   time_ms = ut_time_ms();
1458  
1459 -               buf_pool_mutex_enter(buf_pool);
1460 +               //buf_pool_mutex_enter(buf_pool);
1461 +               mutex_enter(&block->mutex);
1462                 buf_page_set_accessed(&block->page, time_ms);
1463 -               buf_pool_mutex_exit(buf_pool);
1464 +               //buf_pool_mutex_exit(buf_pool);
1465 +               mutex_exit(&block->mutex);
1466         }
1467  
1468         ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1469 @@ -3369,18 +3512,21 @@
1470         ut_ad(mtr);
1471         ut_ad(mtr->state == MTR_ACTIVE);
1472  
1473 -       buf_pool_mutex_enter(buf_pool);
1474 +       //buf_pool_mutex_enter(buf_pool);
1475 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1476         block = buf_block_hash_get(buf_pool, space_id, page_no);
1477  
1478         if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1479 -               buf_pool_mutex_exit(buf_pool);
1480 +               //buf_pool_mutex_exit(buf_pool);
1481 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1482                 return(NULL);
1483         }
1484  
1485         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1486  
1487         mutex_enter(&block->mutex);
1488 -       buf_pool_mutex_exit(buf_pool);
1489 +       //buf_pool_mutex_exit(buf_pool);
1490 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1491  
1492  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1493         ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1494 @@ -3469,7 +3615,10 @@
1495         buf_page_t*     hash_page;
1496         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1497  
1498 -       ut_ad(buf_pool_mutex_own(buf_pool));
1499 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1500 +#ifdef UNIV_SYNC_DEBUG
1501 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1502 +#endif
1503         ut_ad(mutex_own(&(block->mutex)));
1504         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1505  
1506 @@ -3498,11 +3647,14 @@
1507         if (UNIV_LIKELY(!hash_page)) {
1508         } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1509                 /* Preserve the reference count. */
1510 -               ulint   buf_fix_count = hash_page->buf_fix_count;
1511 +               ulint   buf_fix_count;
1512  
1513 +               mutex_enter(&buf_pool->zip_mutex);
1514 +               buf_fix_count = hash_page->buf_fix_count;
1515                 ut_a(buf_fix_count > 0);
1516                 block->page.buf_fix_count += buf_fix_count;
1517                 buf_pool_watch_remove(buf_pool, fold, hash_page);
1518 +               mutex_exit(&buf_pool->zip_mutex);
1519         } else {
1520                 fprintf(stderr,
1521                         "InnoDB: Error: page %lu %lu already found"
1522 @@ -3512,7 +3664,8 @@
1523                         (const void*) hash_page, (const void*) block);
1524  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1525                 mutex_exit(&block->mutex);
1526 -               buf_pool_mutex_exit(buf_pool);
1527 +               //buf_pool_mutex_exit(buf_pool);
1528 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1529                 buf_print();
1530                 buf_LRU_print();
1531                 buf_validate();
1532 @@ -3596,7 +3749,9 @@
1533  
1534         fold = buf_page_address_fold(space, offset);
1535  
1536 -       buf_pool_mutex_enter(buf_pool);
1537 +       //buf_pool_mutex_enter(buf_pool);
1538 +       mutex_enter(&buf_pool->LRU_list_mutex);
1539 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1540  
1541         watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1542         if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1543 @@ -3605,9 +3760,15 @@
1544  err_exit:
1545                 if (block) {
1546                         mutex_enter(&block->mutex);
1547 -                       buf_LRU_block_free_non_file_page(block);
1548 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1549 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1550 +                       buf_LRU_block_free_non_file_page(block, FALSE);
1551                         mutex_exit(&block->mutex);
1552                 }
1553 +               else {
1554 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1555 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1556 +               }
1557  
1558                 bpage = NULL;
1559                 goto func_exit;
1560 @@ -3630,6 +3791,8 @@
1561  
1562                 buf_page_init(space, offset, fold, block);
1563  
1564 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1565 +
1566                 /* The block must be put to the LRU list, to the old blocks */
1567                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1568  
1569 @@ -3657,7 +3820,7 @@
1570                         been added to buf_pool->LRU and
1571                         buf_pool->page_hash. */
1572                         mutex_exit(&block->mutex);
1573 -                       data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1574 +                       data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1575                         mutex_enter(&block->mutex);
1576                         block->page.zip.data = data;
1577  
1578 @@ -3670,6 +3833,7 @@
1579                         buf_unzip_LRU_add_block(block, TRUE);
1580                 }
1581  
1582 +               mutex_exit(&buf_pool->LRU_list_mutex);
1583                 mutex_exit(&block->mutex);
1584         } else {
1585                 /* Defer buf_buddy_alloc() until after the block has
1586 @@ -3681,8 +3845,8 @@
1587                 control block (bpage), in order to avoid the
1588                 invocation of buf_buddy_relocate_block() on
1589                 uninitialized data. */
1590 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1591 -               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1592 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1593 +               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1594  
1595                 /* Initialize the buf_pool pointer. */
1596                 bpage->buf_pool_index = buf_pool_index(buf_pool);
1597 @@ -3701,8 +3865,11 @@
1598  
1599                                 /* The block was added by some other thread. */
1600                                 watch_page = NULL;
1601 -                               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1602 -                               buf_buddy_free(buf_pool, data, zip_size);
1603 +                               buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1604 +                               buf_buddy_free(buf_pool, data, zip_size, TRUE);
1605 +
1606 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1607 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1608  
1609                                 bpage = NULL;
1610                                 goto func_exit;
1611 @@ -3746,18 +3913,24 @@
1612                 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1613                             bpage);
1614  
1615 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1616 +
1617                 /* The block must be put to the LRU list, to the old blocks */
1618                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1619                 buf_LRU_insert_zip_clean(bpage);
1620  
1621 +               mutex_exit(&buf_pool->LRU_list_mutex);
1622 +
1623                 buf_page_set_io_fix(bpage, BUF_IO_READ);
1624  
1625                 mutex_exit(&buf_pool->zip_mutex);
1626         }
1627  
1628 +       buf_pool_mutex_enter(buf_pool);
1629         buf_pool->n_pend_reads++;
1630 -func_exit:
1631         buf_pool_mutex_exit(buf_pool);
1632 +func_exit:
1633 +       //buf_pool_mutex_exit(buf_pool);
1634  
1635         if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1636  
1637 @@ -3799,7 +3972,9 @@
1638  
1639         fold = buf_page_address_fold(space, offset);
1640  
1641 -       buf_pool_mutex_enter(buf_pool);
1642 +       //buf_pool_mutex_enter(buf_pool);
1643 +       mutex_enter(&buf_pool->LRU_list_mutex);
1644 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1645  
1646         block = (buf_block_t*) buf_page_hash_get_low(
1647                 buf_pool, space, offset, fold);
1648 @@ -3815,7 +3990,9 @@
1649  #endif /* UNIV_DEBUG_FILE_ACCESSES */
1650  
1651                 /* Page can be found in buf_pool */
1652 -               buf_pool_mutex_exit(buf_pool);
1653 +               //buf_pool_mutex_exit(buf_pool);
1654 +               mutex_exit(&buf_pool->LRU_list_mutex);
1655 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1656  
1657                 buf_block_free(free_block);
1658  
1659 @@ -3837,6 +4014,7 @@
1660         mutex_enter(&block->mutex);
1661  
1662         buf_page_init(space, offset, fold, block);
1663 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1664  
1665         /* The block must be put to the LRU list */
1666         buf_LRU_add_block(&block->page, FALSE);
1667 @@ -3863,7 +4041,7 @@
1668                 the reacquisition of buf_pool->mutex.  We also must
1669                 defer this operation until after the block descriptor
1670                 has been added to buf_pool->LRU and buf_pool->page_hash. */
1671 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1672 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1673                 mutex_enter(&block->mutex);
1674                 block->page.zip.data = data;
1675  
1676 @@ -3881,7 +4059,8 @@
1677  
1678         buf_page_set_accessed(&block->page, time_ms);
1679  
1680 -       buf_pool_mutex_exit(buf_pool);
1681 +       //buf_pool_mutex_exit(buf_pool);
1682 +       mutex_exit(&buf_pool->LRU_list_mutex);
1683  
1684         mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1685  
1686 @@ -3932,6 +4111,8 @@
1687         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1688         const ibool     uncompressed = (buf_page_get_state(bpage)
1689                                         == BUF_BLOCK_FILE_PAGE);
1690 +       ibool           have_LRU_mutex = FALSE;
1691 +       mutex_t*        block_mutex;
1692  
1693         ut_a(buf_page_in_file(bpage));
1694  
1695 @@ -4065,8 +4246,26 @@
1696                 }
1697         }
1698  
1699 +       if (io_type == BUF_IO_WRITE
1700 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1701 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1702 +               /* to keep consistency at buf_LRU_insert_zip_clean() */
1703 +               have_LRU_mutex = TRUE; /* optimistic */
1704 +       }
1705 +retry_mutex:
1706 +       if (have_LRU_mutex)
1707 +               mutex_enter(&buf_pool->LRU_list_mutex);
1708 +       block_mutex = buf_page_get_mutex_enter(bpage);
1709 +       ut_a(block_mutex);
1710 +       if (io_type == BUF_IO_WRITE
1711 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1712 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1713 +           && !have_LRU_mutex) {
1714 +               mutex_exit(block_mutex);
1715 +               have_LRU_mutex = TRUE;
1716 +               goto retry_mutex;
1717 +       }
1718         buf_pool_mutex_enter(buf_pool);
1719 -       mutex_enter(buf_page_get_mutex(bpage));
1720  
1721  #ifdef UNIV_IBUF_COUNT_DEBUG
1722         if (io_type == BUF_IO_WRITE || uncompressed) {
1723 @@ -4089,6 +4288,7 @@
1724                 the x-latch to this OS thread: do not let this confuse you in
1725                 debugging! */
1726  
1727 +               ut_a(!have_LRU_mutex);
1728                 ut_ad(buf_pool->n_pend_reads > 0);
1729                 buf_pool->n_pend_reads--;
1730                 buf_pool->stat.n_pages_read++;
1731 @@ -4106,6 +4306,9 @@
1732  
1733                 buf_flush_write_complete(bpage);
1734  
1735 +               if (have_LRU_mutex)
1736 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1737 +
1738                 if (uncompressed) {
1739                         rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1740                                              BUF_IO_WRITE);
1741 @@ -4128,8 +4331,8 @@
1742         }
1743  #endif /* UNIV_DEBUG */
1744  
1745 -       mutex_exit(buf_page_get_mutex(bpage));
1746         buf_pool_mutex_exit(buf_pool);
1747 +       mutex_exit(block_mutex);
1748  }
1749  
1750  /*********************************************************************//**
1751 @@ -4146,7 +4349,9 @@
1752  
1753         ut_ad(buf_pool);
1754  
1755 -       buf_pool_mutex_enter(buf_pool);
1756 +       //buf_pool_mutex_enter(buf_pool);
1757 +       mutex_enter(&buf_pool->LRU_list_mutex);
1758 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1759  
1760         chunk = buf_pool->chunks;
1761  
1762 @@ -4163,7 +4368,9 @@
1763                 }
1764         }
1765  
1766 -       buf_pool_mutex_exit(buf_pool);
1767 +       //buf_pool_mutex_exit(buf_pool);
1768 +       mutex_exit(&buf_pool->LRU_list_mutex);
1769 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1770  
1771         return(TRUE);
1772  }
1773 @@ -4211,7 +4418,8 @@
1774                 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1775         }
1776  
1777 -       buf_pool_mutex_enter(buf_pool);
1778 +       //buf_pool_mutex_enter(buf_pool);
1779 +       mutex_enter(&buf_pool->LRU_list_mutex);
1780  
1781         ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1782         ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1783 @@ -4224,7 +4432,8 @@
1784         memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1785         buf_refresh_io_stats(buf_pool);
1786  
1787 -       buf_pool_mutex_exit(buf_pool);
1788 +       //buf_pool_mutex_exit(buf_pool);
1789 +       mutex_exit(&buf_pool->LRU_list_mutex);
1790  }
1791  
1792  /*********************************************************************//**
1793 @@ -4266,7 +4475,10 @@
1794  
1795         ut_ad(buf_pool);
1796  
1797 -       buf_pool_mutex_enter(buf_pool);
1798 +       //buf_pool_mutex_enter(buf_pool);
1799 +       mutex_enter(&buf_pool->LRU_list_mutex);
1800 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1801 +       /* for keep the new latch order, it cannot validate correctly... */
1802  
1803         chunk = buf_pool->chunks;
1804  
1805 @@ -4361,7 +4573,7 @@
1806         /* Check clean compressed-only blocks. */
1807  
1808         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1809 -            b = UT_LIST_GET_NEXT(list, b)) {
1810 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1811                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1812                 switch (buf_page_get_io_fix(b)) {
1813                 case BUF_IO_NONE:
1814 @@ -4392,7 +4604,7 @@
1815  
1816         buf_flush_list_mutex_enter(buf_pool);
1817         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1818 -            b = UT_LIST_GET_NEXT(list, b)) {
1819 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1820                 ut_ad(b->in_flush_list);
1821                 ut_a(b->oldest_modification);
1822                 n_flush++;
1823 @@ -4451,6 +4663,8 @@
1824         }
1825  
1826         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1827 +       /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1828 +/*
1829         if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1830                 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1831                         (ulong) UT_LIST_GET_LEN(buf_pool->free),
1832 @@ -4461,8 +4675,11 @@
1833         ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1834         ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1835         ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1836 +*/
1837  
1838 -       buf_pool_mutex_exit(buf_pool);
1839 +       //buf_pool_mutex_exit(buf_pool);
1840 +       mutex_exit(&buf_pool->LRU_list_mutex);
1841 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1842  
1843         ut_a(buf_LRU_validate());
1844         ut_a(buf_flush_validate(buf_pool));
1845 @@ -4518,7 +4735,9 @@
1846         index_ids = mem_alloc(size * sizeof *index_ids);
1847         counts = mem_alloc(sizeof(ulint) * size);
1848  
1849 -       buf_pool_mutex_enter(buf_pool);
1850 +       //buf_pool_mutex_enter(buf_pool);
1851 +       mutex_enter(&buf_pool->LRU_list_mutex);
1852 +       mutex_enter(&buf_pool->free_list_mutex);
1853         buf_flush_list_mutex_enter(buf_pool);
1854  
1855         fprintf(stderr,
1856 @@ -4587,7 +4806,9 @@
1857                 }
1858         }
1859  
1860 -       buf_pool_mutex_exit(buf_pool);
1861 +       //buf_pool_mutex_exit(buf_pool);
1862 +       mutex_exit(&buf_pool->LRU_list_mutex);
1863 +       mutex_exit(&buf_pool->free_list_mutex);
1864  
1865         for (i = 0; i < n_found; i++) {
1866                 index = dict_index_get_if_in_cache(index_ids[i]);
1867 @@ -4644,7 +4865,7 @@
1868         buf_chunk_t*    chunk;
1869         ulint           fixed_pages_number = 0;
1870  
1871 -       buf_pool_mutex_enter(buf_pool);
1872 +       //buf_pool_mutex_enter(buf_pool);
1873  
1874         chunk = buf_pool->chunks;
1875  
1876 @@ -4678,7 +4899,7 @@
1877         /* Traverse the lists of clean and dirty compressed-only blocks. */
1878  
1879         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1880 -            b = UT_LIST_GET_NEXT(list, b)) {
1881 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1882                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1883                 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1884  
1885 @@ -4690,7 +4911,7 @@
1886  
1887         buf_flush_list_mutex_enter(buf_pool);
1888         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1889 -            b = UT_LIST_GET_NEXT(list, b)) {
1890 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1891                 ut_ad(b->in_flush_list);
1892  
1893                 switch (buf_page_get_state(b)) {
1894 @@ -4716,7 +4937,7 @@
1895  
1896         buf_flush_list_mutex_exit(buf_pool);
1897         mutex_exit(&buf_pool->zip_mutex);
1898 -       buf_pool_mutex_exit(buf_pool);
1899 +       //buf_pool_mutex_exit(buf_pool);
1900  
1901         return(fixed_pages_number);
1902  }
1903 @@ -4810,6 +5031,8 @@
1904  
1905         ut_ad(buf_pool);
1906  
1907 +       mutex_enter(&buf_pool->LRU_list_mutex);
1908 +       mutex_enter(&buf_pool->free_list_mutex);
1909         buf_pool_mutex_enter(buf_pool);
1910         buf_flush_list_mutex_enter(buf_pool);
1911  
1912 @@ -4913,6 +5136,8 @@
1913                 buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
1914  
1915         buf_refresh_io_stats(buf_pool);
1916 +       mutex_exit(&buf_pool->LRU_list_mutex);
1917 +       mutex_exit(&buf_pool->free_list_mutex);
1918         buf_pool_mutex_exit(buf_pool);
1919  }
1920  
1921 @@ -5032,11 +5257,13 @@
1922  {
1923         ulint   len;
1924  
1925 -       buf_pool_mutex_enter(buf_pool);
1926 +       //buf_pool_mutex_enter(buf_pool);
1927 +       mutex_enter(&buf_pool->free_list_mutex);
1928  
1929         len = UT_LIST_GET_LEN(buf_pool->free);
1930  
1931 -       buf_pool_mutex_exit(buf_pool);
1932 +       //buf_pool_mutex_exit(buf_pool);
1933 +       mutex_exit(&buf_pool->free_list_mutex);
1934  
1935         return(len);
1936  }
1937 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1938 --- a/storage/innobase/buf/buf0flu.c    2010-12-03 15:22:36.318955693 +0900
1939 +++ b/storage/innobase/buf/buf0flu.c    2010-12-03 15:48:29.289024083 +0900
1940 @@ -279,7 +279,7 @@
1941  
1942         ut_d(block->page.in_flush_list = TRUE);
1943         block->page.oldest_modification = lsn;
1944 -       UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1945 +       UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1946  
1947  #ifdef UNIV_DEBUG_VALGRIND
1948         {
1949 @@ -373,14 +373,14 @@
1950                        > block->page.oldest_modification) {
1951                         ut_ad(b->in_flush_list);
1952                         prev_b = b;
1953 -                       b = UT_LIST_GET_NEXT(list, b);
1954 +                       b = UT_LIST_GET_NEXT(flush_list, b);
1955                 }
1956         }
1957  
1958         if (prev_b == NULL) {
1959 -               UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1960 +               UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1961         } else {
1962 -               UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1963 +               UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1964                                      prev_b, &block->page);
1965         }
1966  
1967 @@ -406,7 +406,7 @@
1968         //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1969         //ut_ad(buf_pool_mutex_own(buf_pool));
1970  #endif
1971 -       //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1972 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1973         //ut_ad(bpage->in_LRU_list);
1974  
1975         if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1976 @@ -442,14 +442,14 @@
1977         enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1978  {
1979  #ifdef UNIV_DEBUG
1980 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1981 -       ut_ad(buf_pool_mutex_own(buf_pool));
1982 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1983 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1984  #endif
1985 -       ut_a(buf_page_in_file(bpage));
1986 +       //ut_a(buf_page_in_file(bpage));
1987         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1988         ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1989  
1990 -       if (bpage->oldest_modification != 0
1991 +       if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1992             && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1993                 ut_ad(bpage->in_flush_list);
1994  
1995 @@ -480,7 +480,7 @@
1996  {
1997         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1998  
1999 -       ut_ad(buf_pool_mutex_own(buf_pool));
2000 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2001         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2002         ut_ad(bpage->in_flush_list);
2003  
2004 @@ -498,11 +498,11 @@
2005                 return;
2006         case BUF_BLOCK_ZIP_DIRTY:
2007                 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
2008 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2009 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2010                 buf_LRU_insert_zip_clean(bpage);
2011                 break;
2012         case BUF_BLOCK_FILE_PAGE:
2013 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2014 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2015                 break;
2016         }
2017  
2018 @@ -546,7 +546,7 @@
2019         buf_page_t*     prev_b = NULL;
2020         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2021  
2022 -       ut_ad(buf_pool_mutex_own(buf_pool));
2023 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2024         /* Must reside in the same buffer pool. */
2025         ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2026  
2027 @@ -575,18 +575,18 @@
2028         because we assert on in_flush_list in comparison function. */
2029         ut_d(bpage->in_flush_list = FALSE);
2030  
2031 -       prev = UT_LIST_GET_PREV(list, bpage);
2032 -       UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2033 +       prev = UT_LIST_GET_PREV(flush_list, bpage);
2034 +       UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2035  
2036         if (prev) {
2037                 ut_ad(prev->in_flush_list);
2038                 UT_LIST_INSERT_AFTER(
2039 -                       list,
2040 +                       flush_list,
2041                         buf_pool->flush_list,
2042                         prev, dpage);
2043         } else {
2044                 UT_LIST_ADD_FIRST(
2045 -                       list,
2046 +                       flush_list,
2047                         buf_pool->flush_list,
2048                         dpage);
2049         }
2050 @@ -1055,7 +1055,7 @@
2051  
2052  #ifdef UNIV_DEBUG
2053         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2054 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2055 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2056  #endif
2057  
2058  #ifdef UNIV_LOG_DEBUG
2059 @@ -1069,7 +1069,8 @@
2060         io_fixed and oldest_modification != 0.  Thus, it cannot be
2061         relocated in the buffer pool or removed from flush_list or
2062         LRU_list. */
2063 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2064 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2065 +       ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2066         ut_ad(!buf_flush_list_mutex_own(buf_pool));
2067         ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2068         ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2069 @@ -1232,12 +1233,18 @@
2070         ibool           is_uncompressed;
2071  
2072         ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2073 -       ut_ad(buf_pool_mutex_own(buf_pool));
2074 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2075 +#ifdef UNIV_SYNC_DEBUG
2076 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2077 +#endif
2078         ut_ad(buf_page_in_file(bpage));
2079  
2080         block_mutex = buf_page_get_mutex(bpage);
2081         ut_ad(mutex_own(block_mutex));
2082  
2083 +       buf_pool_mutex_enter(buf_pool);
2084 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2085 +
2086         ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2087  
2088         buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2089 @@ -1399,14 +1406,16 @@
2090  
2091                 buf_pool = buf_pool_get(space, i);
2092  
2093 -               buf_pool_mutex_enter(buf_pool);
2094 +               //buf_pool_mutex_enter(buf_pool);
2095 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
2096  
2097                 /* We only want to flush pages from this buffer pool. */
2098                 bpage = buf_page_hash_get(buf_pool, space, i);
2099  
2100                 if (!bpage) {
2101  
2102 -                       buf_pool_mutex_exit(buf_pool);
2103 +                       //buf_pool_mutex_exit(buf_pool);
2104 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2105                         continue;
2106                 }
2107  
2108 @@ -1418,11 +1427,9 @@
2109                 if (flush_type != BUF_FLUSH_LRU
2110                     || i == offset
2111                     || buf_page_is_old(bpage)) {
2112 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2113 -
2114 -                       mutex_enter(block_mutex);
2115 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2116  
2117 -                       if (buf_flush_ready_for_flush(bpage, flush_type)
2118 +                       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2119                             && (i == offset || !bpage->buf_fix_count)) {
2120                                 /* We only try to flush those
2121                                 neighbors != offset where the buf fix
2122 @@ -1438,11 +1445,12 @@
2123                                 ut_ad(!buf_pool_mutex_own(buf_pool));
2124                                 count++;
2125                                 continue;
2126 -                       } else {
2127 +                       } else if (block_mutex) {
2128                                 mutex_exit(block_mutex);
2129                         }
2130                 }
2131 -               buf_pool_mutex_exit(buf_pool);
2132 +               //buf_pool_mutex_exit(buf_pool);
2133 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
2134         }
2135  
2136         return(count);
2137 @@ -1475,21 +1483,25 @@
2138         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2139  #endif /* UNIV_DEBUG */
2140  
2141 -       ut_ad(buf_pool_mutex_own(buf_pool));
2142 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2143 +       ut_ad(flush_type != BUF_FLUSH_LRU
2144 +             || mutex_own(&buf_pool->LRU_list_mutex));
2145  
2146 -       block_mutex = buf_page_get_mutex(bpage);
2147 -       mutex_enter(block_mutex);
2148 +       block_mutex = buf_page_get_mutex_enter(bpage);
2149  
2150 -       ut_a(buf_page_in_file(bpage));
2151 +       //ut_a(buf_page_in_file(bpage));
2152  
2153 -       if (buf_flush_ready_for_flush(bpage, flush_type)) {
2154 +       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2155                 ulint           space;
2156                 ulint           offset;
2157                 buf_pool_t*     buf_pool;
2158  
2159                 buf_pool = buf_pool_from_bpage(bpage);
2160  
2161 -               buf_pool_mutex_exit(buf_pool);
2162 +               //buf_pool_mutex_exit(buf_pool);
2163 +               if (flush_type == BUF_FLUSH_LRU) {
2164 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2165 +               }
2166  
2167                 /* These fields are protected by both the
2168                 buffer pool mutex and block mutex. */
2169 @@ -1505,13 +1517,18 @@
2170                                                   *count,
2171                                                   n_to_flush);
2172  
2173 -               buf_pool_mutex_enter(buf_pool);
2174 +               //buf_pool_mutex_enter(buf_pool);
2175 +               if (flush_type == BUF_FLUSH_LRU) {
2176 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2177 +               }
2178                 flushed = TRUE;
2179 -       } else {
2180 +       } else if (block_mutex) {
2181                 mutex_exit(block_mutex);
2182         }
2183  
2184 -       ut_ad(buf_pool_mutex_own(buf_pool));
2185 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2186 +       ut_ad(flush_type != BUF_FLUSH_LRU
2187 +             || mutex_own(&buf_pool->LRU_list_mutex));
2188  
2189         return(flushed);
2190  }
2191 @@ -1532,7 +1549,8 @@
2192         buf_page_t*     bpage;
2193         ulint           count = 0;
2194  
2195 -       ut_ad(buf_pool_mutex_own(buf_pool));
2196 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2197 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2198  
2199         do {
2200                 /* Start from the end of the list looking for a
2201 @@ -1554,7 +1572,8 @@
2202         should be flushed, we factor in this value. */
2203         buf_lru_flush_page_count += count;
2204  
2205 -       ut_ad(buf_pool_mutex_own(buf_pool));
2206 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2207 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2208  
2209         return(count);
2210  }
2211 @@ -1582,9 +1601,10 @@
2212  {
2213         ulint           len;
2214         buf_page_t*     bpage;
2215 +       buf_page_t*     prev_bpage = NULL;
2216         ulint           count = 0;
2217  
2218 -       ut_ad(buf_pool_mutex_own(buf_pool));
2219 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2220  
2221         /* If we have flushed enough, leave the loop */
2222         do {
2223 @@ -1603,6 +1623,7 @@
2224  
2225                 if (bpage) {
2226                         ut_a(bpage->oldest_modification > 0);
2227 +                       prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2228                 }
2229  
2230                 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2231 @@ -1644,9 +1665,17 @@
2232                                 break;
2233                         }
2234  
2235 -                       bpage = UT_LIST_GET_PREV(list, bpage);
2236 +                       bpage = UT_LIST_GET_PREV(flush_list, bpage);
2237  
2238 -                       ut_ad(!bpage || bpage->in_flush_list);
2239 +                       //ut_ad(!bpage || bpage->in_flush_list);
2240 +                       if (bpage != prev_bpage) {
2241 +                               /* the search might warp.. retrying */
2242 +                               buf_flush_list_mutex_exit(buf_pool);
2243 +                               break;
2244 +                       }
2245 +                       if (bpage) {
2246 +                               prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2247 +                       }
2248  
2249                         buf_flush_list_mutex_exit(buf_pool);
2250  
2251 @@ -1655,7 +1684,7 @@
2252  
2253         } while (count < min_n && bpage != NULL && len > 0);
2254  
2255 -       ut_ad(buf_pool_mutex_own(buf_pool));
2256 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2257  
2258         return(count);
2259  }
2260 @@ -1694,13 +1723,15 @@
2261               || sync_thread_levels_empty_gen(TRUE));
2262  #endif /* UNIV_SYNC_DEBUG */
2263  
2264 -       buf_pool_mutex_enter(buf_pool);
2265 +       //buf_pool_mutex_enter(buf_pool);
2266  
2267         /* Note: The buffer pool mutex is released and reacquired within
2268         the flush functions. */
2269         switch(flush_type) {
2270         case BUF_FLUSH_LRU:
2271 +               mutex_enter(&buf_pool->LRU_list_mutex);
2272                 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2273 +               mutex_exit(&buf_pool->LRU_list_mutex);
2274                 break;
2275         case BUF_FLUSH_LIST:
2276                 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2277 @@ -1709,7 +1740,7 @@
2278                 ut_error;
2279         }
2280  
2281 -       buf_pool_mutex_exit(buf_pool);
2282 +       //buf_pool_mutex_exit(buf_pool);
2283  
2284         buf_flush_buffered_writes();
2285  
2286 @@ -1965,7 +1996,7 @@
2287  retry:
2288         //buf_pool_mutex_enter(buf_pool);
2289         if (have_LRU_mutex)
2290 -               buf_pool_mutex_enter(buf_pool);
2291 +               mutex_enter(&buf_pool->LRU_list_mutex);
2292  
2293         n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2294  
2295 @@ -1982,15 +2013,15 @@
2296                         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2297                         continue;
2298                 }
2299 -               block_mutex = buf_page_get_mutex(bpage);
2300 -
2301 -               mutex_enter(block_mutex);
2302 +               block_mutex = buf_page_get_mutex_enter(bpage);
2303  
2304 -               if (buf_flush_ready_for_replace(bpage)) {
2305 +               if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2306                         n_replaceable++;
2307                 }
2308  
2309 -               mutex_exit(block_mutex);
2310 +               if (block_mutex) {
2311 +                       mutex_exit(block_mutex);
2312 +               }
2313  
2314                 distance++;
2315  
2316 @@ -1999,7 +2030,7 @@
2317  
2318         //buf_pool_mutex_exit(buf_pool);
2319         if (have_LRU_mutex)
2320 -               buf_pool_mutex_exit(buf_pool);
2321 +               mutex_exit(&buf_pool->LRU_list_mutex);
2322  
2323         if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2324  
2325 @@ -2198,7 +2229,7 @@
2326  
2327         ut_ad(buf_flush_list_mutex_own(buf_pool));
2328  
2329 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2330 +       UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2331                          ut_ad(ut_list_node_313->in_flush_list));
2332  
2333         bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2334 @@ -2238,7 +2269,7 @@
2335                         rnode = rbt_next(buf_pool->flush_rbt, rnode);
2336                 }
2337  
2338 -               bpage = UT_LIST_GET_NEXT(list, bpage);
2339 +               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2340  
2341                 ut_a(!bpage || om >= bpage->oldest_modification);
2342         }
2343 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2344 --- a/storage/innobase/buf/buf0lru.c    2010-12-03 15:22:36.321987250 +0900
2345 +++ b/storage/innobase/buf/buf0lru.c    2010-12-03 15:48:29.293023197 +0900
2346 @@ -143,8 +143,9 @@
2347  void
2348  buf_LRU_block_free_hashed_page(
2349  /*===========================*/
2350 -       buf_block_t*    block); /*!< in: block, must contain a file page and
2351 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
2352                                 be in a state where it can be freed */
2353 +       ibool           have_page_hash_mutex);
2354  
2355  /******************************************************************//**
2356  Determines if the unzip_LRU list should be used for evicting a victim
2357 @@ -154,15 +155,20 @@
2358  ibool
2359  buf_LRU_evict_from_unzip_LRU(
2360  /*=========================*/
2361 -       buf_pool_t*     buf_pool)
2362 +       buf_pool_t*     buf_pool,
2363 +       ibool           have_LRU_mutex)
2364  {
2365         ulint   io_avg;
2366         ulint   unzip_avg;
2367  
2368 -       ut_ad(buf_pool_mutex_own(buf_pool));
2369 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2370  
2371 +       if (!have_LRU_mutex)
2372 +               mutex_enter(&buf_pool->LRU_list_mutex);
2373         /* If the unzip_LRU list is empty, we can only use the LRU. */
2374         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2375 +               if (!have_LRU_mutex)
2376 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2377                 return(FALSE);
2378         }
2379  
2380 @@ -171,14 +177,20 @@
2381         decompressed pages in the buffer pool. */
2382         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2383             <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2384 +               if (!have_LRU_mutex)
2385 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2386                 return(FALSE);
2387         }
2388  
2389         /* If eviction hasn't started yet, we assume by default
2390         that a workload is disk bound. */
2391         if (buf_pool->freed_page_clock == 0) {
2392 +               if (!have_LRU_mutex)
2393 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2394                 return(TRUE);
2395         }
2396 +       if (!have_LRU_mutex)
2397 +               mutex_exit(&buf_pool->LRU_list_mutex);
2398  
2399         /* Calculate the average over past intervals, and add the values
2400         of the current interval. */
2401 @@ -246,19 +258,23 @@
2402         page_arr = ut_malloc(
2403                 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2404  
2405 -       buf_pool_mutex_enter(buf_pool);
2406 +       //buf_pool_mutex_enter(buf_pool);
2407 +       mutex_enter(&buf_pool->LRU_list_mutex);
2408  
2409  scan_again:
2410         num_entries = 0;
2411         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2412  
2413         while (bpage != NULL) {
2414 -               mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2415 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
2416                 buf_page_t*     prev_bpage;
2417  
2418 -               mutex_enter(block_mutex);
2419                 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2420  
2421 +               if (!block_mutex) {
2422 +                       goto next_page;
2423 +               }
2424 +
2425                 ut_a(buf_page_in_file(bpage));
2426  
2427                 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2428 @@ -287,14 +303,16 @@
2429  
2430                         /* Array full. We release the buf_pool->mutex to
2431                         obey the latching order. */
2432 -                       buf_pool_mutex_exit(buf_pool);
2433 +                       //buf_pool_mutex_exit(buf_pool);
2434 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2435  
2436                         buf_LRU_drop_page_hash_batch(
2437                                 id, zip_size, page_arr, num_entries);
2438  
2439                         num_entries = 0;
2440  
2441 -                       buf_pool_mutex_enter(buf_pool);
2442 +                       //buf_pool_mutex_enter(buf_pool);
2443 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2444                 } else {
2445                         mutex_exit(block_mutex);
2446                 }
2447 @@ -319,7 +337,8 @@
2448                 }
2449         }
2450  
2451 -       buf_pool_mutex_exit(buf_pool);
2452 +       //buf_pool_mutex_exit(buf_pool);
2453 +       mutex_exit(&buf_pool->LRU_list_mutex);
2454  
2455         /* Drop any remaining batch of search hashed pages. */
2456         buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2457 @@ -341,7 +360,9 @@
2458         ibool           all_freed;
2459  
2460  scan_again:
2461 -       buf_pool_mutex_enter(buf_pool);
2462 +       //buf_pool_mutex_enter(buf_pool);
2463 +       mutex_enter(&buf_pool->LRU_list_mutex);
2464 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2465  
2466         all_freed = TRUE;
2467  
2468 @@ -369,8 +390,16 @@
2469  
2470                         all_freed = FALSE;
2471                 } else {
2472 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2473 -                       mutex_enter(block_mutex);
2474 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2475 +
2476 +                       if (!block_mutex) {
2477 +                               /* It may be impossible case...
2478 +                               Something wrong, so will be scan_again */
2479 +
2480 +                               all_freed = FALSE;
2481 +
2482 +                               goto next_page_no_mutex;
2483 +                       }
2484  
2485                         if (bpage->buf_fix_count > 0) {
2486  
2487 @@ -429,7 +458,9 @@
2488                                 ulint   page_no;
2489                                 ulint   zip_size;
2490  
2491 -                               buf_pool_mutex_exit(buf_pool);
2492 +                               //buf_pool_mutex_exit(buf_pool);
2493 +                               mutex_exit(&buf_pool->LRU_list_mutex);
2494 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2495  
2496                                 zip_size = buf_page_get_zip_size(bpage);
2497                                 page_no = buf_page_get_page_no(bpage);
2498 @@ -454,7 +485,7 @@
2499                         if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2500                             != BUF_BLOCK_ZIP_FREE) {
2501                                 buf_LRU_block_free_hashed_page((buf_block_t*)
2502 -                                                              bpage);
2503 +                                                              bpage, TRUE);
2504                         } else {
2505                                 /* The block_mutex should have been
2506                                 released by buf_LRU_block_remove_hashed_page()
2507 @@ -486,7 +517,9 @@
2508                 bpage = prev_bpage;
2509         }
2510  
2511 -       buf_pool_mutex_exit(buf_pool);
2512 +       //buf_pool_mutex_exit(buf_pool);
2513 +       mutex_exit(&buf_pool->LRU_list_mutex);
2514 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
2515  
2516         if (!all_freed) {
2517                 os_thread_sleep(20000);
2518 @@ -532,7 +565,9 @@
2519         buf_page_t*     b;
2520         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2521  
2522 -       ut_ad(buf_pool_mutex_own(buf_pool));
2523 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2524 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2525 +       ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2526         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2527  
2528         /* Find the first successor of bpage in the LRU list
2529 @@ -540,17 +575,17 @@
2530         b = bpage;
2531         do {
2532                 b = UT_LIST_GET_NEXT(LRU, b);
2533 -       } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2534 +       } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2535  
2536         /* Insert bpage before b, i.e., after the predecessor of b. */
2537         if (b) {
2538 -               b = UT_LIST_GET_PREV(list, b);
2539 +               b = UT_LIST_GET_PREV(zip_list, b);
2540         }
2541  
2542         if (b) {
2543 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2544 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2545         } else {
2546 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2547 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2548         }
2549  }
2550  
2551 @@ -563,18 +598,19 @@
2552  buf_LRU_free_from_unzip_LRU_list(
2553  /*=============================*/
2554         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
2555 -       ulint           n_iterations)   /*!< in: how many times this has
2556 +       ulint           n_iterations,   /*!< in: how many times this has
2557                                         been called repeatedly without
2558                                         result: a high value means that
2559                                         we should search farther; we will
2560                                         search n_iterations / 5 of the
2561                                         unzip_LRU list, or nothing if
2562                                         n_iterations >= 5 */
2563 +       ibool           have_LRU_mutex)
2564  {
2565         buf_block_t*    block;
2566         ulint           distance;
2567  
2568 -       ut_ad(buf_pool_mutex_own(buf_pool));
2569 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2570  
2571         /* Theoratically it should be much easier to find a victim
2572         from unzip_LRU as we can choose even a dirty block (as we'll
2573 @@ -584,7 +620,7 @@
2574         if we have done five iterations so far. */
2575  
2576         if (UNIV_UNLIKELY(n_iterations >= 5)
2577 -           || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2578 +           || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2579  
2580                 return(FALSE);
2581         }
2582 @@ -592,18 +628,25 @@
2583         distance = 100 + (n_iterations
2584                           * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2585  
2586 +restart:
2587         for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2588              UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2589              block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2590  
2591                 enum buf_lru_free_block_status  freed;
2592  
2593 +               mutex_enter(&block->mutex);
2594 +               if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2595 +                   || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2596 +                       mutex_exit(&block->mutex);
2597 +                       goto restart;
2598 +               }
2599 +
2600                 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2601                 ut_ad(block->in_unzip_LRU_list);
2602                 ut_ad(block->page.in_LRU_list);
2603  
2604 -               mutex_enter(&block->mutex);
2605 -               freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2606 +               freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2607                 mutex_exit(&block->mutex);
2608  
2609                 switch (freed) {
2610 @@ -637,21 +680,23 @@
2611  buf_LRU_free_from_common_LRU_list(
2612  /*==============================*/
2613         buf_pool_t*     buf_pool,
2614 -       ulint           n_iterations)
2615 +       ulint           n_iterations,
2616                                 /*!< in: how many times this has been called
2617                                 repeatedly without result: a high value means
2618                                 that we should search farther; if
2619                                 n_iterations < 10, then we search
2620                                 n_iterations / 10 * buf_pool->curr_size
2621                                 pages from the end of the LRU list */
2622 +       ibool           have_LRU_mutex)
2623  {
2624         buf_page_t*     bpage;
2625         ulint           distance;
2626  
2627 -       ut_ad(buf_pool_mutex_own(buf_pool));
2628 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2629  
2630         distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2631  
2632 +restart:
2633         for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2634              UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2635              bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2636 @@ -659,14 +704,23 @@
2637                 enum buf_lru_free_block_status  freed;
2638                 unsigned                        accessed;
2639                 mutex_t*                        block_mutex
2640 -                       = buf_page_get_mutex(bpage);
2641 +                       = buf_page_get_mutex_enter(bpage);
2642 +
2643 +               if (!block_mutex) {
2644 +                       goto restart;
2645 +               }
2646 +
2647 +               if (!bpage->in_LRU_list
2648 +                   || !buf_page_in_file(bpage)) {
2649 +                       mutex_exit(block_mutex);
2650 +                       goto restart;
2651 +               }
2652  
2653                 ut_ad(buf_page_in_file(bpage));
2654                 ut_ad(bpage->in_LRU_list);
2655  
2656 -               mutex_enter(block_mutex);
2657                 accessed = buf_page_is_accessed(bpage);
2658 -               freed = buf_LRU_free_block(bpage, TRUE, NULL);
2659 +               freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2660                 mutex_exit(block_mutex);
2661  
2662                 switch (freed) {
2663 @@ -718,16 +772,23 @@
2664                                 n_iterations / 5 of the unzip_LRU list. */
2665  {
2666         ibool   freed = FALSE;
2667 +       ibool   have_LRU_mutex = FALSE;
2668  
2669 -       buf_pool_mutex_enter(buf_pool);
2670 +       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2671 +               have_LRU_mutex = TRUE;
2672 +
2673 +       //buf_pool_mutex_enter(buf_pool);
2674 +       if (have_LRU_mutex)
2675 +               mutex_enter(&buf_pool->LRU_list_mutex);
2676  
2677 -       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2678 +       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2679  
2680         if (!freed) {
2681                 freed = buf_LRU_free_from_common_LRU_list(
2682 -                       buf_pool, n_iterations);
2683 +                       buf_pool, n_iterations, have_LRU_mutex);
2684         }
2685  
2686 +       buf_pool_mutex_enter(buf_pool);
2687         if (!freed) {
2688                 buf_pool->LRU_flush_ended = 0;
2689         } else if (buf_pool->LRU_flush_ended > 0) {
2690 @@ -735,6 +796,8 @@
2691         }
2692  
2693         buf_pool_mutex_exit(buf_pool);
2694 +       if (have_LRU_mutex)
2695 +               mutex_exit(&buf_pool->LRU_list_mutex);
2696  
2697         return(freed);
2698  }
2699 @@ -795,7 +858,9 @@
2700  
2701                 buf_pool = buf_pool_from_array(i);
2702  
2703 -               buf_pool_mutex_enter(buf_pool);
2704 +               //buf_pool_mutex_enter(buf_pool);
2705 +               mutex_enter(&buf_pool->LRU_list_mutex);
2706 +               mutex_enter(&buf_pool->free_list_mutex);
2707  
2708                 if (!recv_recovery_on
2709                     && UT_LIST_GET_LEN(buf_pool->free)
2710 @@ -805,7 +870,9 @@
2711                         ret = TRUE;
2712                 }
2713  
2714 -               buf_pool_mutex_exit(buf_pool);
2715 +               //buf_pool_mutex_exit(buf_pool);
2716 +               mutex_exit(&buf_pool->LRU_list_mutex);
2717 +               mutex_exit(&buf_pool->free_list_mutex);
2718         }
2719  
2720         return(ret);
2721 @@ -823,9 +890,10 @@
2722  {
2723         buf_block_t*    block;
2724  
2725 -       ut_ad(buf_pool_mutex_own(buf_pool));
2726 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2727  
2728 -       block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2729 +       mutex_enter(&buf_pool->free_list_mutex);
2730 +       block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2731  
2732         if (block) {
2733  
2734 @@ -834,7 +902,9 @@
2735                 ut_ad(!block->page.in_flush_list);
2736                 ut_ad(!block->page.in_LRU_list);
2737                 ut_a(!buf_page_in_file(&block->page));
2738 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2739 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2740 +
2741 +               mutex_exit(&buf_pool->free_list_mutex);
2742  
2743                 mutex_enter(&block->mutex);
2744  
2745 @@ -844,6 +914,8 @@
2746                 ut_ad(buf_pool_from_block(block) == buf_pool);
2747  
2748                 mutex_exit(&block->mutex);
2749 +       } else {
2750 +               mutex_exit(&buf_pool->free_list_mutex);
2751         }
2752  
2753         return(block);
2754 @@ -868,7 +940,7 @@
2755         ibool           mon_value_was   = FALSE;
2756         ibool           started_monitor = FALSE;
2757  loop:
2758 -       buf_pool_mutex_enter(buf_pool);
2759 +       //buf_pool_mutex_enter(buf_pool);
2760  
2761         if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2762             + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2763 @@ -951,8 +1023,10 @@
2764                         ibool   lru;
2765                         page_zip_set_size(&block->page.zip, zip_size);
2766  
2767 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2768                         block->page.zip.data = buf_buddy_alloc(
2769 -                               buf_pool, zip_size, &lru);
2770 +                               buf_pool, zip_size, &lru, FALSE);
2771 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2772  
2773                         UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2774                 } else {
2775 @@ -960,7 +1034,7 @@
2776                         block->page.zip.data = NULL;
2777                 }
2778  
2779 -               buf_pool_mutex_exit(buf_pool);
2780 +               //buf_pool_mutex_exit(buf_pool);
2781  
2782                 if (started_monitor) {
2783                         srv_print_innodb_monitor = mon_value_was;
2784 @@ -972,7 +1046,7 @@
2785         /* If no block was in the free list, search from the end of the LRU
2786         list and try to free a block there */
2787  
2788 -       buf_pool_mutex_exit(buf_pool);
2789 +       //buf_pool_mutex_exit(buf_pool);
2790  
2791         freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2792  
2793 @@ -1058,7 +1132,8 @@
2794         ulint   new_len;
2795  
2796         ut_a(buf_pool->LRU_old);
2797 -       ut_ad(buf_pool_mutex_own(buf_pool));
2798 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2799 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2800         ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2801         ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2802  #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2803 @@ -1124,7 +1199,8 @@
2804  {
2805         buf_page_t*     bpage;
2806  
2807 -       ut_ad(buf_pool_mutex_own(buf_pool));
2808 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2809 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2810         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2811  
2812         /* We first initialize all blocks in the LRU list as old and then use
2813 @@ -1159,13 +1235,14 @@
2814         ut_ad(buf_pool);
2815         ut_ad(bpage);
2816         ut_ad(buf_page_in_file(bpage));
2817 -       ut_ad(buf_pool_mutex_own(buf_pool));
2818 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2819 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2820  
2821         if (buf_page_belongs_to_unzip_LRU(bpage)) {
2822                 buf_block_t*    block = (buf_block_t*) bpage;
2823  
2824                 ut_ad(block->in_unzip_LRU_list);
2825 -               ut_d(block->in_unzip_LRU_list = FALSE);
2826 +               block->in_unzip_LRU_list = FALSE;
2827  
2828                 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2829         }
2830 @@ -1183,7 +1260,8 @@
2831  
2832         ut_ad(buf_pool);
2833         ut_ad(bpage);
2834 -       ut_ad(buf_pool_mutex_own(buf_pool));
2835 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2836 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2837  
2838         ut_a(buf_page_in_file(bpage));
2839  
2840 @@ -1260,12 +1338,13 @@
2841  
2842         ut_ad(buf_pool);
2843         ut_ad(block);
2844 -       ut_ad(buf_pool_mutex_own(buf_pool));
2845 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2846 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2847  
2848         ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2849  
2850         ut_ad(!block->in_unzip_LRU_list);
2851 -       ut_d(block->in_unzip_LRU_list = TRUE);
2852 +       block->in_unzip_LRU_list = TRUE;
2853  
2854         if (old) {
2855                 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2856 @@ -1286,7 +1365,8 @@
2857  
2858         ut_ad(buf_pool);
2859         ut_ad(bpage);
2860 -       ut_ad(buf_pool_mutex_own(buf_pool));
2861 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2862 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2863  
2864         ut_a(buf_page_in_file(bpage));
2865  
2866 @@ -1337,7 +1417,8 @@
2867  
2868         ut_ad(buf_pool);
2869         ut_ad(bpage);
2870 -       ut_ad(buf_pool_mutex_own(buf_pool));
2871 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2872 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2873  
2874         ut_a(buf_page_in_file(bpage));
2875         ut_ad(!bpage->in_LRU_list);
2876 @@ -1416,7 +1497,8 @@
2877  {
2878         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2879  
2880 -       ut_ad(buf_pool_mutex_own(buf_pool));
2881 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2882 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2883  
2884         if (bpage->old) {
2885                 buf_pool->stat.n_pages_made_young++;
2886 @@ -1458,19 +1540,20 @@
2887         buf_page_t*     bpage,  /*!< in: block to be freed */
2888         ibool           zip,    /*!< in: TRUE if should remove also the
2889                                 compressed page of an uncompressed page */
2890 -       ibool*          buf_pool_mutex_released)
2891 +       ibool*          buf_pool_mutex_released,
2892                                 /*!< in: pointer to a variable that will
2893                                 be assigned TRUE if buf_pool_mutex
2894                                 was temporarily released, or NULL */
2895 +       ibool           have_LRU_mutex)
2896  {
2897         buf_page_t*     b = NULL;
2898         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2899         mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2900  
2901 -       ut_ad(buf_pool_mutex_own(buf_pool));
2902 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2903         ut_ad(mutex_own(block_mutex));
2904         ut_ad(buf_page_in_file(bpage));
2905 -       ut_ad(bpage->in_LRU_list);
2906 +       //ut_ad(bpage->in_LRU_list);
2907         ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2908  #if UNIV_WORD_SIZE == 4
2909         /* On 32-bit systems, there is no padding in buf_page_t.  On
2910 @@ -1479,7 +1562,7 @@
2911         UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2912  #endif
2913  
2914 -       if (!buf_page_can_relocate(bpage)) {
2915 +       if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2916  
2917                 /* Do not free buffer-fixed or I/O-fixed blocks. */
2918                 return(BUF_LRU_NOT_FREED);
2919 @@ -1511,15 +1594,15 @@
2920                 If it cannot be allocated (without freeing a block
2921                 from the LRU list), refuse to free bpage. */
2922  alloc:
2923 -               buf_pool_mutex_exit_forbid(buf_pool);
2924 -               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2925 -               buf_pool_mutex_exit_allow(buf_pool);
2926 +               //buf_pool_mutex_exit_forbid(buf_pool);
2927 +               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2928 +               //buf_pool_mutex_exit_allow(buf_pool);
2929  
2930                 if (UNIV_UNLIKELY(!b)) {
2931                         return(BUF_LRU_CANNOT_RELOCATE);
2932                 }
2933  
2934 -               memcpy(b, bpage, sizeof *b);
2935 +               //memcpy(b, bpage, sizeof *b);
2936         }
2937  
2938  #ifdef UNIV_DEBUG
2939 @@ -1530,6 +1613,39 @@
2940         }
2941  #endif /* UNIV_DEBUG */
2942  
2943 +       /* not to break latch order, must re-enter block_mutex */
2944 +       mutex_exit(block_mutex);
2945 +
2946 +       if (!have_LRU_mutex)
2947 +               mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2948 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2949 +       mutex_enter(block_mutex);
2950 +
2951 +       /* recheck states of block */
2952 +       if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2953 +           || !buf_page_can_relocate(bpage)) {
2954 +not_freed:
2955 +               if (b) {
2956 +                       buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2957 +               }
2958 +               if (!have_LRU_mutex)
2959 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2960 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2961 +               return(BUF_LRU_NOT_FREED);
2962 +       } else if (zip || !bpage->zip.data) {
2963 +               if (bpage->oldest_modification)
2964 +                       goto not_freed;
2965 +       } else if (bpage->oldest_modification) {
2966 +               if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2967 +                       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2968 +                       goto not_freed;
2969 +               }
2970 +       }
2971 +
2972 +       if (b) {
2973 +               memcpy(b, bpage, sizeof *b);
2974 +       }
2975 +
2976         if (buf_LRU_block_remove_hashed_page(bpage, zip)
2977             != BUF_BLOCK_ZIP_FREE) {
2978                 ut_a(bpage->buf_fix_count == 0);
2979 @@ -1546,6 +1662,10 @@
2980  
2981                         ut_a(!hash_b);
2982  
2983 +                       while (prev_b && !prev_b->in_LRU_list) {
2984 +                               prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2985 +                       }
2986 +
2987                         b->state = b->oldest_modification
2988                                 ? BUF_BLOCK_ZIP_DIRTY
2989                                 : BUF_BLOCK_ZIP_PAGE;
2990 @@ -1642,7 +1762,9 @@
2991                         *buf_pool_mutex_released = TRUE;
2992                 }
2993  
2994 -               buf_pool_mutex_exit(buf_pool);
2995 +               //buf_pool_mutex_exit(buf_pool);
2996 +               mutex_exit(&buf_pool->LRU_list_mutex);
2997 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2998                 mutex_exit(block_mutex);
2999  
3000                 /* Remove possible adaptive hash index on the page.
3001 @@ -1674,7 +1796,9 @@
3002                                 : BUF_NO_CHECKSUM_MAGIC);
3003                 }
3004  
3005 -               buf_pool_mutex_enter(buf_pool);
3006 +               //buf_pool_mutex_enter(buf_pool);
3007 +               if (have_LRU_mutex)
3008 +                       mutex_enter(&buf_pool->LRU_list_mutex);
3009                 mutex_enter(block_mutex);
3010  
3011                 if (b) {
3012 @@ -1684,13 +1808,17 @@
3013                         mutex_exit(&buf_pool->zip_mutex);
3014                 }
3015  
3016 -               buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
3017 +               buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3018         } else {
3019                 /* The block_mutex should have been released by
3020                 buf_LRU_block_remove_hashed_page() when it returns
3021                 BUF_BLOCK_ZIP_FREE. */
3022                 ut_ad(block_mutex == &buf_pool->zip_mutex);
3023                 mutex_enter(block_mutex);
3024 +
3025 +               if (!have_LRU_mutex)
3026 +                       mutex_exit(&buf_pool->LRU_list_mutex);
3027 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3028         }
3029  
3030         return(BUF_LRU_FREED);
3031 @@ -1702,13 +1830,14 @@
3032  void
3033  buf_LRU_block_free_non_file_page(
3034  /*=============================*/
3035 -       buf_block_t*    block)  /*!< in: block, must not contain a file page */
3036 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
3037 +       ibool           have_page_hash_mutex)
3038  {
3039         void*           data;
3040         buf_pool_t*     buf_pool = buf_pool_from_block(block);
3041  
3042         ut_ad(block);
3043 -       ut_ad(buf_pool_mutex_own(buf_pool));
3044 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3045         ut_ad(mutex_own(&block->mutex));
3046  
3047         switch (buf_block_get_state(block)) {
3048 @@ -1742,18 +1871,21 @@
3049         if (data) {
3050                 block->page.zip.data = NULL;
3051                 mutex_exit(&block->mutex);
3052 -               buf_pool_mutex_exit_forbid(buf_pool);
3053 +               //buf_pool_mutex_exit_forbid(buf_pool);
3054  
3055                 buf_buddy_free(
3056 -                       buf_pool, data, page_zip_get_size(&block->page.zip));
3057 +                       buf_pool, data, page_zip_get_size(&block->page.zip),
3058 +                       have_page_hash_mutex);
3059  
3060 -               buf_pool_mutex_exit_allow(buf_pool);
3061 +               //buf_pool_mutex_exit_allow(buf_pool);
3062                 mutex_enter(&block->mutex);
3063                 page_zip_set_size(&block->page.zip, 0);
3064         }
3065  
3066 -       UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3067 +       mutex_enter(&buf_pool->free_list_mutex);
3068 +       UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3069         ut_d(block->page.in_free_list = TRUE);
3070 +       mutex_exit(&buf_pool->free_list_mutex);
3071  
3072         UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3073  }
3074 @@ -1783,7 +1915,11 @@
3075         buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
3076  
3077         ut_ad(bpage);
3078 -       ut_ad(buf_pool_mutex_own(buf_pool));
3079 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3080 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3081 +#ifdef UNIV_SYNC_DEBUG
3082 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3083 +#endif
3084         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3085  
3086         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3087 @@ -1891,7 +2027,9 @@
3088  
3089  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3090                 mutex_exit(buf_page_get_mutex(bpage));
3091 -               buf_pool_mutex_exit(buf_pool);
3092 +               //buf_pool_mutex_exit(buf_pool);
3093 +               mutex_exit(&buf_pool->LRU_list_mutex);
3094 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3095                 buf_print();
3096                 buf_LRU_print();
3097                 buf_validate();
3098 @@ -1912,17 +2050,17 @@
3099                 ut_a(bpage->zip.data);
3100                 ut_a(buf_page_get_zip_size(bpage));
3101  
3102 -               UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3103 +               UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3104  
3105                 mutex_exit(&buf_pool->zip_mutex);
3106 -               buf_pool_mutex_exit_forbid(buf_pool);
3107 +               //buf_pool_mutex_exit_forbid(buf_pool);
3108  
3109                 buf_buddy_free(
3110                         buf_pool, bpage->zip.data,
3111 -                       page_zip_get_size(&bpage->zip));
3112 +                       page_zip_get_size(&bpage->zip), TRUE);
3113  
3114 -               buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3115 -               buf_pool_mutex_exit_allow(buf_pool);
3116 +               buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3117 +               //buf_pool_mutex_exit_allow(buf_pool);
3118  
3119                 UNIV_MEM_UNDESC(bpage);
3120                 return(BUF_BLOCK_ZIP_FREE);
3121 @@ -1945,13 +2083,13 @@
3122                         ut_ad(!bpage->in_flush_list);
3123                         ut_ad(!bpage->in_LRU_list);
3124                         mutex_exit(&((buf_block_t*) bpage)->mutex);
3125 -                       buf_pool_mutex_exit_forbid(buf_pool);
3126 +                       //buf_pool_mutex_exit_forbid(buf_pool);
3127  
3128                         buf_buddy_free(
3129                                 buf_pool, data,
3130 -                               page_zip_get_size(&bpage->zip));
3131 +                               page_zip_get_size(&bpage->zip), TRUE);
3132  
3133 -                       buf_pool_mutex_exit_allow(buf_pool);
3134 +                       //buf_pool_mutex_exit_allow(buf_pool);
3135                         mutex_enter(&((buf_block_t*) bpage)->mutex);
3136                         page_zip_set_size(&bpage->zip, 0);
3137                 }
3138 @@ -1977,18 +2115,19 @@
3139  void
3140  buf_LRU_block_free_hashed_page(
3141  /*===========================*/
3142 -       buf_block_t*    block)  /*!< in: block, must contain a file page and
3143 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
3144                                 be in a state where it can be freed */
3145 +       ibool           have_page_hash_mutex)
3146  {
3147  #ifdef UNIV_DEBUG
3148 -       buf_pool_t*     buf_pool = buf_pool_from_block(block);
3149 -       ut_ad(buf_pool_mutex_own(buf_pool));
3150 +       //buf_pool_t*   buf_pool = buf_pool_from_block(block);
3151 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3152  #endif
3153         ut_ad(mutex_own(&block->mutex));
3154  
3155         buf_block_set_state(block, BUF_BLOCK_MEMORY);
3156  
3157 -       buf_LRU_block_free_non_file_page(block);
3158 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3159  }
3160  
3161  /**********************************************************************//**
3162 @@ -2015,7 +2154,8 @@
3163         }
3164  
3165         if (adjust) {
3166 -               buf_pool_mutex_enter(buf_pool);
3167 +               //buf_pool_mutex_enter(buf_pool);
3168 +               mutex_enter(&buf_pool->LRU_list_mutex);
3169  
3170                 if (ratio != buf_pool->LRU_old_ratio) {
3171                         buf_pool->LRU_old_ratio = ratio;
3172 @@ -2027,7 +2167,8 @@
3173                         }
3174                 }
3175  
3176 -               buf_pool_mutex_exit(buf_pool);
3177 +               //buf_pool_mutex_exit(buf_pool);
3178 +               mutex_exit(&buf_pool->LRU_list_mutex);
3179         } else {
3180                 buf_pool->LRU_old_ratio = ratio;
3181         }
3182 @@ -2124,7 +2265,8 @@
3183         ulint           new_len;
3184  
3185         ut_ad(buf_pool);
3186 -       buf_pool_mutex_enter(buf_pool);
3187 +       //buf_pool_mutex_enter(buf_pool);
3188 +       mutex_enter(&buf_pool->LRU_list_mutex);
3189  
3190         if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3191  
3192 @@ -2185,16 +2327,22 @@
3193  
3194         ut_a(buf_pool->LRU_old_len == old_len);
3195  
3196 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3197 +       mutex_exit(&buf_pool->LRU_list_mutex);
3198 +       mutex_enter(&buf_pool->free_list_mutex);
3199 +
3200 +       UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3201                          ut_ad(ut_list_node_313->in_free_list));
3202  
3203         for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3204              bpage != NULL;
3205 -            bpage = UT_LIST_GET_NEXT(list, bpage)) {
3206 +            bpage = UT_LIST_GET_NEXT(free, bpage)) {
3207  
3208                 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3209         }
3210  
3211 +       mutex_exit(&buf_pool->free_list_mutex);
3212 +       mutex_enter(&buf_pool->LRU_list_mutex);
3213 +
3214         UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3215                          ut_ad(ut_list_node_313->in_unzip_LRU_list
3216                                && ut_list_node_313->page.in_LRU_list));
3217 @@ -2208,7 +2356,8 @@
3218                 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3219         }
3220  
3221 -       buf_pool_mutex_exit(buf_pool);
3222 +       //buf_pool_mutex_exit(buf_pool);
3223 +       mutex_exit(&buf_pool->LRU_list_mutex);
3224  }
3225  
3226  /**********************************************************************//**
3227 @@ -2244,7 +2393,8 @@
3228         const buf_page_t*       bpage;
3229  
3230         ut_ad(buf_pool);
3231 -       buf_pool_mutex_enter(buf_pool);
3232 +       //buf_pool_mutex_enter(buf_pool);
3233 +       mutex_enter(&buf_pool->LRU_list_mutex);
3234  
3235         bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3236  
3237 @@ -2301,7 +2451,8 @@
3238                 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3239         }
3240  
3241 -       buf_pool_mutex_exit(buf_pool);
3242 +       //buf_pool_mutex_exit(buf_pool);
3243 +       mutex_exit(&buf_pool->LRU_list_mutex);
3244  }
3245  
3246  /**********************************************************************//**
3247 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3248 --- a/storage/innobase/buf/buf0rea.c    2010-12-03 15:22:36.323977308 +0900
3249 +++ b/storage/innobase/buf/buf0rea.c    2010-12-03 15:48:29.296024468 +0900
3250 @@ -311,6 +311,7 @@
3251  
3252                 return(0);
3253         }
3254 +       buf_pool_mutex_exit(buf_pool);
3255  
3256         /* Check that almost all pages in the area have been accessed; if
3257         offset == low, the accesses must be in a descending order, otherwise,
3258 @@ -329,6 +330,7 @@
3259  
3260         fail_count = 0;
3261  
3262 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3263         for (i = low; i < high; i++) {
3264                 bpage = buf_page_hash_get(buf_pool, space, i);
3265  
3266 @@ -356,7 +358,8 @@
3267  
3268                 if (fail_count > threshold) {
3269                         /* Too many failures: return */
3270 -                       buf_pool_mutex_exit(buf_pool);
3271 +                       //buf_pool_mutex_exit(buf_pool);
3272 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3273                         return(0);
3274                 }
3275  
3276 @@ -371,7 +374,8 @@
3277         bpage = buf_page_hash_get(buf_pool, space, offset);
3278  
3279         if (bpage == NULL) {
3280 -               buf_pool_mutex_exit(buf_pool);
3281 +               //buf_pool_mutex_exit(buf_pool);
3282 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3283  
3284                 return(0);
3285         }
3286 @@ -397,7 +401,8 @@
3287         pred_offset = fil_page_get_prev(frame);
3288         succ_offset = fil_page_get_next(frame);
3289  
3290 -       buf_pool_mutex_exit(buf_pool);
3291 +       //buf_pool_mutex_exit(buf_pool);
3292 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3293  
3294         if ((offset == low) && (succ_offset == offset + 1)) {
3295  
3296 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3297 --- a/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:03.048955897 +0900
3298 +++ b/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:29.304024564 +0900
3299 @@ -245,6 +245,10 @@
3300  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3301         {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3302         {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3303 +       {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3304 +       {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3305 +       {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3306 +       {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3307         {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3308         {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3309         {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3310 @@ -295,6 +299,7 @@
3311         {&archive_lock_key, "archive_lock", 0},
3312  #  endif /* UNIV_LOG_ARCHIVE */
3313         {&btr_search_latch_key, "btr_search_latch", 0},
3314 +       {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3315  #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3316         {&buf_block_lock_key, "buf_block_lock", 0},
3317  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3318 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3319 --- a/storage/innobase/handler/i_s.cc   2010-12-03 15:37:45.517105700 +0900
3320 +++ b/storage/innobase/handler/i_s.cc   2010-12-03 15:48:29.331024462 +0900
3321 @@ -1566,7 +1566,8 @@
3322  
3323                 buf_pool = buf_pool_from_array(i);
3324  
3325 -               buf_pool_mutex_enter(buf_pool);
3326 +               //buf_pool_mutex_enter(buf_pool);
3327 +               mutex_enter(&buf_pool->zip_free_mutex);
3328  
3329                 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3330                         buf_buddy_stat_t*       buddy_stat;
3331 @@ -1596,7 +1597,8 @@
3332                         }
3333                 }
3334  
3335 -               buf_pool_mutex_exit(buf_pool);
3336 +               //buf_pool_mutex_exit(buf_pool);
3337 +               mutex_exit(&buf_pool->zip_free_mutex);
3338  
3339                 if (status) {
3340                         break;
3341 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3342 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3343 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3344 @@ -3705,9 +3705,11 @@
3345                 ulint           fold = buf_page_address_fold(space, page_no);
3346                 buf_pool_t*     buf_pool = buf_pool_get(space, page_no);
3347  
3348 -               buf_pool_mutex_enter(buf_pool);
3349 +               //buf_pool_mutex_enter(buf_pool);
3350 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
3351                 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3352 -               buf_pool_mutex_exit(buf_pool);
3353 +               //buf_pool_mutex_exit(buf_pool);
3354 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3355  
3356                 if (UNIV_LIKELY_NULL(bpage)) {
3357                         /* A buffer pool watch has been set or the
3358 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3359 --- a/storage/innobase/include/buf0buddy.h      2010-11-03 07:01:13.000000000 +0900
3360 +++ b/storage/innobase/include/buf0buddy.h      2010-12-03 15:48:29.338023826 +0900
3361 @@ -51,10 +51,11 @@
3362         buf_pool_t*     buf_pool,
3363                         /*!< buffer pool in which the block resides */
3364         ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3365 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3366 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3367                         TRUE if storage was allocated from the LRU list
3368                         and buf_pool->mutex was temporarily released,
3369                         or NULL if the LRU list should not be used */
3370 +       ibool   have_page_hash_mutex)
3371         __attribute__((malloc));
3372  
3373  /**********************************************************************//**
3374 @@ -67,7 +68,8 @@
3375                         /*!< buffer pool in which the block resides */
3376         void*   buf,    /*!< in: block to be freed, must not be
3377                         pointed to by the buffer pool */
3378 -       ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
3379 +       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3380 +       ibool   have_page_hash_mutex)
3381         __attribute__((nonnull));
3382  
3383  #ifndef UNIV_NONINL
3384 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3385 --- a/storage/innobase/include/buf0buddy.ic     2010-11-03 07:01:13.000000000 +0900
3386 +++ b/storage/innobase/include/buf0buddy.ic     2010-12-03 15:48:29.339040413 +0900
3387 @@ -46,10 +46,11 @@
3388                         /*!< in: buffer pool in which the page resides */
3389         ulint   i,      /*!< in: index of buf_pool->zip_free[],
3390                         or BUF_BUDDY_SIZES */
3391 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3392 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3393                         TRUE if storage was allocated from the LRU list
3394                         and buf_pool->mutex was temporarily released,
3395                         or NULL if the LRU list should not be used */
3396 +       ibool   have_page_hash_mutex)
3397         __attribute__((malloc));
3398  
3399  /**********************************************************************//**
3400 @@ -61,8 +62,9 @@
3401         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3402         void*           buf,            /*!< in: block to be freed, must not be
3403                                         pointed to by the buffer pool */
3404 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
3405 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
3406                                         or BUF_BUDDY_SIZES */
3407 +       ibool           have_page_hash_mutex)
3408         __attribute__((nonnull));
3409  
3410  /**********************************************************************//**
3411 @@ -102,16 +104,17 @@
3412                                         the page resides */
3413         ulint           size,           /*!< in: block size, up to
3414                                         UNIV_PAGE_SIZE */
3415 -       ibool*          lru)            /*!< in: pointer to a variable
3416 +       ibool*          lru,            /*!< in: pointer to a variable
3417                                         that will be assigned TRUE if
3418                                         storage was allocated from the
3419                                         LRU list and buf_pool->mutex was
3420                                         temporarily released, or NULL if
3421                                         the LRU list should not be used */
3422 +       ibool           have_page_hash_mutex)
3423  {
3424 -       ut_ad(buf_pool_mutex_own(buf_pool));
3425 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3426  
3427 -       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3428 +       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3429  }
3430  
3431  /**********************************************************************//**
3432 @@ -123,12 +126,25 @@
3433         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3434         void*           buf,            /*!< in: block to be freed, must not be
3435                                         pointed to by the buffer pool */
3436 -       ulint           size)           /*!< in: block size, up to
3437 +       ulint           size,           /*!< in: block size, up to
3438                                         UNIV_PAGE_SIZE */
3439 +       ibool           have_page_hash_mutex)
3440  {
3441 -       ut_ad(buf_pool_mutex_own(buf_pool));
3442 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3443 +
3444 +       if (!have_page_hash_mutex) {
3445 +               mutex_enter(&buf_pool->LRU_list_mutex);
3446 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3447 +       }
3448  
3449 -       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3450 +       mutex_enter(&buf_pool->zip_free_mutex);
3451 +       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3452 +       mutex_exit(&buf_pool->zip_free_mutex);
3453 +
3454 +       if (!have_page_hash_mutex) {
3455 +               mutex_exit(&buf_pool->LRU_list_mutex);
3456 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3457 +       }
3458  }
3459  
3460  #ifdef UNIV_MATERIALIZE
3461 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3462 --- a/storage/innobase/include/buf0buf.h        2010-12-03 15:22:36.327954660 +0900
3463 +++ b/storage/innobase/include/buf0buf.h        2010-12-03 15:48:29.343024683 +0900
3464 @@ -132,6 +132,20 @@
3465  /*==========================*/
3466  
3467  /********************************************************************//**
3468 +*/
3469 +UNIV_INLINE
3470 +void
3471 +buf_pool_page_hash_x_lock_all(void);
3472 +/*================================*/
3473 +
3474 +/********************************************************************//**
3475 +*/
3476 +UNIV_INLINE
3477 +void
3478 +buf_pool_page_hash_x_unlock_all(void);
3479 +/*==================================*/
3480 +
3481 +/********************************************************************//**
3482  Creates the buffer pool.
3483  @return        own: buf_pool object, NULL if not enough memory or error */
3484  UNIV_INTERN
3485 @@ -761,6 +775,15 @@
3486         const buf_page_t*       bpage)  /*!< in: pointer to control block */
3487         __attribute__((pure));
3488  
3489 +/*************************************************************************
3490 +Gets the mutex of a block and enter the mutex with consistency. */
3491 +UNIV_INLINE
3492 +mutex_t*
3493 +buf_page_get_mutex_enter(
3494 +/*=========================*/
3495 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3496 +       __attribute__((pure));
3497 +
3498  /*********************************************************************//**
3499  Get the flush type of a page.
3500  @return        flush type */
3501 @@ -1242,7 +1265,7 @@
3502         All these are protected by buf_pool->mutex. */
3503         /* @{ */
3504  
3505 -       UT_LIST_NODE_T(buf_page_t) list;
3506 +       /* UT_LIST_NODE_T(buf_page_t) list; */
3507                                         /*!< based on state, this is a
3508                                         list node, protected either by
3509                                         buf_pool->mutex or by
3510 @@ -1270,6 +1293,10 @@
3511                                         BUF_BLOCK_REMOVE_HASH or
3512                                         BUF_BLOCK_READY_IN_USE. */
3513  
3514 +       /* resplit for optimistic use */
3515 +       UT_LIST_NODE_T(buf_page_t) free;
3516 +       UT_LIST_NODE_T(buf_page_t) flush_list;
3517 +       UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3518  #ifdef UNIV_DEBUG
3519         ibool           in_flush_list;  /*!< TRUE if in buf_pool->flush_list;
3520                                         when buf_pool->flush_list_mutex is
3521 @@ -1362,11 +1389,11 @@
3522                                         a block is in the unzip_LRU list
3523                                         if page.state == BUF_BLOCK_FILE_PAGE
3524                                         and page.zip.data != NULL */
3525 -#ifdef UNIV_DEBUG
3526 +//#ifdef UNIV_DEBUG
3527         ibool           in_unzip_LRU_list;/*!< TRUE if the page is in the
3528                                         decompressed LRU list;
3529                                         used in debugging */
3530 -#endif /* UNIV_DEBUG */
3531 +//#endif /* UNIV_DEBUG */
3532         mutex_t         mutex;          /*!< mutex protecting this block:
3533                                         state (also protected by the buffer
3534                                         pool mutex), io_fix, buf_fix_count,
3535 @@ -1532,6 +1559,11 @@
3536                                         pool instance, protects compressed
3537                                         only pages (of type buf_page_t, not
3538                                         buf_block_t */
3539 +       mutex_t         LRU_list_mutex;
3540 +       rw_lock_t       page_hash_latch;
3541 +       mutex_t         free_list_mutex;
3542 +       mutex_t         zip_free_mutex;
3543 +       mutex_t         zip_hash_mutex;
3544         ulint           instance_no;    /*!< Array index of this buffer
3545                                         pool instance */
3546         ulint           old_pool_size;  /*!< Old pool size in bytes */
3547 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3548 --- a/storage/innobase/include/buf0buf.ic       2010-11-03 07:01:13.000000000 +0900
3549 +++ b/storage/innobase/include/buf0buf.ic       2010-12-03 15:48:29.345024524 +0900
3550 @@ -274,7 +274,7 @@
3551         case BUF_BLOCK_ZIP_FREE:
3552                 /* This is a free page in buf_pool->zip_free[].
3553                 Such pages should only be accessed by the buddy allocator. */
3554 -               ut_error;
3555 +               /* ut_error; */ /* optimistic */
3556                 break;
3557         case BUF_BLOCK_ZIP_PAGE:
3558         case BUF_BLOCK_ZIP_DIRTY:
3559 @@ -317,9 +317,14 @@
3560  {
3561         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3562  
3563 +       if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3564 +               /* TODO: this code is the interim. should be confirmed later. */
3565 +               return(&buf_pool->zip_mutex);
3566 +       }
3567 +
3568         switch (buf_page_get_state(bpage)) {
3569         case BUF_BLOCK_ZIP_FREE:
3570 -               ut_error;
3571 +               /* ut_error; */ /* optimistic */
3572                 return(NULL);
3573         case BUF_BLOCK_ZIP_PAGE:
3574         case BUF_BLOCK_ZIP_DIRTY:
3575 @@ -329,6 +334,28 @@
3576         }
3577  }
3578  
3579 +/*************************************************************************
3580 +Gets the mutex of a block and enter the mutex with consistency. */
3581 +UNIV_INLINE
3582 +mutex_t*
3583 +buf_page_get_mutex_enter(
3584 +/*=========================*/
3585 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3586 +{
3587 +       mutex_t*        block_mutex;
3588 +
3589 +       while(1) {
3590 +               block_mutex = buf_page_get_mutex(bpage);
3591 +               if (!block_mutex)
3592 +                       return block_mutex;
3593 +
3594 +               mutex_enter(block_mutex);
3595 +               if (block_mutex == buf_page_get_mutex(bpage))
3596 +                       return block_mutex;
3597 +               mutex_exit(block_mutex);
3598 +       }
3599 +}
3600 +
3601  /*********************************************************************//**
3602  Get the flush type of a page.
3603  @return        flush type */
3604 @@ -425,8 +452,8 @@
3605         enum buf_io_fix io_fix) /*!< in: io_fix state */
3606  {
3607  #ifdef UNIV_DEBUG
3608 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3609 -       ut_ad(buf_pool_mutex_own(buf_pool));
3610 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3611 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3612  #endif
3613         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3614  
3615 @@ -456,14 +483,14 @@
3616         const buf_page_t*       bpage)  /*!< control block being relocated */
3617  {
3618  #ifdef UNIV_DEBUG
3619 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3620 -       ut_ad(buf_pool_mutex_own(buf_pool));
3621 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3622 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3623  #endif
3624         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3625         ut_ad(buf_page_in_file(bpage));
3626 -       ut_ad(bpage->in_LRU_list);
3627 +       //ut_ad(bpage->in_LRU_list);
3628  
3629 -       return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3630 +       return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3631                && bpage->buf_fix_count == 0);
3632  }
3633  
3634 @@ -477,8 +504,8 @@
3635         const buf_page_t*       bpage)  /*!< in: control block */
3636  {
3637  #ifdef UNIV_DEBUG
3638 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3639 -       ut_ad(buf_pool_mutex_own(buf_pool));
3640 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3641 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3642  #endif
3643         ut_ad(buf_page_in_file(bpage));
3644  
3645 @@ -498,7 +525,8 @@
3646         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3647  #endif /* UNIV_DEBUG */
3648         ut_a(buf_page_in_file(bpage));
3649 -       ut_ad(buf_pool_mutex_own(buf_pool));
3650 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3651 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3652         ut_ad(bpage->in_LRU_list);
3653  
3654  #ifdef UNIV_LRU_DEBUG
3655 @@ -545,9 +573,10 @@
3656         ulint           time_ms)        /*!< in: ut_time_ms() */
3657  {
3658  #ifdef UNIV_DEBUG
3659 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3660 -       ut_ad(buf_pool_mutex_own(buf_pool));
3661 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3662 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3663  #endif
3664 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3665         ut_a(buf_page_in_file(bpage));
3666  
3667         if (!bpage->access_time) {
3668 @@ -761,19 +790,19 @@
3669  /*===========*/
3670         buf_block_t*    block)  /*!< in, own: block to be freed */
3671  {
3672 -       buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3673 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3674  
3675 -       buf_pool_mutex_enter(buf_pool);
3676 +       //buf_pool_mutex_enter(buf_pool);
3677  
3678         mutex_enter(&block->mutex);
3679  
3680         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3681  
3682 -       buf_LRU_block_free_non_file_page(block);
3683 +       buf_LRU_block_free_non_file_page(block, FALSE);
3684  
3685         mutex_exit(&block->mutex);
3686  
3687 -       buf_pool_mutex_exit(buf_pool);
3688 +       //buf_pool_mutex_exit(buf_pool);
3689  }
3690  #endif /* !UNIV_HOTBACKUP */
3691  
3692 @@ -821,17 +850,17 @@
3693                                         page frame */
3694  {
3695         ib_uint64_t     lsn;
3696 -       mutex_t*        block_mutex = buf_page_get_mutex(bpage);
3697 -
3698 -       mutex_enter(block_mutex);
3699 +       mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
3700  
3701 -       if (buf_page_in_file(bpage)) {
3702 +       if (block_mutex && buf_page_in_file(bpage)) {
3703                 lsn = bpage->newest_modification;
3704         } else {
3705                 lsn = 0;
3706         }
3707  
3708 -       mutex_exit(block_mutex);
3709 +       if (block_mutex) {
3710 +               mutex_exit(block_mutex);
3711 +       }
3712  
3713         return(lsn);
3714  }
3715 @@ -849,7 +878,7 @@
3716  #ifdef UNIV_SYNC_DEBUG
3717         buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3718  
3719 -       ut_ad((buf_pool_mutex_own(buf_pool)
3720 +       ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3721                && (block->page.buf_fix_count == 0))
3722               || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3723  #endif /* UNIV_SYNC_DEBUG */
3724 @@ -979,7 +1008,11 @@
3725         buf_page_t*     bpage;
3726  
3727         ut_ad(buf_pool);
3728 -       ut_ad(buf_pool_mutex_own(buf_pool));
3729 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3730 +#ifdef UNIV_SYNC_DEBUG
3731 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3732 +             || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3733 +#endif
3734         ut_ad(fold == buf_page_address_fold(space, offset));
3735  
3736         /* Look for the page in the hash table */
3737 @@ -1064,11 +1097,13 @@
3738         const buf_page_t*       bpage;
3739         buf_pool_t*             buf_pool = buf_pool_get(space, offset);
3740  
3741 -       buf_pool_mutex_enter(buf_pool);
3742 +       //buf_pool_mutex_enter(buf_pool);
3743 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3744  
3745         bpage = buf_page_hash_get(buf_pool, space, offset);
3746  
3747 -       buf_pool_mutex_exit(buf_pool);
3748 +       //buf_pool_mutex_exit(buf_pool);
3749 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3750  
3751         return(bpage != NULL);
3752  }
3753 @@ -1196,4 +1231,38 @@
3754                 buf_pool_mutex_exit(buf_pool);
3755         }
3756  }
3757 +
3758 +/********************************************************************//**
3759 +*/
3760 +UNIV_INLINE
3761 +void
3762 +buf_pool_page_hash_x_lock_all(void)
3763 +/*===============================*/
3764 +{
3765 +       ulint   i;
3766 +
3767 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3768 +               buf_pool_t*     buf_pool;
3769 +
3770 +               buf_pool = buf_pool_from_array(i);
3771 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3772 +       }
3773 +}
3774 +
3775 +/********************************************************************//**
3776 +*/
3777 +UNIV_INLINE
3778 +void
3779 +buf_pool_page_hash_x_unlock_all(void)
3780 +/*=================================*/
3781 +{
3782 +       ulint   i;
3783 +
3784 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3785 +               buf_pool_t*     buf_pool;
3786 +
3787 +               buf_pool = buf_pool_from_array(i);
3788 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3789 +       }
3790 +}
3791  #endif /* !UNIV_HOTBACKUP */
3792 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3793 --- a/storage/innobase/include/buf0lru.h        2010-11-03 07:01:13.000000000 +0900
3794 +++ b/storage/innobase/include/buf0lru.h        2010-12-03 15:48:29.349024701 +0900
3795 @@ -113,10 +113,11 @@
3796         buf_page_t*     bpage,  /*!< in: block to be freed */
3797         ibool           zip,    /*!< in: TRUE if should remove also the
3798                                 compressed page of an uncompressed page */
3799 -       ibool*          buf_pool_mutex_released);
3800 +       ibool*          buf_pool_mutex_released,
3801                                 /*!< in: pointer to a variable that will
3802                                 be assigned TRUE if buf_pool->mutex
3803                                 was temporarily released, or NULL */
3804 +       ibool           have_LRU_mutex);
3805  /******************************************************************//**
3806  Try to free a replaceable block.
3807  @return        TRUE if found and freed */
3808 @@ -163,7 +164,8 @@
3809  void
3810  buf_LRU_block_free_non_file_page(
3811  /*=============================*/
3812 -       buf_block_t*    block); /*!< in: block, must not contain a file page */
3813 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
3814 +       ibool           have_page_hash_mutex);
3815  /******************************************************************//**
3816  Adds a block to the LRU list. */
3817  UNIV_INTERN
3818 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3819 --- a/storage/innobase/include/sync0rw.h        2010-11-03 07:01:13.000000000 +0900
3820 +++ b/storage/innobase/include/sync0rw.h        2010-12-03 15:48:29.349942993 +0900
3821 @@ -112,6 +112,7 @@
3822  extern mysql_pfs_key_t archive_lock_key;
3823  # endif /* UNIV_LOG_ARCHIVE */
3824  extern mysql_pfs_key_t btr_search_latch_key;
3825 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3826  extern mysql_pfs_key_t buf_block_lock_key;
3827  # ifdef UNIV_SYNC_DEBUG
3828  extern mysql_pfs_key_t buf_block_debug_latch_key;
3829 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3830 --- a/storage/innobase/include/sync0sync.h      2010-11-03 07:01:13.000000000 +0900
3831 +++ b/storage/innobase/include/sync0sync.h      2010-12-03 15:48:29.352024614 +0900
3832 @@ -75,6 +75,10 @@
3833  extern mysql_pfs_key_t buffer_block_mutex_key;
3834  extern mysql_pfs_key_t buf_pool_mutex_key;
3835  extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3836 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3837 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3838 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3839 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3840  extern mysql_pfs_key_t cache_last_read_mutex_key;
3841  extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3842  extern mysql_pfs_key_t dict_sys_mutex_key;
3843 @@ -660,7 +664,7 @@
3844  #define        SYNC_TRX_LOCK_HEAP      298
3845  #define SYNC_TRX_SYS_HEADER    290
3846  #define SYNC_LOG               170
3847 -#define SYNC_LOG_FLUSH_ORDER   147
3848 +#define SYNC_LOG_FLUSH_ORDER   156
3849  #define SYNC_RECV              168
3850  #define        SYNC_WORK_QUEUE         162
3851  #define        SYNC_SEARCH_SYS_CONF    161     /* for assigning btr_search_enabled */
3852 @@ -670,8 +674,13 @@
3853                                         SYNC_SEARCH_SYS, as memory allocation
3854                                         can call routines there! Otherwise
3855                                         the level is SYNC_MEM_HASH. */
3856 +#define        SYNC_BUF_LRU_LIST       158
3857 +#define        SYNC_BUF_PAGE_HASH      157
3858 +#define        SYNC_BUF_BLOCK          155     /* Block mutex */
3859 +#define        SYNC_BUF_FREE_LIST      153
3860 +#define        SYNC_BUF_ZIP_FREE       152
3861 +#define        SYNC_BUF_ZIP_HASH       151
3862  #define        SYNC_BUF_POOL           150     /* Buffer pool mutex */
3863 -#define        SYNC_BUF_BLOCK          146     /* Block mutex */
3864  #define        SYNC_BUF_FLUSH_LIST     145     /* Buffer flush list mutex */
3865  #define SYNC_DOUBLEWRITE       140
3866  #define        SYNC_ANY_LATCH          135
3867 @@ -703,7 +712,7 @@
3868                 os_fast_mutex;  /*!< We use this OS mutex in place of lock_word
3869                                 when atomic operations are not enabled */
3870  #endif
3871 -       ulint   waiters;        /*!< This ulint is set to 1 if there are (or
3872 +       volatile ulint  waiters;        /*!< This ulint is set to 1 if there are (or
3873                                 may be) threads waiting in the global wait
3874                                 array for this mutex to be released.
3875                                 Otherwise, this is 0. */
3876 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3877 --- a/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:03.080956216 +0900
3878 +++ b/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:29.355023766 +0900
3879 @@ -3060,7 +3060,7 @@
3880                                                                 level += log_sys->max_checkpoint_age
3881                                                                          - (lsn - oldest_modification);
3882                                                         }
3883 -                                                       bpage = UT_LIST_GET_NEXT(list, bpage);
3884 +                                                       bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3885                                                         n_blocks++;
3886                                                 }
3887  
3888 @@ -3145,7 +3145,7 @@
3889                                                         found = TRUE;
3890                                                         break;
3891                                                 }
3892 -                                               bpage = UT_LIST_GET_NEXT(list, bpage);
3893 +                                               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3894                                                 new_blocks_num++;
3895                                         }
3896                                         if (!found) {
3897 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3898 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3899 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3900 @@ -265,7 +265,7 @@
3901         mutex->lock_word = 0;
3902  #endif
3903         mutex->event = os_event_create(NULL);
3904 -       mutex_set_waiters(mutex, 0);
3905 +       mutex->waiters = 0;
3906  #ifdef UNIV_DEBUG
3907         mutex->magic_n = MUTEX_MAGIC_N;
3908  #endif /* UNIV_DEBUG */
3909 @@ -444,6 +444,15 @@
3910         mutex_t*        mutex,  /*!< in: mutex */
3911         ulint           n)      /*!< in: value to set */
3912  {
3913 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3914 +       ut_ad(mutex);
3915 +
3916 +       if (n) {
3917 +               os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3918 +       } else {
3919 +               os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3920 +       }
3921 +#else
3922         volatile ulint* ptr;            /* declared volatile to ensure that
3923                                         the value is stored to memory */
3924         ut_ad(mutex);
3925 @@ -452,6 +461,7 @@
3926  
3927         *ptr = n;               /* Here we assume that the write of a single
3928                                 word in memory is atomic */
3929 +#endif
3930  }
3931  
3932  /******************************************************************//**
3933 @@ -1193,7 +1203,12 @@
3934                         ut_error;
3935                 }
3936                 break;
3937 +       case SYNC_BUF_LRU_LIST:
3938         case SYNC_BUF_FLUSH_LIST:
3939 +       case SYNC_BUF_PAGE_HASH:
3940 +       case SYNC_BUF_FREE_LIST:
3941 +       case SYNC_BUF_ZIP_FREE:
3942 +       case SYNC_BUF_ZIP_HASH:
3943         case SYNC_BUF_POOL:
3944                 /* We can have multiple mutexes of this type therefore we
3945                 can only check whether the greater than condition holds. */
3946 @@ -1211,7 +1226,8 @@
3947                 buffer block (block->mutex or buf_pool->zip_mutex). */
3948                 if (!sync_thread_levels_g(array, level, FALSE)) {
3949                         ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3950 -                       ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3951 +                       /* the exact rule is not fixed yet, for now */
3952 +                       //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3953                 }
3954                 break;
3955         case SYNC_REC_LOCK:
This page took 0.453817 seconds and 3 git commands to generate.