]> git.pld-linux.org Git - packages/mysql.git/blob - mysql-innodb_split_buf_pool_mutex.patch
- use %{name} macro, cosmetics, macros BR for cmake, restore comment for default...
[packages/mysql.git] / mysql-innodb_split_buf_pool_mutex.patch
1 # name       : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c    2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c    2010-12-03 15:48:29.268957148 +0900
11 @@ -4042,7 +4042,8 @@
12  
13         mtr_commit(mtr);
14  
15 -       buf_pool_mutex_enter(buf_pool);
16 +       //buf_pool_mutex_enter(buf_pool);
17 +       mutex_enter(&buf_pool->LRU_list_mutex);
18         mutex_enter(&block->mutex);
19  
20         /* Only free the block if it is still allocated to
21 @@ -4053,17 +4054,22 @@
22             && buf_block_get_space(block) == space
23             && buf_block_get_page_no(block) == page_no) {
24  
25 -               if (buf_LRU_free_block(&block->page, all, NULL)
26 +               if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
27                     != BUF_LRU_FREED
28 -                   && all && block->page.zip.data) {
29 +                   && all && block->page.zip.data
30 +                   /* Now, buf_LRU_free_block() may release mutex temporarily */
31 +                   && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32 +                   && buf_block_get_space(block) == space
33 +                   && buf_block_get_page_no(block) == page_no) {
34                         /* Attempt to deallocate the uncompressed page
35                         if the whole block cannot be deallocted. */
36  
37 -                       buf_LRU_free_block(&block->page, FALSE, NULL);
38 +                       buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
39                 }
40         }
41  
42 -       buf_pool_mutex_exit(buf_pool);
43 +       //buf_pool_mutex_exit(buf_pool);
44 +       mutex_exit(&buf_pool->LRU_list_mutex);
45         mutex_exit(&block->mutex);
46  }
47  
48 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49 --- a/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:03.033037049 +0900
50 +++ b/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:29.271024260 +0900
51 @@ -1211,7 +1211,7 @@
52         ulint*          offsets;
53  
54         rw_lock_x_lock(&btr_search_latch);
55 -       buf_pool_mutex_enter_all();
56 +       //buf_pool_mutex_enter_all();
57  
58         table = btr_search_sys->hash_index;
59  
60 @@ -1220,6 +1220,8 @@
61  
62                 buf_pool = buf_pool_from_array(j);
63  
64 +               mutex_enter(&buf_pool->LRU_list_mutex);
65 +
66                 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
67  
68                 while (bpage != NULL) {
69 @@ -1301,9 +1303,11 @@
70  
71                         bpage = UT_LIST_GET_PREV(LRU, bpage);
72                 }
73 +
74 +               mutex_exit(&buf_pool->LRU_list_mutex);
75         }
76  
77 -       buf_pool_mutex_exit_all();
78 +       //buf_pool_mutex_exit_all();
79         rw_lock_x_unlock(&btr_search_latch);
80  
81         if (UNIV_LIKELY_NULL(heap)) {
82 @@ -1896,7 +1900,7 @@
83         rec_offs_init(offsets_);
84  
85         rw_lock_x_lock(&btr_search_latch);
86 -       buf_pool_mutex_enter_all();
87 +       buf_pool_page_hash_x_lock_all();
88  
89         cell_count = hash_get_n_cells(btr_search_sys->hash_index);
90  
91 @@ -1904,11 +1908,11 @@
92                 /* We release btr_search_latch every once in a while to
93                 give other queries a chance to run. */
94                 if ((i != 0) && ((i % chunk_size) == 0)) {
95 -                       buf_pool_mutex_exit_all();
96 +                       buf_pool_page_hash_x_unlock_all();
97                         rw_lock_x_unlock(&btr_search_latch);
98                         os_thread_yield();
99                         rw_lock_x_lock(&btr_search_latch);
100 -                       buf_pool_mutex_enter_all();
101 +                       buf_pool_page_hash_x_lock_all();
102                 }
103  
104                 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
105 @@ -2019,11 +2023,11 @@
106                 /* We release btr_search_latch every once in a while to
107                 give other queries a chance to run. */
108                 if (i != 0) {
109 -                       buf_pool_mutex_exit_all();
110 +                       buf_pool_page_hash_x_unlock_all();
111                         rw_lock_x_unlock(&btr_search_latch);
112                         os_thread_yield();
113                         rw_lock_x_lock(&btr_search_latch);
114 -                       buf_pool_mutex_enter_all();
115 +                       buf_pool_page_hash_x_lock_all();
116                 }
117  
118                 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
119 @@ -2031,7 +2035,7 @@
120                 }
121         }
122  
123 -       buf_pool_mutex_exit_all();
124 +       buf_pool_page_hash_x_unlock_all();
125         rw_lock_x_unlock(&btr_search_latch);
126         if (UNIV_LIKELY_NULL(heap)) {
127                 mem_heap_free(heap);
128 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
129 --- a/storage/innobase/buf/buf0buddy.c  2010-12-03 15:22:36.307986907 +0900
130 +++ b/storage/innobase/buf/buf0buddy.c  2010-12-03 15:48:29.275025723 +0900
131 @@ -73,10 +73,11 @@
132         if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
133  #endif /* UNIV_DEBUG_VALGRIND */
134  
135 -       ut_ad(buf_pool_mutex_own(buf_pool));
136 +       //ut_ad(buf_pool_mutex_own(buf_pool));
137 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
138         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
139         ut_ad(buf_pool->zip_free[i].start != bpage);
140 -       UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
141 +       UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
142  
143  #ifdef UNIV_DEBUG_VALGRIND
144         if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
145 @@ -96,8 +97,8 @@
146                                         buf_pool->zip_free[] */
147  {
148  #ifdef UNIV_DEBUG_VALGRIND
149 -       buf_page_t*     prev = UT_LIST_GET_PREV(list, bpage);
150 -       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
151 +       buf_page_t*     prev = UT_LIST_GET_PREV(zip_list, bpage);
152 +       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
153  
154         if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
155         if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
156 @@ -106,9 +107,10 @@
157         ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
158  #endif /* UNIV_DEBUG_VALGRIND */
159  
160 -       ut_ad(buf_pool_mutex_own(buf_pool));
161 +       //ut_ad(buf_pool_mutex_own(buf_pool));
162 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
163         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
164 -       UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
165 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
166  
167  #ifdef UNIV_DEBUG_VALGRIND
168         if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
169 @@ -128,12 +130,13 @@
170  {
171         buf_page_t*     bpage;
172  
173 -       ut_ad(buf_pool_mutex_own(buf_pool));
174 +       //ut_ad(buf_pool_mutex_own(buf_pool));
175 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
176         ut_a(i < BUF_BUDDY_SIZES);
177  
178  #ifndef UNIV_DEBUG_VALGRIND
179         /* Valgrind would complain about accessing free memory. */
180 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
181 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
182                               ut_ad(buf_page_get_state(ut_list_node_313)
183                                     == BUF_BLOCK_ZIP_FREE)));
184  #endif /* !UNIV_DEBUG_VALGRIND */
185 @@ -177,16 +180,19 @@
186  buf_buddy_block_free(
187  /*=================*/
188         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
189 -       void*           buf)            /*!< in: buffer frame to deallocate */
190 +       void*           buf,            /*!< in: buffer frame to deallocate */
191 +       ibool           have_page_hash_mutex)
192  {
193         const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
194         buf_page_t*     bpage;
195         buf_block_t*    block;
196  
197 -       ut_ad(buf_pool_mutex_own(buf_pool));
198 +       //ut_ad(buf_pool_mutex_own(buf_pool));
199         ut_ad(!mutex_own(&buf_pool->zip_mutex));
200         ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
201  
202 +       mutex_enter(&buf_pool->zip_hash_mutex);
203 +
204         HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
205                     ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
206                           && bpage->in_zip_hash && !bpage->in_page_hash),
207 @@ -198,12 +204,14 @@
208         ut_d(bpage->in_zip_hash = FALSE);
209         HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
210  
211 +       mutex_exit(&buf_pool->zip_hash_mutex);
212 +
213         ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
214         UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
215  
216         block = (buf_block_t*) bpage;
217         mutex_enter(&block->mutex);
218 -       buf_LRU_block_free_non_file_page(block);
219 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
220         mutex_exit(&block->mutex);
221  
222         ut_ad(buf_pool->buddy_n_frames > 0);
223 @@ -220,7 +228,7 @@
224  {
225         buf_pool_t*     buf_pool = buf_pool_from_block(block);
226         const ulint     fold = BUF_POOL_ZIP_FOLD(block);
227 -       ut_ad(buf_pool_mutex_own(buf_pool));
228 +       //ut_ad(buf_pool_mutex_own(buf_pool));
229         ut_ad(!mutex_own(&buf_pool->zip_mutex));
230         ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
231  
232 @@ -232,7 +240,10 @@
233         ut_ad(!block->page.in_page_hash);
234         ut_ad(!block->page.in_zip_hash);
235         ut_d(block->page.in_zip_hash = TRUE);
236 +
237 +       mutex_enter(&buf_pool->zip_hash_mutex);
238         HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
239 +       mutex_exit(&buf_pool->zip_hash_mutex);
240  
241         ut_d(buf_pool->buddy_n_frames++);
242  }
243 @@ -268,7 +279,7 @@
244                 bpage->state = BUF_BLOCK_ZIP_FREE;
245  #ifndef UNIV_DEBUG_VALGRIND
246                 /* Valgrind would complain about accessing free memory. */
247 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
248 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
249                                       ut_ad(buf_page_get_state(
250                                                     ut_list_node_313)
251                                             == BUF_BLOCK_ZIP_FREE)));
252 @@ -291,25 +302,29 @@
253         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
254         ulint           i,              /*!< in: index of buf_pool->zip_free[],
255                                         or BUF_BUDDY_SIZES */
256 -       ibool*          lru)            /*!< in: pointer to a variable that
257 +       ibool*          lru,            /*!< in: pointer to a variable that
258                                         will be assigned TRUE if storage was
259                                         allocated from the LRU list and
260                                         buf_pool->mutex was temporarily
261                                         released, or NULL if the LRU list
262                                         should not be used */
263 +       ibool           have_page_hash_mutex)
264  {
265         buf_block_t*    block;
266  
267 -       ut_ad(buf_pool_mutex_own(buf_pool));
268 +       //ut_ad(buf_pool_mutex_own(buf_pool));
269 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
270         ut_ad(!mutex_own(&buf_pool->zip_mutex));
271  
272         if (i < BUF_BUDDY_SIZES) {
273                 /* Try to allocate from the buddy system. */
274 +               mutex_enter(&buf_pool->zip_free_mutex);
275                 block = buf_buddy_alloc_zip(buf_pool, i);
276  
277                 if (block) {
278                         goto func_exit;
279                 }
280 +               mutex_exit(&buf_pool->zip_free_mutex);
281         }
282  
283         /* Try allocating from the buf_pool->free list. */
284 @@ -326,19 +341,30 @@
285         }
286  
287         /* Try replacing an uncompressed page in the buffer pool. */
288 -       buf_pool_mutex_exit(buf_pool);
289 +       //buf_pool_mutex_exit(buf_pool);
290 +       mutex_exit(&buf_pool->LRU_list_mutex);
291 +       if (have_page_hash_mutex) {
292 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
293 +       }
294         block = buf_LRU_get_free_block(buf_pool, 0);
295         *lru = TRUE;
296 -       buf_pool_mutex_enter(buf_pool);
297 +       //buf_pool_mutex_enter(buf_pool);
298 +       mutex_enter(&buf_pool->LRU_list_mutex);
299 +       if (have_page_hash_mutex) {
300 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
301 +       }
302  
303  alloc_big:
304         buf_buddy_block_register(block);
305  
306 +       mutex_enter(&buf_pool->zip_free_mutex);
307         block = buf_buddy_alloc_from(
308                 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
309  
310  func_exit:
311         buf_pool->buddy_stat[i].used++;
312 +       mutex_exit(&buf_pool->zip_free_mutex);
313 +
314         return(block);
315  }
316  
317 @@ -355,7 +381,10 @@
318         buf_page_t*     b;
319         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
320  
321 -       ut_ad(buf_pool_mutex_own(buf_pool));
322 +       //ut_ad(buf_pool_mutex_own(buf_pool));
323 +#ifdef UNIV_SYNC_DEBUG
324 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
325 +#endif
326  
327         switch (buf_page_get_state(bpage)) {
328         case BUF_BLOCK_ZIP_FREE:
329 @@ -364,7 +393,7 @@
330         case BUF_BLOCK_FILE_PAGE:
331         case BUF_BLOCK_MEMORY:
332         case BUF_BLOCK_REMOVE_HASH:
333 -               ut_error;
334 +               /* ut_error; */ /* optimistic */
335         case BUF_BLOCK_ZIP_DIRTY:
336                 /* Cannot relocate dirty pages. */
337                 return(FALSE);
338 @@ -374,9 +403,18 @@
339         }
340  
341         mutex_enter(&buf_pool->zip_mutex);
342 +       mutex_enter(&buf_pool->zip_free_mutex);
343  
344         if (!buf_page_can_relocate(bpage)) {
345                 mutex_exit(&buf_pool->zip_mutex);
346 +               mutex_exit(&buf_pool->zip_free_mutex);
347 +               return(FALSE);
348 +       }
349 +
350 +       if (bpage != buf_page_hash_get(buf_pool,
351 +                                      bpage->space, bpage->offset)) {
352 +               mutex_exit(&buf_pool->zip_mutex);
353 +               mutex_exit(&buf_pool->zip_free_mutex);
354                 return(FALSE);
355         }
356  
357 @@ -384,18 +422,19 @@
358         ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
359  
360         /* relocate buf_pool->zip_clean */
361 -       b = UT_LIST_GET_PREV(list, dpage);
362 -       UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
363 +       b = UT_LIST_GET_PREV(zip_list, dpage);
364 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
365  
366         if (b) {
367 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
368 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
369         } else {
370 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
371 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
372         }
373  
374         UNIV_MEM_INVALID(bpage, sizeof *bpage);
375  
376         mutex_exit(&buf_pool->zip_mutex);
377 +       mutex_exit(&buf_pool->zip_free_mutex);
378         return(TRUE);
379  }
380  
381 @@ -409,14 +448,16 @@
382         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
383         void*           src,            /*!< in: block to relocate */
384         void*           dst,            /*!< in: free block to relocate to */
385 -       ulint           i)              /*!< in: index of
386 +       ulint           i,              /*!< in: index of
387                                         buf_pool->zip_free[] */
388 +       ibool           have_page_hash_mutex)
389  {
390         buf_page_t*     bpage;
391         const ulint     size    = BUF_BUDDY_LOW << i;
392         ullint          usec    = ut_time_us(NULL);
393  
394 -       ut_ad(buf_pool_mutex_own(buf_pool));
395 +       //ut_ad(buf_pool_mutex_own(buf_pool));
396 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
397         ut_ad(!mutex_own(&buf_pool->zip_mutex));
398         ut_ad(!ut_align_offset(src, size));
399         ut_ad(!ut_align_offset(dst, size));
400 @@ -438,6 +479,12 @@
401                 /* This is a compressed page. */
402                 mutex_t*        mutex;
403  
404 +               if (!have_page_hash_mutex) {
405 +                       mutex_exit(&buf_pool->zip_free_mutex);
406 +                       mutex_enter(&buf_pool->LRU_list_mutex);
407 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
408 +               }
409 +
410                 /* The src block may be split into smaller blocks,
411                 some of which may be free.  Thus, the
412                 mach_read_from_4() calls below may attempt to read
413 @@ -462,6 +509,11 @@
414                         added to buf_pool->page_hash yet.  Obviously,
415                         it cannot be relocated. */
416  
417 +                       if (!have_page_hash_mutex) {
418 +                               mutex_enter(&buf_pool->zip_free_mutex);
419 +                               mutex_exit(&buf_pool->LRU_list_mutex);
420 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
421 +                       }
422                         return(FALSE);
423                 }
424  
425 @@ -473,18 +525,27 @@
426                         For the sake of simplicity, give up. */
427                         ut_ad(page_zip_get_size(&bpage->zip) < size);
428  
429 +                       if (!have_page_hash_mutex) {
430 +                               mutex_enter(&buf_pool->zip_free_mutex);
431 +                               mutex_exit(&buf_pool->LRU_list_mutex);
432 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
433 +                       }
434                         return(FALSE);
435                 }
436  
437 +               /* To keep latch order */
438 +               if (have_page_hash_mutex)
439 +                       mutex_exit(&buf_pool->zip_free_mutex);
440 +
441                 /* The block must have been allocated, but it may
442                 contain uninitialized data. */
443                 UNIV_MEM_ASSERT_W(src, size);
444  
445 -               mutex = buf_page_get_mutex(bpage);
446 +               mutex = buf_page_get_mutex_enter(bpage);
447  
448 -               mutex_enter(mutex);
449 +               mutex_enter(&buf_pool->zip_free_mutex);
450  
451 -               if (buf_page_can_relocate(bpage)) {
452 +               if (mutex && buf_page_can_relocate(bpage)) {
453                         /* Relocate the compressed page. */
454                         ut_a(bpage->zip.data == src);
455                         memcpy(dst, src, size);
456 @@ -499,10 +560,22 @@
457                                 buddy_stat->relocated_usec
458                                         += ut_time_us(NULL) - usec;
459                         }
460 +
461 +                       if (!have_page_hash_mutex) {
462 +                               mutex_exit(&buf_pool->LRU_list_mutex);
463 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
464 +                       }
465                         return(TRUE);
466                 }
467  
468 -               mutex_exit(mutex);
469 +               if (!have_page_hash_mutex) {
470 +                       mutex_exit(&buf_pool->LRU_list_mutex);
471 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
472 +               }
473 +
474 +               if (mutex) {
475 +                       mutex_exit(mutex);
476 +               }
477         } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
478                 /* This must be a buf_page_t object. */
479  #if UNIV_WORD_SIZE == 4
480 @@ -511,10 +584,31 @@
481                 about uninitialized pad bytes. */
482                 UNIV_MEM_ASSERT_RW(src, size);
483  #endif
484 +
485 +               mutex_exit(&buf_pool->zip_free_mutex);
486 +
487 +               if (!have_page_hash_mutex) {
488 +                       mutex_enter(&buf_pool->LRU_list_mutex);
489 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
490 +               }
491 +
492                 if (buf_buddy_relocate_block(src, dst)) {
493 +                       mutex_enter(&buf_pool->zip_free_mutex);
494 +
495 +                       if (!have_page_hash_mutex) {
496 +                               mutex_exit(&buf_pool->LRU_list_mutex);
497 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
498 +                       }
499  
500                         goto success;
501                 }
502 +
503 +               mutex_enter(&buf_pool->zip_free_mutex);
504 +
505 +               if (!have_page_hash_mutex) {
506 +                       mutex_exit(&buf_pool->LRU_list_mutex);
507 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
508 +               }
509         }
510  
511         return(FALSE);
512 @@ -529,13 +623,15 @@
513         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
514         void*           buf,            /*!< in: block to be freed, must not be
515                                         pointed to by the buffer pool */
516 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
517 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
518                                         or BUF_BUDDY_SIZES */
519 +       ibool           have_page_hash_mutex)
520  {
521         buf_page_t*     bpage;
522         buf_page_t*     buddy;
523  
524 -       ut_ad(buf_pool_mutex_own(buf_pool));
525 +       //ut_ad(buf_pool_mutex_own(buf_pool));
526 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
527         ut_ad(!mutex_own(&buf_pool->zip_mutex));
528         ut_ad(i <= BUF_BUDDY_SIZES);
529         ut_ad(buf_pool->buddy_stat[i].used > 0);
530 @@ -546,7 +642,9 @@
531         ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
532  
533         if (i == BUF_BUDDY_SIZES) {
534 -               buf_buddy_block_free(buf_pool, buf);
535 +               mutex_exit(&buf_pool->zip_free_mutex);
536 +               buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
537 +               mutex_enter(&buf_pool->zip_free_mutex);
538                 return;
539         }
540  
541 @@ -591,7 +689,7 @@
542                 ut_a(bpage != buf);
543  
544                 {
545 -                       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
546 +                       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
547                         UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
548                         bpage = next;
549                 }
550 @@ -600,13 +698,13 @@
551  #ifndef UNIV_DEBUG_VALGRIND
552  buddy_nonfree:
553         /* Valgrind would complain about accessing free memory. */
554 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
555 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
556                               ut_ad(buf_page_get_state(ut_list_node_313)
557                                     == BUF_BLOCK_ZIP_FREE)));
558  #endif /* UNIV_DEBUG_VALGRIND */
559  
560         /* The buddy is not free. Is there a free block of this size? */
561 -       bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
562 +       bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
563  
564         if (bpage) {
565                 /* Remove the block from the free list, because a successful
566 @@ -616,7 +714,7 @@
567                 buf_buddy_remove_from_free(buf_pool, bpage, i);
568  
569                 /* Try to relocate the buddy of buf to the free block. */
570 -               if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
571 +               if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
572  
573                         ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
574                         goto buddy_free2;
575 @@ -636,14 +734,14 @@
576  
577                 (Parts of the buddy can be free in
578                 buf_pool->zip_free[j] with j < i.) */
579 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
580 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
581                                       ut_ad(buf_page_get_state(
582                                                     ut_list_node_313)
583                                             == BUF_BLOCK_ZIP_FREE
584                                             && ut_list_node_313 != buddy)));
585  #endif /* !UNIV_DEBUG_VALGRIND */
586  
587 -               if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
588 +               if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
589  
590                         buf = bpage;
591                         UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
592 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
593 --- a/storage/innobase/buf/buf0buf.c    2010-12-03 15:22:36.314943336 +0900
594 +++ b/storage/innobase/buf/buf0buf.c    2010-12-03 15:48:29.282947357 +0900
595 @@ -263,6 +263,7 @@
596  #ifdef UNIV_PFS_RWLOCK
597  /* Keys to register buffer block related rwlocks and mutexes with
598  performance schema */
599 +UNIV_INTERN mysql_pfs_key_t    buf_pool_page_hash_key;
600  UNIV_INTERN mysql_pfs_key_t    buf_block_lock_key;
601  # ifdef UNIV_SYNC_DEBUG
602  UNIV_INTERN mysql_pfs_key_t    buf_block_debug_latch_key;
603 @@ -273,6 +274,10 @@
604  UNIV_INTERN mysql_pfs_key_t    buffer_block_mutex_key;
605  UNIV_INTERN mysql_pfs_key_t    buf_pool_mutex_key;
606  UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_mutex_key;
607 +UNIV_INTERN mysql_pfs_key_t    buf_pool_LRU_list_mutex_key;
608 +UNIV_INTERN mysql_pfs_key_t    buf_pool_free_list_mutex_key;
609 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_free_mutex_key;
610 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_hash_mutex_key;
611  UNIV_INTERN mysql_pfs_key_t    flush_list_mutex_key;
612  #endif /* UNIV_PFS_MUTEX */
613  
614 @@ -881,9 +886,9 @@
615         block->page.in_zip_hash = FALSE;
616         block->page.in_flush_list = FALSE;
617         block->page.in_free_list = FALSE;
618 -       block->in_unzip_LRU_list = FALSE;
619  #endif /* UNIV_DEBUG */
620         block->page.in_LRU_list = FALSE;
621 +       block->in_unzip_LRU_list = FALSE;
622  #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
623         block->n_pointers = 0;
624  #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
625 @@ -981,9 +986,11 @@
626                 memset(block->frame, '\0', UNIV_PAGE_SIZE);
627  #endif
628                 /* Add the block to the free list */
629 -               UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
630 +               mutex_enter(&buf_pool->free_list_mutex);
631 +               UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
632  
633                 ut_d(block->page.in_free_list = TRUE);
634 +               mutex_exit(&buf_pool->free_list_mutex);
635                 ut_ad(buf_pool_from_block(block) == buf_pool);
636  
637                 block++;
638 @@ -1038,7 +1045,8 @@
639         buf_chunk_t*    chunk = buf_pool->chunks;
640  
641         ut_ad(buf_pool);
642 -       ut_ad(buf_pool_mutex_own(buf_pool));
643 +       //ut_ad(buf_pool_mutex_own(buf_pool));
644 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
645         for (n = buf_pool->n_chunks; n--; chunk++) {
646  
647                 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
648 @@ -1138,7 +1146,7 @@
649         buf_block_t*            block;
650         const buf_block_t*      block_end;
651  
652 -       ut_ad(buf_pool_mutex_own(buf_pool));
653 +       //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
654  
655         block_end = chunk->blocks + chunk->size;
656  
657 @@ -1150,8 +1158,10 @@
658                 ut_ad(!block->in_unzip_LRU_list);
659                 ut_ad(!block->page.in_flush_list);
660                 /* Remove the block from the free list. */
661 +               mutex_enter(&buf_pool->free_list_mutex);
662                 ut_ad(block->page.in_free_list);
663 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
664 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
665 +               mutex_exit(&buf_pool->free_list_mutex);
666  
667                 /* Free the latches. */
668                 mutex_free(&block->mutex);
669 @@ -1208,9 +1218,21 @@
670         ------------------------------- */
671         mutex_create(buf_pool_mutex_key,
672                      &buf_pool->mutex, SYNC_BUF_POOL);
673 +       mutex_create(buf_pool_LRU_list_mutex_key,
674 +                    &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
675 +       rw_lock_create(buf_pool_page_hash_key,
676 +                      &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
677 +       mutex_create(buf_pool_free_list_mutex_key,
678 +                    &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
679 +       mutex_create(buf_pool_zip_free_mutex_key,
680 +                    &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
681 +       mutex_create(buf_pool_zip_hash_mutex_key,
682 +                    &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
683         mutex_create(buf_pool_zip_mutex_key,
684                      &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
685  
686 +       mutex_enter(&buf_pool->LRU_list_mutex);
687 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
688         buf_pool_mutex_enter(buf_pool);
689  
690         if (buf_pool_size > 0) {
691 @@ -1223,6 +1245,8 @@
692                         mem_free(chunk);
693                         mem_free(buf_pool);
694  
695 +                       mutex_exit(&buf_pool->LRU_list_mutex);
696 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
697                         buf_pool_mutex_exit(buf_pool);
698  
699                         return(DB_ERROR);
700 @@ -1253,6 +1277,8 @@
701  
702         /* All fields are initialized by mem_zalloc(). */
703  
704 +       mutex_exit(&buf_pool->LRU_list_mutex);
705 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
706         buf_pool_mutex_exit(buf_pool);
707  
708         return(DB_SUCCESS);
709 @@ -1469,7 +1495,11 @@
710         ulint           fold;
711         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
712  
713 -       ut_ad(buf_pool_mutex_own(buf_pool));
714 +       //ut_ad(buf_pool_mutex_own(buf_pool));
715 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
716 +#ifdef UNIV_SYNC_DEBUG
717 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
718 +#endif
719         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
720         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
721         ut_a(bpage->buf_fix_count == 0);
722 @@ -1556,7 +1586,8 @@
723  
724  try_again:
725         btr_search_disable(); /* Empty the adaptive hash index again */
726 -       buf_pool_mutex_enter(buf_pool);
727 +       //buf_pool_mutex_enter(buf_pool);
728 +       mutex_enter(&buf_pool->LRU_list_mutex);
729  
730  shrink_again:
731         if (buf_pool->n_chunks <= 1) {
732 @@ -1627,7 +1658,7 @@
733  
734                                 buf_LRU_make_block_old(&block->page);
735                                 dirty++;
736 -                       } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
737 +                       } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
738                                    != BUF_LRU_FREED) {
739                                 nonfree++;
740                         }
741 @@ -1635,7 +1666,8 @@
742                         mutex_exit(&block->mutex);
743                 }
744  
745 -               buf_pool_mutex_exit(buf_pool);
746 +               //buf_pool_mutex_exit(buf_pool);
747 +               mutex_exit(&buf_pool->LRU_list_mutex);
748  
749                 /* Request for a flush of the chunk if it helps.
750                 Do not flush if there are non-free blocks, since
751 @@ -1685,7 +1717,8 @@
752  func_done:
753         buf_pool->old_pool_size = buf_pool->curr_pool_size;
754  func_exit:
755 -       buf_pool_mutex_exit(buf_pool);
756 +       //buf_pool_mutex_exit(buf_pool);
757 +       mutex_exit(&buf_pool->LRU_list_mutex);
758         btr_search_enable();
759  }
760  
761 @@ -1726,7 +1759,9 @@
762         hash_table_t*   zip_hash;
763         hash_table_t*   page_hash;
764  
765 -       buf_pool_mutex_enter(buf_pool);
766 +       //buf_pool_mutex_enter(buf_pool);
767 +       mutex_enter(&buf_pool->LRU_list_mutex);
768 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
769  
770         /* Free, create, and populate the hash table. */
771         hash_table_free(buf_pool->page_hash);
772 @@ -1767,8 +1802,9 @@
773         All such blocks are either in buf_pool->zip_clean or
774         in buf_pool->flush_list. */
775  
776 +       mutex_enter(&buf_pool->zip_mutex);
777         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
778 -            b = UT_LIST_GET_NEXT(list, b)) {
779 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
780                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
781                 ut_ad(!b->in_flush_list);
782                 ut_ad(b->in_LRU_list);
783 @@ -1778,10 +1814,11 @@
784                 HASH_INSERT(buf_page_t, hash, page_hash,
785                             buf_page_address_fold(b->space, b->offset), b);
786         }
787 +       mutex_exit(&buf_pool->zip_mutex);
788  
789         buf_flush_list_mutex_enter(buf_pool);
790         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
791 -            b = UT_LIST_GET_NEXT(list, b)) {
792 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
793                 ut_ad(b->in_flush_list);
794                 ut_ad(b->in_LRU_list);
795                 ut_ad(b->in_page_hash);
796 @@ -1808,7 +1845,9 @@
797         }
798  
799         buf_flush_list_mutex_exit(buf_pool);
800 -       buf_pool_mutex_exit(buf_pool);
801 +       //buf_pool_mutex_exit(buf_pool);
802 +       mutex_exit(&buf_pool->LRU_list_mutex);
803 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
804  }
805  
806  /********************************************************************
807 @@ -1855,21 +1894,32 @@
808         buf_page_t*     bpage;
809         ulint           i;
810         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
811 +       mutex_t*        block_mutex;
812  
813 -       ut_ad(buf_pool_mutex_own(buf_pool));
814 +       //ut_ad(buf_pool_mutex_own(buf_pool));
815  
816 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
817         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
818 +       if (bpage) {
819 +               block_mutex = buf_page_get_mutex_enter(bpage);
820 +               ut_a(block_mutex);
821 +       }
822  
823         if (UNIV_LIKELY_NULL(bpage)) {
824                 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
825                         /* The page was loaded meanwhile. */
826 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
827                         return(bpage);
828                 }
829                 /* Add to an existing watch. */
830                 bpage->buf_fix_count++;
831 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
832 +               mutex_exit(block_mutex);
833                 return(NULL);
834         }
835  
836 +       /* buf_pool->watch is protected by zip_mutex for now */
837 +       mutex_enter(&buf_pool->zip_mutex);
838         for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
839                 bpage = &buf_pool->watch[i];
840  
841 @@ -1897,6 +1947,8 @@
842                         ut_d(bpage->in_page_hash = TRUE);
843                         HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
844                                     fold, bpage);
845 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
846 +                       mutex_exit(&buf_pool->zip_mutex);
847                         return(NULL);
848                 case BUF_BLOCK_ZIP_PAGE:
849                         ut_ad(bpage->in_page_hash);
850 @@ -1914,6 +1966,8 @@
851         ut_error;
852  
853         /* Fix compiler warning */
854 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
855 +       mutex_exit(&buf_pool->zip_mutex);
856         return(NULL);
857  }
858  
859 @@ -1943,6 +1997,8 @@
860         buf_chunk_t*    chunks;
861         buf_chunk_t*    chunk;
862  
863 +       mutex_enter(&buf_pool->LRU_list_mutex);
864 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
865         buf_pool_mutex_enter(buf_pool);
866         chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
867  
868 @@ -1961,6 +2017,8 @@
869                 buf_pool->n_chunks++;
870         }
871  
872 +       mutex_exit(&buf_pool->LRU_list_mutex);
873 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
874         buf_pool_mutex_exit(buf_pool);
875  }
876  
877 @@ -2048,7 +2106,11 @@
878                                         space, offset) */
879         buf_page_t*     watch)          /*!< in/out: sentinel for watch */
880  {
881 -       ut_ad(buf_pool_mutex_own(buf_pool));
882 +       //ut_ad(buf_pool_mutex_own(buf_pool));
883 +#ifdef UNIV_SYNC_DEBUG
884 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
885 +#endif
886 +       ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
887  
888         HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
889         ut_d(watch->in_page_hash = FALSE);
890 @@ -2070,28 +2132,31 @@
891         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
892         ulint           fold = buf_page_address_fold(space, offset);
893  
894 -       buf_pool_mutex_enter(buf_pool);
895 +       //buf_pool_mutex_enter(buf_pool);
896 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
897         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
898         /* The page must exist because buf_pool_watch_set()
899         increments buf_fix_count. */
900         ut_a(bpage);
901  
902         if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
903 -               mutex_t* mutex = buf_page_get_mutex(bpage);
904 +               mutex_t* mutex = buf_page_get_mutex_enter(bpage);
905  
906 -               mutex_enter(mutex);
907                 ut_a(bpage->buf_fix_count > 0);
908                 bpage->buf_fix_count--;
909                 mutex_exit(mutex);
910         } else {
911 +               mutex_enter(&buf_pool->zip_mutex);
912                 ut_a(bpage->buf_fix_count > 0);
913  
914                 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
915                         buf_pool_watch_remove(buf_pool, fold, bpage);
916                 }
917 +               mutex_exit(&buf_pool->zip_mutex);
918         }
919  
920 -       buf_pool_mutex_exit(buf_pool);
921 +       //buf_pool_mutex_exit(buf_pool);
922 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
923  }
924  
925  /****************************************************************//**
926 @@ -2111,14 +2176,16 @@
927         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
928         ulint           fold    = buf_page_address_fold(space, offset);
929  
930 -       buf_pool_mutex_enter(buf_pool);
931 +       //buf_pool_mutex_enter(buf_pool);
932 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
933  
934         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
935         /* The page must exist because buf_pool_watch_set()
936         increments buf_fix_count. */
937         ut_a(bpage);
938         ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
939 -       buf_pool_mutex_exit(buf_pool);
940 +       //buf_pool_mutex_exit(buf_pool);
941 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
942  
943         return(ret);
944  }
945 @@ -2135,13 +2202,15 @@
946  {
947         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
948  
949 -       buf_pool_mutex_enter(buf_pool);
950 +       //buf_pool_mutex_enter(buf_pool);
951 +       mutex_enter(&buf_pool->LRU_list_mutex);
952  
953         ut_a(buf_page_in_file(bpage));
954  
955         buf_LRU_make_block_young(bpage);
956  
957 -       buf_pool_mutex_exit(buf_pool);
958 +       //buf_pool_mutex_exit(buf_pool);
959 +       mutex_exit(&buf_pool->LRU_list_mutex);
960  }
961  
962  /********************************************************************//**
963 @@ -2165,14 +2234,20 @@
964         ut_a(buf_page_in_file(bpage));
965  
966         if (buf_page_peek_if_too_old(bpage)) {
967 -               buf_pool_mutex_enter(buf_pool);
968 +               //buf_pool_mutex_enter(buf_pool);
969 +               mutex_enter(&buf_pool->LRU_list_mutex);
970                 buf_LRU_make_block_young(bpage);
971 -               buf_pool_mutex_exit(buf_pool);
972 +               //buf_pool_mutex_exit(buf_pool);
973 +               mutex_exit(&buf_pool->LRU_list_mutex);
974         } else if (!access_time) {
975                 ulint   time_ms = ut_time_ms();
976 -               buf_pool_mutex_enter(buf_pool);
977 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
978 +               //buf_pool_mutex_enter(buf_pool);
979 +               if (block_mutex) {
980                 buf_page_set_accessed(bpage, time_ms);
981 -               buf_pool_mutex_exit(buf_pool);
982 +               mutex_exit(block_mutex);
983 +               }
984 +               //buf_pool_mutex_exit(buf_pool);
985         }
986  }
987  
988 @@ -2189,7 +2264,8 @@
989         buf_block_t*    block;
990         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
991  
992 -       buf_pool_mutex_enter(buf_pool);
993 +       //buf_pool_mutex_enter(buf_pool);
994 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
995  
996         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
997  
998 @@ -2198,7 +2274,8 @@
999                 block->check_index_page_at_flush = FALSE;
1000         }
1001  
1002 -       buf_pool_mutex_exit(buf_pool);
1003 +       //buf_pool_mutex_exit(buf_pool);
1004 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1005  }
1006  
1007  /********************************************************************//**
1008 @@ -2217,7 +2294,8 @@
1009         ibool           is_hashed;
1010         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1011  
1012 -       buf_pool_mutex_enter(buf_pool);
1013 +       //buf_pool_mutex_enter(buf_pool);
1014 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1015  
1016         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1017  
1018 @@ -2228,7 +2306,8 @@
1019                 is_hashed = block->is_hashed;
1020         }
1021  
1022 -       buf_pool_mutex_exit(buf_pool);
1023 +       //buf_pool_mutex_exit(buf_pool);
1024 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1025  
1026         return(is_hashed);
1027  }
1028 @@ -2250,7 +2329,8 @@
1029         buf_page_t*     bpage;
1030         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1031  
1032 -       buf_pool_mutex_enter(buf_pool);
1033 +       //buf_pool_mutex_enter(buf_pool);
1034 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1035  
1036         bpage = buf_page_hash_get(buf_pool, space, offset);
1037  
1038 @@ -2259,7 +2339,8 @@
1039                 bpage->file_page_was_freed = TRUE;
1040         }
1041  
1042 -       buf_pool_mutex_exit(buf_pool);
1043 +       //buf_pool_mutex_exit(buf_pool);
1044 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1045  
1046         return(bpage);
1047  }
1048 @@ -2280,7 +2361,8 @@
1049         buf_page_t*     bpage;
1050         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1051  
1052 -       buf_pool_mutex_enter(buf_pool);
1053 +       //buf_pool_mutex_enter(buf_pool);
1054 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1055  
1056         bpage = buf_page_hash_get(buf_pool, space, offset);
1057  
1058 @@ -2289,7 +2371,8 @@
1059                 bpage->file_page_was_freed = FALSE;
1060         }
1061  
1062 -       buf_pool_mutex_exit(buf_pool);
1063 +       //buf_pool_mutex_exit(buf_pool);
1064 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1065  
1066         return(bpage);
1067  }
1068 @@ -2324,8 +2407,9 @@
1069         buf_pool->stat.n_page_gets++;
1070  
1071         for (;;) {
1072 -               buf_pool_mutex_enter(buf_pool);
1073 +               //buf_pool_mutex_enter(buf_pool);
1074  lookup:
1075 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1076                 bpage = buf_page_hash_get(buf_pool, space, offset);
1077                 if (bpage) {
1078                         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1079 @@ -2334,7 +2418,8 @@
1080  
1081                 /* Page not in buf_pool: needs to be read from file */
1082  
1083 -               buf_pool_mutex_exit(buf_pool);
1084 +               //buf_pool_mutex_exit(buf_pool);
1085 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1086  
1087                 buf_read_page(space, zip_size, offset);
1088  
1089 @@ -2346,10 +2431,15 @@
1090         if (UNIV_UNLIKELY(!bpage->zip.data)) {
1091                 /* There is no compressed page. */
1092  err_exit:
1093 -               buf_pool_mutex_exit(buf_pool);
1094 +               //buf_pool_mutex_exit(buf_pool);
1095 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1096                 return(NULL);
1097         }
1098  
1099 +       block_mutex = buf_page_get_mutex_enter(bpage);
1100 +
1101 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1102 +
1103         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1104  
1105         switch (buf_page_get_state(bpage)) {
1106 @@ -2358,19 +2448,19 @@
1107         case BUF_BLOCK_MEMORY:
1108         case BUF_BLOCK_REMOVE_HASH:
1109         case BUF_BLOCK_ZIP_FREE:
1110 +               if (block_mutex)
1111 +                       mutex_exit(block_mutex);
1112                 break;
1113         case BUF_BLOCK_ZIP_PAGE:
1114         case BUF_BLOCK_ZIP_DIRTY:
1115 -               block_mutex = &buf_pool->zip_mutex;
1116 -               mutex_enter(block_mutex);
1117 +               ut_a(block_mutex == &buf_pool->zip_mutex);
1118                 bpage->buf_fix_count++;
1119                 goto got_block;
1120         case BUF_BLOCK_FILE_PAGE:
1121 -               block_mutex = &((buf_block_t*) bpage)->mutex;
1122 -               mutex_enter(block_mutex);
1123 +               ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1124  
1125                 /* Discard the uncompressed page frame if possible. */
1126 -               if (buf_LRU_free_block(bpage, FALSE, NULL)
1127 +               if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1128                     == BUF_LRU_FREED) {
1129  
1130                         mutex_exit(block_mutex);
1131 @@ -2389,7 +2479,7 @@
1132         must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1133         access_time = buf_page_is_accessed(bpage);
1134  
1135 -       buf_pool_mutex_exit(buf_pool);
1136 +       //buf_pool_mutex_exit(buf_pool);
1137  
1138         mutex_exit(block_mutex);
1139  
1140 @@ -2698,7 +2788,7 @@
1141         const buf_block_t*      block)          /*!< in: pointer to block,
1142                                                 not dereferenced */
1143  {
1144 -       ut_ad(buf_pool_mutex_own(buf_pool));
1145 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1146  
1147         if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1148                 /* The pointer should be aligned. */
1149 @@ -2734,6 +2824,7 @@
1150         ulint           fix_type;
1151         ibool           must_read;
1152         ulint           retries = 0;
1153 +       mutex_t*        block_mutex = NULL;
1154         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1155  
1156         ut_ad(mtr);
1157 @@ -2755,9 +2846,11 @@
1158         fold = buf_page_address_fold(space, offset);
1159  loop:
1160         block = guess;
1161 -       buf_pool_mutex_enter(buf_pool);
1162 +       //buf_pool_mutex_enter(buf_pool);
1163  
1164         if (block) {
1165 +               block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1166 +
1167                 /* If the guess is a compressed page descriptor that
1168                 has been allocated by buf_buddy_alloc(), it may have
1169                 been invalidated by buf_buddy_relocate().  In that
1170 @@ -2766,11 +2859,15 @@
1171                 the guess may be pointing to a buffer pool chunk that
1172                 has been released when resizing the buffer pool. */
1173  
1174 -               if (!buf_block_is_uncompressed(buf_pool, block)
1175 +               if (!block_mutex) {
1176 +                       block = guess = NULL;
1177 +               } else if (!buf_block_is_uncompressed(buf_pool, block)
1178                     || offset != block->page.offset
1179                     || space != block->page.space
1180                     || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1181  
1182 +                       mutex_exit(block_mutex);
1183 +
1184                         block = guess = NULL;
1185                 } else {
1186                         ut_ad(!block->page.in_zip_hash);
1187 @@ -2779,12 +2876,19 @@
1188         }
1189  
1190         if (block == NULL) {
1191 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1192                 block = (buf_block_t*) buf_page_hash_get_low(
1193                         buf_pool, space, offset, fold);
1194 +               if (block) {
1195 +                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1196 +                       ut_a(block_mutex);
1197 +               }
1198 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1199         }
1200  
1201  loop2:
1202         if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1203 +               mutex_exit(block_mutex);
1204                 block = NULL;
1205         }
1206  
1207 @@ -2796,12 +2900,14 @@
1208                                 space, offset, fold);
1209  
1210                         if (UNIV_LIKELY_NULL(block)) {
1211 -
1212 +                               block_mutex = buf_page_get_mutex((buf_page_t*)block);
1213 +                               ut_a(block_mutex);
1214 +                               ut_ad(mutex_own(block_mutex));
1215                                 goto got_block;
1216                         }
1217                 }
1218  
1219 -               buf_pool_mutex_exit(buf_pool);
1220 +               //buf_pool_mutex_exit(buf_pool);
1221  
1222                 if (mode == BUF_GET_IF_IN_POOL
1223                     || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1224 @@ -2849,7 +2955,8 @@
1225                 /* The page is being read to buffer pool,
1226                 but we cannot wait around for the read to
1227                 complete. */
1228 -               buf_pool_mutex_exit(buf_pool);
1229 +               //buf_pool_mutex_exit(buf_pool);
1230 +               mutex_exit(block_mutex);
1231  
1232                 return(NULL);
1233         }
1234 @@ -2859,38 +2966,49 @@
1235                 ibool           success;
1236  
1237         case BUF_BLOCK_FILE_PAGE:
1238 +               if (block_mutex == &buf_pool->zip_mutex) {
1239 +                       /* it is wrong mutex... */
1240 +                       mutex_exit(block_mutex);
1241 +                       goto loop;
1242 +               }
1243                 break;
1244  
1245         case BUF_BLOCK_ZIP_PAGE:
1246         case BUF_BLOCK_ZIP_DIRTY:
1247 +               ut_ad(block_mutex == &buf_pool->zip_mutex);
1248                 bpage = &block->page;
1249                 /* Protect bpage->buf_fix_count. */
1250 -               mutex_enter(&buf_pool->zip_mutex);
1251 +               //mutex_enter(&buf_pool->zip_mutex);
1252  
1253                 if (bpage->buf_fix_count
1254                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1255                         /* This condition often occurs when the buffer
1256                         is not buffer-fixed, but I/O-fixed by
1257                         buf_page_init_for_read(). */
1258 -                       mutex_exit(&buf_pool->zip_mutex);
1259 +                       //mutex_exit(&buf_pool->zip_mutex);
1260  wait_until_unfixed:
1261                         /* The block is buffer-fixed or I/O-fixed.
1262                         Try again later. */
1263 -                       buf_pool_mutex_exit(buf_pool);
1264 +                       //buf_pool_mutex_exit(buf_pool);
1265 +                       mutex_exit(block_mutex);
1266                         os_thread_sleep(WAIT_FOR_READ);
1267    
1268                         goto loop;
1269                 }
1270  
1271                 /* Allocate an uncompressed page. */
1272 -               buf_pool_mutex_exit(buf_pool);
1273 -               mutex_exit(&buf_pool->zip_mutex);
1274 +               //buf_pool_mutex_exit(buf_pool);
1275 +               //mutex_exit(&buf_pool->zip_mutex);
1276 +               mutex_exit(block_mutex);
1277  
1278                 block = buf_LRU_get_free_block(buf_pool, 0);
1279                 ut_a(block);
1280 +               block_mutex = &block->mutex;
1281  
1282 -               buf_pool_mutex_enter(buf_pool);
1283 -               mutex_enter(&block->mutex);
1284 +               //buf_pool_mutex_enter(buf_pool);
1285 +               mutex_enter(&buf_pool->LRU_list_mutex);
1286 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
1287 +               mutex_enter(block_mutex);
1288  
1289                 {
1290                         buf_page_t*     hash_bpage;
1291 @@ -2903,35 +3021,47 @@
1292                                 while buf_pool->mutex was released.
1293                                 Free the block that was allocated. */
1294  
1295 -                               buf_LRU_block_free_non_file_page(block);
1296 -                               mutex_exit(&block->mutex);
1297 +                               buf_LRU_block_free_non_file_page(block, TRUE);
1298 +                               mutex_exit(block_mutex);
1299  
1300                                 block = (buf_block_t*) hash_bpage;
1301 +                               if (block) {
1302 +                                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1303 +                                       ut_a(block_mutex);
1304 +                               }
1305 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1306 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1307                                 goto loop2;
1308                         }
1309                 }
1310  
1311 +               mutex_enter(&buf_pool->zip_mutex);
1312 +
1313                 if (UNIV_UNLIKELY
1314                     (bpage->buf_fix_count
1315                      || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1316  
1317 +                       mutex_exit(&buf_pool->zip_mutex);
1318                         /* The block was buffer-fixed or I/O-fixed
1319                         while buf_pool->mutex was not held by this thread.
1320                         Free the block that was allocated and try again.
1321                         This should be extremely unlikely. */
1322  
1323 -                       buf_LRU_block_free_non_file_page(block);
1324 -                       mutex_exit(&block->mutex);
1325 +                       buf_LRU_block_free_non_file_page(block, TRUE);
1326 +                       //mutex_exit(&block->mutex);
1327  
1328 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1329 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1330                         goto wait_until_unfixed;
1331                 }
1332  
1333                 /* Move the compressed page from bpage to block,
1334                 and uncompress it. */
1335  
1336 -               mutex_enter(&buf_pool->zip_mutex);
1337 -
1338                 buf_relocate(bpage, &block->page);
1339 +
1340 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1341 +
1342                 buf_block_init_low(block);
1343                 block->lock_hash_val = lock_rec_hash(space, offset);
1344  
1345 @@ -2940,7 +3070,7 @@
1346  
1347                 if (buf_page_get_state(&block->page)
1348                     == BUF_BLOCK_ZIP_PAGE) {
1349 -                       UT_LIST_REMOVE(list, buf_pool->zip_clean,
1350 +                       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1351                                        &block->page);
1352                         ut_ad(!block->page.in_flush_list);
1353                 } else {
1354 @@ -2957,19 +3087,24 @@
1355                 /* Insert at the front of unzip_LRU list */
1356                 buf_unzip_LRU_add_block(block, FALSE);
1357  
1358 +               mutex_exit(&buf_pool->LRU_list_mutex);
1359 +
1360                 block->page.buf_fix_count = 1;
1361                 buf_block_set_io_fix(block, BUF_IO_READ);
1362                 rw_lock_x_lock_func(&block->lock, 0, file, line);
1363  
1364                 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1365  
1366 -               mutex_exit(&block->mutex);
1367 +               mutex_exit(block_mutex);
1368                 mutex_exit(&buf_pool->zip_mutex);
1369 +
1370 +               buf_pool_mutex_enter(buf_pool);
1371                 buf_pool->n_pend_unzip++;
1372 +               buf_pool_mutex_exit(buf_pool);
1373  
1374 -               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1375 +               buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1376  
1377 -               buf_pool_mutex_exit(buf_pool);
1378 +               //buf_pool_mutex_exit(buf_pool);
1379  
1380                 /* Decompress the page and apply buffered operations
1381                 while not holding buf_pool->mutex or block->mutex. */
1382 @@ -2982,12 +3117,15 @@
1383                 }
1384  
1385                 /* Unfix and unlatch the block. */
1386 -               buf_pool_mutex_enter(buf_pool);
1387 -               mutex_enter(&block->mutex);
1388 +               //buf_pool_mutex_enter(buf_pool);
1389 +               block_mutex = &block->mutex;
1390 +               mutex_enter(block_mutex);
1391                 block->page.buf_fix_count--;
1392                 buf_block_set_io_fix(block, BUF_IO_NONE);
1393 -               mutex_exit(&block->mutex);
1394 +
1395 +               buf_pool_mutex_enter(buf_pool);
1396                 buf_pool->n_pend_unzip--;
1397 +               buf_pool_mutex_exit(buf_pool);
1398                 rw_lock_x_unlock(&block->lock);
1399  
1400                 break;
1401 @@ -3003,7 +3141,7 @@
1402  
1403         ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1404  
1405 -       mutex_enter(&block->mutex);
1406 +       //mutex_enter(&block->mutex);
1407  #if UNIV_WORD_SIZE == 4
1408         /* On 32-bit systems, there is no padding in buf_page_t.  On
1409         other systems, Valgrind could complain about uninitialized pad
1410 @@ -3013,13 +3151,14 @@
1411  
1412         buf_block_buf_fix_inc(block, file, line);
1413  
1414 -       mutex_exit(&block->mutex);
1415 +       //mutex_exit(&block->mutex);
1416  
1417         /* Check if this is the first access to the page */
1418  
1419         access_time = buf_page_is_accessed(&block->page);
1420  
1421 -       buf_pool_mutex_exit(buf_pool);
1422 +       //buf_pool_mutex_exit(buf_pool);
1423 +       mutex_exit(block_mutex);
1424  
1425         buf_page_set_accessed_make_young(&block->page, access_time);
1426  
1427 @@ -3252,9 +3391,11 @@
1428         buf_pool = buf_pool_from_block(block);
1429  
1430         if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1431 -               buf_pool_mutex_enter(buf_pool);
1432 +               //buf_pool_mutex_enter(buf_pool);
1433 +               mutex_enter(&buf_pool->LRU_list_mutex);
1434                 buf_LRU_make_block_young(&block->page);
1435 -               buf_pool_mutex_exit(buf_pool);
1436 +               //buf_pool_mutex_exit(buf_pool);
1437 +               mutex_exit(&buf_pool->LRU_list_mutex);
1438         } else if (!buf_page_is_accessed(&block->page)) {
1439                 /* Above, we do a dirty read on purpose, to avoid
1440                 mutex contention.  The field buf_page_t::access_time
1441 @@ -3262,9 +3403,11 @@
1442                 field must be protected by mutex, however. */
1443                 ulint   time_ms = ut_time_ms();
1444  
1445 -               buf_pool_mutex_enter(buf_pool);
1446 +               //buf_pool_mutex_enter(buf_pool);
1447 +               mutex_enter(&block->mutex);
1448                 buf_page_set_accessed(&block->page, time_ms);
1449 -               buf_pool_mutex_exit(buf_pool);
1450 +               //buf_pool_mutex_exit(buf_pool);
1451 +               mutex_exit(&block->mutex);
1452         }
1453  
1454         ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1455 @@ -3331,18 +3474,21 @@
1456         ut_ad(mtr);
1457         ut_ad(mtr->state == MTR_ACTIVE);
1458  
1459 -       buf_pool_mutex_enter(buf_pool);
1460 +       //buf_pool_mutex_enter(buf_pool);
1461 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1462         block = buf_block_hash_get(buf_pool, space_id, page_no);
1463  
1464         if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1465 -               buf_pool_mutex_exit(buf_pool);
1466 +               //buf_pool_mutex_exit(buf_pool);
1467 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1468                 return(NULL);
1469         }
1470  
1471         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1472  
1473         mutex_enter(&block->mutex);
1474 -       buf_pool_mutex_exit(buf_pool);
1475 +       //buf_pool_mutex_exit(buf_pool);
1476 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1477  
1478  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1479         ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1480 @@ -3431,7 +3577,10 @@
1481         buf_page_t*     hash_page;
1482         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1483  
1484 -       ut_ad(buf_pool_mutex_own(buf_pool));
1485 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1486 +#ifdef UNIV_SYNC_DEBUG
1487 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1488 +#endif
1489         ut_ad(mutex_own(&(block->mutex)));
1490         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1491  
1492 @@ -3460,11 +3609,14 @@
1493         if (UNIV_LIKELY(!hash_page)) {
1494         } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1495                 /* Preserve the reference count. */
1496 -               ulint   buf_fix_count = hash_page->buf_fix_count;
1497 +               ulint   buf_fix_count;
1498  
1499 +               mutex_enter(&buf_pool->zip_mutex);
1500 +               buf_fix_count = hash_page->buf_fix_count;
1501                 ut_a(buf_fix_count > 0);
1502                 block->page.buf_fix_count += buf_fix_count;
1503                 buf_pool_watch_remove(buf_pool, fold, hash_page);
1504 +               mutex_exit(&buf_pool->zip_mutex);
1505         } else {
1506                 fprintf(stderr,
1507                         "InnoDB: Error: page %lu %lu already found"
1508 @@ -3474,7 +3626,8 @@
1509                         (const void*) hash_page, (const void*) block);
1510  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1511                 mutex_exit(&block->mutex);
1512 -               buf_pool_mutex_exit(buf_pool);
1513 +               //buf_pool_mutex_exit(buf_pool);
1514 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1515                 buf_print();
1516                 buf_LRU_print();
1517                 buf_validate();
1518 @@ -3558,7 +3711,9 @@
1519  
1520         fold = buf_page_address_fold(space, offset);
1521  
1522 -       buf_pool_mutex_enter(buf_pool);
1523 +       //buf_pool_mutex_enter(buf_pool);
1524 +       mutex_enter(&buf_pool->LRU_list_mutex);
1525 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1526  
1527         watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1528         if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1529 @@ -3567,9 +3722,15 @@
1530  err_exit:
1531                 if (block) {
1532                         mutex_enter(&block->mutex);
1533 -                       buf_LRU_block_free_non_file_page(block);
1534 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1535 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1536 +                       buf_LRU_block_free_non_file_page(block, FALSE);
1537                         mutex_exit(&block->mutex);
1538                 }
1539 +               else {
1540 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1541 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1542 +               }
1543  
1544                 bpage = NULL;
1545                 goto func_exit;
1546 @@ -3592,6 +3753,8 @@
1547  
1548                 buf_page_init(space, offset, fold, block);
1549  
1550 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1551 +
1552                 /* The block must be put to the LRU list, to the old blocks */
1553                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1554  
1555 @@ -3619,7 +3782,7 @@
1556                         been added to buf_pool->LRU and
1557                         buf_pool->page_hash. */
1558                         mutex_exit(&block->mutex);
1559 -                       data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1560 +                       data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1561                         mutex_enter(&block->mutex);
1562                         block->page.zip.data = data;
1563  
1564 @@ -3632,6 +3795,7 @@
1565                         buf_unzip_LRU_add_block(block, TRUE);
1566                 }
1567  
1568 +               mutex_exit(&buf_pool->LRU_list_mutex);
1569                 mutex_exit(&block->mutex);
1570         } else {
1571                 /* Defer buf_buddy_alloc() until after the block has
1572 @@ -3643,8 +3807,8 @@
1573                 control block (bpage), in order to avoid the
1574                 invocation of buf_buddy_relocate_block() on
1575                 uninitialized data. */
1576 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1577 -               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1578 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1579 +               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1580  
1581                 /* Initialize the buf_pool pointer. */
1582                 bpage->buf_pool = buf_pool;
1583 @@ -3663,8 +3827,11 @@
1584  
1585                                 /* The block was added by some other thread. */
1586                                 watch_page = NULL;
1587 -                               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1588 -                               buf_buddy_free(buf_pool, data, zip_size);
1589 +                               buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1590 +                               buf_buddy_free(buf_pool, data, zip_size, TRUE);
1591 +
1592 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1593 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1594  
1595                                 bpage = NULL;
1596                                 goto func_exit;
1597 @@ -3708,18 +3875,24 @@
1598                 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1599                             bpage);
1600  
1601 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1602 +
1603                 /* The block must be put to the LRU list, to the old blocks */
1604                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1605                 buf_LRU_insert_zip_clean(bpage);
1606  
1607 +               mutex_exit(&buf_pool->LRU_list_mutex);
1608 +
1609                 buf_page_set_io_fix(bpage, BUF_IO_READ);
1610  
1611                 mutex_exit(&buf_pool->zip_mutex);
1612         }
1613  
1614 +       buf_pool_mutex_enter(buf_pool);
1615         buf_pool->n_pend_reads++;
1616 -func_exit:
1617         buf_pool_mutex_exit(buf_pool);
1618 +func_exit:
1619 +       //buf_pool_mutex_exit(buf_pool);
1620  
1621         if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1622  
1623 @@ -3761,7 +3934,9 @@
1624  
1625         fold = buf_page_address_fold(space, offset);
1626  
1627 -       buf_pool_mutex_enter(buf_pool);
1628 +       //buf_pool_mutex_enter(buf_pool);
1629 +       mutex_enter(&buf_pool->LRU_list_mutex);
1630 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1631  
1632         block = (buf_block_t*) buf_page_hash_get_low(
1633                 buf_pool, space, offset, fold);
1634 @@ -3777,7 +3952,9 @@
1635  #endif /* UNIV_DEBUG_FILE_ACCESSES */
1636  
1637                 /* Page can be found in buf_pool */
1638 -               buf_pool_mutex_exit(buf_pool);
1639 +               //buf_pool_mutex_exit(buf_pool);
1640 +               mutex_exit(&buf_pool->LRU_list_mutex);
1641 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1642  
1643                 buf_block_free(free_block);
1644  
1645 @@ -3799,6 +3976,7 @@
1646         mutex_enter(&block->mutex);
1647  
1648         buf_page_init(space, offset, fold, block);
1649 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1650  
1651         /* The block must be put to the LRU list */
1652         buf_LRU_add_block(&block->page, FALSE);
1653 @@ -3825,7 +4003,7 @@
1654                 the reacquisition of buf_pool->mutex.  We also must
1655                 defer this operation until after the block descriptor
1656                 has been added to buf_pool->LRU and buf_pool->page_hash. */
1657 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1658 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1659                 mutex_enter(&block->mutex);
1660                 block->page.zip.data = data;
1661  
1662 @@ -3843,7 +4021,8 @@
1663  
1664         buf_page_set_accessed(&block->page, time_ms);
1665  
1666 -       buf_pool_mutex_exit(buf_pool);
1667 +       //buf_pool_mutex_exit(buf_pool);
1668 +       mutex_exit(&buf_pool->LRU_list_mutex);
1669  
1670         mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1671  
1672 @@ -3894,6 +4073,8 @@
1673         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1674         const ibool     uncompressed = (buf_page_get_state(bpage)
1675                                         == BUF_BLOCK_FILE_PAGE);
1676 +       ibool           have_LRU_mutex = FALSE;
1677 +       mutex_t*        block_mutex;
1678  
1679         ut_a(buf_page_in_file(bpage));
1680  
1681 @@ -4027,8 +4208,26 @@
1682                 }
1683         }
1684  
1685 +       if (io_type == BUF_IO_WRITE
1686 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1687 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1688 +               /* to keep consistency at buf_LRU_insert_zip_clean() */
1689 +               have_LRU_mutex = TRUE; /* optimistic */
1690 +       }
1691 +retry_mutex:
1692 +       if (have_LRU_mutex)
1693 +               mutex_enter(&buf_pool->LRU_list_mutex);
1694 +       block_mutex = buf_page_get_mutex_enter(bpage);
1695 +       ut_a(block_mutex);
1696 +       if (io_type == BUF_IO_WRITE
1697 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1698 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1699 +           && !have_LRU_mutex) {
1700 +               mutex_exit(block_mutex);
1701 +               have_LRU_mutex = TRUE;
1702 +               goto retry_mutex;
1703 +       }
1704         buf_pool_mutex_enter(buf_pool);
1705 -       mutex_enter(buf_page_get_mutex(bpage));
1706  
1707  #ifdef UNIV_IBUF_COUNT_DEBUG
1708         if (io_type == BUF_IO_WRITE || uncompressed) {
1709 @@ -4051,6 +4250,7 @@
1710                 the x-latch to this OS thread: do not let this confuse you in
1711                 debugging! */
1712  
1713 +               ut_a(!have_LRU_mutex);
1714                 ut_ad(buf_pool->n_pend_reads > 0);
1715                 buf_pool->n_pend_reads--;
1716                 buf_pool->stat.n_pages_read++;
1717 @@ -4068,6 +4268,9 @@
1718  
1719                 buf_flush_write_complete(bpage);
1720  
1721 +               if (have_LRU_mutex)
1722 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1723 +
1724                 if (uncompressed) {
1725                         rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1726                                              BUF_IO_WRITE);
1727 @@ -4090,8 +4293,8 @@
1728         }
1729  #endif /* UNIV_DEBUG */
1730  
1731 -       mutex_exit(buf_page_get_mutex(bpage));
1732         buf_pool_mutex_exit(buf_pool);
1733 +       mutex_exit(block_mutex);
1734  }
1735  
1736  /*********************************************************************//**
1737 @@ -4108,7 +4311,9 @@
1738  
1739         ut_ad(buf_pool);
1740  
1741 -       buf_pool_mutex_enter(buf_pool);
1742 +       //buf_pool_mutex_enter(buf_pool);
1743 +       mutex_enter(&buf_pool->LRU_list_mutex);
1744 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1745  
1746         chunk = buf_pool->chunks;
1747  
1748 @@ -4125,7 +4330,9 @@
1749                 }
1750         }
1751  
1752 -       buf_pool_mutex_exit(buf_pool);
1753 +       //buf_pool_mutex_exit(buf_pool);
1754 +       mutex_exit(&buf_pool->LRU_list_mutex);
1755 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1756  
1757         return(TRUE);
1758  }
1759 @@ -4173,7 +4380,8 @@
1760                 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1761         }
1762  
1763 -       buf_pool_mutex_enter(buf_pool);
1764 +       //buf_pool_mutex_enter(buf_pool);
1765 +       mutex_enter(&buf_pool->LRU_list_mutex);
1766  
1767         ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1768         ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1769 @@ -4186,7 +4394,8 @@
1770         memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1771         buf_refresh_io_stats(buf_pool);
1772  
1773 -       buf_pool_mutex_exit(buf_pool);
1774 +       //buf_pool_mutex_exit(buf_pool);
1775 +       mutex_exit(&buf_pool->LRU_list_mutex);
1776  }
1777  
1778  /*********************************************************************//**
1779 @@ -4228,7 +4437,10 @@
1780  
1781         ut_ad(buf_pool);
1782  
1783 -       buf_pool_mutex_enter(buf_pool);
1784 +       //buf_pool_mutex_enter(buf_pool);
1785 +       mutex_enter(&buf_pool->LRU_list_mutex);
1786 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1787 +       /* for keep the new latch order, it cannot validate correctly... */
1788  
1789         chunk = buf_pool->chunks;
1790  
1791 @@ -4323,7 +4535,7 @@
1792         /* Check clean compressed-only blocks. */
1793  
1794         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1795 -            b = UT_LIST_GET_NEXT(list, b)) {
1796 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1797                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1798                 switch (buf_page_get_io_fix(b)) {
1799                 case BUF_IO_NONE:
1800 @@ -4354,7 +4566,7 @@
1801  
1802         buf_flush_list_mutex_enter(buf_pool);
1803         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1804 -            b = UT_LIST_GET_NEXT(list, b)) {
1805 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1806                 ut_ad(b->in_flush_list);
1807                 ut_a(b->oldest_modification);
1808                 n_flush++;
1809 @@ -4413,6 +4625,8 @@
1810         }
1811  
1812         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1813 +       /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1814 +/*
1815         if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1816                 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1817                         (ulong) UT_LIST_GET_LEN(buf_pool->free),
1818 @@ -4423,8 +4637,11 @@
1819         ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1820         ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1821         ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1822 +*/
1823  
1824 -       buf_pool_mutex_exit(buf_pool);
1825 +       //buf_pool_mutex_exit(buf_pool);
1826 +       mutex_exit(&buf_pool->LRU_list_mutex);
1827 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1828  
1829         ut_a(buf_LRU_validate());
1830         ut_a(buf_flush_validate(buf_pool));
1831 @@ -4480,7 +4697,9 @@
1832         index_ids = mem_alloc(size * sizeof *index_ids);
1833         counts = mem_alloc(sizeof(ulint) * size);
1834  
1835 -       buf_pool_mutex_enter(buf_pool);
1836 +       //buf_pool_mutex_enter(buf_pool);
1837 +       mutex_enter(&buf_pool->LRU_list_mutex);
1838 +       mutex_enter(&buf_pool->free_list_mutex);
1839         buf_flush_list_mutex_enter(buf_pool);
1840  
1841         fprintf(stderr,
1842 @@ -4549,7 +4768,9 @@
1843                 }
1844         }
1845  
1846 -       buf_pool_mutex_exit(buf_pool);
1847 +       //buf_pool_mutex_exit(buf_pool);
1848 +       mutex_exit(&buf_pool->LRU_list_mutex);
1849 +       mutex_exit(&buf_pool->free_list_mutex);
1850  
1851         for (i = 0; i < n_found; i++) {
1852                 index = dict_index_get_if_in_cache(index_ids[i]);
1853 @@ -4606,7 +4827,7 @@
1854         buf_chunk_t*    chunk;
1855         ulint           fixed_pages_number = 0;
1856  
1857 -       buf_pool_mutex_enter(buf_pool);
1858 +       //buf_pool_mutex_enter(buf_pool);
1859  
1860         chunk = buf_pool->chunks;
1861  
1862 @@ -4640,7 +4861,7 @@
1863         /* Traverse the lists of clean and dirty compressed-only blocks. */
1864  
1865         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1866 -            b = UT_LIST_GET_NEXT(list, b)) {
1867 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1868                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1869                 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1870  
1871 @@ -4652,7 +4873,7 @@
1872  
1873         buf_flush_list_mutex_enter(buf_pool);
1874         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1875 -            b = UT_LIST_GET_NEXT(list, b)) {
1876 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1877                 ut_ad(b->in_flush_list);
1878  
1879                 switch (buf_page_get_state(b)) {
1880 @@ -4678,7 +4899,7 @@
1881  
1882         buf_flush_list_mutex_exit(buf_pool);
1883         mutex_exit(&buf_pool->zip_mutex);
1884 -       buf_pool_mutex_exit(buf_pool);
1885 +       //buf_pool_mutex_exit(buf_pool);
1886  
1887         return(fixed_pages_number);
1888  }
1889 @@ -4772,6 +4993,8 @@
1890  
1891         ut_ad(buf_pool);
1892  
1893 +       mutex_enter(&buf_pool->LRU_list_mutex);
1894 +       mutex_enter(&buf_pool->free_list_mutex);
1895         buf_pool_mutex_enter(buf_pool);
1896         buf_flush_list_mutex_enter(buf_pool);
1897  
1898 @@ -4875,6 +5098,8 @@
1899                 buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
1900  
1901         buf_refresh_io_stats(buf_pool);
1902 +       mutex_exit(&buf_pool->LRU_list_mutex);
1903 +       mutex_exit(&buf_pool->free_list_mutex);
1904         buf_pool_mutex_exit(buf_pool);
1905  }
1906  
1907 @@ -4994,11 +5219,13 @@
1908  {
1909         ulint   len;
1910  
1911 -       buf_pool_mutex_enter(buf_pool);
1912 +       //buf_pool_mutex_enter(buf_pool);
1913 +       mutex_enter(&buf_pool->free_list_mutex);
1914  
1915         len = UT_LIST_GET_LEN(buf_pool->free);
1916  
1917 -       buf_pool_mutex_exit(buf_pool);
1918 +       //buf_pool_mutex_exit(buf_pool);
1919 +       mutex_exit(&buf_pool->free_list_mutex);
1920  
1921         return(len);
1922  }
1923 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1924 --- a/storage/innobase/buf/buf0flu.c    2010-12-03 15:22:36.318955693 +0900
1925 +++ b/storage/innobase/buf/buf0flu.c    2010-12-03 15:48:29.289024083 +0900
1926 @@ -279,7 +279,7 @@
1927  
1928         ut_d(block->page.in_flush_list = TRUE);
1929         block->page.oldest_modification = lsn;
1930 -       UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1931 +       UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1932  
1933  #ifdef UNIV_DEBUG_VALGRIND
1934         {
1935 @@ -373,14 +373,14 @@
1936                        > block->page.oldest_modification) {
1937                         ut_ad(b->in_flush_list);
1938                         prev_b = b;
1939 -                       b = UT_LIST_GET_NEXT(list, b);
1940 +                       b = UT_LIST_GET_NEXT(flush_list, b);
1941                 }
1942         }
1943  
1944         if (prev_b == NULL) {
1945 -               UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1946 +               UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1947         } else {
1948 -               UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1949 +               UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1950                                      prev_b, &block->page);
1951         }
1952  
1953 @@ -406,7 +406,7 @@
1954         //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1955         //ut_ad(buf_pool_mutex_own(buf_pool));
1956  #endif
1957 -       //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1958 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1959         //ut_ad(bpage->in_LRU_list);
1960  
1961         if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1962 @@ -442,14 +442,14 @@
1963         enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1964  {
1965  #ifdef UNIV_DEBUG
1966 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1967 -       ut_ad(buf_pool_mutex_own(buf_pool));
1968 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1969 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1970  #endif
1971 -       ut_a(buf_page_in_file(bpage));
1972 +       //ut_a(buf_page_in_file(bpage));
1973         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1974         ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1975  
1976 -       if (bpage->oldest_modification != 0
1977 +       if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1978             && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1979                 ut_ad(bpage->in_flush_list);
1980  
1981 @@ -480,7 +480,7 @@
1982  {
1983         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1984  
1985 -       ut_ad(buf_pool_mutex_own(buf_pool));
1986 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1987         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1988         ut_ad(bpage->in_flush_list);
1989  
1990 @@ -498,11 +498,11 @@
1991                 return;
1992         case BUF_BLOCK_ZIP_DIRTY:
1993                 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1994 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1995 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1996                 buf_LRU_insert_zip_clean(bpage);
1997                 break;
1998         case BUF_BLOCK_FILE_PAGE:
1999 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2000 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2001                 break;
2002         }
2003  
2004 @@ -546,7 +546,7 @@
2005         buf_page_t*     prev_b = NULL;
2006         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2007  
2008 -       ut_ad(buf_pool_mutex_own(buf_pool));
2009 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2010         /* Must reside in the same buffer pool. */
2011         ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2012  
2013 @@ -575,18 +575,18 @@
2014         because we assert on in_flush_list in comparison function. */
2015         ut_d(bpage->in_flush_list = FALSE);
2016  
2017 -       prev = UT_LIST_GET_PREV(list, bpage);
2018 -       UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2019 +       prev = UT_LIST_GET_PREV(flush_list, bpage);
2020 +       UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2021  
2022         if (prev) {
2023                 ut_ad(prev->in_flush_list);
2024                 UT_LIST_INSERT_AFTER(
2025 -                       list,
2026 +                       flush_list,
2027                         buf_pool->flush_list,
2028                         prev, dpage);
2029         } else {
2030                 UT_LIST_ADD_FIRST(
2031 -                       list,
2032 +                       flush_list,
2033                         buf_pool->flush_list,
2034                         dpage);
2035         }
2036 @@ -1055,7 +1055,7 @@
2037  
2038  #ifdef UNIV_DEBUG
2039         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2040 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2041 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2042  #endif
2043  
2044  #ifdef UNIV_LOG_DEBUG
2045 @@ -1069,7 +1069,8 @@
2046         io_fixed and oldest_modification != 0.  Thus, it cannot be
2047         relocated in the buffer pool or removed from flush_list or
2048         LRU_list. */
2049 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2050 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2051 +       ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2052         ut_ad(!buf_flush_list_mutex_own(buf_pool));
2053         ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2054         ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2055 @@ -1155,12 +1156,18 @@
2056         ibool           is_uncompressed;
2057  
2058         ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2059 -       ut_ad(buf_pool_mutex_own(buf_pool));
2060 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2061 +#ifdef UNIV_SYNC_DEBUG
2062 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2063 +#endif
2064         ut_ad(buf_page_in_file(bpage));
2065  
2066         block_mutex = buf_page_get_mutex(bpage);
2067         ut_ad(mutex_own(block_mutex));
2068  
2069 +       buf_pool_mutex_enter(buf_pool);
2070 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2071 +
2072         ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2073  
2074         buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2075 @@ -1322,14 +1329,16 @@
2076  
2077                 buf_pool = buf_pool_get(space, i);
2078  
2079 -               buf_pool_mutex_enter(buf_pool);
2080 +               //buf_pool_mutex_enter(buf_pool);
2081 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
2082  
2083                 /* We only want to flush pages from this buffer pool. */
2084                 bpage = buf_page_hash_get(buf_pool, space, i);
2085  
2086                 if (!bpage) {
2087  
2088 -                       buf_pool_mutex_exit(buf_pool);
2089 +                       //buf_pool_mutex_exit(buf_pool);
2090 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2091                         continue;
2092                 }
2093  
2094 @@ -1341,11 +1350,9 @@
2095                 if (flush_type != BUF_FLUSH_LRU
2096                     || i == offset
2097                     || buf_page_is_old(bpage)) {
2098 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2099 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2100  
2101 -                       mutex_enter(block_mutex);
2102 -
2103 -                       if (buf_flush_ready_for_flush(bpage, flush_type)
2104 +                       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2105                             && (i == offset || !bpage->buf_fix_count)) {
2106                                 /* We only try to flush those
2107                                 neighbors != offset where the buf fix
2108 @@ -1361,11 +1368,12 @@
2109                                 ut_ad(!buf_pool_mutex_own(buf_pool));
2110                                 count++;
2111                                 continue;
2112 -                       } else {
2113 +                       } else if (block_mutex) {
2114                                 mutex_exit(block_mutex);
2115                         }
2116                 }
2117 -               buf_pool_mutex_exit(buf_pool);
2118 +               //buf_pool_mutex_exit(buf_pool);
2119 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
2120         }
2121  
2122         return(count);
2123 @@ -1398,21 +1406,25 @@
2124         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2125  #endif /* UNIV_DEBUG */
2126  
2127 -       ut_ad(buf_pool_mutex_own(buf_pool));
2128 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2129 +       ut_ad(flush_type != BUF_FLUSH_LRU
2130 +             || mutex_own(&buf_pool->LRU_list_mutex));
2131  
2132 -       block_mutex = buf_page_get_mutex(bpage);
2133 -       mutex_enter(block_mutex);
2134 +       block_mutex = buf_page_get_mutex_enter(bpage);
2135  
2136 -       ut_a(buf_page_in_file(bpage));
2137 +       //ut_a(buf_page_in_file(bpage));
2138  
2139 -       if (buf_flush_ready_for_flush(bpage, flush_type)) {
2140 +       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2141                 ulint           space;
2142                 ulint           offset;
2143                 buf_pool_t*     buf_pool;
2144  
2145                 buf_pool = buf_pool_from_bpage(bpage);
2146  
2147 -               buf_pool_mutex_exit(buf_pool);
2148 +               //buf_pool_mutex_exit(buf_pool);
2149 +               if (flush_type == BUF_FLUSH_LRU) {
2150 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2151 +               }
2152  
2153                 /* These fields are protected by both the
2154                 buffer pool mutex and block mutex. */
2155 @@ -1428,13 +1440,18 @@
2156                                                   *count,
2157                                                   n_to_flush);
2158  
2159 -               buf_pool_mutex_enter(buf_pool);
2160 +               //buf_pool_mutex_enter(buf_pool);
2161 +               if (flush_type == BUF_FLUSH_LRU) {
2162 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2163 +               }
2164                 flushed = TRUE;
2165 -       } else {
2166 +       } else if (block_mutex) {
2167                 mutex_exit(block_mutex);
2168         }
2169  
2170 -       ut_ad(buf_pool_mutex_own(buf_pool));
2171 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2172 +       ut_ad(flush_type != BUF_FLUSH_LRU
2173 +             || mutex_own(&buf_pool->LRU_list_mutex));
2174  
2175         return(flushed);
2176  }
2177 @@ -1455,7 +1472,8 @@
2178         buf_page_t*     bpage;
2179         ulint           count = 0;
2180  
2181 -       ut_ad(buf_pool_mutex_own(buf_pool));
2182 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2183 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2184  
2185         do {
2186                 /* Start from the end of the list looking for a
2187 @@ -1477,7 +1495,8 @@
2188         should be flushed, we factor in this value. */
2189         buf_lru_flush_page_count += count;
2190  
2191 -       ut_ad(buf_pool_mutex_own(buf_pool));
2192 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2193 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2194  
2195         return(count);
2196  }
2197 @@ -1505,9 +1524,10 @@
2198  {
2199         ulint           len;
2200         buf_page_t*     bpage;
2201 +       buf_page_t*     prev_bpage = NULL;
2202         ulint           count = 0;
2203  
2204 -       ut_ad(buf_pool_mutex_own(buf_pool));
2205 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2206  
2207         /* If we have flushed enough, leave the loop */
2208         do {
2209 @@ -1526,6 +1546,7 @@
2210  
2211                 if (bpage) {
2212                         ut_a(bpage->oldest_modification > 0);
2213 +                       prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2214                 }
2215  
2216                 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2217 @@ -1567,9 +1588,17 @@
2218                                 break;
2219                         }
2220  
2221 -                       bpage = UT_LIST_GET_PREV(list, bpage);
2222 +                       bpage = UT_LIST_GET_PREV(flush_list, bpage);
2223  
2224 -                       ut_ad(!bpage || bpage->in_flush_list);
2225 +                       //ut_ad(!bpage || bpage->in_flush_list);
2226 +                       if (bpage != prev_bpage) {
2227 +                               /* the search might warp.. retrying */
2228 +                               buf_flush_list_mutex_exit(buf_pool);
2229 +                               break;
2230 +                       }
2231 +                       if (bpage) {
2232 +                               prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2233 +                       }
2234  
2235                         buf_flush_list_mutex_exit(buf_pool);
2236  
2237 @@ -1578,7 +1607,7 @@
2238  
2239         } while (count < min_n && bpage != NULL && len > 0);
2240  
2241 -       ut_ad(buf_pool_mutex_own(buf_pool));
2242 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2243  
2244         return(count);
2245  }
2246 @@ -1617,13 +1646,15 @@
2247               || sync_thread_levels_empty_gen(TRUE));
2248  #endif /* UNIV_SYNC_DEBUG */
2249  
2250 -       buf_pool_mutex_enter(buf_pool);
2251 +       //buf_pool_mutex_enter(buf_pool);
2252  
2253         /* Note: The buffer pool mutex is released and reacquired within
2254         the flush functions. */
2255         switch(flush_type) {
2256         case BUF_FLUSH_LRU:
2257 +               mutex_enter(&buf_pool->LRU_list_mutex);
2258                 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2259 +               mutex_exit(&buf_pool->LRU_list_mutex);
2260                 break;
2261         case BUF_FLUSH_LIST:
2262                 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2263 @@ -1632,7 +1663,7 @@
2264                 ut_error;
2265         }
2266  
2267 -       buf_pool_mutex_exit(buf_pool);
2268 +       //buf_pool_mutex_exit(buf_pool);
2269  
2270         buf_flush_buffered_writes();
2271  
2272 @@ -1888,7 +1919,7 @@
2273  retry:
2274         //buf_pool_mutex_enter(buf_pool);
2275         if (have_LRU_mutex)
2276 -               buf_pool_mutex_enter(buf_pool);
2277 +               mutex_enter(&buf_pool->LRU_list_mutex);
2278  
2279         n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2280  
2281 @@ -1905,15 +1936,15 @@
2282                         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2283                         continue;
2284                 }
2285 -               block_mutex = buf_page_get_mutex(bpage);
2286 -
2287 -               mutex_enter(block_mutex);
2288 +               block_mutex = buf_page_get_mutex_enter(bpage);
2289  
2290 -               if (buf_flush_ready_for_replace(bpage)) {
2291 +               if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2292                         n_replaceable++;
2293                 }
2294  
2295 -               mutex_exit(block_mutex);
2296 +               if (block_mutex) {
2297 +                       mutex_exit(block_mutex);
2298 +               }
2299  
2300                 distance++;
2301  
2302 @@ -1922,7 +1953,7 @@
2303  
2304         //buf_pool_mutex_exit(buf_pool);
2305         if (have_LRU_mutex)
2306 -               buf_pool_mutex_exit(buf_pool);
2307 +               mutex_exit(&buf_pool->LRU_list_mutex);
2308  
2309         if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2310  
2311 @@ -2121,7 +2152,7 @@
2312  
2313         ut_ad(buf_flush_list_mutex_own(buf_pool));
2314  
2315 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2316 +       UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2317                          ut_ad(ut_list_node_313->in_flush_list));
2318  
2319         bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2320 @@ -2161,7 +2192,7 @@
2321                         rnode = rbt_next(buf_pool->flush_rbt, rnode);
2322                 }
2323  
2324 -               bpage = UT_LIST_GET_NEXT(list, bpage);
2325 +               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2326  
2327                 ut_a(!bpage || om >= bpage->oldest_modification);
2328         }
2329 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2330 --- a/storage/innobase/buf/buf0lru.c    2010-12-03 15:22:36.321987250 +0900
2331 +++ b/storage/innobase/buf/buf0lru.c    2010-12-03 15:48:29.293023197 +0900
2332 @@ -143,8 +143,9 @@
2333  void
2334  buf_LRU_block_free_hashed_page(
2335  /*===========================*/
2336 -       buf_block_t*    block); /*!< in: block, must contain a file page and
2337 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
2338                                 be in a state where it can be freed */
2339 +       ibool           have_page_hash_mutex);
2340  
2341  /******************************************************************//**
2342  Determines if the unzip_LRU list should be used for evicting a victim
2343 @@ -154,15 +155,20 @@
2344  ibool
2345  buf_LRU_evict_from_unzip_LRU(
2346  /*=========================*/
2347 -       buf_pool_t*     buf_pool)
2348 +       buf_pool_t*     buf_pool,
2349 +       ibool           have_LRU_mutex)
2350  {
2351         ulint   io_avg;
2352         ulint   unzip_avg;
2353  
2354 -       ut_ad(buf_pool_mutex_own(buf_pool));
2355 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2356  
2357 +       if (!have_LRU_mutex)
2358 +               mutex_enter(&buf_pool->LRU_list_mutex);
2359         /* If the unzip_LRU list is empty, we can only use the LRU. */
2360         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2361 +               if (!have_LRU_mutex)
2362 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2363                 return(FALSE);
2364         }
2365  
2366 @@ -171,14 +177,20 @@
2367         decompressed pages in the buffer pool. */
2368         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2369             <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2370 +               if (!have_LRU_mutex)
2371 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2372                 return(FALSE);
2373         }
2374  
2375         /* If eviction hasn't started yet, we assume by default
2376         that a workload is disk bound. */
2377         if (buf_pool->freed_page_clock == 0) {
2378 +               if (!have_LRU_mutex)
2379 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2380                 return(TRUE);
2381         }
2382 +       if (!have_LRU_mutex)
2383 +               mutex_exit(&buf_pool->LRU_list_mutex);
2384  
2385         /* Calculate the average over past intervals, and add the values
2386         of the current interval. */
2387 @@ -246,19 +258,23 @@
2388         page_arr = ut_malloc(
2389                 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2390  
2391 -       buf_pool_mutex_enter(buf_pool);
2392 +       //buf_pool_mutex_enter(buf_pool);
2393 +       mutex_enter(&buf_pool->LRU_list_mutex);
2394  
2395  scan_again:
2396         num_entries = 0;
2397         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2398  
2399         while (bpage != NULL) {
2400 -               mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2401 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
2402                 buf_page_t*     prev_bpage;
2403  
2404 -               mutex_enter(block_mutex);
2405                 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2406  
2407 +               if (!block_mutex) {
2408 +                       goto next_page;
2409 +               }
2410 +
2411                 ut_a(buf_page_in_file(bpage));
2412  
2413                 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2414 @@ -287,14 +303,16 @@
2415  
2416                         /* Array full. We release the buf_pool->mutex to
2417                         obey the latching order. */
2418 -                       buf_pool_mutex_exit(buf_pool);
2419 +                       //buf_pool_mutex_exit(buf_pool);
2420 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2421  
2422                         buf_LRU_drop_page_hash_batch(
2423                                 id, zip_size, page_arr, num_entries);
2424  
2425                         num_entries = 0;
2426  
2427 -                       buf_pool_mutex_enter(buf_pool);
2428 +                       //buf_pool_mutex_enter(buf_pool);
2429 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2430                 } else {
2431                         mutex_exit(block_mutex);
2432                 }
2433 @@ -319,7 +337,8 @@
2434                 }
2435         }
2436  
2437 -       buf_pool_mutex_exit(buf_pool);
2438 +       //buf_pool_mutex_exit(buf_pool);
2439 +       mutex_exit(&buf_pool->LRU_list_mutex);
2440  
2441         /* Drop any remaining batch of search hashed pages. */
2442         buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2443 @@ -341,7 +360,9 @@
2444         ibool           all_freed;
2445  
2446  scan_again:
2447 -       buf_pool_mutex_enter(buf_pool);
2448 +       //buf_pool_mutex_enter(buf_pool);
2449 +       mutex_enter(&buf_pool->LRU_list_mutex);
2450 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2451  
2452         all_freed = TRUE;
2453  
2454 @@ -369,8 +390,16 @@
2455  
2456                         all_freed = FALSE;
2457                 } else {
2458 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2459 -                       mutex_enter(block_mutex);
2460 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2461 +
2462 +                       if (!block_mutex) {
2463 +                               /* It may be impossible case...
2464 +                               Something wrong, so will be scan_again */
2465 +
2466 +                               all_freed = FALSE;
2467 +
2468 +                               goto next_page_no_mutex;
2469 +                       }
2470  
2471                         if (bpage->buf_fix_count > 0) {
2472  
2473 @@ -429,7 +458,9 @@
2474                                 ulint   page_no;
2475                                 ulint   zip_size;
2476  
2477 -                               buf_pool_mutex_exit(buf_pool);
2478 +                               //buf_pool_mutex_exit(buf_pool);
2479 +                               mutex_exit(&buf_pool->LRU_list_mutex);
2480 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2481  
2482                                 zip_size = buf_page_get_zip_size(bpage);
2483                                 page_no = buf_page_get_page_no(bpage);
2484 @@ -454,7 +485,7 @@
2485                         if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2486                             != BUF_BLOCK_ZIP_FREE) {
2487                                 buf_LRU_block_free_hashed_page((buf_block_t*)
2488 -                                                              bpage);
2489 +                                                              bpage, TRUE);
2490                         } else {
2491                                 /* The block_mutex should have been
2492                                 released by buf_LRU_block_remove_hashed_page()
2493 @@ -486,7 +517,9 @@
2494                 bpage = prev_bpage;
2495         }
2496  
2497 -       buf_pool_mutex_exit(buf_pool);
2498 +       //buf_pool_mutex_exit(buf_pool);
2499 +       mutex_exit(&buf_pool->LRU_list_mutex);
2500 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
2501  
2502         if (!all_freed) {
2503                 os_thread_sleep(20000);
2504 @@ -532,7 +565,9 @@
2505         buf_page_t*     b;
2506         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2507  
2508 -       ut_ad(buf_pool_mutex_own(buf_pool));
2509 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2510 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2511 +       ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2512         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2513  
2514         /* Find the first successor of bpage in the LRU list
2515 @@ -540,17 +575,17 @@
2516         b = bpage;
2517         do {
2518                 b = UT_LIST_GET_NEXT(LRU, b);
2519 -       } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2520 +       } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2521  
2522         /* Insert bpage before b, i.e., after the predecessor of b. */
2523         if (b) {
2524 -               b = UT_LIST_GET_PREV(list, b);
2525 +               b = UT_LIST_GET_PREV(zip_list, b);
2526         }
2527  
2528         if (b) {
2529 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2530 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2531         } else {
2532 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2533 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2534         }
2535  }
2536  
2537 @@ -563,18 +598,19 @@
2538  buf_LRU_free_from_unzip_LRU_list(
2539  /*=============================*/
2540         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
2541 -       ulint           n_iterations)   /*!< in: how many times this has
2542 +       ulint           n_iterations,   /*!< in: how many times this has
2543                                         been called repeatedly without
2544                                         result: a high value means that
2545                                         we should search farther; we will
2546                                         search n_iterations / 5 of the
2547                                         unzip_LRU list, or nothing if
2548                                         n_iterations >= 5 */
2549 +       ibool           have_LRU_mutex)
2550  {
2551         buf_block_t*    block;
2552         ulint           distance;
2553  
2554 -       ut_ad(buf_pool_mutex_own(buf_pool));
2555 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2556  
2557         /* Theoratically it should be much easier to find a victim
2558         from unzip_LRU as we can choose even a dirty block (as we'll
2559 @@ -584,7 +620,7 @@
2560         if we have done five iterations so far. */
2561  
2562         if (UNIV_UNLIKELY(n_iterations >= 5)
2563 -           || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2564 +           || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2565  
2566                 return(FALSE);
2567         }
2568 @@ -592,18 +628,25 @@
2569         distance = 100 + (n_iterations
2570                           * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2571  
2572 +restart:
2573         for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2574              UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2575              block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2576  
2577                 enum buf_lru_free_block_status  freed;
2578  
2579 +               mutex_enter(&block->mutex);
2580 +               if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2581 +                   || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2582 +                       mutex_exit(&block->mutex);
2583 +                       goto restart;
2584 +               }
2585 +
2586                 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2587                 ut_ad(block->in_unzip_LRU_list);
2588                 ut_ad(block->page.in_LRU_list);
2589  
2590 -               mutex_enter(&block->mutex);
2591 -               freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2592 +               freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2593                 mutex_exit(&block->mutex);
2594  
2595                 switch (freed) {
2596 @@ -637,21 +680,23 @@
2597  buf_LRU_free_from_common_LRU_list(
2598  /*==============================*/
2599         buf_pool_t*     buf_pool,
2600 -       ulint           n_iterations)
2601 +       ulint           n_iterations,
2602                                 /*!< in: how many times this has been called
2603                                 repeatedly without result: a high value means
2604                                 that we should search farther; if
2605                                 n_iterations < 10, then we search
2606                                 n_iterations / 10 * buf_pool->curr_size
2607                                 pages from the end of the LRU list */
2608 +       ibool           have_LRU_mutex)
2609  {
2610         buf_page_t*     bpage;
2611         ulint           distance;
2612  
2613 -       ut_ad(buf_pool_mutex_own(buf_pool));
2614 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2615  
2616         distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2617  
2618 +restart:
2619         for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2620              UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2621              bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2622 @@ -659,14 +704,23 @@
2623                 enum buf_lru_free_block_status  freed;
2624                 unsigned                        accessed;
2625                 mutex_t*                        block_mutex
2626 -                       = buf_page_get_mutex(bpage);
2627 +                       = buf_page_get_mutex_enter(bpage);
2628 +
2629 +               if (!block_mutex) {
2630 +                       goto restart;
2631 +               }
2632 +
2633 +               if (!bpage->in_LRU_list
2634 +                   || !buf_page_in_file(bpage)) {
2635 +                       mutex_exit(block_mutex);
2636 +                       goto restart;
2637 +               }
2638  
2639                 ut_ad(buf_page_in_file(bpage));
2640                 ut_ad(bpage->in_LRU_list);
2641  
2642 -               mutex_enter(block_mutex);
2643                 accessed = buf_page_is_accessed(bpage);
2644 -               freed = buf_LRU_free_block(bpage, TRUE, NULL);
2645 +               freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2646                 mutex_exit(block_mutex);
2647  
2648                 switch (freed) {
2649 @@ -718,16 +772,23 @@
2650                                 n_iterations / 5 of the unzip_LRU list. */
2651  {
2652         ibool   freed = FALSE;
2653 +       ibool   have_LRU_mutex = FALSE;
2654  
2655 -       buf_pool_mutex_enter(buf_pool);
2656 +       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2657 +               have_LRU_mutex = TRUE;
2658 +
2659 +       //buf_pool_mutex_enter(buf_pool);
2660 +       if (have_LRU_mutex)
2661 +               mutex_enter(&buf_pool->LRU_list_mutex);
2662  
2663 -       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2664 +       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2665  
2666         if (!freed) {
2667                 freed = buf_LRU_free_from_common_LRU_list(
2668 -                       buf_pool, n_iterations);
2669 +                       buf_pool, n_iterations, have_LRU_mutex);
2670         }
2671  
2672 +       buf_pool_mutex_enter(buf_pool);
2673         if (!freed) {
2674                 buf_pool->LRU_flush_ended = 0;
2675         } else if (buf_pool->LRU_flush_ended > 0) {
2676 @@ -735,6 +796,8 @@
2677         }
2678  
2679         buf_pool_mutex_exit(buf_pool);
2680 +       if (have_LRU_mutex)
2681 +               mutex_exit(&buf_pool->LRU_list_mutex);
2682  
2683         return(freed);
2684  }
2685 @@ -795,7 +858,9 @@
2686  
2687                 buf_pool = buf_pool_from_array(i);
2688  
2689 -               buf_pool_mutex_enter(buf_pool);
2690 +               //buf_pool_mutex_enter(buf_pool);
2691 +               mutex_enter(&buf_pool->LRU_list_mutex);
2692 +               mutex_enter(&buf_pool->free_list_mutex);
2693  
2694                 if (!recv_recovery_on
2695                     && UT_LIST_GET_LEN(buf_pool->free)
2696 @@ -805,7 +870,9 @@
2697                         ret = TRUE;
2698                 }
2699  
2700 -               buf_pool_mutex_exit(buf_pool);
2701 +               //buf_pool_mutex_exit(buf_pool);
2702 +               mutex_exit(&buf_pool->LRU_list_mutex);
2703 +               mutex_exit(&buf_pool->free_list_mutex);
2704         }
2705  
2706         return(ret);
2707 @@ -823,9 +890,10 @@
2708  {
2709         buf_block_t*    block;
2710  
2711 -       ut_ad(buf_pool_mutex_own(buf_pool));
2712 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2713  
2714 -       block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2715 +       mutex_enter(&buf_pool->free_list_mutex);
2716 +       block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2717  
2718         if (block) {
2719  
2720 @@ -834,7 +902,9 @@
2721                 ut_ad(!block->page.in_flush_list);
2722                 ut_ad(!block->page.in_LRU_list);
2723                 ut_a(!buf_page_in_file(&block->page));
2724 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2725 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2726 +
2727 +               mutex_exit(&buf_pool->free_list_mutex);
2728  
2729                 mutex_enter(&block->mutex);
2730  
2731 @@ -844,6 +914,8 @@
2732                 ut_ad(buf_pool_from_block(block) == buf_pool);
2733  
2734                 mutex_exit(&block->mutex);
2735 +       } else {
2736 +               mutex_exit(&buf_pool->free_list_mutex);
2737         }
2738  
2739         return(block);
2740 @@ -868,7 +940,7 @@
2741         ibool           mon_value_was   = FALSE;
2742         ibool           started_monitor = FALSE;
2743  loop:
2744 -       buf_pool_mutex_enter(buf_pool);
2745 +       //buf_pool_mutex_enter(buf_pool);
2746  
2747         if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2748             + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2749 @@ -951,8 +1023,10 @@
2750                         ibool   lru;
2751                         page_zip_set_size(&block->page.zip, zip_size);
2752  
2753 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2754                         block->page.zip.data = buf_buddy_alloc(
2755 -                               buf_pool, zip_size, &lru);
2756 +                               buf_pool, zip_size, &lru, FALSE);
2757 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2758  
2759                         UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2760                 } else {
2761 @@ -960,7 +1034,7 @@
2762                         block->page.zip.data = NULL;
2763                 }
2764  
2765 -               buf_pool_mutex_exit(buf_pool);
2766 +               //buf_pool_mutex_exit(buf_pool);
2767  
2768                 if (started_monitor) {
2769                         srv_print_innodb_monitor = mon_value_was;
2770 @@ -972,7 +1046,7 @@
2771         /* If no block was in the free list, search from the end of the LRU
2772         list and try to free a block there */
2773  
2774 -       buf_pool_mutex_exit(buf_pool);
2775 +       //buf_pool_mutex_exit(buf_pool);
2776  
2777         freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2778  
2779 @@ -1058,7 +1132,8 @@
2780         ulint   new_len;
2781  
2782         ut_a(buf_pool->LRU_old);
2783 -       ut_ad(buf_pool_mutex_own(buf_pool));
2784 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2785 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2786         ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2787         ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2788  #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2789 @@ -1124,7 +1199,8 @@
2790  {
2791         buf_page_t*     bpage;
2792  
2793 -       ut_ad(buf_pool_mutex_own(buf_pool));
2794 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2795 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2796         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2797  
2798         /* We first initialize all blocks in the LRU list as old and then use
2799 @@ -1159,13 +1235,14 @@
2800         ut_ad(buf_pool);
2801         ut_ad(bpage);
2802         ut_ad(buf_page_in_file(bpage));
2803 -       ut_ad(buf_pool_mutex_own(buf_pool));
2804 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2805 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2806  
2807         if (buf_page_belongs_to_unzip_LRU(bpage)) {
2808                 buf_block_t*    block = (buf_block_t*) bpage;
2809  
2810                 ut_ad(block->in_unzip_LRU_list);
2811 -               ut_d(block->in_unzip_LRU_list = FALSE);
2812 +               block->in_unzip_LRU_list = FALSE;
2813  
2814                 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2815         }
2816 @@ -1183,7 +1260,8 @@
2817  
2818         ut_ad(buf_pool);
2819         ut_ad(bpage);
2820 -       ut_ad(buf_pool_mutex_own(buf_pool));
2821 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2822 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2823  
2824         ut_a(buf_page_in_file(bpage));
2825  
2826 @@ -1260,12 +1338,13 @@
2827  
2828         ut_ad(buf_pool);
2829         ut_ad(block);
2830 -       ut_ad(buf_pool_mutex_own(buf_pool));
2831 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2832 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2833  
2834         ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2835  
2836         ut_ad(!block->in_unzip_LRU_list);
2837 -       ut_d(block->in_unzip_LRU_list = TRUE);
2838 +       block->in_unzip_LRU_list = TRUE;
2839  
2840         if (old) {
2841                 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2842 @@ -1286,7 +1365,8 @@
2843  
2844         ut_ad(buf_pool);
2845         ut_ad(bpage);
2846 -       ut_ad(buf_pool_mutex_own(buf_pool));
2847 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2848 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2849  
2850         ut_a(buf_page_in_file(bpage));
2851  
2852 @@ -1337,7 +1417,8 @@
2853  
2854         ut_ad(buf_pool);
2855         ut_ad(bpage);
2856 -       ut_ad(buf_pool_mutex_own(buf_pool));
2857 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2858 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2859  
2860         ut_a(buf_page_in_file(bpage));
2861         ut_ad(!bpage->in_LRU_list);
2862 @@ -1416,7 +1497,8 @@
2863  {
2864         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2865  
2866 -       ut_ad(buf_pool_mutex_own(buf_pool));
2867 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2868 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2869  
2870         if (bpage->old) {
2871                 buf_pool->stat.n_pages_made_young++;
2872 @@ -1458,19 +1540,20 @@
2873         buf_page_t*     bpage,  /*!< in: block to be freed */
2874         ibool           zip,    /*!< in: TRUE if should remove also the
2875                                 compressed page of an uncompressed page */
2876 -       ibool*          buf_pool_mutex_released)
2877 +       ibool*          buf_pool_mutex_released,
2878                                 /*!< in: pointer to a variable that will
2879                                 be assigned TRUE if buf_pool_mutex
2880                                 was temporarily released, or NULL */
2881 +       ibool           have_LRU_mutex)
2882  {
2883         buf_page_t*     b = NULL;
2884         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2885         mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2886  
2887 -       ut_ad(buf_pool_mutex_own(buf_pool));
2888 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2889         ut_ad(mutex_own(block_mutex));
2890         ut_ad(buf_page_in_file(bpage));
2891 -       ut_ad(bpage->in_LRU_list);
2892 +       //ut_ad(bpage->in_LRU_list);
2893         ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2894  #if UNIV_WORD_SIZE == 4
2895         /* On 32-bit systems, there is no padding in buf_page_t.  On
2896 @@ -1479,7 +1562,7 @@
2897         UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2898  #endif
2899  
2900 -       if (!buf_page_can_relocate(bpage)) {
2901 +       if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2902  
2903                 /* Do not free buffer-fixed or I/O-fixed blocks. */
2904                 return(BUF_LRU_NOT_FREED);
2905 @@ -1511,15 +1594,15 @@
2906                 If it cannot be allocated (without freeing a block
2907                 from the LRU list), refuse to free bpage. */
2908  alloc:
2909 -               buf_pool_mutex_exit_forbid(buf_pool);
2910 -               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2911 -               buf_pool_mutex_exit_allow(buf_pool);
2912 +               //buf_pool_mutex_exit_forbid(buf_pool);
2913 +               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2914 +               //buf_pool_mutex_exit_allow(buf_pool);
2915  
2916                 if (UNIV_UNLIKELY(!b)) {
2917                         return(BUF_LRU_CANNOT_RELOCATE);
2918                 }
2919  
2920 -               memcpy(b, bpage, sizeof *b);
2921 +               //memcpy(b, bpage, sizeof *b);
2922         }
2923  
2924  #ifdef UNIV_DEBUG
2925 @@ -1530,6 +1613,39 @@
2926         }
2927  #endif /* UNIV_DEBUG */
2928  
2929 +       /* not to break latch order, must re-enter block_mutex */
2930 +       mutex_exit(block_mutex);
2931 +
2932 +       if (!have_LRU_mutex)
2933 +               mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2934 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2935 +       mutex_enter(block_mutex);
2936 +
2937 +       /* recheck states of block */
2938 +       if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2939 +           || !buf_page_can_relocate(bpage)) {
2940 +not_freed:
2941 +               if (b) {
2942 +                       buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2943 +               }
2944 +               if (!have_LRU_mutex)
2945 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2946 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2947 +               return(BUF_LRU_NOT_FREED);
2948 +       } else if (zip || !bpage->zip.data) {
2949 +               if (bpage->oldest_modification)
2950 +                       goto not_freed;
2951 +       } else if (bpage->oldest_modification) {
2952 +               if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2953 +                       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2954 +                       goto not_freed;
2955 +               }
2956 +       }
2957 +
2958 +       if (b) {
2959 +               memcpy(b, bpage, sizeof *b);
2960 +       }
2961 +
2962         if (buf_LRU_block_remove_hashed_page(bpage, zip)
2963             != BUF_BLOCK_ZIP_FREE) {
2964                 ut_a(bpage->buf_fix_count == 0);
2965 @@ -1546,6 +1662,10 @@
2966  
2967                         ut_a(!hash_b);
2968  
2969 +                       while (prev_b && !prev_b->in_LRU_list) {
2970 +                               prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2971 +                       }
2972 +
2973                         b->state = b->oldest_modification
2974                                 ? BUF_BLOCK_ZIP_DIRTY
2975                                 : BUF_BLOCK_ZIP_PAGE;
2976 @@ -1642,7 +1762,9 @@
2977                         *buf_pool_mutex_released = TRUE;
2978                 }
2979  
2980 -               buf_pool_mutex_exit(buf_pool);
2981 +               //buf_pool_mutex_exit(buf_pool);
2982 +               mutex_exit(&buf_pool->LRU_list_mutex);
2983 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2984                 mutex_exit(block_mutex);
2985  
2986                 /* Remove possible adaptive hash index on the page.
2987 @@ -1674,7 +1796,9 @@
2988                                 : BUF_NO_CHECKSUM_MAGIC);
2989                 }
2990  
2991 -               buf_pool_mutex_enter(buf_pool);
2992 +               //buf_pool_mutex_enter(buf_pool);
2993 +               if (have_LRU_mutex)
2994 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2995                 mutex_enter(block_mutex);
2996  
2997                 if (b) {
2998 @@ -1684,13 +1808,17 @@
2999                         mutex_exit(&buf_pool->zip_mutex);
3000                 }
3001  
3002 -               buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
3003 +               buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3004         } else {
3005                 /* The block_mutex should have been released by
3006                 buf_LRU_block_remove_hashed_page() when it returns
3007                 BUF_BLOCK_ZIP_FREE. */
3008                 ut_ad(block_mutex == &buf_pool->zip_mutex);
3009                 mutex_enter(block_mutex);
3010 +
3011 +               if (!have_LRU_mutex)
3012 +                       mutex_exit(&buf_pool->LRU_list_mutex);
3013 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3014         }
3015  
3016         return(BUF_LRU_FREED);
3017 @@ -1702,13 +1830,14 @@
3018  void
3019  buf_LRU_block_free_non_file_page(
3020  /*=============================*/
3021 -       buf_block_t*    block)  /*!< in: block, must not contain a file page */
3022 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
3023 +       ibool           have_page_hash_mutex)
3024  {
3025         void*           data;
3026         buf_pool_t*     buf_pool = buf_pool_from_block(block);
3027  
3028         ut_ad(block);
3029 -       ut_ad(buf_pool_mutex_own(buf_pool));
3030 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3031         ut_ad(mutex_own(&block->mutex));
3032  
3033         switch (buf_block_get_state(block)) {
3034 @@ -1742,18 +1871,21 @@
3035         if (data) {
3036                 block->page.zip.data = NULL;
3037                 mutex_exit(&block->mutex);
3038 -               buf_pool_mutex_exit_forbid(buf_pool);
3039 +               //buf_pool_mutex_exit_forbid(buf_pool);
3040  
3041                 buf_buddy_free(
3042 -                       buf_pool, data, page_zip_get_size(&block->page.zip));
3043 +                       buf_pool, data, page_zip_get_size(&block->page.zip),
3044 +                       have_page_hash_mutex);
3045  
3046 -               buf_pool_mutex_exit_allow(buf_pool);
3047 +               //buf_pool_mutex_exit_allow(buf_pool);
3048                 mutex_enter(&block->mutex);
3049                 page_zip_set_size(&block->page.zip, 0);
3050         }
3051  
3052 -       UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3053 +       mutex_enter(&buf_pool->free_list_mutex);
3054 +       UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3055         ut_d(block->page.in_free_list = TRUE);
3056 +       mutex_exit(&buf_pool->free_list_mutex);
3057  
3058         UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3059  }
3060 @@ -1783,7 +1915,11 @@
3061         buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
3062  
3063         ut_ad(bpage);
3064 -       ut_ad(buf_pool_mutex_own(buf_pool));
3065 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3066 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3067 +#ifdef UNIV_SYNC_DEBUG
3068 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3069 +#endif
3070         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3071  
3072         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3073 @@ -1891,7 +2027,9 @@
3074  
3075  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3076                 mutex_exit(buf_page_get_mutex(bpage));
3077 -               buf_pool_mutex_exit(buf_pool);
3078 +               //buf_pool_mutex_exit(buf_pool);
3079 +               mutex_exit(&buf_pool->LRU_list_mutex);
3080 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3081                 buf_print();
3082                 buf_LRU_print();
3083                 buf_validate();
3084 @@ -1912,17 +2050,17 @@
3085                 ut_a(bpage->zip.data);
3086                 ut_a(buf_page_get_zip_size(bpage));
3087  
3088 -               UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3089 +               UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3090  
3091                 mutex_exit(&buf_pool->zip_mutex);
3092 -               buf_pool_mutex_exit_forbid(buf_pool);
3093 +               //buf_pool_mutex_exit_forbid(buf_pool);
3094  
3095                 buf_buddy_free(
3096                         buf_pool, bpage->zip.data,
3097 -                       page_zip_get_size(&bpage->zip));
3098 +                       page_zip_get_size(&bpage->zip), TRUE);
3099  
3100 -               buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3101 -               buf_pool_mutex_exit_allow(buf_pool);
3102 +               buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3103 +               //buf_pool_mutex_exit_allow(buf_pool);
3104  
3105                 UNIV_MEM_UNDESC(bpage);
3106                 return(BUF_BLOCK_ZIP_FREE);
3107 @@ -1945,13 +2083,13 @@
3108                         ut_ad(!bpage->in_flush_list);
3109                         ut_ad(!bpage->in_LRU_list);
3110                         mutex_exit(&((buf_block_t*) bpage)->mutex);
3111 -                       buf_pool_mutex_exit_forbid(buf_pool);
3112 +                       //buf_pool_mutex_exit_forbid(buf_pool);
3113  
3114                         buf_buddy_free(
3115                                 buf_pool, data,
3116 -                               page_zip_get_size(&bpage->zip));
3117 +                               page_zip_get_size(&bpage->zip), TRUE);
3118  
3119 -                       buf_pool_mutex_exit_allow(buf_pool);
3120 +                       //buf_pool_mutex_exit_allow(buf_pool);
3121                         mutex_enter(&((buf_block_t*) bpage)->mutex);
3122                         page_zip_set_size(&bpage->zip, 0);
3123                 }
3124 @@ -1977,18 +2115,19 @@
3125  void
3126  buf_LRU_block_free_hashed_page(
3127  /*===========================*/
3128 -       buf_block_t*    block)  /*!< in: block, must contain a file page and
3129 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
3130                                 be in a state where it can be freed */
3131 +       ibool           have_page_hash_mutex)
3132  {
3133  #ifdef UNIV_DEBUG
3134 -       buf_pool_t*     buf_pool = buf_pool_from_block(block);
3135 -       ut_ad(buf_pool_mutex_own(buf_pool));
3136 +       //buf_pool_t*   buf_pool = buf_pool_from_block(block);
3137 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3138  #endif
3139         ut_ad(mutex_own(&block->mutex));
3140  
3141         buf_block_set_state(block, BUF_BLOCK_MEMORY);
3142  
3143 -       buf_LRU_block_free_non_file_page(block);
3144 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3145  }
3146  
3147  /**********************************************************************//**
3148 @@ -2015,7 +2154,8 @@
3149         }
3150  
3151         if (adjust) {
3152 -               buf_pool_mutex_enter(buf_pool);
3153 +               //buf_pool_mutex_enter(buf_pool);
3154 +               mutex_enter(&buf_pool->LRU_list_mutex);
3155  
3156                 if (ratio != buf_pool->LRU_old_ratio) {
3157                         buf_pool->LRU_old_ratio = ratio;
3158 @@ -2027,7 +2167,8 @@
3159                         }
3160                 }
3161  
3162 -               buf_pool_mutex_exit(buf_pool);
3163 +               //buf_pool_mutex_exit(buf_pool);
3164 +               mutex_exit(&buf_pool->LRU_list_mutex);
3165         } else {
3166                 buf_pool->LRU_old_ratio = ratio;
3167         }
3168 @@ -2124,7 +2265,8 @@
3169         ulint           new_len;
3170  
3171         ut_ad(buf_pool);
3172 -       buf_pool_mutex_enter(buf_pool);
3173 +       //buf_pool_mutex_enter(buf_pool);
3174 +       mutex_enter(&buf_pool->LRU_list_mutex);
3175  
3176         if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3177  
3178 @@ -2185,16 +2327,22 @@
3179  
3180         ut_a(buf_pool->LRU_old_len == old_len);
3181  
3182 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3183 +       mutex_exit(&buf_pool->LRU_list_mutex);
3184 +       mutex_enter(&buf_pool->free_list_mutex);
3185 +
3186 +       UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3187                          ut_ad(ut_list_node_313->in_free_list));
3188  
3189         for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3190              bpage != NULL;
3191 -            bpage = UT_LIST_GET_NEXT(list, bpage)) {
3192 +            bpage = UT_LIST_GET_NEXT(free, bpage)) {
3193  
3194                 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3195         }
3196  
3197 +       mutex_exit(&buf_pool->free_list_mutex);
3198 +       mutex_enter(&buf_pool->LRU_list_mutex);
3199 +
3200         UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3201                          ut_ad(ut_list_node_313->in_unzip_LRU_list
3202                                && ut_list_node_313->page.in_LRU_list));
3203 @@ -2208,7 +2356,8 @@
3204                 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3205         }
3206  
3207 -       buf_pool_mutex_exit(buf_pool);
3208 +       //buf_pool_mutex_exit(buf_pool);
3209 +       mutex_exit(&buf_pool->LRU_list_mutex);
3210  }
3211  
3212  /**********************************************************************//**
3213 @@ -2244,7 +2393,8 @@
3214         const buf_page_t*       bpage;
3215  
3216         ut_ad(buf_pool);
3217 -       buf_pool_mutex_enter(buf_pool);
3218 +       //buf_pool_mutex_enter(buf_pool);
3219 +       mutex_enter(&buf_pool->LRU_list_mutex);
3220  
3221         bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3222  
3223 @@ -2301,7 +2451,8 @@
3224                 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3225         }
3226  
3227 -       buf_pool_mutex_exit(buf_pool);
3228 +       //buf_pool_mutex_exit(buf_pool);
3229 +       mutex_exit(&buf_pool->LRU_list_mutex);
3230  }
3231  
3232  /**********************************************************************//**
3233 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3234 --- a/storage/innobase/buf/buf0rea.c    2010-12-03 15:22:36.323977308 +0900
3235 +++ b/storage/innobase/buf/buf0rea.c    2010-12-03 15:48:29.296024468 +0900
3236 @@ -311,6 +311,7 @@
3237  
3238                 return(0);
3239         }
3240 +       buf_pool_mutex_exit(buf_pool);
3241  
3242         /* Check that almost all pages in the area have been accessed; if
3243         offset == low, the accesses must be in a descending order, otherwise,
3244 @@ -329,6 +330,7 @@
3245  
3246         fail_count = 0;
3247  
3248 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3249         for (i = low; i < high; i++) {
3250                 bpage = buf_page_hash_get(buf_pool, space, i);
3251  
3252 @@ -356,7 +358,8 @@
3253  
3254                 if (fail_count > threshold) {
3255                         /* Too many failures: return */
3256 -                       buf_pool_mutex_exit(buf_pool);
3257 +                       //buf_pool_mutex_exit(buf_pool);
3258 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3259                         return(0);
3260                 }
3261  
3262 @@ -371,7 +374,8 @@
3263         bpage = buf_page_hash_get(buf_pool, space, offset);
3264  
3265         if (bpage == NULL) {
3266 -               buf_pool_mutex_exit(buf_pool);
3267 +               //buf_pool_mutex_exit(buf_pool);
3268 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3269  
3270                 return(0);
3271         }
3272 @@ -397,7 +401,8 @@
3273         pred_offset = fil_page_get_prev(frame);
3274         succ_offset = fil_page_get_next(frame);
3275  
3276 -       buf_pool_mutex_exit(buf_pool);
3277 +       //buf_pool_mutex_exit(buf_pool);
3278 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3279  
3280         if ((offset == low) && (succ_offset == offset + 1)) {
3281  
3282 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3283 --- a/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:03.048955897 +0900
3284 +++ b/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:29.304024564 +0900
3285 @@ -250,6 +250,10 @@
3286  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3287         {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3288         {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3289 +       {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3290 +       {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3291 +       {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3292 +       {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3293         {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3294         {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3295         {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3296 @@ -301,6 +305,7 @@
3297         {&archive_lock_key, "archive_lock", 0},
3298  #  endif /* UNIV_LOG_ARCHIVE */
3299         {&btr_search_latch_key, "btr_search_latch", 0},
3300 +       {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3301  #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3302         {&buf_block_lock_key, "buf_block_lock", 0},
3303  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3304 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3305 --- a/storage/innobase/handler/i_s.cc   2010-12-03 15:37:45.517105700 +0900
3306 +++ b/storage/innobase/handler/i_s.cc   2010-12-03 15:48:29.331024462 +0900
3307 @@ -1725,7 +1725,8 @@
3308  
3309                 buf_pool = buf_pool_from_array(i);
3310  
3311 -               buf_pool_mutex_enter(buf_pool);
3312 +               //buf_pool_mutex_enter(buf_pool);
3313 +               mutex_enter(&buf_pool->zip_free_mutex);
3314  
3315                 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3316                         buf_buddy_stat_t*       buddy_stat;
3317 @@ -1755,7 +1756,8 @@
3318                         }
3319                 }
3320  
3321 -               buf_pool_mutex_exit(buf_pool);
3322 +               //buf_pool_mutex_exit(buf_pool);
3323 +               mutex_exit(&buf_pool->zip_free_mutex);
3324  
3325                 if (status) {
3326                         break;
3327 diff -ruN a/storage/innobase/handler/innodb_patch_info.h b/storage/innobase/handler/innodb_patch_info.h
3328 --- a/storage/innobase/handler/innodb_patch_info.h      2010-12-03 15:48:03.064995674 +0900
3329 +++ b/storage/innobase/handler/innodb_patch_info.h      2010-12-03 15:48:29.331955850 +0900
3330 @@ -31,5 +31,6 @@
3331  {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
3332  {"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
3333  {"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
3334 +{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
3335  {NULL, NULL, NULL, NULL}
3336  };
3337 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3338 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3339 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3340 @@ -3700,9 +3700,11 @@
3341                 ulint           fold = buf_page_address_fold(space, page_no);
3342                 buf_pool_t*     buf_pool = buf_pool_get(space, page_no);
3343  
3344 -               buf_pool_mutex_enter(buf_pool);
3345 +               //buf_pool_mutex_enter(buf_pool);
3346 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
3347                 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3348 -               buf_pool_mutex_exit(buf_pool);
3349 +               //buf_pool_mutex_exit(buf_pool);
3350 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3351  
3352                 if (UNIV_LIKELY_NULL(bpage)) {
3353                         /* A buffer pool watch has been set or the
3354 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3355 --- a/storage/innobase/include/buf0buddy.h      2010-11-03 07:01:13.000000000 +0900
3356 +++ b/storage/innobase/include/buf0buddy.h      2010-12-03 15:48:29.338023826 +0900
3357 @@ -51,10 +51,11 @@
3358         buf_pool_t*     buf_pool,
3359                         /*!< buffer pool in which the block resides */
3360         ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3361 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3362 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3363                         TRUE if storage was allocated from the LRU list
3364                         and buf_pool->mutex was temporarily released,
3365                         or NULL if the LRU list should not be used */
3366 +       ibool   have_page_hash_mutex)
3367         __attribute__((malloc));
3368  
3369  /**********************************************************************//**
3370 @@ -67,7 +68,8 @@
3371                         /*!< buffer pool in which the block resides */
3372         void*   buf,    /*!< in: block to be freed, must not be
3373                         pointed to by the buffer pool */
3374 -       ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
3375 +       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3376 +       ibool   have_page_hash_mutex)
3377         __attribute__((nonnull));
3378  
3379  #ifndef UNIV_NONINL
3380 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3381 --- a/storage/innobase/include/buf0buddy.ic     2010-11-03 07:01:13.000000000 +0900
3382 +++ b/storage/innobase/include/buf0buddy.ic     2010-12-03 15:48:29.339040413 +0900
3383 @@ -46,10 +46,11 @@
3384                         /*!< in: buffer pool in which the page resides */
3385         ulint   i,      /*!< in: index of buf_pool->zip_free[],
3386                         or BUF_BUDDY_SIZES */
3387 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3388 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3389                         TRUE if storage was allocated from the LRU list
3390                         and buf_pool->mutex was temporarily released,
3391                         or NULL if the LRU list should not be used */
3392 +       ibool   have_page_hash_mutex)
3393         __attribute__((malloc));
3394  
3395  /**********************************************************************//**
3396 @@ -61,8 +62,9 @@
3397         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3398         void*           buf,            /*!< in: block to be freed, must not be
3399                                         pointed to by the buffer pool */
3400 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
3401 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
3402                                         or BUF_BUDDY_SIZES */
3403 +       ibool           have_page_hash_mutex)
3404         __attribute__((nonnull));
3405  
3406  /**********************************************************************//**
3407 @@ -102,16 +104,17 @@
3408                                         the page resides */
3409         ulint           size,           /*!< in: block size, up to
3410                                         UNIV_PAGE_SIZE */
3411 -       ibool*          lru)            /*!< in: pointer to a variable
3412 +       ibool*          lru,            /*!< in: pointer to a variable
3413                                         that will be assigned TRUE if
3414                                         storage was allocated from the
3415                                         LRU list and buf_pool->mutex was
3416                                         temporarily released, or NULL if
3417                                         the LRU list should not be used */
3418 +       ibool           have_page_hash_mutex)
3419  {
3420 -       ut_ad(buf_pool_mutex_own(buf_pool));
3421 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3422  
3423 -       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3424 +       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3425  }
3426  
3427  /**********************************************************************//**
3428 @@ -123,12 +126,25 @@
3429         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3430         void*           buf,            /*!< in: block to be freed, must not be
3431                                         pointed to by the buffer pool */
3432 -       ulint           size)           /*!< in: block size, up to
3433 +       ulint           size,           /*!< in: block size, up to
3434                                         UNIV_PAGE_SIZE */
3435 +       ibool           have_page_hash_mutex)
3436  {
3437 -       ut_ad(buf_pool_mutex_own(buf_pool));
3438 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3439 +
3440 +       if (!have_page_hash_mutex) {
3441 +               mutex_enter(&buf_pool->LRU_list_mutex);
3442 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3443 +       }
3444  
3445 -       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3446 +       mutex_enter(&buf_pool->zip_free_mutex);
3447 +       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3448 +       mutex_exit(&buf_pool->zip_free_mutex);
3449 +
3450 +       if (!have_page_hash_mutex) {
3451 +               mutex_exit(&buf_pool->LRU_list_mutex);
3452 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3453 +       }
3454  }
3455  
3456  #ifdef UNIV_MATERIALIZE
3457 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3458 --- a/storage/innobase/include/buf0buf.h        2010-12-03 15:22:36.327954660 +0900
3459 +++ b/storage/innobase/include/buf0buf.h        2010-12-03 15:48:29.343024683 +0900
3460 @@ -132,6 +132,20 @@
3461  /*==========================*/
3462  
3463  /********************************************************************//**
3464 +*/
3465 +UNIV_INLINE
3466 +void
3467 +buf_pool_page_hash_x_lock_all(void);
3468 +/*================================*/
3469 +
3470 +/********************************************************************//**
3471 +*/
3472 +UNIV_INLINE
3473 +void
3474 +buf_pool_page_hash_x_unlock_all(void);
3475 +/*==================================*/
3476 +
3477 +/********************************************************************//**
3478  Creates the buffer pool.
3479  @return        own: buf_pool object, NULL if not enough memory or error */
3480  UNIV_INTERN
3481 @@ -761,6 +775,15 @@
3482         const buf_page_t*       bpage)  /*!< in: pointer to control block */
3483         __attribute__((pure));
3484  
3485 +/*************************************************************************
3486 +Gets the mutex of a block and enter the mutex with consistency. */
3487 +UNIV_INLINE
3488 +mutex_t*
3489 +buf_page_get_mutex_enter(
3490 +/*=========================*/
3491 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3492 +       __attribute__((pure));
3493 +
3494  /*********************************************************************//**
3495  Get the flush type of a page.
3496  @return        flush type */
3497 @@ -1227,7 +1250,7 @@
3498         All these are protected by buf_pool_mutex. */
3499         /* @{ */
3500  
3501 -       UT_LIST_NODE_T(buf_page_t) list;
3502 +       /* UT_LIST_NODE_T(buf_page_t) list; */
3503                                         /*!< based on state, this is a
3504                                         list node, protected either by
3505                                         buf_pool_mutex or by
3506 @@ -1254,6 +1277,10 @@
3507                                         BUF_BLOCK_REMOVE_HASH or
3508                                         BUF_BLOCK_READY_IN_USE. */
3509  
3510 +       /* resplit for optimistic use */
3511 +       UT_LIST_NODE_T(buf_page_t) free;
3512 +       UT_LIST_NODE_T(buf_page_t) flush_list;
3513 +       UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3514  #ifdef UNIV_DEBUG
3515         ibool           in_flush_list;  /*!< TRUE if in buf_pool->flush_list;
3516                                         when flush_list_mutex is free, the
3517 @@ -1347,11 +1374,11 @@
3518                                         a block is in the unzip_LRU list
3519                                         if page.state == BUF_BLOCK_FILE_PAGE
3520                                         and page.zip.data != NULL */
3521 -#ifdef UNIV_DEBUG
3522 +//#ifdef UNIV_DEBUG
3523         ibool           in_unzip_LRU_list;/*!< TRUE if the page is in the
3524                                         decompressed LRU list;
3525                                         used in debugging */
3526 -#endif /* UNIV_DEBUG */
3527 +//#endif /* UNIV_DEBUG */
3528         mutex_t         mutex;          /*!< mutex protecting this block:
3529                                         state (also protected by the buffer
3530                                         pool mutex), io_fix, buf_fix_count,
3531 @@ -1517,6 +1544,11 @@
3532                                         pool instance, protects compressed
3533                                         only pages (of type buf_page_t, not
3534                                         buf_block_t */
3535 +       mutex_t         LRU_list_mutex;
3536 +       rw_lock_t       page_hash_latch;
3537 +       mutex_t         free_list_mutex;
3538 +       mutex_t         zip_free_mutex;
3539 +       mutex_t         zip_hash_mutex;
3540         ulint           instance_no;    /*!< Array index of this buffer
3541                                         pool instance */
3542         ulint           old_pool_size;  /*!< Old pool size in bytes */
3543 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3544 --- a/storage/innobase/include/buf0buf.ic       2010-11-03 07:01:13.000000000 +0900
3545 +++ b/storage/innobase/include/buf0buf.ic       2010-12-03 15:48:29.345024524 +0900
3546 @@ -232,7 +232,7 @@
3547         case BUF_BLOCK_ZIP_FREE:
3548                 /* This is a free page in buf_pool->zip_free[].
3549                 Such pages should only be accessed by the buddy allocator. */
3550 -               ut_error;
3551 +               /* ut_error; */ /* optimistic */
3552                 break;
3553         case BUF_BLOCK_ZIP_PAGE:
3554         case BUF_BLOCK_ZIP_DIRTY:
3555 @@ -275,9 +275,14 @@
3556  {
3557         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3558  
3559 +       if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3560 +               /* TODO: this code is the interim. should be confirmed later. */
3561 +               return(&buf_pool->zip_mutex);
3562 +       }
3563 +
3564         switch (buf_page_get_state(bpage)) {
3565         case BUF_BLOCK_ZIP_FREE:
3566 -               ut_error;
3567 +               /* ut_error; */ /* optimistic */
3568                 return(NULL);
3569         case BUF_BLOCK_ZIP_PAGE:
3570         case BUF_BLOCK_ZIP_DIRTY:
3571 @@ -287,6 +292,28 @@
3572         }
3573  }
3574  
3575 +/*************************************************************************
3576 +Gets the mutex of a block and enter the mutex with consistency. */
3577 +UNIV_INLINE
3578 +mutex_t*
3579 +buf_page_get_mutex_enter(
3580 +/*=========================*/
3581 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3582 +{
3583 +       mutex_t*        block_mutex;
3584 +
3585 +       while(1) {
3586 +               block_mutex = buf_page_get_mutex(bpage);
3587 +               if (!block_mutex)
3588 +                       return block_mutex;
3589 +
3590 +               mutex_enter(block_mutex);
3591 +               if (block_mutex == buf_page_get_mutex(bpage))
3592 +                       return block_mutex;
3593 +               mutex_exit(block_mutex);
3594 +       }
3595 +}
3596 +
3597  /*********************************************************************//**
3598  Get the flush type of a page.
3599  @return        flush type */
3600 @@ -383,8 +410,8 @@
3601         enum buf_io_fix io_fix) /*!< in: io_fix state */
3602  {
3603  #ifdef UNIV_DEBUG
3604 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3605 -       ut_ad(buf_pool_mutex_own(buf_pool));
3606 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3607 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3608  #endif
3609         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3610  
3611 @@ -414,14 +441,14 @@
3612         const buf_page_t*       bpage)  /*!< control block being relocated */
3613  {
3614  #ifdef UNIV_DEBUG
3615 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3616 -       ut_ad(buf_pool_mutex_own(buf_pool));
3617 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3618 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3619  #endif
3620         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3621         ut_ad(buf_page_in_file(bpage));
3622 -       ut_ad(bpage->in_LRU_list);
3623 +       //ut_ad(bpage->in_LRU_list);
3624  
3625 -       return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3626 +       return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3627                && bpage->buf_fix_count == 0);
3628  }
3629  
3630 @@ -435,8 +462,8 @@
3631         const buf_page_t*       bpage)  /*!< in: control block */
3632  {
3633  #ifdef UNIV_DEBUG
3634 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3635 -       ut_ad(buf_pool_mutex_own(buf_pool));
3636 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3637 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3638  #endif
3639         ut_ad(buf_page_in_file(bpage));
3640  
3641 @@ -456,7 +483,8 @@
3642         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3643  #endif /* UNIV_DEBUG */
3644         ut_a(buf_page_in_file(bpage));
3645 -       ut_ad(buf_pool_mutex_own(buf_pool));
3646 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3647 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3648         ut_ad(bpage->in_LRU_list);
3649  
3650  #ifdef UNIV_LRU_DEBUG
3651 @@ -503,9 +531,10 @@
3652         ulint           time_ms)        /*!< in: ut_time_ms() */
3653  {
3654  #ifdef UNIV_DEBUG
3655 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3656 -       ut_ad(buf_pool_mutex_own(buf_pool));
3657 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3658 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3659  #endif
3660 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3661         ut_a(buf_page_in_file(bpage));
3662  
3663         if (!bpage->access_time) {
3664 @@ -719,19 +748,19 @@
3665  /*===========*/
3666         buf_block_t*    block)  /*!< in, own: block to be freed */
3667  {
3668 -       buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3669 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3670  
3671 -       buf_pool_mutex_enter(buf_pool);
3672 +       //buf_pool_mutex_enter(buf_pool);
3673  
3674         mutex_enter(&block->mutex);
3675  
3676         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3677  
3678 -       buf_LRU_block_free_non_file_page(block);
3679 +       buf_LRU_block_free_non_file_page(block, FALSE);
3680  
3681         mutex_exit(&block->mutex);
3682  
3683 -       buf_pool_mutex_exit(buf_pool);
3684 +       //buf_pool_mutex_exit(buf_pool);
3685  }
3686  #endif /* !UNIV_HOTBACKUP */
3687  
3688 @@ -779,17 +808,17 @@
3689                                         page frame */
3690  {
3691         ib_uint64_t     lsn;
3692 -       mutex_t*        block_mutex = buf_page_get_mutex(bpage);
3693 -
3694 -       mutex_enter(block_mutex);
3695 +       mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
3696  
3697 -       if (buf_page_in_file(bpage)) {
3698 +       if (block_mutex && buf_page_in_file(bpage)) {
3699                 lsn = bpage->newest_modification;
3700         } else {
3701                 lsn = 0;
3702         }
3703  
3704 -       mutex_exit(block_mutex);
3705 +       if (block_mutex) {
3706 +               mutex_exit(block_mutex);
3707 +       }
3708  
3709         return(lsn);
3710  }
3711 @@ -807,7 +836,7 @@
3712  #ifdef UNIV_SYNC_DEBUG
3713         buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3714  
3715 -       ut_ad((buf_pool_mutex_own(buf_pool)
3716 +       ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3717                && (block->page.buf_fix_count == 0))
3718               || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3719  #endif /* UNIV_SYNC_DEBUG */
3720 @@ -962,7 +991,11 @@
3721         buf_page_t*     bpage;
3722  
3723         ut_ad(buf_pool);
3724 -       ut_ad(buf_pool_mutex_own(buf_pool));
3725 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3726 +#ifdef UNIV_SYNC_DEBUG
3727 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3728 +             || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3729 +#endif
3730         ut_ad(fold == buf_page_address_fold(space, offset));
3731  
3732         /* Look for the page in the hash table */
3733 @@ -1047,11 +1080,13 @@
3734         const buf_page_t*       bpage;
3735         buf_pool_t*             buf_pool = buf_pool_get(space, offset);
3736  
3737 -       buf_pool_mutex_enter(buf_pool);
3738 +       //buf_pool_mutex_enter(buf_pool);
3739 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3740  
3741         bpage = buf_page_hash_get(buf_pool, space, offset);
3742  
3743 -       buf_pool_mutex_exit(buf_pool);
3744 +       //buf_pool_mutex_exit(buf_pool);
3745 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3746  
3747         return(bpage != NULL);
3748  }
3749 @@ -1179,4 +1214,38 @@
3750                 buf_pool_mutex_exit(buf_pool);
3751         }
3752  }
3753 +
3754 +/********************************************************************//**
3755 +*/
3756 +UNIV_INLINE
3757 +void
3758 +buf_pool_page_hash_x_lock_all(void)
3759 +/*===============================*/
3760 +{
3761 +       ulint   i;
3762 +
3763 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3764 +               buf_pool_t*     buf_pool;
3765 +
3766 +               buf_pool = buf_pool_from_array(i);
3767 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3768 +       }
3769 +}
3770 +
3771 +/********************************************************************//**
3772 +*/
3773 +UNIV_INLINE
3774 +void
3775 +buf_pool_page_hash_x_unlock_all(void)
3776 +/*=================================*/
3777 +{
3778 +       ulint   i;
3779 +
3780 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3781 +               buf_pool_t*     buf_pool;
3782 +
3783 +               buf_pool = buf_pool_from_array(i);
3784 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3785 +       }
3786 +}
3787  #endif /* !UNIV_HOTBACKUP */
3788 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3789 --- a/storage/innobase/include/buf0lru.h        2010-11-03 07:01:13.000000000 +0900
3790 +++ b/storage/innobase/include/buf0lru.h        2010-12-03 15:48:29.349024701 +0900
3791 @@ -113,10 +113,11 @@
3792         buf_page_t*     bpage,  /*!< in: block to be freed */
3793         ibool           zip,    /*!< in: TRUE if should remove also the
3794                                 compressed page of an uncompressed page */
3795 -       ibool*          buf_pool_mutex_released);
3796 +       ibool*          buf_pool_mutex_released,
3797                                 /*!< in: pointer to a variable that will
3798                                 be assigned TRUE if buf_pool->mutex
3799                                 was temporarily released, or NULL */
3800 +       ibool           have_LRU_mutex);
3801  /******************************************************************//**
3802  Try to free a replaceable block.
3803  @return        TRUE if found and freed */
3804 @@ -163,7 +164,8 @@
3805  void
3806  buf_LRU_block_free_non_file_page(
3807  /*=============================*/
3808 -       buf_block_t*    block); /*!< in: block, must not contain a file page */
3809 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
3810 +       ibool           have_page_hash_mutex);
3811  /******************************************************************//**
3812  Adds a block to the LRU list. */
3813  UNIV_INTERN
3814 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3815 --- a/storage/innobase/include/sync0rw.h        2010-11-03 07:01:13.000000000 +0900
3816 +++ b/storage/innobase/include/sync0rw.h        2010-12-03 15:48:29.349942993 +0900
3817 @@ -112,6 +112,7 @@
3818  extern mysql_pfs_key_t archive_lock_key;
3819  # endif /* UNIV_LOG_ARCHIVE */
3820  extern mysql_pfs_key_t btr_search_latch_key;
3821 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3822  extern mysql_pfs_key_t buf_block_lock_key;
3823  # ifdef UNIV_SYNC_DEBUG
3824  extern mysql_pfs_key_t buf_block_debug_latch_key;
3825 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3826 --- a/storage/innobase/include/sync0sync.h      2010-11-03 07:01:13.000000000 +0900
3827 +++ b/storage/innobase/include/sync0sync.h      2010-12-03 15:48:29.352024614 +0900
3828 @@ -75,6 +75,10 @@
3829  extern mysql_pfs_key_t buffer_block_mutex_key;
3830  extern mysql_pfs_key_t buf_pool_mutex_key;
3831  extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3832 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3833 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3834 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3835 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3836  extern mysql_pfs_key_t cache_last_read_mutex_key;
3837  extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3838  extern mysql_pfs_key_t dict_sys_mutex_key;
3839 @@ -661,7 +665,7 @@
3840  #define        SYNC_TRX_LOCK_HEAP      298
3841  #define SYNC_TRX_SYS_HEADER    290
3842  #define SYNC_LOG               170
3843 -#define SYNC_LOG_FLUSH_ORDER   147
3844 +#define SYNC_LOG_FLUSH_ORDER   156
3845  #define SYNC_RECV              168
3846  #define        SYNC_WORK_QUEUE         162
3847  #define        SYNC_SEARCH_SYS_CONF    161     /* for assigning btr_search_enabled */
3848 @@ -671,8 +675,13 @@
3849                                         SYNC_SEARCH_SYS, as memory allocation
3850                                         can call routines there! Otherwise
3851                                         the level is SYNC_MEM_HASH. */
3852 +#define        SYNC_BUF_LRU_LIST       158
3853 +#define        SYNC_BUF_PAGE_HASH      157
3854 +#define        SYNC_BUF_BLOCK          155     /* Block mutex */
3855 +#define        SYNC_BUF_FREE_LIST      153
3856 +#define        SYNC_BUF_ZIP_FREE       152
3857 +#define        SYNC_BUF_ZIP_HASH       151
3858  #define        SYNC_BUF_POOL           150     /* Buffer pool mutex */
3859 -#define        SYNC_BUF_BLOCK          146     /* Block mutex */
3860  #define        SYNC_BUF_FLUSH_LIST     145     /* Buffer flush list mutex */
3861  #define SYNC_DOUBLEWRITE       140
3862  #define        SYNC_ANY_LATCH          135
3863 @@ -704,7 +713,7 @@
3864                 os_fast_mutex;  /*!< We use this OS mutex in place of lock_word
3865                                 when atomic operations are not enabled */
3866  #endif
3867 -       ulint   waiters;        /*!< This ulint is set to 1 if there are (or
3868 +       volatile ulint  waiters;        /*!< This ulint is set to 1 if there are (or
3869                                 may be) threads waiting in the global wait
3870                                 array for this mutex to be released.
3871                                 Otherwise, this is 0. */
3872 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3873 --- a/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:03.080956216 +0900
3874 +++ b/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:29.355023766 +0900
3875 @@ -3065,7 +3065,7 @@
3876                                                                 level += log_sys->max_checkpoint_age
3877                                                                          - (lsn - oldest_modification);
3878                                                         }
3879 -                                                       bpage = UT_LIST_GET_NEXT(list, bpage);
3880 +                                                       bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3881                                                         n_blocks++;
3882                                                 }
3883  
3884 @@ -3150,7 +3150,7 @@
3885                                                         found = TRUE;
3886                                                         break;
3887                                                 }
3888 -                                               bpage = UT_LIST_GET_NEXT(list, bpage);
3889 +                                               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3890                                                 new_blocks_num++;
3891                                         }
3892                                         if (!found) {
3893 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3894 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3895 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3896 @@ -265,7 +265,7 @@
3897         mutex->lock_word = 0;
3898  #endif
3899         mutex->event = os_event_create(NULL);
3900 -       mutex_set_waiters(mutex, 0);
3901 +       mutex->waiters = 0;
3902  #ifdef UNIV_DEBUG
3903         mutex->magic_n = MUTEX_MAGIC_N;
3904  #endif /* UNIV_DEBUG */
3905 @@ -444,6 +444,15 @@
3906         mutex_t*        mutex,  /*!< in: mutex */
3907         ulint           n)      /*!< in: value to set */
3908  {
3909 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3910 +       ut_ad(mutex);
3911 +
3912 +       if (n) {
3913 +               os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3914 +       } else {
3915 +               os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3916 +       }
3917 +#else
3918         volatile ulint* ptr;            /* declared volatile to ensure that
3919                                         the value is stored to memory */
3920         ut_ad(mutex);
3921 @@ -452,6 +461,7 @@
3922  
3923         *ptr = n;               /* Here we assume that the write of a single
3924                                 word in memory is atomic */
3925 +#endif
3926  }
3927  
3928  /******************************************************************//**
3929 @@ -1193,7 +1203,12 @@
3930                         ut_error;
3931                 }
3932                 break;
3933 +       case SYNC_BUF_LRU_LIST:
3934         case SYNC_BUF_FLUSH_LIST:
3935 +       case SYNC_BUF_PAGE_HASH:
3936 +       case SYNC_BUF_FREE_LIST:
3937 +       case SYNC_BUF_ZIP_FREE:
3938 +       case SYNC_BUF_ZIP_HASH:
3939         case SYNC_BUF_POOL:
3940                 /* We can have multiple mutexes of this type therefore we
3941                 can only check whether the greater than condition holds. */
3942 @@ -1211,7 +1226,8 @@
3943                 buffer block (block->mutex or buf_pool_zip_mutex). */
3944                 if (!sync_thread_levels_g(array, level, FALSE)) {
3945                         ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3946 -                       ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3947 +                       /* the exact rule is not fixed yet, for now */
3948 +                       //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3949                 }
3950                 break;
3951         case SYNC_REC_LOCK:
This page took 4.259624 seconds and 3 git commands to generate.