]> git.pld-linux.org Git - packages/mysql.git/blob - innodb_split_buf_pool_mutex.patch
- not all chunks needed to patch
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
1 # name       : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
4 #
5 #!!! notice !!!
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c    2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c    2010-12-03 15:48:29.268957148 +0900
11 @@ -3935,7 +3935,8 @@
12  
13         mtr_commit(mtr);
14  
15 -       buf_pool_mutex_enter(buf_pool);
16 +       //buf_pool_mutex_enter(buf_pool);
17 +       mutex_enter(&buf_pool->LRU_list_mutex);
18         mutex_enter(&block->mutex);
19  
20         /* Only free the block if it is still allocated to
21 @@ -3946,17 +3947,22 @@
22             && buf_block_get_space(block) == space
23             && buf_block_get_page_no(block) == page_no) {
24  
25 -               if (buf_LRU_free_block(&block->page, all, NULL)
26 +               if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
27                     != BUF_LRU_FREED
28 -                   && all && block->page.zip.data) {
29 +                   && all && block->page.zip.data
30 +                   /* Now, buf_LRU_free_block() may release mutex temporarily */
31 +                   && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32 +                   && buf_block_get_space(block) == space
33 +                   && buf_block_get_page_no(block) == page_no) {
34                         /* Attempt to deallocate the uncompressed page
35                         if the whole block cannot be deallocted. */
36  
37 -                       buf_LRU_free_block(&block->page, FALSE, NULL);
38 +                       buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
39                 }
40         }
41  
42 -       buf_pool_mutex_exit(buf_pool);
43 +       //buf_pool_mutex_exit(buf_pool);
44 +       mutex_exit(&buf_pool->LRU_list_mutex);
45         mutex_exit(&block->mutex);
46  }
47  
48 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49 --- a/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:03.033037049 +0900
50 +++ b/storage/innobase/btr/btr0sea.c    2010-12-03 15:48:29.271024260 +0900
51 @@ -1943,7 +1943,7 @@
52         rec_offs_init(offsets_);
53  
54         rw_lock_x_lock(&btr_search_latch);
55 -       buf_pool_mutex_enter_all();
56 +       buf_pool_page_hash_x_lock_all();
57  
58         cell_count = hash_get_n_cells(btr_search_sys->hash_index);
59  
60 @@ -1951,11 +1951,11 @@
61                 /* We release btr_search_latch every once in a while to
62                 give other queries a chance to run. */
63                 if ((i != 0) && ((i % chunk_size) == 0)) {
64 -                       buf_pool_mutex_exit_all();
65 +                       buf_pool_page_hash_x_unlock_all();
66                         rw_lock_x_unlock(&btr_search_latch);
67                         os_thread_yield();
68                         rw_lock_x_lock(&btr_search_latch);
69 -                       buf_pool_mutex_enter_all();
70 +                       buf_pool_page_hash_x_lock_all();
71                 }
72  
73                 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
74 @@ -2066,11 +2066,11 @@
75                 /* We release btr_search_latch every once in a while to
76                 give other queries a chance to run. */
77                 if (i != 0) {
78 -                       buf_pool_mutex_exit_all();
79 +                       buf_pool_page_hash_x_unlock_all();
80                         rw_lock_x_unlock(&btr_search_latch);
81                         os_thread_yield();
82                         rw_lock_x_lock(&btr_search_latch);
83 -                       buf_pool_mutex_enter_all();
84 +                       buf_pool_page_hash_x_lock_all();
85                 }
86  
87                 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
88 @@ -2078,7 +2078,7 @@
89                 }
90         }
91  
92 -       buf_pool_mutex_exit_all();
93 +       buf_pool_page_hash_x_unlock_all();
94         rw_lock_x_unlock(&btr_search_latch);
95         if (UNIV_LIKELY_NULL(heap)) {
96                 mem_heap_free(heap);
97 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
98 --- a/storage/innobase/buf/buf0buddy.c  2010-12-03 15:22:36.307986907 +0900
99 +++ b/storage/innobase/buf/buf0buddy.c  2010-12-03 15:48:29.275025723 +0900
100 @@ -73,10 +73,11 @@
101         if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
102  #endif /* UNIV_DEBUG_VALGRIND */
103  
104 -       ut_ad(buf_pool_mutex_own(buf_pool));
105 +       //ut_ad(buf_pool_mutex_own(buf_pool));
106 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
107         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
108         ut_ad(buf_pool->zip_free[i].start != bpage);
109 -       UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
110 +       UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
111  
112  #ifdef UNIV_DEBUG_VALGRIND
113         if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
114 @@ -96,8 +97,8 @@
115                                         buf_pool->zip_free[] */
116  {
117  #ifdef UNIV_DEBUG_VALGRIND
118 -       buf_page_t*     prev = UT_LIST_GET_PREV(list, bpage);
119 -       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
120 +       buf_page_t*     prev = UT_LIST_GET_PREV(zip_list, bpage);
121 +       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
122  
123         if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
124         if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
125 @@ -106,9 +107,10 @@
126         ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
127  #endif /* UNIV_DEBUG_VALGRIND */
128  
129 -       ut_ad(buf_pool_mutex_own(buf_pool));
130 +       //ut_ad(buf_pool_mutex_own(buf_pool));
131 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
132         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
133 -       UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
134 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
135  
136  #ifdef UNIV_DEBUG_VALGRIND
137         if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
138 @@ -128,12 +130,13 @@
139  {
140         buf_page_t*     bpage;
141  
142 -       ut_ad(buf_pool_mutex_own(buf_pool));
143 +       //ut_ad(buf_pool_mutex_own(buf_pool));
144 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
145         ut_a(i < BUF_BUDDY_SIZES);
146  
147  #ifndef UNIV_DEBUG_VALGRIND
148         /* Valgrind would complain about accessing free memory. */
149 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
150 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
151                               ut_ad(buf_page_get_state(ut_list_node_313)
152                                     == BUF_BLOCK_ZIP_FREE)));
153  #endif /* !UNIV_DEBUG_VALGRIND */
154 @@ -177,16 +180,19 @@
155  buf_buddy_block_free(
156  /*=================*/
157         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
158 -       void*           buf)            /*!< in: buffer frame to deallocate */
159 +       void*           buf,            /*!< in: buffer frame to deallocate */
160 +       ibool           have_page_hash_mutex)
161  {
162         const ulint     fold    = BUF_POOL_ZIP_FOLD_PTR(buf);
163         buf_page_t*     bpage;
164         buf_block_t*    block;
165  
166 -       ut_ad(buf_pool_mutex_own(buf_pool));
167 +       //ut_ad(buf_pool_mutex_own(buf_pool));
168         ut_ad(!mutex_own(&buf_pool->zip_mutex));
169         ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
170  
171 +       mutex_enter(&buf_pool->zip_hash_mutex);
172 +
173         HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
174                     ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
175                           && bpage->in_zip_hash && !bpage->in_page_hash),
176 @@ -198,12 +204,14 @@
177         ut_d(bpage->in_zip_hash = FALSE);
178         HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
179  
180 +       mutex_exit(&buf_pool->zip_hash_mutex);
181 +
182         ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
183         UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
184  
185         block = (buf_block_t*) bpage;
186         mutex_enter(&block->mutex);
187 -       buf_LRU_block_free_non_file_page(block);
188 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
189         mutex_exit(&block->mutex);
190  
191         ut_ad(buf_pool->buddy_n_frames > 0);
192 @@ -220,7 +228,7 @@
193  {
194         buf_pool_t*     buf_pool = buf_pool_from_block(block);
195         const ulint     fold = BUF_POOL_ZIP_FOLD(block);
196 -       ut_ad(buf_pool_mutex_own(buf_pool));
197 +       //ut_ad(buf_pool_mutex_own(buf_pool));
198         ut_ad(!mutex_own(&buf_pool->zip_mutex));
199         ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
200  
201 @@ -232,7 +240,10 @@
202         ut_ad(!block->page.in_page_hash);
203         ut_ad(!block->page.in_zip_hash);
204         ut_d(block->page.in_zip_hash = TRUE);
205 +
206 +       mutex_enter(&buf_pool->zip_hash_mutex);
207         HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
208 +       mutex_exit(&buf_pool->zip_hash_mutex);
209  
210         ut_d(buf_pool->buddy_n_frames++);
211  }
212 @@ -268,7 +279,7 @@
213                 bpage->state = BUF_BLOCK_ZIP_FREE;
214  #ifndef UNIV_DEBUG_VALGRIND
215                 /* Valgrind would complain about accessing free memory. */
216 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
217 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
218                                       ut_ad(buf_page_get_state(
219                                                     ut_list_node_313)
220                                             == BUF_BLOCK_ZIP_FREE)));
221 @@ -291,25 +302,29 @@
222         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
223         ulint           i,              /*!< in: index of buf_pool->zip_free[],
224                                         or BUF_BUDDY_SIZES */
225 -       ibool*          lru)            /*!< in: pointer to a variable that
226 +       ibool*          lru,            /*!< in: pointer to a variable that
227                                         will be assigned TRUE if storage was
228                                         allocated from the LRU list and
229                                         buf_pool->mutex was temporarily
230                                         released, or NULL if the LRU list
231                                         should not be used */
232 +       ibool           have_page_hash_mutex)
233  {
234         buf_block_t*    block;
235  
236 -       ut_ad(buf_pool_mutex_own(buf_pool));
237 +       //ut_ad(buf_pool_mutex_own(buf_pool));
238 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
239         ut_ad(!mutex_own(&buf_pool->zip_mutex));
240  
241         if (i < BUF_BUDDY_SIZES) {
242                 /* Try to allocate from the buddy system. */
243 +               mutex_enter(&buf_pool->zip_free_mutex);
244                 block = buf_buddy_alloc_zip(buf_pool, i);
245  
246                 if (block) {
247                         goto func_exit;
248                 }
249 +               mutex_exit(&buf_pool->zip_free_mutex);
250         }
251  
252         /* Try allocating from the buf_pool->free list. */
253 @@ -326,19 +341,30 @@
254         }
255  
256         /* Try replacing an uncompressed page in the buffer pool. */
257 -       buf_pool_mutex_exit(buf_pool);
258 +       //buf_pool_mutex_exit(buf_pool);
259 +       mutex_exit(&buf_pool->LRU_list_mutex);
260 +       if (have_page_hash_mutex) {
261 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
262 +       }
263         block = buf_LRU_get_free_block(buf_pool, 0);
264         *lru = TRUE;
265 -       buf_pool_mutex_enter(buf_pool);
266 +       //buf_pool_mutex_enter(buf_pool);
267 +       mutex_enter(&buf_pool->LRU_list_mutex);
268 +       if (have_page_hash_mutex) {
269 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
270 +       }
271  
272  alloc_big:
273         buf_buddy_block_register(block);
274  
275 +       mutex_enter(&buf_pool->zip_free_mutex);
276         block = buf_buddy_alloc_from(
277                 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
278  
279  func_exit:
280         buf_pool->buddy_stat[i].used++;
281 +       mutex_exit(&buf_pool->zip_free_mutex);
282 +
283         return(block);
284  }
285  
286 @@ -355,7 +381,10 @@
287         buf_page_t*     b;
288         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
289  
290 -       ut_ad(buf_pool_mutex_own(buf_pool));
291 +       //ut_ad(buf_pool_mutex_own(buf_pool));
292 +#ifdef UNIV_SYNC_DEBUG
293 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
294 +#endif
295  
296         switch (buf_page_get_state(bpage)) {
297         case BUF_BLOCK_ZIP_FREE:
298 @@ -364,7 +393,7 @@
299         case BUF_BLOCK_FILE_PAGE:
300         case BUF_BLOCK_MEMORY:
301         case BUF_BLOCK_REMOVE_HASH:
302 -               ut_error;
303 +               /* ut_error; */ /* optimistic */
304         case BUF_BLOCK_ZIP_DIRTY:
305                 /* Cannot relocate dirty pages. */
306                 return(FALSE);
307 @@ -374,9 +403,18 @@
308         }
309  
310         mutex_enter(&buf_pool->zip_mutex);
311 +       mutex_enter(&buf_pool->zip_free_mutex);
312  
313         if (!buf_page_can_relocate(bpage)) {
314                 mutex_exit(&buf_pool->zip_mutex);
315 +               mutex_exit(&buf_pool->zip_free_mutex);
316 +               return(FALSE);
317 +       }
318 +
319 +       if (bpage != buf_page_hash_get(buf_pool,
320 +                                      bpage->space, bpage->offset)) {
321 +               mutex_exit(&buf_pool->zip_mutex);
322 +               mutex_exit(&buf_pool->zip_free_mutex);
323                 return(FALSE);
324         }
325  
326 @@ -384,18 +422,19 @@
327         ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
328  
329         /* relocate buf_pool->zip_clean */
330 -       b = UT_LIST_GET_PREV(list, dpage);
331 -       UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
332 +       b = UT_LIST_GET_PREV(zip_list, dpage);
333 +       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
334  
335         if (b) {
336 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
337 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
338         } else {
339 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
340 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
341         }
342  
343         UNIV_MEM_INVALID(bpage, sizeof *bpage);
344  
345         mutex_exit(&buf_pool->zip_mutex);
346 +       mutex_exit(&buf_pool->zip_free_mutex);
347         return(TRUE);
348  }
349  
350 @@ -409,14 +448,16 @@
351         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
352         void*           src,            /*!< in: block to relocate */
353         void*           dst,            /*!< in: free block to relocate to */
354 -       ulint           i)              /*!< in: index of
355 +       ulint           i,              /*!< in: index of
356                                         buf_pool->zip_free[] */
357 +       ibool           have_page_hash_mutex)
358  {
359         buf_page_t*     bpage;
360         const ulint     size    = BUF_BUDDY_LOW << i;
361         ullint          usec    = ut_time_us(NULL);
362  
363 -       ut_ad(buf_pool_mutex_own(buf_pool));
364 +       //ut_ad(buf_pool_mutex_own(buf_pool));
365 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
366         ut_ad(!mutex_own(&buf_pool->zip_mutex));
367         ut_ad(!ut_align_offset(src, size));
368         ut_ad(!ut_align_offset(dst, size));
369 @@ -437,6 +478,13 @@
370         if (size >= PAGE_ZIP_MIN_SIZE) {
371                 /* This is a compressed page. */
372                 mutex_t*        mutex;
373 +               ulint           space, page_no;
374 +
375 +               if (!have_page_hash_mutex) {
376 +                       mutex_exit(&buf_pool->zip_free_mutex);
377 +                       mutex_enter(&buf_pool->LRU_list_mutex);
378 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
379 +               }
380  
381                 /* The src block may be split into smaller blocks,
382                 some of which may be free.  Thus, the
383 @@ -446,9 +494,9 @@
384                 pool), so there is nothing wrong about this.  The
385                 mach_read_from_4() calls here will only trigger bogus
386                 Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
387 -               ulint           space   = mach_read_from_4(
388 +               space   = mach_read_from_4(
389                         (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
390 -               ulint           page_no = mach_read_from_4(
391 +               page_no = mach_read_from_4(
392                         (const byte*) src + FIL_PAGE_OFFSET);
393                 /* Suppress Valgrind warnings about conditional jump
394                 on uninitialized value. */
395 @@ -462,6 +510,11 @@
396                         added to buf_pool->page_hash yet.  Obviously,
397                         it cannot be relocated. */
398  
399 +                       if (!have_page_hash_mutex) {
400 +                               mutex_enter(&buf_pool->zip_free_mutex);
401 +                               mutex_exit(&buf_pool->LRU_list_mutex);
402 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
403 +                       }
404                         return(FALSE);
405                 }
406  
407 @@ -473,18 +526,27 @@
408                         For the sake of simplicity, give up. */
409                         ut_ad(page_zip_get_size(&bpage->zip) < size);
410  
411 +                       if (!have_page_hash_mutex) {
412 +                               mutex_enter(&buf_pool->zip_free_mutex);
413 +                               mutex_exit(&buf_pool->LRU_list_mutex);
414 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
415 +                       }
416                         return(FALSE);
417                 }
418  
419 +               /* To keep latch order */
420 +               if (have_page_hash_mutex)
421 +                       mutex_exit(&buf_pool->zip_free_mutex);
422 +
423                 /* The block must have been allocated, but it may
424                 contain uninitialized data. */
425                 UNIV_MEM_ASSERT_W(src, size);
426  
427 -               mutex = buf_page_get_mutex(bpage);
428 +               mutex = buf_page_get_mutex_enter(bpage);
429  
430 -               mutex_enter(mutex);
431 +               mutex_enter(&buf_pool->zip_free_mutex);
432  
433 -               if (buf_page_can_relocate(bpage)) {
434 +               if (mutex && buf_page_can_relocate(bpage)) {
435                         /* Relocate the compressed page. */
436                         ut_a(bpage->zip.data == src);
437                         memcpy(dst, src, size);
438 @@ -499,10 +561,22 @@
439                                 buddy_stat->relocated_usec
440                                         += ut_time_us(NULL) - usec;
441                         }
442 +
443 +                       if (!have_page_hash_mutex) {
444 +                               mutex_exit(&buf_pool->LRU_list_mutex);
445 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
446 +                       }
447                         return(TRUE);
448                 }
449  
450 -               mutex_exit(mutex);
451 +               if (!have_page_hash_mutex) {
452 +                       mutex_exit(&buf_pool->LRU_list_mutex);
453 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
454 +               }
455 +
456 +               if (mutex) {
457 +                       mutex_exit(mutex);
458 +               }
459         } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
460                 /* This must be a buf_page_t object. */
461  #if UNIV_WORD_SIZE == 4
462 @@ -511,10 +585,31 @@
463                 about uninitialized pad bytes. */
464                 UNIV_MEM_ASSERT_RW(src, size);
465  #endif
466 +
467 +               mutex_exit(&buf_pool->zip_free_mutex);
468 +
469 +               if (!have_page_hash_mutex) {
470 +                       mutex_enter(&buf_pool->LRU_list_mutex);
471 +                       rw_lock_x_lock(&buf_pool->page_hash_latch);
472 +               }
473 +
474                 if (buf_buddy_relocate_block(src, dst)) {
475 +                       mutex_enter(&buf_pool->zip_free_mutex);
476 +
477 +                       if (!have_page_hash_mutex) {
478 +                               mutex_exit(&buf_pool->LRU_list_mutex);
479 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
480 +                       }
481  
482                         goto success;
483                 }
484 +
485 +               mutex_enter(&buf_pool->zip_free_mutex);
486 +
487 +               if (!have_page_hash_mutex) {
488 +                       mutex_exit(&buf_pool->LRU_list_mutex);
489 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
490 +               }
491         }
492  
493         return(FALSE);
494 @@ -529,13 +624,15 @@
495         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
496         void*           buf,            /*!< in: block to be freed, must not be
497                                         pointed to by the buffer pool */
498 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
499 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
500                                         or BUF_BUDDY_SIZES */
501 +       ibool           have_page_hash_mutex)
502  {
503         buf_page_t*     bpage;
504         buf_page_t*     buddy;
505  
506 -       ut_ad(buf_pool_mutex_own(buf_pool));
507 +       //ut_ad(buf_pool_mutex_own(buf_pool));
508 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
509         ut_ad(!mutex_own(&buf_pool->zip_mutex));
510         ut_ad(i <= BUF_BUDDY_SIZES);
511         ut_ad(buf_pool->buddy_stat[i].used > 0);
512 @@ -546,7 +643,9 @@
513         ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
514  
515         if (i == BUF_BUDDY_SIZES) {
516 -               buf_buddy_block_free(buf_pool, buf);
517 +               mutex_exit(&buf_pool->zip_free_mutex);
518 +               buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
519 +               mutex_enter(&buf_pool->zip_free_mutex);
520                 return;
521         }
522  
523 @@ -591,7 +690,7 @@
524                 ut_a(bpage != buf);
525  
526                 {
527 -                       buf_page_t*     next = UT_LIST_GET_NEXT(list, bpage);
528 +                       buf_page_t*     next = UT_LIST_GET_NEXT(zip_list, bpage);
529                         UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
530                         bpage = next;
531                 }
532 @@ -600,13 +699,13 @@
533  #ifndef UNIV_DEBUG_VALGRIND
534  buddy_nonfree:
535         /* Valgrind would complain about accessing free memory. */
536 -       ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
537 +       ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
538                               ut_ad(buf_page_get_state(ut_list_node_313)
539                                     == BUF_BLOCK_ZIP_FREE)));
540  #endif /* UNIV_DEBUG_VALGRIND */
541  
542         /* The buddy is not free. Is there a free block of this size? */
543 -       bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
544 +       bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
545  
546         if (bpage) {
547                 /* Remove the block from the free list, because a successful
548 @@ -616,7 +715,7 @@
549                 buf_buddy_remove_from_free(buf_pool, bpage, i);
550  
551                 /* Try to relocate the buddy of buf to the free block. */
552 -               if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
553 +               if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
554  
555                         ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
556                         goto buddy_free2;
557 @@ -636,14 +735,14 @@
558  
559                 (Parts of the buddy can be free in
560                 buf_pool->zip_free[j] with j < i.) */
561 -               ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
562 +               ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
563                                       ut_ad(buf_page_get_state(
564                                                     ut_list_node_313)
565                                             == BUF_BLOCK_ZIP_FREE
566                                             && ut_list_node_313 != buddy)));
567  #endif /* !UNIV_DEBUG_VALGRIND */
568  
569 -               if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
570 +               if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
571  
572                         buf = bpage;
573                         UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
574 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
575 --- a/storage/innobase/buf/buf0buf.c    2010-12-03 15:22:36.314943336 +0900
576 +++ b/storage/innobase/buf/buf0buf.c    2010-12-03 15:48:29.282947357 +0900
577 @@ -263,6 +263,7 @@
578  #ifdef UNIV_PFS_RWLOCK
579  /* Keys to register buffer block related rwlocks and mutexes with
580  performance schema */
581 +UNIV_INTERN mysql_pfs_key_t    buf_pool_page_hash_key;
582  UNIV_INTERN mysql_pfs_key_t    buf_block_lock_key;
583  # ifdef UNIV_SYNC_DEBUG
584  UNIV_INTERN mysql_pfs_key_t    buf_block_debug_latch_key;
585 @@ -273,6 +274,10 @@
586  UNIV_INTERN mysql_pfs_key_t    buffer_block_mutex_key;
587  UNIV_INTERN mysql_pfs_key_t    buf_pool_mutex_key;
588  UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_mutex_key;
589 +UNIV_INTERN mysql_pfs_key_t    buf_pool_LRU_list_mutex_key;
590 +UNIV_INTERN mysql_pfs_key_t    buf_pool_free_list_mutex_key;
591 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_free_mutex_key;
592 +UNIV_INTERN mysql_pfs_key_t    buf_pool_zip_hash_mutex_key;
593  UNIV_INTERN mysql_pfs_key_t    flush_list_mutex_key;
594  #endif /* UNIV_PFS_MUTEX */
595  
596 @@ -881,9 +886,9 @@
597         block->page.in_zip_hash = FALSE;
598         block->page.in_flush_list = FALSE;
599         block->page.in_free_list = FALSE;
600 -       block->in_unzip_LRU_list = FALSE;
601  #endif /* UNIV_DEBUG */
602         block->page.in_LRU_list = FALSE;
603 +       block->in_unzip_LRU_list = FALSE;
604  #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
605         block->n_pointers = 0;
606  #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
607 @@ -981,9 +986,11 @@
608                 memset(block->frame, '\0', UNIV_PAGE_SIZE);
609  #endif
610                 /* Add the block to the free list */
611 -               UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
612 +               mutex_enter(&buf_pool->free_list_mutex);
613 +               UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
614  
615                 ut_d(block->page.in_free_list = TRUE);
616 +               mutex_exit(&buf_pool->free_list_mutex);
617                 ut_ad(buf_pool_from_block(block) == buf_pool);
618  
619                 block++;
620 @@ -1038,7 +1045,8 @@
621         buf_chunk_t*    chunk = buf_pool->chunks;
622  
623         ut_ad(buf_pool);
624 -       ut_ad(buf_pool_mutex_own(buf_pool));
625 +       //ut_ad(buf_pool_mutex_own(buf_pool));
626 +       ut_ad(mutex_own(&buf_pool->zip_free_mutex));
627         for (n = buf_pool->n_chunks; n--; chunk++) {
628  
629                 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
630 @@ -1138,7 +1146,7 @@
631         buf_block_t*            block;
632         const buf_block_t*      block_end;
633  
634 -       ut_ad(buf_pool_mutex_own(buf_pool));
635 +       //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
636  
637         block_end = chunk->blocks + chunk->size;
638  
639 @@ -1150,8 +1158,10 @@
640                 ut_ad(!block->in_unzip_LRU_list);
641                 ut_ad(!block->page.in_flush_list);
642                 /* Remove the block from the free list. */
643 +               mutex_enter(&buf_pool->free_list_mutex);
644                 ut_ad(block->page.in_free_list);
645 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
646 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
647 +               mutex_exit(&buf_pool->free_list_mutex);
648  
649                 /* Free the latches. */
650                 mutex_free(&block->mutex);
651 @@ -1208,9 +1218,21 @@
652         ------------------------------- */
653         mutex_create(buf_pool_mutex_key,
654                      &buf_pool->mutex, SYNC_BUF_POOL);
655 +       mutex_create(buf_pool_LRU_list_mutex_key,
656 +                    &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
657 +       rw_lock_create(buf_pool_page_hash_key,
658 +                      &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
659 +       mutex_create(buf_pool_free_list_mutex_key,
660 +                    &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
661 +       mutex_create(buf_pool_zip_free_mutex_key,
662 +                    &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
663 +       mutex_create(buf_pool_zip_hash_mutex_key,
664 +                    &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
665         mutex_create(buf_pool_zip_mutex_key,
666                      &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
667  
668 +       mutex_enter(&buf_pool->LRU_list_mutex);
669 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
670         buf_pool_mutex_enter(buf_pool);
671  
672         if (buf_pool_size > 0) {
673 @@ -1223,6 +1245,8 @@
674                         mem_free(chunk);
675                         mem_free(buf_pool);
676  
677 +                       mutex_exit(&buf_pool->LRU_list_mutex);
678 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
679                         buf_pool_mutex_exit(buf_pool);
680  
681                         return(DB_ERROR);
682 @@ -1253,6 +1277,8 @@
683  
684         /* All fields are initialized by mem_zalloc(). */
685  
686 +       mutex_exit(&buf_pool->LRU_list_mutex);
687 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
688         buf_pool_mutex_exit(buf_pool);
689  
690         return(DB_SUCCESS);
691 @@ -1467,7 +1493,11 @@
692         ulint           fold;
693         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
694  
695 -       ut_ad(buf_pool_mutex_own(buf_pool));
696 +       //ut_ad(buf_pool_mutex_own(buf_pool));
697 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
698 +#ifdef UNIV_SYNC_DEBUG
699 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
700 +#endif
701         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
702         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
703         ut_a(bpage->buf_fix_count == 0);
704 @@ -1554,7 +1584,8 @@
705  
706  try_again:
707         btr_search_disable(); /* Empty the adaptive hash index again */
708 -       buf_pool_mutex_enter(buf_pool);
709 +       //buf_pool_mutex_enter(buf_pool);
710 +       mutex_enter(&buf_pool->LRU_list_mutex);
711  
712  shrink_again:
713         if (buf_pool->n_chunks <= 1) {
714 @@ -1625,7 +1656,7 @@
715  
716                                 buf_LRU_make_block_old(&block->page);
717                                 dirty++;
718 -                       } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
719 +                       } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
720                                    != BUF_LRU_FREED) {
721                                 nonfree++;
722                         }
723 @@ -1633,7 +1664,8 @@
724                         mutex_exit(&block->mutex);
725                 }
726  
727 -               buf_pool_mutex_exit(buf_pool);
728 +               //buf_pool_mutex_exit(buf_pool);
729 +               mutex_exit(&buf_pool->LRU_list_mutex);
730  
731                 /* Request for a flush of the chunk if it helps.
732                 Do not flush if there are non-free blocks, since
733 @@ -1683,7 +1715,8 @@
734  func_done:
735         buf_pool->old_pool_size = buf_pool->curr_pool_size;
736  func_exit:
737 -       buf_pool_mutex_exit(buf_pool);
738 +       //buf_pool_mutex_exit(buf_pool);
739 +       mutex_exit(&buf_pool->LRU_list_mutex);
740         btr_search_enable();
741  }
742  
743 @@ -1724,7 +1757,9 @@
744         hash_table_t*   zip_hash;
745         hash_table_t*   page_hash;
746  
747 -       buf_pool_mutex_enter(buf_pool);
748 +       //buf_pool_mutex_enter(buf_pool);
749 +       mutex_enter(&buf_pool->LRU_list_mutex);
750 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
751  
752         /* Free, create, and populate the hash table. */
753         hash_table_free(buf_pool->page_hash);
754 @@ -1765,8 +1800,9 @@
755         All such blocks are either in buf_pool->zip_clean or
756         in buf_pool->flush_list. */
757  
758 +       mutex_enter(&buf_pool->zip_mutex);
759         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
760 -            b = UT_LIST_GET_NEXT(list, b)) {
761 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
762                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
763                 ut_ad(!b->in_flush_list);
764                 ut_ad(b->in_LRU_list);
765 @@ -1776,10 +1812,11 @@
766                 HASH_INSERT(buf_page_t, hash, page_hash,
767                             buf_page_address_fold(b->space, b->offset), b);
768         }
769 +       mutex_exit(&buf_pool->zip_mutex);
770  
771         buf_flush_list_mutex_enter(buf_pool);
772         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
773 -            b = UT_LIST_GET_NEXT(list, b)) {
774 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
775                 ut_ad(b->in_flush_list);
776                 ut_ad(b->in_LRU_list);
777                 ut_ad(b->in_page_hash);
778 @@ -1806,7 +1843,9 @@
779         }
780  
781         buf_flush_list_mutex_exit(buf_pool);
782 -       buf_pool_mutex_exit(buf_pool);
783 +       //buf_pool_mutex_exit(buf_pool);
784 +       mutex_exit(&buf_pool->LRU_list_mutex);
785 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
786  }
787  
788  /********************************************************************
789 @@ -1853,21 +1892,32 @@
790         buf_page_t*     bpage;
791         ulint           i;
792         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
793 +       mutex_t*        block_mutex;
794  
795 -       ut_ad(buf_pool_mutex_own(buf_pool));
796 +       //ut_ad(buf_pool_mutex_own(buf_pool));
797  
798 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
799         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
800 +       if (bpage) {
801 +               block_mutex = buf_page_get_mutex_enter(bpage);
802 +               ut_a(block_mutex);
803 +       }
804  
805         if (UNIV_LIKELY_NULL(bpage)) {
806                 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
807                         /* The page was loaded meanwhile. */
808 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
809                         return(bpage);
810                 }
811                 /* Add to an existing watch. */
812                 bpage->buf_fix_count++;
813 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
814 +               mutex_exit(block_mutex);
815                 return(NULL);
816         }
817  
818 +       /* buf_pool->watch is protected by zip_mutex for now */
819 +       mutex_enter(&buf_pool->zip_mutex);
820         for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
821                 bpage = &buf_pool->watch[i];
822  
823 @@ -1891,10 +1941,12 @@
824                         bpage->space = space;
825                         bpage->offset = offset;
826                         bpage->buf_fix_count = 1;
827 -
828 +                       bpage->buf_pool_index = buf_pool_index(buf_pool);
829                         ut_d(bpage->in_page_hash = TRUE);
830                         HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
831                                     fold, bpage);
832 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
833 +                       mutex_exit(&buf_pool->zip_mutex);
834                         return(NULL);
835                 case BUF_BLOCK_ZIP_PAGE:
836                         ut_ad(bpage->in_page_hash);
837 @@ -1912,6 +1964,8 @@
838         ut_error;
839  
840         /* Fix compiler warning */
841 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
842 +       mutex_exit(&buf_pool->zip_mutex);
843         return(NULL);
844  }
845  
846 @@ -1941,6 +1995,8 @@
847         buf_chunk_t*    chunks;
848         buf_chunk_t*    chunk;
849  
850 +       mutex_enter(&buf_pool->LRU_list_mutex);
851 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
852         buf_pool_mutex_enter(buf_pool);
853         chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
854  
855 @@ -1959,6 +2015,8 @@
856                 buf_pool->n_chunks++;
857         }
858  
859 +       mutex_exit(&buf_pool->LRU_list_mutex);
860 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
861         buf_pool_mutex_exit(buf_pool);
862  }
863  
864 @@ -2046,7 +2104,11 @@
865                                         space, offset) */
866         buf_page_t*     watch)          /*!< in/out: sentinel for watch */
867  {
868 -       ut_ad(buf_pool_mutex_own(buf_pool));
869 +       //ut_ad(buf_pool_mutex_own(buf_pool));
870 +#ifdef UNIV_SYNC_DEBUG
871 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
872 +#endif
873 +       ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
874  
875         HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
876         ut_d(watch->in_page_hash = FALSE);
877 @@ -2068,28 +2130,31 @@
878         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
879         ulint           fold = buf_page_address_fold(space, offset);
880  
881 -       buf_pool_mutex_enter(buf_pool);
882 +       //buf_pool_mutex_enter(buf_pool);
883 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
884         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
885         /* The page must exist because buf_pool_watch_set()
886         increments buf_fix_count. */
887         ut_a(bpage);
888  
889         if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
890 -               mutex_t* mutex = buf_page_get_mutex(bpage);
891 +               mutex_t* mutex = buf_page_get_mutex_enter(bpage);
892  
893 -               mutex_enter(mutex);
894                 ut_a(bpage->buf_fix_count > 0);
895                 bpage->buf_fix_count--;
896                 mutex_exit(mutex);
897         } else {
898 +               mutex_enter(&buf_pool->zip_mutex);
899                 ut_a(bpage->buf_fix_count > 0);
900  
901                 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
902                         buf_pool_watch_remove(buf_pool, fold, bpage);
903                 }
904 +               mutex_exit(&buf_pool->zip_mutex);
905         }
906  
907 -       buf_pool_mutex_exit(buf_pool);
908 +       //buf_pool_mutex_exit(buf_pool);
909 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
910  }
911  
912  /****************************************************************//**
913 @@ -2109,14 +2174,16 @@
914         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
915         ulint           fold    = buf_page_address_fold(space, offset);
916  
917 -       buf_pool_mutex_enter(buf_pool);
918 +       //buf_pool_mutex_enter(buf_pool);
919 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
920  
921         bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
922         /* The page must exist because buf_pool_watch_set()
923         increments buf_fix_count. */
924         ut_a(bpage);
925         ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
926 -       buf_pool_mutex_exit(buf_pool);
927 +       //buf_pool_mutex_exit(buf_pool);
928 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
929  
930         return(ret);
931  }
932 @@ -2133,13 +2200,15 @@
933  {
934         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
935  
936 -       buf_pool_mutex_enter(buf_pool);
937 +       //buf_pool_mutex_enter(buf_pool);
938 +       mutex_enter(&buf_pool->LRU_list_mutex);
939  
940         ut_a(buf_page_in_file(bpage));
941  
942         buf_LRU_make_block_young(bpage);
943  
944 -       buf_pool_mutex_exit(buf_pool);
945 +       //buf_pool_mutex_exit(buf_pool);
946 +       mutex_exit(&buf_pool->LRU_list_mutex);
947  }
948  
949  /********************************************************************//**
950 @@ -2163,14 +2232,20 @@
951         ut_a(buf_page_in_file(bpage));
952  
953         if (buf_page_peek_if_too_old(bpage)) {
954 -               buf_pool_mutex_enter(buf_pool);
955 +               //buf_pool_mutex_enter(buf_pool);
956 +               mutex_enter(&buf_pool->LRU_list_mutex);
957                 buf_LRU_make_block_young(bpage);
958 -               buf_pool_mutex_exit(buf_pool);
959 +               //buf_pool_mutex_exit(buf_pool);
960 +               mutex_exit(&buf_pool->LRU_list_mutex);
961         } else if (!access_time) {
962                 ulint   time_ms = ut_time_ms();
963 -               buf_pool_mutex_enter(buf_pool);
964 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
965 +               //buf_pool_mutex_enter(buf_pool);
966 +               if (block_mutex) {
967                 buf_page_set_accessed(bpage, time_ms);
968 -               buf_pool_mutex_exit(buf_pool);
969 +               mutex_exit(block_mutex);
970 +               }
971 +               //buf_pool_mutex_exit(buf_pool);
972         }
973  }
974  
975 @@ -2187,7 +2262,8 @@
976         buf_block_t*    block;
977         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
978  
979 -       buf_pool_mutex_enter(buf_pool);
980 +       //buf_pool_mutex_enter(buf_pool);
981 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
982  
983         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
984  
985 @@ -2196,7 +2272,8 @@
986                 block->check_index_page_at_flush = FALSE;
987         }
988  
989 -       buf_pool_mutex_exit(buf_pool);
990 +       //buf_pool_mutex_exit(buf_pool);
991 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
992  }
993  
994  /********************************************************************//**
995 @@ -2215,7 +2292,8 @@
996         ibool           is_hashed;
997         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
998  
999 -       buf_pool_mutex_enter(buf_pool);
1000 +       //buf_pool_mutex_enter(buf_pool);
1001 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1002  
1003         block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1004  
1005 @@ -2226,7 +2304,8 @@
1006                 is_hashed = block->is_hashed;
1007         }
1008  
1009 -       buf_pool_mutex_exit(buf_pool);
1010 +       //buf_pool_mutex_exit(buf_pool);
1011 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1012  
1013         return(is_hashed);
1014  }
1015 @@ -2248,7 +2327,8 @@
1016         buf_page_t*     bpage;
1017         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1018  
1019 -       buf_pool_mutex_enter(buf_pool);
1020 +       //buf_pool_mutex_enter(buf_pool);
1021 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1022  
1023         bpage = buf_page_hash_get(buf_pool, space, offset);
1024  
1025 @@ -2257,7 +2337,8 @@
1026                 bpage->file_page_was_freed = TRUE;
1027         }
1028  
1029 -       buf_pool_mutex_exit(buf_pool);
1030 +       //buf_pool_mutex_exit(buf_pool);
1031 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1032  
1033         return(bpage);
1034  }
1035 @@ -2278,7 +2359,8 @@
1036         buf_page_t*     bpage;
1037         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1038  
1039 -       buf_pool_mutex_enter(buf_pool);
1040 +       //buf_pool_mutex_enter(buf_pool);
1041 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1042  
1043         bpage = buf_page_hash_get(buf_pool, space, offset);
1044  
1045 @@ -2287,7 +2369,8 @@
1046                 bpage->file_page_was_freed = FALSE;
1047         }
1048  
1049 -       buf_pool_mutex_exit(buf_pool);
1050 +       //buf_pool_mutex_exit(buf_pool);
1051 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1052  
1053         return(bpage);
1054  }
1055 @@ -2322,8 +2405,9 @@
1056         buf_pool->stat.n_page_gets++;
1057  
1058         for (;;) {
1059 -               buf_pool_mutex_enter(buf_pool);
1060 +               //buf_pool_mutex_enter(buf_pool);
1061  lookup:
1062 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1063                 bpage = buf_page_hash_get(buf_pool, space, offset);
1064                 if (bpage) {
1065                         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1066 @@ -2332,7 +2416,8 @@
1067  
1068                 /* Page not in buf_pool: needs to be read from file */
1069  
1070 -               buf_pool_mutex_exit(buf_pool);
1071 +               //buf_pool_mutex_exit(buf_pool);
1072 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1073  
1074                 buf_read_page(space, zip_size, offset);
1075  
1076 @@ -2344,10 +2429,15 @@
1077         if (UNIV_UNLIKELY(!bpage->zip.data)) {
1078                 /* There is no compressed page. */
1079  err_exit:
1080 -               buf_pool_mutex_exit(buf_pool);
1081 +               //buf_pool_mutex_exit(buf_pool);
1082 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1083                 return(NULL);
1084         }
1085  
1086 +       block_mutex = buf_page_get_mutex_enter(bpage);
1087 +
1088 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1089 +
1090         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1091  
1092         switch (buf_page_get_state(bpage)) {
1093 @@ -2356,19 +2446,19 @@
1094         case BUF_BLOCK_MEMORY:
1095         case BUF_BLOCK_REMOVE_HASH:
1096         case BUF_BLOCK_ZIP_FREE:
1097 +               if (block_mutex)
1098 +                       mutex_exit(block_mutex);
1099                 break;
1100         case BUF_BLOCK_ZIP_PAGE:
1101         case BUF_BLOCK_ZIP_DIRTY:
1102 -               block_mutex = &buf_pool->zip_mutex;
1103 -               mutex_enter(block_mutex);
1104 +               ut_a(block_mutex == &buf_pool->zip_mutex);
1105                 bpage->buf_fix_count++;
1106                 goto got_block;
1107         case BUF_BLOCK_FILE_PAGE:
1108 -               block_mutex = &((buf_block_t*) bpage)->mutex;
1109 -               mutex_enter(block_mutex);
1110 +               ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1111  
1112                 /* Discard the uncompressed page frame if possible. */
1113 -               if (buf_LRU_free_block(bpage, FALSE, NULL)
1114 +               if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1115                     == BUF_LRU_FREED) {
1116  
1117                         mutex_exit(block_mutex);
1118 @@ -2387,7 +2477,7 @@
1119         must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1120         access_time = buf_page_is_accessed(bpage);
1121  
1122 -       buf_pool_mutex_exit(buf_pool);
1123 +       //buf_pool_mutex_exit(buf_pool);
1124  
1125         mutex_exit(block_mutex);
1126  
1127 @@ -2696,7 +2786,7 @@
1128         const buf_block_t*      block)          /*!< in: pointer to block,
1129                                                 not dereferenced */
1130  {
1131 -       ut_ad(buf_pool_mutex_own(buf_pool));
1132 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1133  
1134         if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1135                 /* The pointer should be aligned. */
1136 @@ -2732,6 +2822,7 @@
1137         ulint           fix_type;
1138         ibool           must_read;
1139         ulint           retries = 0;
1140 +       mutex_t*        block_mutex = NULL;
1141         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1142  
1143         ut_ad(mtr);
1144 @@ -2754,9 +2845,11 @@
1145         fold = buf_page_address_fold(space, offset);
1146  loop:
1147         block = guess;
1148 -       buf_pool_mutex_enter(buf_pool);
1149 +       //buf_pool_mutex_enter(buf_pool);
1150  
1151         if (block) {
1152 +               block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1153 +
1154                 /* If the guess is a compressed page descriptor that
1155                 has been allocated by buf_buddy_alloc(), it may have
1156                 been invalidated by buf_buddy_relocate().  In that
1157 @@ -2765,11 +2858,15 @@
1158                 the guess may be pointing to a buffer pool chunk that
1159                 has been released when resizing the buffer pool. */
1160  
1161 -               if (!buf_block_is_uncompressed(buf_pool, block)
1162 +               if (!block_mutex) {
1163 +                       block = guess = NULL;
1164 +               } else if (!buf_block_is_uncompressed(buf_pool, block)
1165                     || offset != block->page.offset
1166                     || space != block->page.space
1167                     || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1168  
1169 +                       mutex_exit(block_mutex);
1170 +
1171                         block = guess = NULL;
1172                 } else {
1173                         ut_ad(!block->page.in_zip_hash);
1174 @@ -2778,12 +2875,19 @@
1175         }
1176  
1177         if (block == NULL) {
1178 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
1179                 block = (buf_block_t*) buf_page_hash_get_low(
1180                         buf_pool, space, offset, fold);
1181 +               if (block) {
1182 +                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1183 +                       ut_a(block_mutex);
1184 +               }
1185 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1186         }
1187  
1188  loop2:
1189         if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1190 +               mutex_exit(block_mutex);
1191                 block = NULL;
1192         }
1193  
1194 @@ -2795,12 +2899,14 @@
1195                                 space, offset, fold);
1196  
1197                         if (UNIV_LIKELY_NULL(block)) {
1198 -
1199 +                               block_mutex = buf_page_get_mutex((buf_page_t*)block);
1200 +                               ut_a(block_mutex);
1201 +                               ut_ad(mutex_own(block_mutex));
1202                                 goto got_block;
1203                         }
1204                 }
1205  
1206 -               buf_pool_mutex_exit(buf_pool);
1207 +               //buf_pool_mutex_exit(buf_pool);
1208  
1209                 if (mode == BUF_GET_IF_IN_POOL
1210                     || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1211 @@ -2848,7 +2954,8 @@
1212                 /* The page is being read to buffer pool,
1213                 but we cannot wait around for the read to
1214                 complete. */
1215 -               buf_pool_mutex_exit(buf_pool);
1216 +               //buf_pool_mutex_exit(buf_pool);
1217 +               mutex_exit(block_mutex);
1218  
1219                 return(NULL);
1220         }
1221 @@ -2858,38 +2965,49 @@
1222                 ibool           success;
1223  
1224         case BUF_BLOCK_FILE_PAGE:
1225 +               if (block_mutex == &buf_pool->zip_mutex) {
1226 +                       /* it is wrong mutex... */
1227 +                       mutex_exit(block_mutex);
1228 +                       goto loop;
1229 +               }
1230                 break;
1231  
1232         case BUF_BLOCK_ZIP_PAGE:
1233         case BUF_BLOCK_ZIP_DIRTY:
1234 +               ut_ad(block_mutex == &buf_pool->zip_mutex);
1235                 bpage = &block->page;
1236                 /* Protect bpage->buf_fix_count. */
1237 -               mutex_enter(&buf_pool->zip_mutex);
1238 +               //mutex_enter(&buf_pool->zip_mutex);
1239  
1240                 if (bpage->buf_fix_count
1241                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1242                         /* This condition often occurs when the buffer
1243                         is not buffer-fixed, but I/O-fixed by
1244                         buf_page_init_for_read(). */
1245 -                       mutex_exit(&buf_pool->zip_mutex);
1246 +                       //mutex_exit(&buf_pool->zip_mutex);
1247  wait_until_unfixed:
1248                         /* The block is buffer-fixed or I/O-fixed.
1249                         Try again later. */
1250 -                       buf_pool_mutex_exit(buf_pool);
1251 +                       //buf_pool_mutex_exit(buf_pool);
1252 +                       mutex_exit(block_mutex);
1253                         os_thread_sleep(WAIT_FOR_READ);
1254    
1255                         goto loop;
1256                 }
1257  
1258                 /* Allocate an uncompressed page. */
1259 -               buf_pool_mutex_exit(buf_pool);
1260 -               mutex_exit(&buf_pool->zip_mutex);
1261 +               //buf_pool_mutex_exit(buf_pool);
1262 +               //mutex_exit(&buf_pool->zip_mutex);
1263 +               mutex_exit(block_mutex);
1264  
1265                 block = buf_LRU_get_free_block(buf_pool, 0);
1266                 ut_a(block);
1267 +               block_mutex = &block->mutex;
1268  
1269 -               buf_pool_mutex_enter(buf_pool);
1270 -               mutex_enter(&block->mutex);
1271 +               //buf_pool_mutex_enter(buf_pool);
1272 +               mutex_enter(&buf_pool->LRU_list_mutex);
1273 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
1274 +               mutex_enter(block_mutex);
1275  
1276                 {
1277                         buf_page_t*     hash_bpage;
1278 @@ -2902,35 +3020,47 @@
1279                                 while buf_pool->mutex was released.
1280                                 Free the block that was allocated. */
1281  
1282 -                               buf_LRU_block_free_non_file_page(block);
1283 -                               mutex_exit(&block->mutex);
1284 +                               buf_LRU_block_free_non_file_page(block, TRUE);
1285 +                               mutex_exit(block_mutex);
1286  
1287                                 block = (buf_block_t*) hash_bpage;
1288 +                               if (block) {
1289 +                                       block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1290 +                                       ut_a(block_mutex);
1291 +                               }
1292 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1293 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1294                                 goto loop2;
1295                         }
1296                 }
1297  
1298 +               mutex_enter(&buf_pool->zip_mutex);
1299 +
1300                 if (UNIV_UNLIKELY
1301                     (bpage->buf_fix_count
1302                      || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1303  
1304 +                       mutex_exit(&buf_pool->zip_mutex);
1305                         /* The block was buffer-fixed or I/O-fixed
1306                         while buf_pool->mutex was not held by this thread.
1307                         Free the block that was allocated and try again.
1308                         This should be extremely unlikely. */
1309  
1310 -                       buf_LRU_block_free_non_file_page(block);
1311 -                       mutex_exit(&block->mutex);
1312 +                       buf_LRU_block_free_non_file_page(block, TRUE);
1313 +                       //mutex_exit(&block->mutex);
1314  
1315 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1316 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1317                         goto wait_until_unfixed;
1318                 }
1319  
1320                 /* Move the compressed page from bpage to block,
1321                 and uncompress it. */
1322  
1323 -               mutex_enter(&buf_pool->zip_mutex);
1324 -
1325                 buf_relocate(bpage, &block->page);
1326 +
1327 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1328 +
1329                 buf_block_init_low(block);
1330                 block->lock_hash_val = lock_rec_hash(space, offset);
1331  
1332 @@ -2939,7 +3069,7 @@
1333  
1334                 if (buf_page_get_state(&block->page)
1335                     == BUF_BLOCK_ZIP_PAGE) {
1336 -                       UT_LIST_REMOVE(list, buf_pool->zip_clean,
1337 +                       UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1338                                        &block->page);
1339                         ut_ad(!block->page.in_flush_list);
1340                 } else {
1341 @@ -2956,19 +3086,24 @@
1342                 /* Insert at the front of unzip_LRU list */
1343                 buf_unzip_LRU_add_block(block, FALSE);
1344  
1345 +               mutex_exit(&buf_pool->LRU_list_mutex);
1346 +
1347                 block->page.buf_fix_count = 1;
1348                 buf_block_set_io_fix(block, BUF_IO_READ);
1349                 rw_lock_x_lock_func(&block->lock, 0, file, line);
1350  
1351                 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1352  
1353 -               mutex_exit(&block->mutex);
1354 +               mutex_exit(block_mutex);
1355                 mutex_exit(&buf_pool->zip_mutex);
1356 +
1357 +               buf_pool_mutex_enter(buf_pool);
1358                 buf_pool->n_pend_unzip++;
1359 +               buf_pool_mutex_exit(buf_pool);
1360  
1361 -               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1362 +               buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1363  
1364 -               buf_pool_mutex_exit(buf_pool);
1365 +               //buf_pool_mutex_exit(buf_pool);
1366  
1367                 /* Decompress the page and apply buffered operations
1368                 while not holding buf_pool->mutex or block->mutex. */
1369 @@ -2981,12 +3116,15 @@
1370                 }
1371  
1372                 /* Unfix and unlatch the block. */
1373 -               buf_pool_mutex_enter(buf_pool);
1374 -               mutex_enter(&block->mutex);
1375 +               //buf_pool_mutex_enter(buf_pool);
1376 +               block_mutex = &block->mutex;
1377 +               mutex_enter(block_mutex);
1378                 block->page.buf_fix_count--;
1379                 buf_block_set_io_fix(block, BUF_IO_NONE);
1380 -               mutex_exit(&block->mutex);
1381 +
1382 +               buf_pool_mutex_enter(buf_pool);
1383                 buf_pool->n_pend_unzip--;
1384 +               buf_pool_mutex_exit(buf_pool);
1385                 rw_lock_x_unlock(&block->lock);
1386  
1387                 break;
1388 @@ -3002,7 +3140,7 @@
1389  
1390         ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1391  
1392 -       mutex_enter(&block->mutex);
1393 +       //mutex_enter(&block->mutex);
1394  #if UNIV_WORD_SIZE == 4
1395         /* On 32-bit systems, there is no padding in buf_page_t.  On
1396         other systems, Valgrind could complain about uninitialized pad
1397 @@ -3015,7 +3153,7 @@
1398                 /* Try to evict the block from the buffer pool, to use the
1399                 insert buffer (change buffer) as much as possible. */
1400  
1401 -               if (buf_LRU_free_block(&block->page, TRUE, NULL)
1402 +               if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
1403                     == BUF_LRU_FREED) {
1404                         mutex_exit(&block->mutex);
1405                         if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1406 @@ -3052,13 +3190,14 @@
1407  
1408         buf_block_buf_fix_inc(block, file, line);
1409  
1410 -       mutex_exit(&block->mutex);
1411 +       //mutex_exit(&block->mutex);
1412  
1413         /* Check if this is the first access to the page */
1414  
1415         access_time = buf_page_is_accessed(&block->page);
1416  
1417 -       buf_pool_mutex_exit(buf_pool);
1418 +       //buf_pool_mutex_exit(buf_pool);
1419 +       mutex_exit(block_mutex);
1420  
1421         buf_page_set_accessed_make_young(&block->page, access_time);
1422  
1423 @@ -3291,9 +3430,11 @@
1424         buf_pool = buf_pool_from_block(block);
1425  
1426         if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1427 -               buf_pool_mutex_enter(buf_pool);
1428 +               //buf_pool_mutex_enter(buf_pool);
1429 +               mutex_enter(&buf_pool->LRU_list_mutex);
1430                 buf_LRU_make_block_young(&block->page);
1431 -               buf_pool_mutex_exit(buf_pool);
1432 +               //buf_pool_mutex_exit(buf_pool);
1433 +               mutex_exit(&buf_pool->LRU_list_mutex);
1434         } else if (!buf_page_is_accessed(&block->page)) {
1435                 /* Above, we do a dirty read on purpose, to avoid
1436                 mutex contention.  The field buf_page_t::access_time
1437 @@ -3301,9 +3442,11 @@
1438                 field must be protected by mutex, however. */
1439                 ulint   time_ms = ut_time_ms();
1440  
1441 -               buf_pool_mutex_enter(buf_pool);
1442 +               //buf_pool_mutex_enter(buf_pool);
1443 +               mutex_enter(&block->mutex);
1444                 buf_page_set_accessed(&block->page, time_ms);
1445 -               buf_pool_mutex_exit(buf_pool);
1446 +               //buf_pool_mutex_exit(buf_pool);
1447 +               mutex_exit(&block->mutex);
1448         }
1449  
1450         ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1451 @@ -3370,18 +3513,21 @@
1452         ut_ad(mtr);
1453         ut_ad(mtr->state == MTR_ACTIVE);
1454  
1455 -       buf_pool_mutex_enter(buf_pool);
1456 +       //buf_pool_mutex_enter(buf_pool);
1457 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
1458         block = buf_block_hash_get(buf_pool, space_id, page_no);
1459  
1460         if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1461 -               buf_pool_mutex_exit(buf_pool);
1462 +               //buf_pool_mutex_exit(buf_pool);
1463 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
1464                 return(NULL);
1465         }
1466  
1467         ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1468  
1469         mutex_enter(&block->mutex);
1470 -       buf_pool_mutex_exit(buf_pool);
1471 +       //buf_pool_mutex_exit(buf_pool);
1472 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
1473  
1474  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1475         ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1476 @@ -3470,7 +3616,10 @@
1477         buf_page_t*     hash_page;
1478         buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1479  
1480 -       ut_ad(buf_pool_mutex_own(buf_pool));
1481 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1482 +#ifdef UNIV_SYNC_DEBUG
1483 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1484 +#endif
1485         ut_ad(mutex_own(&(block->mutex)));
1486         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1487  
1488 @@ -3499,11 +3648,14 @@
1489         if (UNIV_LIKELY(!hash_page)) {
1490         } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1491                 /* Preserve the reference count. */
1492 -               ulint   buf_fix_count = hash_page->buf_fix_count;
1493 +               ulint   buf_fix_count;
1494  
1495 +               mutex_enter(&buf_pool->zip_mutex);
1496 +               buf_fix_count = hash_page->buf_fix_count;
1497                 ut_a(buf_fix_count > 0);
1498                 block->page.buf_fix_count += buf_fix_count;
1499                 buf_pool_watch_remove(buf_pool, fold, hash_page);
1500 +               mutex_exit(&buf_pool->zip_mutex);
1501         } else {
1502                 fprintf(stderr,
1503                         "InnoDB: Error: page %lu %lu already found"
1504 @@ -3513,7 +3665,8 @@
1505                         (const void*) hash_page, (const void*) block);
1506  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1507                 mutex_exit(&block->mutex);
1508 -               buf_pool_mutex_exit(buf_pool);
1509 +               //buf_pool_mutex_exit(buf_pool);
1510 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1511                 buf_print();
1512                 buf_LRU_print();
1513                 buf_validate();
1514 @@ -3597,7 +3750,9 @@
1515  
1516         fold = buf_page_address_fold(space, offset);
1517  
1518 -       buf_pool_mutex_enter(buf_pool);
1519 +       //buf_pool_mutex_enter(buf_pool);
1520 +       mutex_enter(&buf_pool->LRU_list_mutex);
1521 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1522  
1523         watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1524         if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1525 @@ -3606,9 +3761,15 @@
1526  err_exit:
1527                 if (block) {
1528                         mutex_enter(&block->mutex);
1529 -                       buf_LRU_block_free_non_file_page(block);
1530 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1531 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1532 +                       buf_LRU_block_free_non_file_page(block, FALSE);
1533                         mutex_exit(&block->mutex);
1534                 }
1535 +               else {
1536 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1537 +                       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1538 +               }
1539  
1540                 bpage = NULL;
1541                 goto func_exit;
1542 @@ -3631,6 +3792,8 @@
1543  
1544                 buf_page_init(space, offset, fold, block);
1545  
1546 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1547 +
1548                 /* The block must be put to the LRU list, to the old blocks */
1549                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1550  
1551 @@ -3658,7 +3821,7 @@
1552                         been added to buf_pool->LRU and
1553                         buf_pool->page_hash. */
1554                         mutex_exit(&block->mutex);
1555 -                       data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1556 +                       data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1557                         mutex_enter(&block->mutex);
1558                         block->page.zip.data = data;
1559  
1560 @@ -3671,6 +3834,7 @@
1561                         buf_unzip_LRU_add_block(block, TRUE);
1562                 }
1563  
1564 +               mutex_exit(&buf_pool->LRU_list_mutex);
1565                 mutex_exit(&block->mutex);
1566         } else {
1567                 /* Defer buf_buddy_alloc() until after the block has
1568 @@ -3682,8 +3846,8 @@
1569                 control block (bpage), in order to avoid the
1570                 invocation of buf_buddy_relocate_block() on
1571                 uninitialized data. */
1572 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1573 -               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1574 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1575 +               bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1576  
1577                 /* Initialize the buf_pool pointer. */
1578                 bpage->buf_pool_index = buf_pool_index(buf_pool);
1579 @@ -3702,8 +3866,11 @@
1580  
1581                                 /* The block was added by some other thread. */
1582                                 watch_page = NULL;
1583 -                               buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1584 -                               buf_buddy_free(buf_pool, data, zip_size);
1585 +                               buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1586 +                               buf_buddy_free(buf_pool, data, zip_size, TRUE);
1587 +
1588 +                               mutex_exit(&buf_pool->LRU_list_mutex);
1589 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1590  
1591                                 bpage = NULL;
1592                                 goto func_exit;
1593 @@ -3747,18 +3914,24 @@
1594                 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1595                             bpage);
1596  
1597 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1598 +
1599                 /* The block must be put to the LRU list, to the old blocks */
1600                 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1601                 buf_LRU_insert_zip_clean(bpage);
1602  
1603 +               mutex_exit(&buf_pool->LRU_list_mutex);
1604 +
1605                 buf_page_set_io_fix(bpage, BUF_IO_READ);
1606  
1607                 mutex_exit(&buf_pool->zip_mutex);
1608         }
1609  
1610 +       buf_pool_mutex_enter(buf_pool);
1611         buf_pool->n_pend_reads++;
1612 -func_exit:
1613         buf_pool_mutex_exit(buf_pool);
1614 +func_exit:
1615 +       //buf_pool_mutex_exit(buf_pool);
1616  
1617         if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1618  
1619 @@ -3800,7 +3973,9 @@
1620  
1621         fold = buf_page_address_fold(space, offset);
1622  
1623 -       buf_pool_mutex_enter(buf_pool);
1624 +       //buf_pool_mutex_enter(buf_pool);
1625 +       mutex_enter(&buf_pool->LRU_list_mutex);
1626 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1627  
1628         block = (buf_block_t*) buf_page_hash_get_low(
1629                 buf_pool, space, offset, fold);
1630 @@ -3816,7 +3991,9 @@
1631  #endif /* UNIV_DEBUG_FILE_ACCESSES */
1632  
1633                 /* Page can be found in buf_pool */
1634 -               buf_pool_mutex_exit(buf_pool);
1635 +               //buf_pool_mutex_exit(buf_pool);
1636 +               mutex_exit(&buf_pool->LRU_list_mutex);
1637 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
1638  
1639                 buf_block_free(free_block);
1640  
1641 @@ -3838,6 +4015,7 @@
1642         mutex_enter(&block->mutex);
1643  
1644         buf_page_init(space, offset, fold, block);
1645 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1646  
1647         /* The block must be put to the LRU list */
1648         buf_LRU_add_block(&block->page, FALSE);
1649 @@ -3864,7 +4042,7 @@
1650                 the reacquisition of buf_pool->mutex.  We also must
1651                 defer this operation until after the block descriptor
1652                 has been added to buf_pool->LRU and buf_pool->page_hash. */
1653 -               data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1654 +               data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1655                 mutex_enter(&block->mutex);
1656                 block->page.zip.data = data;
1657  
1658 @@ -3882,7 +4060,8 @@
1659  
1660         buf_page_set_accessed(&block->page, time_ms);
1661  
1662 -       buf_pool_mutex_exit(buf_pool);
1663 +       //buf_pool_mutex_exit(buf_pool);
1664 +       mutex_exit(&buf_pool->LRU_list_mutex);
1665  
1666         mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1667  
1668 @@ -3933,6 +4112,8 @@
1669         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1670         const ibool     uncompressed = (buf_page_get_state(bpage)
1671                                         == BUF_BLOCK_FILE_PAGE);
1672 +       ibool           have_LRU_mutex = FALSE;
1673 +       mutex_t*        block_mutex;
1674  
1675         ut_a(buf_page_in_file(bpage));
1676  
1677 @@ -4066,8 +4247,26 @@
1678                 }
1679         }
1680  
1681 +       if (io_type == BUF_IO_WRITE
1682 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1683 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1684 +               /* to keep consistency at buf_LRU_insert_zip_clean() */
1685 +               have_LRU_mutex = TRUE; /* optimistic */
1686 +       }
1687 +retry_mutex:
1688 +       if (have_LRU_mutex)
1689 +               mutex_enter(&buf_pool->LRU_list_mutex);
1690 +       block_mutex = buf_page_get_mutex_enter(bpage);
1691 +       ut_a(block_mutex);
1692 +       if (io_type == BUF_IO_WRITE
1693 +           && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1694 +               || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1695 +           && !have_LRU_mutex) {
1696 +               mutex_exit(block_mutex);
1697 +               have_LRU_mutex = TRUE;
1698 +               goto retry_mutex;
1699 +       }
1700         buf_pool_mutex_enter(buf_pool);
1701 -       mutex_enter(buf_page_get_mutex(bpage));
1702  
1703  #ifdef UNIV_IBUF_COUNT_DEBUG
1704         if (io_type == BUF_IO_WRITE || uncompressed) {
1705 @@ -4090,6 +4289,7 @@
1706                 the x-latch to this OS thread: do not let this confuse you in
1707                 debugging! */
1708  
1709 +               ut_a(!have_LRU_mutex);
1710                 ut_ad(buf_pool->n_pend_reads > 0);
1711                 buf_pool->n_pend_reads--;
1712                 buf_pool->stat.n_pages_read++;
1713 @@ -4107,6 +4307,9 @@
1714  
1715                 buf_flush_write_complete(bpage);
1716  
1717 +               if (have_LRU_mutex)
1718 +                       mutex_exit(&buf_pool->LRU_list_mutex);
1719 +
1720                 if (uncompressed) {
1721                         rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1722                                              BUF_IO_WRITE);
1723 @@ -4129,8 +4332,8 @@
1724         }
1725  #endif /* UNIV_DEBUG */
1726  
1727 -       mutex_exit(buf_page_get_mutex(bpage));
1728         buf_pool_mutex_exit(buf_pool);
1729 +       mutex_exit(block_mutex);
1730  }
1731  
1732  /*********************************************************************//**
1733 @@ -4147,7 +4350,9 @@
1734  
1735         ut_ad(buf_pool);
1736  
1737 -       buf_pool_mutex_enter(buf_pool);
1738 +       //buf_pool_mutex_enter(buf_pool);
1739 +       mutex_enter(&buf_pool->LRU_list_mutex);
1740 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1741  
1742         chunk = buf_pool->chunks;
1743  
1744 @@ -4164,7 +4369,9 @@
1745                 }
1746         }
1747  
1748 -       buf_pool_mutex_exit(buf_pool);
1749 +       //buf_pool_mutex_exit(buf_pool);
1750 +       mutex_exit(&buf_pool->LRU_list_mutex);
1751 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1752  
1753         return(TRUE);
1754  }
1755 @@ -4212,7 +4419,8 @@
1756                 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1757         }
1758  
1759 -       buf_pool_mutex_enter(buf_pool);
1760 +       //buf_pool_mutex_enter(buf_pool);
1761 +       mutex_enter(&buf_pool->LRU_list_mutex);
1762  
1763         ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1764         ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1765 @@ -4225,7 +4433,8 @@
1766         memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1767         buf_refresh_io_stats(buf_pool);
1768  
1769 -       buf_pool_mutex_exit(buf_pool);
1770 +       //buf_pool_mutex_exit(buf_pool);
1771 +       mutex_exit(&buf_pool->LRU_list_mutex);
1772  }
1773  
1774  /*********************************************************************//**
1775 @@ -4267,7 +4476,10 @@
1776  
1777         ut_ad(buf_pool);
1778  
1779 -       buf_pool_mutex_enter(buf_pool);
1780 +       //buf_pool_mutex_enter(buf_pool);
1781 +       mutex_enter(&buf_pool->LRU_list_mutex);
1782 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
1783 +       /* for keep the new latch order, it cannot validate correctly... */
1784  
1785         chunk = buf_pool->chunks;
1786  
1787 @@ -4362,7 +4574,7 @@
1788         /* Check clean compressed-only blocks. */
1789  
1790         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1791 -            b = UT_LIST_GET_NEXT(list, b)) {
1792 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1793                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1794                 switch (buf_page_get_io_fix(b)) {
1795                 case BUF_IO_NONE:
1796 @@ -4393,7 +4605,7 @@
1797  
1798         buf_flush_list_mutex_enter(buf_pool);
1799         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1800 -            b = UT_LIST_GET_NEXT(list, b)) {
1801 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1802                 ut_ad(b->in_flush_list);
1803                 ut_a(b->oldest_modification);
1804                 n_flush++;
1805 @@ -4452,6 +4664,8 @@
1806         }
1807  
1808         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1809 +       /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1810 +/*
1811         if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1812                 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1813                         (ulong) UT_LIST_GET_LEN(buf_pool->free),
1814 @@ -4462,8 +4676,11 @@
1815         ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1816         ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1817         ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1818 +*/
1819  
1820 -       buf_pool_mutex_exit(buf_pool);
1821 +       //buf_pool_mutex_exit(buf_pool);
1822 +       mutex_exit(&buf_pool->LRU_list_mutex);
1823 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
1824  
1825         ut_a(buf_LRU_validate());
1826         ut_a(buf_flush_validate(buf_pool));
1827 @@ -4519,7 +4736,9 @@
1828         index_ids = mem_alloc(size * sizeof *index_ids);
1829         counts = mem_alloc(sizeof(ulint) * size);
1830  
1831 -       buf_pool_mutex_enter(buf_pool);
1832 +       //buf_pool_mutex_enter(buf_pool);
1833 +       mutex_enter(&buf_pool->LRU_list_mutex);
1834 +       mutex_enter(&buf_pool->free_list_mutex);
1835         buf_flush_list_mutex_enter(buf_pool);
1836  
1837         fprintf(stderr,
1838 @@ -4588,7 +4807,9 @@
1839                 }
1840         }
1841  
1842 -       buf_pool_mutex_exit(buf_pool);
1843 +       //buf_pool_mutex_exit(buf_pool);
1844 +       mutex_exit(&buf_pool->LRU_list_mutex);
1845 +       mutex_exit(&buf_pool->free_list_mutex);
1846  
1847         for (i = 0; i < n_found; i++) {
1848                 index = dict_index_get_if_in_cache(index_ids[i]);
1849 @@ -4645,7 +4866,7 @@
1850         buf_chunk_t*    chunk;
1851         ulint           fixed_pages_number = 0;
1852  
1853 -       buf_pool_mutex_enter(buf_pool);
1854 +       //buf_pool_mutex_enter(buf_pool);
1855  
1856         chunk = buf_pool->chunks;
1857  
1858 @@ -4679,7 +4900,7 @@
1859         /* Traverse the lists of clean and dirty compressed-only blocks. */
1860  
1861         for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1862 -            b = UT_LIST_GET_NEXT(list, b)) {
1863 +            b = UT_LIST_GET_NEXT(zip_list, b)) {
1864                 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1865                 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1866  
1867 @@ -4691,7 +4912,7 @@
1868  
1869         buf_flush_list_mutex_enter(buf_pool);
1870         for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1871 -            b = UT_LIST_GET_NEXT(list, b)) {
1872 +            b = UT_LIST_GET_NEXT(flush_list, b)) {
1873                 ut_ad(b->in_flush_list);
1874  
1875                 switch (buf_page_get_state(b)) {
1876 @@ -4717,7 +4938,7 @@
1877  
1878         buf_flush_list_mutex_exit(buf_pool);
1879         mutex_exit(&buf_pool->zip_mutex);
1880 -       buf_pool_mutex_exit(buf_pool);
1881 +       //buf_pool_mutex_exit(buf_pool);
1882  
1883         return(fixed_pages_number);
1884  }
1885 @@ -4873,6 +5094,8 @@
1886         /* Find appropriate pool_info to store stats for this buffer pool */
1887         pool_info = &all_pool_info[pool_id];
1888  
1889 +       mutex_enter(&buf_pool->LRU_list_mutex);
1890 +       mutex_enter(&buf_pool->free_list_mutex);
1891         buf_pool_mutex_enter(buf_pool);
1892         buf_flush_list_mutex_enter(buf_pool);
1893  
1894 @@ -4983,6 +5206,8 @@
1895         pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
1896  
1897         buf_refresh_io_stats(buf_pool);
1898 +       mutex_exit(&buf_pool->LRU_list_mutex);
1899 +       mutex_exit(&buf_pool->free_list_mutex);
1900         buf_pool_mutex_exit(buf_pool);
1901  }
1902  
1903 @@ -5224,11 +5449,13 @@
1904  {
1905         ulint   len;
1906  
1907 -       buf_pool_mutex_enter(buf_pool);
1908 +       //buf_pool_mutex_enter(buf_pool);
1909 +       mutex_enter(&buf_pool->free_list_mutex);
1910  
1911         len = UT_LIST_GET_LEN(buf_pool->free);
1912  
1913 -       buf_pool_mutex_exit(buf_pool);
1914 +       //buf_pool_mutex_exit(buf_pool);
1915 +       mutex_exit(&buf_pool->free_list_mutex);
1916  
1917         return(len);
1918  }
1919 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1920 --- a/storage/innobase/buf/buf0flu.c    2010-12-03 15:22:36.318955693 +0900
1921 +++ b/storage/innobase/buf/buf0flu.c    2010-12-03 15:48:29.289024083 +0900
1922 @@ -307,7 +307,7 @@
1923  
1924         ut_d(block->page.in_flush_list = TRUE);
1925         block->page.oldest_modification = lsn;
1926 -       UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1927 +       UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1928  
1929  #ifdef UNIV_DEBUG_VALGRIND
1930         {
1931 @@ -401,14 +401,14 @@
1932                        > block->page.oldest_modification) {
1933                         ut_ad(b->in_flush_list);
1934                         prev_b = b;
1935 -                       b = UT_LIST_GET_NEXT(list, b);
1936 +                       b = UT_LIST_GET_NEXT(flush_list, b);
1937                 }
1938         }
1939  
1940         if (prev_b == NULL) {
1941 -               UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1942 +               UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1943         } else {
1944 -               UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1945 +               UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1946                                      prev_b, &block->page);
1947         }
1948  
1949 @@ -434,7 +434,7 @@
1950         //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1951         //ut_ad(buf_pool_mutex_own(buf_pool));
1952  #endif
1953 -       //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1954 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1955         //ut_ad(bpage->in_LRU_list);
1956  
1957         if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1958 @@ -470,14 +470,14 @@
1959         enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1960  {
1961  #ifdef UNIV_DEBUG
1962 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1963 -       ut_ad(buf_pool_mutex_own(buf_pool));
1964 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
1965 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1966  #endif
1967 -       ut_a(buf_page_in_file(bpage));
1968 +       //ut_a(buf_page_in_file(bpage));
1969         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1970         ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1971  
1972 -       if (bpage->oldest_modification != 0
1973 +       if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1974             && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1975                 ut_ad(bpage->in_flush_list);
1976  
1977 @@ -508,7 +508,7 @@
1978  {
1979         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
1980  
1981 -       ut_ad(buf_pool_mutex_own(buf_pool));
1982 +       //ut_ad(buf_pool_mutex_own(buf_pool));
1983         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1984         ut_ad(bpage->in_flush_list);
1985  
1986 @@ -526,11 +526,11 @@
1987                 return;
1988         case BUF_BLOCK_ZIP_DIRTY:
1989                 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1990 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1991 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1992                 buf_LRU_insert_zip_clean(bpage);
1993                 break;
1994         case BUF_BLOCK_FILE_PAGE:
1995 -               UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1996 +               UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1997                 break;
1998         }
1999  
2000 @@ -574,7 +574,7 @@
2001         buf_page_t*     prev_b = NULL;
2002         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2003  
2004 -       ut_ad(buf_pool_mutex_own(buf_pool));
2005 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2006         /* Must reside in the same buffer pool. */
2007         ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2008  
2009 @@ -603,18 +603,18 @@
2010         because we assert on in_flush_list in comparison function. */
2011         ut_d(bpage->in_flush_list = FALSE);
2012  
2013 -       prev = UT_LIST_GET_PREV(list, bpage);
2014 -       UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2015 +       prev = UT_LIST_GET_PREV(flush_list, bpage);
2016 +       UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2017  
2018         if (prev) {
2019                 ut_ad(prev->in_flush_list);
2020                 UT_LIST_INSERT_AFTER(
2021 -                       list,
2022 +                       flush_list,
2023                         buf_pool->flush_list,
2024                         prev, dpage);
2025         } else {
2026                 UT_LIST_ADD_FIRST(
2027 -                       list,
2028 +                       flush_list,
2029                         buf_pool->flush_list,
2030                         dpage);
2031         }
2032 @@ -1083,7 +1083,7 @@
2033  
2034  #ifdef UNIV_DEBUG
2035         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2036 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2037 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2038  #endif
2039  
2040  #ifdef UNIV_LOG_DEBUG
2041 @@ -1097,7 +1097,8 @@
2042         io_fixed and oldest_modification != 0.  Thus, it cannot be
2043         relocated in the buffer pool or removed from flush_list or
2044         LRU_list. */
2045 -       ut_ad(!buf_pool_mutex_own(buf_pool));
2046 +       //ut_ad(!buf_pool_mutex_own(buf_pool));
2047 +       ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2048         ut_ad(!buf_flush_list_mutex_own(buf_pool));
2049         ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2050         ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2051 @@ -1260,12 +1261,18 @@
2052         ibool           is_uncompressed;
2053  
2054         ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2055 -       ut_ad(buf_pool_mutex_own(buf_pool));
2056 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2057 +#ifdef UNIV_SYNC_DEBUG
2058 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2059 +#endif
2060         ut_ad(buf_page_in_file(bpage));
2061  
2062         block_mutex = buf_page_get_mutex(bpage);
2063         ut_ad(mutex_own(block_mutex));
2064  
2065 +       buf_pool_mutex_enter(buf_pool);
2066 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2067 +
2068         ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2069  
2070         buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2071 @@ -1427,14 +1434,16 @@
2072  
2073                 buf_pool = buf_pool_get(space, i);
2074  
2075 -               buf_pool_mutex_enter(buf_pool);
2076 +               //buf_pool_mutex_enter(buf_pool);
2077 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
2078  
2079                 /* We only want to flush pages from this buffer pool. */
2080                 bpage = buf_page_hash_get(buf_pool, space, i);
2081  
2082                 if (!bpage) {
2083  
2084 -                       buf_pool_mutex_exit(buf_pool);
2085 +                       //buf_pool_mutex_exit(buf_pool);
2086 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
2087                         continue;
2088                 }
2089  
2090 @@ -1446,11 +1455,9 @@
2091                 if (flush_type != BUF_FLUSH_LRU
2092                     || i == offset
2093                     || buf_page_is_old(bpage)) {
2094 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2095 -
2096 -                       mutex_enter(block_mutex);
2097 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2098  
2099 -                       if (buf_flush_ready_for_flush(bpage, flush_type)
2100 +                       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2101                             && (i == offset || !bpage->buf_fix_count)) {
2102                                 /* We only try to flush those
2103                                 neighbors != offset where the buf fix
2104 @@ -1466,11 +1473,12 @@
2105                                 ut_ad(!buf_pool_mutex_own(buf_pool));
2106                                 count++;
2107                                 continue;
2108 -                       } else {
2109 +                       } else if (block_mutex) {
2110                                 mutex_exit(block_mutex);
2111                         }
2112                 }
2113 -               buf_pool_mutex_exit(buf_pool);
2114 +               //buf_pool_mutex_exit(buf_pool);
2115 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
2116         }
2117  
2118         return(count);
2119 @@ -1503,21 +1511,25 @@
2120         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2121  #endif /* UNIV_DEBUG */
2122  
2123 -       ut_ad(buf_pool_mutex_own(buf_pool));
2124 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2125 +       ut_ad(flush_type != BUF_FLUSH_LRU
2126 +             || mutex_own(&buf_pool->LRU_list_mutex));
2127  
2128 -       block_mutex = buf_page_get_mutex(bpage);
2129 -       mutex_enter(block_mutex);
2130 +       block_mutex = buf_page_get_mutex_enter(bpage);
2131  
2132 -       ut_a(buf_page_in_file(bpage));
2133 +       //ut_a(buf_page_in_file(bpage));
2134  
2135 -       if (buf_flush_ready_for_flush(bpage, flush_type)) {
2136 +       if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2137                 ulint           space;
2138                 ulint           offset;
2139                 buf_pool_t*     buf_pool;
2140  
2141                 buf_pool = buf_pool_from_bpage(bpage);
2142  
2143 -               buf_pool_mutex_exit(buf_pool);
2144 +               //buf_pool_mutex_exit(buf_pool);
2145 +               if (flush_type == BUF_FLUSH_LRU) {
2146 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2147 +               }
2148  
2149                 /* These fields are protected by both the
2150                 buffer pool mutex and block mutex. */
2151 @@ -1533,13 +1545,18 @@
2152                                                   *count,
2153                                                   n_to_flush);
2154  
2155 -               buf_pool_mutex_enter(buf_pool);
2156 +               //buf_pool_mutex_enter(buf_pool);
2157 +               if (flush_type == BUF_FLUSH_LRU) {
2158 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2159 +               }
2160                 flushed = TRUE;
2161 -       } else {
2162 +       } else if (block_mutex) {
2163                 mutex_exit(block_mutex);
2164         }
2165  
2166 -       ut_ad(buf_pool_mutex_own(buf_pool));
2167 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2168 +       ut_ad(flush_type != BUF_FLUSH_LRU
2169 +             || mutex_own(&buf_pool->LRU_list_mutex));
2170  
2171         return(flushed);
2172  }
2173 @@ -1560,7 +1577,8 @@
2174         buf_page_t*     bpage;
2175         ulint           count = 0;
2176  
2177 -       ut_ad(buf_pool_mutex_own(buf_pool));
2178 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2179 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2180  
2181         do {
2182                 /* Start from the end of the list looking for a
2183 @@ -1582,7 +1600,8 @@
2184         should be flushed, we factor in this value. */
2185         buf_lru_flush_page_count += count;
2186  
2187 -       ut_ad(buf_pool_mutex_own(buf_pool));
2188 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2189 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2190  
2191         return(count);
2192  }
2193 @@ -1610,9 +1629,10 @@
2194  {
2195         ulint           len;
2196         buf_page_t*     bpage;
2197 +       buf_page_t*     prev_bpage = NULL;
2198         ulint           count = 0;
2199  
2200 -       ut_ad(buf_pool_mutex_own(buf_pool));
2201 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2202  
2203         /* If we have flushed enough, leave the loop */
2204         do {
2205 @@ -1631,6 +1651,7 @@
2206  
2207                 if (bpage) {
2208                         ut_a(bpage->oldest_modification > 0);
2209 +                       prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2210                 }
2211  
2212                 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2213 @@ -1672,9 +1693,17 @@
2214                                 break;
2215                         }
2216  
2217 -                       bpage = UT_LIST_GET_PREV(list, bpage);
2218 +                       bpage = UT_LIST_GET_PREV(flush_list, bpage);
2219  
2220 -                       ut_ad(!bpage || bpage->in_flush_list);
2221 +                       //ut_ad(!bpage || bpage->in_flush_list);
2222 +                       if (bpage != prev_bpage) {
2223 +                               /* the search might warp.. retrying */
2224 +                               buf_flush_list_mutex_exit(buf_pool);
2225 +                               break;
2226 +                       }
2227 +                       if (bpage) {
2228 +                               prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2229 +                       }
2230  
2231                         buf_flush_list_mutex_exit(buf_pool);
2232  
2233 @@ -1683,7 +1712,7 @@
2234  
2235         } while (count < min_n && bpage != NULL && len > 0);
2236  
2237 -       ut_ad(buf_pool_mutex_own(buf_pool));
2238 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2239  
2240         return(count);
2241  }
2242 @@ -1722,13 +1751,15 @@
2243               || sync_thread_levels_empty_gen(TRUE));
2244  #endif /* UNIV_SYNC_DEBUG */
2245  
2246 -       buf_pool_mutex_enter(buf_pool);
2247 +       //buf_pool_mutex_enter(buf_pool);
2248  
2249         /* Note: The buffer pool mutex is released and reacquired within
2250         the flush functions. */
2251         switch(flush_type) {
2252         case BUF_FLUSH_LRU:
2253 +               mutex_enter(&buf_pool->LRU_list_mutex);
2254                 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2255 +               mutex_exit(&buf_pool->LRU_list_mutex);
2256                 break;
2257         case BUF_FLUSH_LIST:
2258                 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2259 @@ -1737,7 +1768,7 @@
2260                 ut_error;
2261         }
2262  
2263 -       buf_pool_mutex_exit(buf_pool);
2264 +       //buf_pool_mutex_exit(buf_pool);
2265  
2266         buf_flush_buffered_writes();
2267  
2268 @@ -1993,7 +2024,7 @@
2269  retry:
2270         //buf_pool_mutex_enter(buf_pool);
2271         if (have_LRU_mutex)
2272 -               buf_pool_mutex_enter(buf_pool);
2273 +               mutex_enter(&buf_pool->LRU_list_mutex);
2274  
2275         n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2276  
2277 @@ -2010,15 +2041,15 @@
2278                         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2279                         continue;
2280                 }
2281 -               block_mutex = buf_page_get_mutex(bpage);
2282 -
2283 -               mutex_enter(block_mutex);
2284 +               block_mutex = buf_page_get_mutex_enter(bpage);
2285  
2286 -               if (buf_flush_ready_for_replace(bpage)) {
2287 +               if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2288                         n_replaceable++;
2289                 }
2290  
2291 -               mutex_exit(block_mutex);
2292 +               if (block_mutex) {
2293 +                       mutex_exit(block_mutex);
2294 +               }
2295  
2296                 distance++;
2297  
2298 @@ -2027,7 +2058,7 @@
2299  
2300         //buf_pool_mutex_exit(buf_pool);
2301         if (have_LRU_mutex)
2302 -               buf_pool_mutex_exit(buf_pool);
2303 +               mutex_exit(&buf_pool->LRU_list_mutex);
2304  
2305         if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2306  
2307 @@ -2226,7 +2257,7 @@
2308  
2309         ut_ad(buf_flush_list_mutex_own(buf_pool));
2310  
2311 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2312 +       UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2313                          ut_ad(ut_list_node_313->in_flush_list));
2314  
2315         bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2316 @@ -2266,7 +2297,7 @@
2317                         rnode = rbt_next(buf_pool->flush_rbt, rnode);
2318                 }
2319  
2320 -               bpage = UT_LIST_GET_NEXT(list, bpage);
2321 +               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2322  
2323                 ut_a(!bpage || om >= bpage->oldest_modification);
2324         }
2325 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2326 --- a/storage/innobase/buf/buf0lru.c    2010-12-03 15:22:36.321987250 +0900
2327 +++ b/storage/innobase/buf/buf0lru.c    2010-12-03 15:48:29.293023197 +0900
2328 @@ -143,8 +143,9 @@
2329  void
2330  buf_LRU_block_free_hashed_page(
2331  /*===========================*/
2332 -       buf_block_t*    block); /*!< in: block, must contain a file page and
2333 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
2334                                 be in a state where it can be freed */
2335 +       ibool           have_page_hash_mutex);
2336  
2337  /******************************************************************//**
2338  Determines if the unzip_LRU list should be used for evicting a victim
2339 @@ -154,15 +155,20 @@
2340  ibool
2341  buf_LRU_evict_from_unzip_LRU(
2342  /*=========================*/
2343 -       buf_pool_t*     buf_pool)
2344 +       buf_pool_t*     buf_pool,
2345 +       ibool           have_LRU_mutex)
2346  {
2347         ulint   io_avg;
2348         ulint   unzip_avg;
2349  
2350 -       ut_ad(buf_pool_mutex_own(buf_pool));
2351 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2352  
2353 +       if (!have_LRU_mutex)
2354 +               mutex_enter(&buf_pool->LRU_list_mutex);
2355         /* If the unzip_LRU list is empty, we can only use the LRU. */
2356         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2357 +               if (!have_LRU_mutex)
2358 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2359                 return(FALSE);
2360         }
2361  
2362 @@ -171,14 +177,20 @@
2363         decompressed pages in the buffer pool. */
2364         if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2365             <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2366 +               if (!have_LRU_mutex)
2367 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2368                 return(FALSE);
2369         }
2370  
2371         /* If eviction hasn't started yet, we assume by default
2372         that a workload is disk bound. */
2373         if (buf_pool->freed_page_clock == 0) {
2374 +               if (!have_LRU_mutex)
2375 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2376                 return(TRUE);
2377         }
2378 +       if (!have_LRU_mutex)
2379 +               mutex_exit(&buf_pool->LRU_list_mutex);
2380  
2381         /* Calculate the average over past intervals, and add the values
2382         of the current interval. */
2383 @@ -246,19 +258,23 @@
2384         page_arr = ut_malloc(
2385                 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2386  
2387 -       buf_pool_mutex_enter(buf_pool);
2388 +       //buf_pool_mutex_enter(buf_pool);
2389 +       mutex_enter(&buf_pool->LRU_list_mutex);
2390  
2391  scan_again:
2392         num_entries = 0;
2393         bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2394  
2395         while (bpage != NULL) {
2396 -               mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2397 +               mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
2398                 buf_page_t*     prev_bpage;
2399  
2400 -               mutex_enter(block_mutex);
2401                 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2402  
2403 +               if (!block_mutex) {
2404 +                       goto next_page;
2405 +               }
2406 +
2407                 ut_a(buf_page_in_file(bpage));
2408  
2409                 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2410 @@ -287,14 +303,16 @@
2411  
2412                         /* Array full. We release the buf_pool->mutex to
2413                         obey the latching order. */
2414 -                       buf_pool_mutex_exit(buf_pool);
2415 +                       //buf_pool_mutex_exit(buf_pool);
2416 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2417  
2418                         buf_LRU_drop_page_hash_batch(
2419                                 id, zip_size, page_arr, num_entries);
2420  
2421                         num_entries = 0;
2422  
2423 -                       buf_pool_mutex_enter(buf_pool);
2424 +                       //buf_pool_mutex_enter(buf_pool);
2425 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2426                 } else {
2427                         mutex_exit(block_mutex);
2428                 }
2429 @@ -319,7 +337,8 @@
2430                 }
2431         }
2432  
2433 -       buf_pool_mutex_exit(buf_pool);
2434 +       //buf_pool_mutex_exit(buf_pool);
2435 +       mutex_exit(&buf_pool->LRU_list_mutex);
2436  
2437         /* Drop any remaining batch of search hashed pages. */
2438         buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2439 @@ -341,7 +360,9 @@
2440         ibool           all_freed;
2441  
2442  scan_again:
2443 -       buf_pool_mutex_enter(buf_pool);
2444 +       //buf_pool_mutex_enter(buf_pool);
2445 +       mutex_enter(&buf_pool->LRU_list_mutex);
2446 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2447  
2448         all_freed = TRUE;
2449  
2450 @@ -369,8 +390,16 @@
2451  
2452                         all_freed = FALSE;
2453                 } else {
2454 -                       mutex_t* block_mutex = buf_page_get_mutex(bpage);
2455 -                       mutex_enter(block_mutex);
2456 +                       mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2457 +
2458 +                       if (!block_mutex) {
2459 +                               /* It may be impossible case...
2460 +                               Something wrong, so will be scan_again */
2461 +
2462 +                               all_freed = FALSE;
2463 +
2464 +                               goto next_page_no_mutex;
2465 +                       }
2466  
2467                         if (bpage->buf_fix_count > 0) {
2468  
2469 @@ -429,7 +458,9 @@
2470                                 ulint   page_no;
2471                                 ulint   zip_size;
2472  
2473 -                               buf_pool_mutex_exit(buf_pool);
2474 +                               //buf_pool_mutex_exit(buf_pool);
2475 +                               mutex_exit(&buf_pool->LRU_list_mutex);
2476 +                               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2477  
2478                                 zip_size = buf_page_get_zip_size(bpage);
2479                                 page_no = buf_page_get_page_no(bpage);
2480 @@ -454,7 +485,7 @@
2481                         if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2482                             != BUF_BLOCK_ZIP_FREE) {
2483                                 buf_LRU_block_free_hashed_page((buf_block_t*)
2484 -                                                              bpage);
2485 +                                                              bpage, TRUE);
2486                         } else {
2487                                 /* The block_mutex should have been
2488                                 released by buf_LRU_block_remove_hashed_page()
2489 @@ -486,7 +517,9 @@
2490                 bpage = prev_bpage;
2491         }
2492  
2493 -       buf_pool_mutex_exit(buf_pool);
2494 +       //buf_pool_mutex_exit(buf_pool);
2495 +       mutex_exit(&buf_pool->LRU_list_mutex);
2496 +       rw_lock_x_unlock(&buf_pool->page_hash_latch);
2497  
2498         if (!all_freed) {
2499                 os_thread_sleep(20000);
2500 @@ -532,7 +565,9 @@
2501         buf_page_t*     b;
2502         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2503  
2504 -       ut_ad(buf_pool_mutex_own(buf_pool));
2505 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2506 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2507 +       ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2508         ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2509  
2510         /* Find the first successor of bpage in the LRU list
2511 @@ -540,17 +575,17 @@
2512         b = bpage;
2513         do {
2514                 b = UT_LIST_GET_NEXT(LRU, b);
2515 -       } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2516 +       } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2517  
2518         /* Insert bpage before b, i.e., after the predecessor of b. */
2519         if (b) {
2520 -               b = UT_LIST_GET_PREV(list, b);
2521 +               b = UT_LIST_GET_PREV(zip_list, b);
2522         }
2523  
2524         if (b) {
2525 -               UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2526 +               UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2527         } else {
2528 -               UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2529 +               UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2530         }
2531  }
2532  
2533 @@ -563,18 +598,19 @@
2534  buf_LRU_free_from_unzip_LRU_list(
2535  /*=============================*/
2536         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
2537 -       ulint           n_iterations)   /*!< in: how many times this has
2538 +       ulint           n_iterations,   /*!< in: how many times this has
2539                                         been called repeatedly without
2540                                         result: a high value means that
2541                                         we should search farther; we will
2542                                         search n_iterations / 5 of the
2543                                         unzip_LRU list, or nothing if
2544                                         n_iterations >= 5 */
2545 +       ibool           have_LRU_mutex)
2546  {
2547         buf_block_t*    block;
2548         ulint           distance;
2549  
2550 -       ut_ad(buf_pool_mutex_own(buf_pool));
2551 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2552  
2553         /* Theoratically it should be much easier to find a victim
2554         from unzip_LRU as we can choose even a dirty block (as we'll
2555 @@ -584,7 +620,7 @@
2556         if we have done five iterations so far. */
2557  
2558         if (UNIV_UNLIKELY(n_iterations >= 5)
2559 -           || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2560 +           || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2561  
2562                 return(FALSE);
2563         }
2564 @@ -592,18 +628,25 @@
2565         distance = 100 + (n_iterations
2566                           * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2567  
2568 +restart:
2569         for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2570              UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2571              block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2572  
2573                 enum buf_lru_free_block_status  freed;
2574  
2575 +               mutex_enter(&block->mutex);
2576 +               if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2577 +                   || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2578 +                       mutex_exit(&block->mutex);
2579 +                       goto restart;
2580 +               }
2581 +
2582                 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2583                 ut_ad(block->in_unzip_LRU_list);
2584                 ut_ad(block->page.in_LRU_list);
2585  
2586 -               mutex_enter(&block->mutex);
2587 -               freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2588 +               freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2589                 mutex_exit(&block->mutex);
2590  
2591                 switch (freed) {
2592 @@ -637,21 +680,23 @@
2593  buf_LRU_free_from_common_LRU_list(
2594  /*==============================*/
2595         buf_pool_t*     buf_pool,
2596 -       ulint           n_iterations)
2597 +       ulint           n_iterations,
2598                                 /*!< in: how many times this has been called
2599                                 repeatedly without result: a high value means
2600                                 that we should search farther; if
2601                                 n_iterations < 10, then we search
2602                                 n_iterations / 10 * buf_pool->curr_size
2603                                 pages from the end of the LRU list */
2604 +       ibool           have_LRU_mutex)
2605  {
2606         buf_page_t*     bpage;
2607         ulint           distance;
2608  
2609 -       ut_ad(buf_pool_mutex_own(buf_pool));
2610 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2611  
2612         distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2613  
2614 +restart:
2615         for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2616              UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2617              bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2618 @@ -659,14 +704,23 @@
2619                 enum buf_lru_free_block_status  freed;
2620                 unsigned                        accessed;
2621                 mutex_t*                        block_mutex
2622 -                       = buf_page_get_mutex(bpage);
2623 +                       = buf_page_get_mutex_enter(bpage);
2624 +
2625 +               if (!block_mutex) {
2626 +                       goto restart;
2627 +               }
2628 +
2629 +               if (!bpage->in_LRU_list
2630 +                   || !buf_page_in_file(bpage)) {
2631 +                       mutex_exit(block_mutex);
2632 +                       goto restart;
2633 +               }
2634  
2635                 ut_ad(buf_page_in_file(bpage));
2636                 ut_ad(bpage->in_LRU_list);
2637  
2638 -               mutex_enter(block_mutex);
2639                 accessed = buf_page_is_accessed(bpage);
2640 -               freed = buf_LRU_free_block(bpage, TRUE, NULL);
2641 +               freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2642                 mutex_exit(block_mutex);
2643  
2644                 switch (freed) {
2645 @@ -718,16 +772,23 @@
2646                                 n_iterations / 5 of the unzip_LRU list. */
2647  {
2648         ibool   freed = FALSE;
2649 +       ibool   have_LRU_mutex = FALSE;
2650  
2651 -       buf_pool_mutex_enter(buf_pool);
2652 +       if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2653 +               have_LRU_mutex = TRUE;
2654 +
2655 +       //buf_pool_mutex_enter(buf_pool);
2656 +       if (have_LRU_mutex)
2657 +               mutex_enter(&buf_pool->LRU_list_mutex);
2658  
2659 -       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2660 +       freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2661  
2662         if (!freed) {
2663                 freed = buf_LRU_free_from_common_LRU_list(
2664 -                       buf_pool, n_iterations);
2665 +                       buf_pool, n_iterations, have_LRU_mutex);
2666         }
2667  
2668 +       buf_pool_mutex_enter(buf_pool);
2669         if (!freed) {
2670                 buf_pool->LRU_flush_ended = 0;
2671         } else if (buf_pool->LRU_flush_ended > 0) {
2672 @@ -735,6 +796,8 @@
2673         }
2674  
2675         buf_pool_mutex_exit(buf_pool);
2676 +       if (have_LRU_mutex)
2677 +               mutex_exit(&buf_pool->LRU_list_mutex);
2678  
2679         return(freed);
2680  }
2681 @@ -795,7 +858,9 @@
2682  
2683                 buf_pool = buf_pool_from_array(i);
2684  
2685 -               buf_pool_mutex_enter(buf_pool);
2686 +               //buf_pool_mutex_enter(buf_pool);
2687 +               mutex_enter(&buf_pool->LRU_list_mutex);
2688 +               mutex_enter(&buf_pool->free_list_mutex);
2689  
2690                 if (!recv_recovery_on
2691                     && UT_LIST_GET_LEN(buf_pool->free)
2692 @@ -805,7 +870,9 @@
2693                         ret = TRUE;
2694                 }
2695  
2696 -               buf_pool_mutex_exit(buf_pool);
2697 +               //buf_pool_mutex_exit(buf_pool);
2698 +               mutex_exit(&buf_pool->LRU_list_mutex);
2699 +               mutex_exit(&buf_pool->free_list_mutex);
2700         }
2701  
2702         return(ret);
2703 @@ -823,9 +890,10 @@
2704  {
2705         buf_block_t*    block;
2706  
2707 -       ut_ad(buf_pool_mutex_own(buf_pool));
2708 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2709  
2710 -       block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2711 +       mutex_enter(&buf_pool->free_list_mutex);
2712 +       block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2713  
2714         if (block) {
2715  
2716 @@ -834,7 +902,9 @@
2717                 ut_ad(!block->page.in_flush_list);
2718                 ut_ad(!block->page.in_LRU_list);
2719                 ut_a(!buf_page_in_file(&block->page));
2720 -               UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2721 +               UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2722 +
2723 +               mutex_exit(&buf_pool->free_list_mutex);
2724  
2725                 mutex_enter(&block->mutex);
2726  
2727 @@ -844,6 +914,8 @@
2728                 ut_ad(buf_pool_from_block(block) == buf_pool);
2729  
2730                 mutex_exit(&block->mutex);
2731 +       } else {
2732 +               mutex_exit(&buf_pool->free_list_mutex);
2733         }
2734  
2735         return(block);
2736 @@ -868,7 +940,7 @@
2737         ibool           mon_value_was   = FALSE;
2738         ibool           started_monitor = FALSE;
2739  loop:
2740 -       buf_pool_mutex_enter(buf_pool);
2741 +       //buf_pool_mutex_enter(buf_pool);
2742  
2743         if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2744             + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2745 @@ -951,8 +1023,10 @@
2746                         ibool   lru;
2747                         page_zip_set_size(&block->page.zip, zip_size);
2748  
2749 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2750                         block->page.zip.data = buf_buddy_alloc(
2751 -                               buf_pool, zip_size, &lru);
2752 +                               buf_pool, zip_size, &lru, FALSE);
2753 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2754  
2755                         UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2756                 } else {
2757 @@ -960,7 +1034,7 @@
2758                         block->page.zip.data = NULL;
2759                 }
2760  
2761 -               buf_pool_mutex_exit(buf_pool);
2762 +               //buf_pool_mutex_exit(buf_pool);
2763  
2764                 if (started_monitor) {
2765                         srv_print_innodb_monitor = mon_value_was;
2766 @@ -972,7 +1046,7 @@
2767         /* If no block was in the free list, search from the end of the LRU
2768         list and try to free a block there */
2769  
2770 -       buf_pool_mutex_exit(buf_pool);
2771 +       //buf_pool_mutex_exit(buf_pool);
2772  
2773         freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2774  
2775 @@ -1058,7 +1132,8 @@
2776         ulint   new_len;
2777  
2778         ut_a(buf_pool->LRU_old);
2779 -       ut_ad(buf_pool_mutex_own(buf_pool));
2780 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2781 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2782         ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2783         ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2784  #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2785 @@ -1124,7 +1199,8 @@
2786  {
2787         buf_page_t*     bpage;
2788  
2789 -       ut_ad(buf_pool_mutex_own(buf_pool));
2790 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2791 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2792         ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2793  
2794         /* We first initialize all blocks in the LRU list as old and then use
2795 @@ -1159,13 +1235,14 @@
2796         ut_ad(buf_pool);
2797         ut_ad(bpage);
2798         ut_ad(buf_page_in_file(bpage));
2799 -       ut_ad(buf_pool_mutex_own(buf_pool));
2800 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2801 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2802  
2803         if (buf_page_belongs_to_unzip_LRU(bpage)) {
2804                 buf_block_t*    block = (buf_block_t*) bpage;
2805  
2806                 ut_ad(block->in_unzip_LRU_list);
2807 -               ut_d(block->in_unzip_LRU_list = FALSE);
2808 +               block->in_unzip_LRU_list = FALSE;
2809  
2810                 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2811         }
2812 @@ -1183,7 +1260,8 @@
2813  
2814         ut_ad(buf_pool);
2815         ut_ad(bpage);
2816 -       ut_ad(buf_pool_mutex_own(buf_pool));
2817 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2818 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2819  
2820         ut_a(buf_page_in_file(bpage));
2821  
2822 @@ -1260,12 +1338,13 @@
2823  
2824         ut_ad(buf_pool);
2825         ut_ad(block);
2826 -       ut_ad(buf_pool_mutex_own(buf_pool));
2827 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2828 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2829  
2830         ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2831  
2832         ut_ad(!block->in_unzip_LRU_list);
2833 -       ut_d(block->in_unzip_LRU_list = TRUE);
2834 +       block->in_unzip_LRU_list = TRUE;
2835  
2836         if (old) {
2837                 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2838 @@ -1286,7 +1365,8 @@
2839  
2840         ut_ad(buf_pool);
2841         ut_ad(bpage);
2842 -       ut_ad(buf_pool_mutex_own(buf_pool));
2843 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2844 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2845  
2846         ut_a(buf_page_in_file(bpage));
2847  
2848 @@ -1337,7 +1417,8 @@
2849  
2850         ut_ad(buf_pool);
2851         ut_ad(bpage);
2852 -       ut_ad(buf_pool_mutex_own(buf_pool));
2853 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2854 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2855  
2856         ut_a(buf_page_in_file(bpage));
2857         ut_ad(!bpage->in_LRU_list);
2858 @@ -1416,7 +1497,8 @@
2859  {
2860         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2861  
2862 -       ut_ad(buf_pool_mutex_own(buf_pool));
2863 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2864 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2865  
2866         if (bpage->old) {
2867                 buf_pool->stat.n_pages_made_young++;
2868 @@ -1458,19 +1540,20 @@
2869         buf_page_t*     bpage,  /*!< in: block to be freed */
2870         ibool           zip,    /*!< in: TRUE if should remove also the
2871                                 compressed page of an uncompressed page */
2872 -       ibool*          buf_pool_mutex_released)
2873 +       ibool*          buf_pool_mutex_released,
2874                                 /*!< in: pointer to a variable that will
2875                                 be assigned TRUE if buf_pool_mutex
2876                                 was temporarily released, or NULL */
2877 +       ibool           have_LRU_mutex)
2878  {
2879         buf_page_t*     b = NULL;
2880         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
2881         mutex_t*        block_mutex = buf_page_get_mutex(bpage);
2882  
2883 -       ut_ad(buf_pool_mutex_own(buf_pool));
2884 +       //ut_ad(buf_pool_mutex_own(buf_pool));
2885         ut_ad(mutex_own(block_mutex));
2886         ut_ad(buf_page_in_file(bpage));
2887 -       ut_ad(bpage->in_LRU_list);
2888 +       //ut_ad(bpage->in_LRU_list);
2889         ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2890  #if UNIV_WORD_SIZE == 4
2891         /* On 32-bit systems, there is no padding in buf_page_t.  On
2892 @@ -1479,7 +1562,7 @@
2893         UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2894  #endif
2895  
2896 -       if (!buf_page_can_relocate(bpage)) {
2897 +       if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2898  
2899                 /* Do not free buffer-fixed or I/O-fixed blocks. */
2900                 return(BUF_LRU_NOT_FREED);
2901 @@ -1511,15 +1594,15 @@
2902                 If it cannot be allocated (without freeing a block
2903                 from the LRU list), refuse to free bpage. */
2904  alloc:
2905 -               buf_pool_mutex_exit_forbid(buf_pool);
2906 -               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2907 -               buf_pool_mutex_exit_allow(buf_pool);
2908 +               //buf_pool_mutex_exit_forbid(buf_pool);
2909 +               b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2910 +               //buf_pool_mutex_exit_allow(buf_pool);
2911  
2912                 if (UNIV_UNLIKELY(!b)) {
2913                         return(BUF_LRU_CANNOT_RELOCATE);
2914                 }
2915  
2916 -               memcpy(b, bpage, sizeof *b);
2917 +               //memcpy(b, bpage, sizeof *b);
2918         }
2919  
2920  #ifdef UNIV_DEBUG
2921 @@ -1530,6 +1613,39 @@
2922         }
2923  #endif /* UNIV_DEBUG */
2924  
2925 +       /* not to break latch order, must re-enter block_mutex */
2926 +       mutex_exit(block_mutex);
2927 +
2928 +       if (!have_LRU_mutex)
2929 +               mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2930 +       rw_lock_x_lock(&buf_pool->page_hash_latch);
2931 +       mutex_enter(block_mutex);
2932 +
2933 +       /* recheck states of block */
2934 +       if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2935 +           || !buf_page_can_relocate(bpage)) {
2936 +not_freed:
2937 +               if (b) {
2938 +                       buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2939 +               }
2940 +               if (!have_LRU_mutex)
2941 +                       mutex_exit(&buf_pool->LRU_list_mutex);
2942 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2943 +               return(BUF_LRU_NOT_FREED);
2944 +       } else if (zip || !bpage->zip.data) {
2945 +               if (bpage->oldest_modification)
2946 +                       goto not_freed;
2947 +       } else if (bpage->oldest_modification) {
2948 +               if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2949 +                       ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2950 +                       goto not_freed;
2951 +               }
2952 +       }
2953 +
2954 +       if (b) {
2955 +               memcpy(b, bpage, sizeof *b);
2956 +       }
2957 +
2958         if (buf_LRU_block_remove_hashed_page(bpage, zip)
2959             != BUF_BLOCK_ZIP_FREE) {
2960                 ut_a(bpage->buf_fix_count == 0);
2961 @@ -1546,6 +1662,10 @@
2962  
2963                         ut_a(!hash_b);
2964  
2965 +                       while (prev_b && !prev_b->in_LRU_list) {
2966 +                               prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2967 +                       }
2968 +
2969                         b->state = b->oldest_modification
2970                                 ? BUF_BLOCK_ZIP_DIRTY
2971                                 : BUF_BLOCK_ZIP_PAGE;
2972 @@ -1642,7 +1762,9 @@
2973                         *buf_pool_mutex_released = TRUE;
2974                 }
2975  
2976 -               buf_pool_mutex_exit(buf_pool);
2977 +               //buf_pool_mutex_exit(buf_pool);
2978 +               mutex_exit(&buf_pool->LRU_list_mutex);
2979 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
2980                 mutex_exit(block_mutex);
2981  
2982                 /* Remove possible adaptive hash index on the page.
2983 @@ -1674,7 +1796,9 @@
2984                                 : BUF_NO_CHECKSUM_MAGIC);
2985                 }
2986  
2987 -               buf_pool_mutex_enter(buf_pool);
2988 +               //buf_pool_mutex_enter(buf_pool);
2989 +               if (have_LRU_mutex)
2990 +                       mutex_enter(&buf_pool->LRU_list_mutex);
2991                 mutex_enter(block_mutex);
2992  
2993                 if (b) {
2994 @@ -1684,13 +1808,17 @@
2995                         mutex_exit(&buf_pool->zip_mutex);
2996                 }
2997  
2998 -               buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2999 +               buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3000         } else {
3001                 /* The block_mutex should have been released by
3002                 buf_LRU_block_remove_hashed_page() when it returns
3003                 BUF_BLOCK_ZIP_FREE. */
3004                 ut_ad(block_mutex == &buf_pool->zip_mutex);
3005                 mutex_enter(block_mutex);
3006 +
3007 +               if (!have_LRU_mutex)
3008 +                       mutex_exit(&buf_pool->LRU_list_mutex);
3009 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3010         }
3011  
3012         return(BUF_LRU_FREED);
3013 @@ -1702,13 +1830,14 @@
3014  void
3015  buf_LRU_block_free_non_file_page(
3016  /*=============================*/
3017 -       buf_block_t*    block)  /*!< in: block, must not contain a file page */
3018 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
3019 +       ibool           have_page_hash_mutex)
3020  {
3021         void*           data;
3022         buf_pool_t*     buf_pool = buf_pool_from_block(block);
3023  
3024         ut_ad(block);
3025 -       ut_ad(buf_pool_mutex_own(buf_pool));
3026 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3027         ut_ad(mutex_own(&block->mutex));
3028  
3029         switch (buf_block_get_state(block)) {
3030 @@ -1742,18 +1871,21 @@
3031         if (data) {
3032                 block->page.zip.data = NULL;
3033                 mutex_exit(&block->mutex);
3034 -               buf_pool_mutex_exit_forbid(buf_pool);
3035 +               //buf_pool_mutex_exit_forbid(buf_pool);
3036  
3037                 buf_buddy_free(
3038 -                       buf_pool, data, page_zip_get_size(&block->page.zip));
3039 +                       buf_pool, data, page_zip_get_size(&block->page.zip),
3040 +                       have_page_hash_mutex);
3041  
3042 -               buf_pool_mutex_exit_allow(buf_pool);
3043 +               //buf_pool_mutex_exit_allow(buf_pool);
3044                 mutex_enter(&block->mutex);
3045                 page_zip_set_size(&block->page.zip, 0);
3046         }
3047  
3048 -       UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3049 +       mutex_enter(&buf_pool->free_list_mutex);
3050 +       UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3051         ut_d(block->page.in_free_list = TRUE);
3052 +       mutex_exit(&buf_pool->free_list_mutex);
3053  
3054         UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3055  }
3056 @@ -1783,7 +1915,11 @@
3057         buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
3058  
3059         ut_ad(bpage);
3060 -       ut_ad(buf_pool_mutex_own(buf_pool));
3061 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3062 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3063 +#ifdef UNIV_SYNC_DEBUG
3064 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3065 +#endif
3066         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3067  
3068         ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3069 @@ -1891,7 +2027,9 @@
3070  
3071  #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3072                 mutex_exit(buf_page_get_mutex(bpage));
3073 -               buf_pool_mutex_exit(buf_pool);
3074 +               //buf_pool_mutex_exit(buf_pool);
3075 +               mutex_exit(&buf_pool->LRU_list_mutex);
3076 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3077                 buf_print();
3078                 buf_LRU_print();
3079                 buf_validate();
3080 @@ -1912,17 +2050,17 @@
3081                 ut_a(bpage->zip.data);
3082                 ut_a(buf_page_get_zip_size(bpage));
3083  
3084 -               UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3085 +               UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3086  
3087                 mutex_exit(&buf_pool->zip_mutex);
3088 -               buf_pool_mutex_exit_forbid(buf_pool);
3089 +               //buf_pool_mutex_exit_forbid(buf_pool);
3090  
3091                 buf_buddy_free(
3092                         buf_pool, bpage->zip.data,
3093 -                       page_zip_get_size(&bpage->zip));
3094 +                       page_zip_get_size(&bpage->zip), TRUE);
3095  
3096 -               buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3097 -               buf_pool_mutex_exit_allow(buf_pool);
3098 +               buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3099 +               //buf_pool_mutex_exit_allow(buf_pool);
3100  
3101                 UNIV_MEM_UNDESC(bpage);
3102                 return(BUF_BLOCK_ZIP_FREE);
3103 @@ -1945,13 +2083,13 @@
3104                         ut_ad(!bpage->in_flush_list);
3105                         ut_ad(!bpage->in_LRU_list);
3106                         mutex_exit(&((buf_block_t*) bpage)->mutex);
3107 -                       buf_pool_mutex_exit_forbid(buf_pool);
3108 +                       //buf_pool_mutex_exit_forbid(buf_pool);
3109  
3110                         buf_buddy_free(
3111                                 buf_pool, data,
3112 -                               page_zip_get_size(&bpage->zip));
3113 +                               page_zip_get_size(&bpage->zip), TRUE);
3114  
3115 -                       buf_pool_mutex_exit_allow(buf_pool);
3116 +                       //buf_pool_mutex_exit_allow(buf_pool);
3117                         mutex_enter(&((buf_block_t*) bpage)->mutex);
3118                         page_zip_set_size(&bpage->zip, 0);
3119                 }
3120 @@ -1977,18 +2115,19 @@
3121  void
3122  buf_LRU_block_free_hashed_page(
3123  /*===========================*/
3124 -       buf_block_t*    block)  /*!< in: block, must contain a file page and
3125 +       buf_block_t*    block,  /*!< in: block, must contain a file page and
3126                                 be in a state where it can be freed */
3127 +       ibool           have_page_hash_mutex)
3128  {
3129  #ifdef UNIV_DEBUG
3130 -       buf_pool_t*     buf_pool = buf_pool_from_block(block);
3131 -       ut_ad(buf_pool_mutex_own(buf_pool));
3132 +       //buf_pool_t*   buf_pool = buf_pool_from_block(block);
3133 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3134  #endif
3135         ut_ad(mutex_own(&block->mutex));
3136  
3137         buf_block_set_state(block, BUF_BLOCK_MEMORY);
3138  
3139 -       buf_LRU_block_free_non_file_page(block);
3140 +       buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3141  }
3142  
3143  /**********************************************************************//**
3144 @@ -2015,7 +2154,8 @@
3145         }
3146  
3147         if (adjust) {
3148 -               buf_pool_mutex_enter(buf_pool);
3149 +               //buf_pool_mutex_enter(buf_pool);
3150 +               mutex_enter(&buf_pool->LRU_list_mutex);
3151  
3152                 if (ratio != buf_pool->LRU_old_ratio) {
3153                         buf_pool->LRU_old_ratio = ratio;
3154 @@ -2027,7 +2167,8 @@
3155                         }
3156                 }
3157  
3158 -               buf_pool_mutex_exit(buf_pool);
3159 +               //buf_pool_mutex_exit(buf_pool);
3160 +               mutex_exit(&buf_pool->LRU_list_mutex);
3161         } else {
3162                 buf_pool->LRU_old_ratio = ratio;
3163         }
3164 @@ -2132,7 +2273,8 @@
3165         ulint           new_len;
3166  
3167         ut_ad(buf_pool);
3168 -       buf_pool_mutex_enter(buf_pool);
3169 +       //buf_pool_mutex_enter(buf_pool);
3170 +       mutex_enter(&buf_pool->LRU_list_mutex);
3171  
3172         if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3173  
3174 @@ -2193,16 +2335,22 @@
3175  
3176         ut_a(buf_pool->LRU_old_len == old_len);
3177  
3178 -       UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3179 +       mutex_exit(&buf_pool->LRU_list_mutex);
3180 +       mutex_enter(&buf_pool->free_list_mutex);
3181 +
3182 +       UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3183                          ut_ad(ut_list_node_313->in_free_list));
3184  
3185         for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3186              bpage != NULL;
3187 -            bpage = UT_LIST_GET_NEXT(list, bpage)) {
3188 +            bpage = UT_LIST_GET_NEXT(free, bpage)) {
3189  
3190                 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3191         }
3192  
3193 +       mutex_exit(&buf_pool->free_list_mutex);
3194 +       mutex_enter(&buf_pool->LRU_list_mutex);
3195 +
3196         UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3197                          ut_ad(ut_list_node_313->in_unzip_LRU_list
3198                                && ut_list_node_313->page.in_LRU_list));
3199 @@ -2216,7 +2364,8 @@
3200                 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3201         }
3202  
3203 -       buf_pool_mutex_exit(buf_pool);
3204 +       //buf_pool_mutex_exit(buf_pool);
3205 +       mutex_exit(&buf_pool->LRU_list_mutex);
3206  }
3207  
3208  /**********************************************************************//**
3209 @@ -2252,7 +2401,8 @@
3210         const buf_page_t*       bpage;
3211  
3212         ut_ad(buf_pool);
3213 -       buf_pool_mutex_enter(buf_pool);
3214 +       //buf_pool_mutex_enter(buf_pool);
3215 +       mutex_enter(&buf_pool->LRU_list_mutex);
3216  
3217         bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3218  
3219 @@ -2309,7 +2459,8 @@
3220                 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3221         }
3222  
3223 -       buf_pool_mutex_exit(buf_pool);
3224 +       //buf_pool_mutex_exit(buf_pool);
3225 +       mutex_exit(&buf_pool->LRU_list_mutex);
3226  }
3227  
3228  /**********************************************************************//**
3229 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3230 --- a/storage/innobase/buf/buf0rea.c    2010-12-03 15:22:36.323977308 +0900
3231 +++ b/storage/innobase/buf/buf0rea.c    2010-12-03 15:48:29.296024468 +0900
3232 @@ -311,6 +311,7 @@
3233  
3234                 return(0);
3235         }
3236 +       buf_pool_mutex_exit(buf_pool);
3237  
3238         /* Check that almost all pages in the area have been accessed; if
3239         offset == low, the accesses must be in a descending order, otherwise,
3240 @@ -329,6 +330,7 @@
3241  
3242         fail_count = 0;
3243  
3244 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3245         for (i = low; i < high; i++) {
3246                 bpage = buf_page_hash_get(buf_pool, space, i);
3247  
3248 @@ -356,7 +358,8 @@
3249  
3250                 if (fail_count > threshold) {
3251                         /* Too many failures: return */
3252 -                       buf_pool_mutex_exit(buf_pool);
3253 +                       //buf_pool_mutex_exit(buf_pool);
3254 +                       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3255                         return(0);
3256                 }
3257  
3258 @@ -371,7 +374,8 @@
3259         bpage = buf_page_hash_get(buf_pool, space, offset);
3260  
3261         if (bpage == NULL) {
3262 -               buf_pool_mutex_exit(buf_pool);
3263 +               //buf_pool_mutex_exit(buf_pool);
3264 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3265  
3266                 return(0);
3267         }
3268 @@ -397,7 +401,8 @@
3269         pred_offset = fil_page_get_prev(frame);
3270         succ_offset = fil_page_get_next(frame);
3271  
3272 -       buf_pool_mutex_exit(buf_pool);
3273 +       //buf_pool_mutex_exit(buf_pool);
3274 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3275  
3276         if ((offset == low) && (succ_offset == offset + 1)) {
3277  
3278 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3279 --- a/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:03.048955897 +0900
3280 +++ b/storage/innobase/handler/ha_innodb.cc     2010-12-03 15:48:29.304024564 +0900
3281 @@ -245,6 +245,10 @@
3282  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3283         {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3284         {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3285 +       {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3286 +       {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3287 +       {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3288 +       {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3289         {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3290         {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3291         {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3292 @@ -295,6 +299,7 @@
3293         {&archive_lock_key, "archive_lock", 0},
3294  #  endif /* UNIV_LOG_ARCHIVE */
3295         {&btr_search_latch_key, "btr_search_latch", 0},
3296 +       {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3297  #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3298         {&buf_block_lock_key, "buf_block_lock", 0},
3299  #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3300 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3301 --- a/storage/innobase/handler/i_s.cc   2010-12-03 15:37:45.517105700 +0900
3302 +++ b/storage/innobase/handler/i_s.cc   2010-12-03 15:48:29.331024462 +0900
3303 @@ -1565,7 +1565,8 @@
3304  
3305                 buf_pool = buf_pool_from_array(i);
3306  
3307 -               buf_pool_mutex_enter(buf_pool);
3308 +               //buf_pool_mutex_enter(buf_pool);
3309 +               mutex_enter(&buf_pool->zip_free_mutex);
3310  
3311                 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3312                         buf_buddy_stat_t*       buddy_stat;
3313 @@ -1595,7 +1596,8 @@
3314                         }
3315                 }
3316  
3317 -               buf_pool_mutex_exit(buf_pool);
3318 +               //buf_pool_mutex_exit(buf_pool);
3319 +               mutex_exit(&buf_pool->zip_free_mutex);
3320  
3321                 if (status) {
3322                         break;
3323 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3324 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3325 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3326 @@ -3783,9 +3783,11 @@
3327                 ulint           fold = buf_page_address_fold(space, page_no);
3328                 buf_pool_t*     buf_pool = buf_pool_get(space, page_no);
3329  
3330 -               buf_pool_mutex_enter(buf_pool);
3331 +               //buf_pool_mutex_enter(buf_pool);
3332 +               rw_lock_s_lock(&buf_pool->page_hash_latch);
3333                 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3334 -               buf_pool_mutex_exit(buf_pool);
3335 +               //buf_pool_mutex_exit(buf_pool);
3336 +               rw_lock_s_unlock(&buf_pool->page_hash_latch);
3337  
3338                 if (UNIV_LIKELY_NULL(bpage)) {
3339                         /* A buffer pool watch has been set or the
3340 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3341 --- a/storage/innobase/include/buf0buddy.h      2010-11-03 07:01:13.000000000 +0900
3342 +++ b/storage/innobase/include/buf0buddy.h      2010-12-03 15:48:29.338023826 +0900
3343 @@ -51,10 +51,11 @@
3344         buf_pool_t*     buf_pool,
3345                         /*!< buffer pool in which the block resides */
3346         ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3347 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3348 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3349                         TRUE if storage was allocated from the LRU list
3350                         and buf_pool->mutex was temporarily released,
3351                         or NULL if the LRU list should not be used */
3352 +       ibool   have_page_hash_mutex)
3353         __attribute__((malloc));
3354  
3355  /**********************************************************************//**
3356 @@ -67,7 +68,8 @@
3357                         /*!< buffer pool in which the block resides */
3358         void*   buf,    /*!< in: block to be freed, must not be
3359                         pointed to by the buffer pool */
3360 -       ulint   size)   /*!< in: block size, up to UNIV_PAGE_SIZE */
3361 +       ulint   size,   /*!< in: block size, up to UNIV_PAGE_SIZE */
3362 +       ibool   have_page_hash_mutex)
3363         __attribute__((nonnull));
3364  
3365  #ifndef UNIV_NONINL
3366 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3367 --- a/storage/innobase/include/buf0buddy.ic     2010-11-03 07:01:13.000000000 +0900
3368 +++ b/storage/innobase/include/buf0buddy.ic     2010-12-03 15:48:29.339040413 +0900
3369 @@ -46,10 +46,11 @@
3370                         /*!< in: buffer pool in which the page resides */
3371         ulint   i,      /*!< in: index of buf_pool->zip_free[],
3372                         or BUF_BUDDY_SIZES */
3373 -       ibool*  lru)    /*!< in: pointer to a variable that will be assigned
3374 +       ibool*  lru,    /*!< in: pointer to a variable that will be assigned
3375                         TRUE if storage was allocated from the LRU list
3376                         and buf_pool->mutex was temporarily released,
3377                         or NULL if the LRU list should not be used */
3378 +       ibool   have_page_hash_mutex)
3379         __attribute__((malloc));
3380  
3381  /**********************************************************************//**
3382 @@ -61,8 +62,9 @@
3383         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3384         void*           buf,            /*!< in: block to be freed, must not be
3385                                         pointed to by the buffer pool */
3386 -       ulint           i)              /*!< in: index of buf_pool->zip_free[],
3387 +       ulint           i,              /*!< in: index of buf_pool->zip_free[],
3388                                         or BUF_BUDDY_SIZES */
3389 +       ibool           have_page_hash_mutex)
3390         __attribute__((nonnull));
3391  
3392  /**********************************************************************//**
3393 @@ -102,16 +104,17 @@
3394                                         the page resides */
3395         ulint           size,           /*!< in: block size, up to
3396                                         UNIV_PAGE_SIZE */
3397 -       ibool*          lru)            /*!< in: pointer to a variable
3398 +       ibool*          lru,            /*!< in: pointer to a variable
3399                                         that will be assigned TRUE if
3400                                         storage was allocated from the
3401                                         LRU list and buf_pool->mutex was
3402                                         temporarily released, or NULL if
3403                                         the LRU list should not be used */
3404 +       ibool           have_page_hash_mutex)
3405  {
3406 -       ut_ad(buf_pool_mutex_own(buf_pool));
3407 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3408  
3409 -       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3410 +       return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3411  }
3412  
3413  /**********************************************************************//**
3414 @@ -123,12 +126,25 @@
3415         buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
3416         void*           buf,            /*!< in: block to be freed, must not be
3417                                         pointed to by the buffer pool */
3418 -       ulint           size)           /*!< in: block size, up to
3419 +       ulint           size,           /*!< in: block size, up to
3420                                         UNIV_PAGE_SIZE */
3421 +       ibool           have_page_hash_mutex)
3422  {
3423 -       ut_ad(buf_pool_mutex_own(buf_pool));
3424 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3425 +
3426 +       if (!have_page_hash_mutex) {
3427 +               mutex_enter(&buf_pool->LRU_list_mutex);
3428 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3429 +       }
3430  
3431 -       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3432 +       mutex_enter(&buf_pool->zip_free_mutex);
3433 +       buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3434 +       mutex_exit(&buf_pool->zip_free_mutex);
3435 +
3436 +       if (!have_page_hash_mutex) {
3437 +               mutex_exit(&buf_pool->LRU_list_mutex);
3438 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3439 +       }
3440  }
3441  
3442  #ifdef UNIV_MATERIALIZE
3443 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3444 --- a/storage/innobase/include/buf0buf.h        2010-12-03 15:22:36.327954660 +0900
3445 +++ b/storage/innobase/include/buf0buf.h        2010-12-03 15:48:29.343024683 +0900
3446 @@ -203,6 +203,20 @@
3447  /*==========================*/
3448  
3449  /********************************************************************//**
3450 +*/
3451 +UNIV_INLINE
3452 +void
3453 +buf_pool_page_hash_x_lock_all(void);
3454 +/*================================*/
3455 +
3456 +/********************************************************************//**
3457 +*/
3458 +UNIV_INLINE
3459 +void
3460 +buf_pool_page_hash_x_unlock_all(void);
3461 +/*==================================*/
3462 +
3463 +/********************************************************************//**
3464  Creates the buffer pool.
3465  @return        own: buf_pool object, NULL if not enough memory or error */
3466  UNIV_INTERN
3467 @@ -832,6 +846,15 @@
3468         const buf_page_t*       bpage)  /*!< in: pointer to control block */
3469         __attribute__((pure));
3470  
3471 +/*************************************************************************
3472 +Gets the mutex of a block and enter the mutex with consistency. */
3473 +UNIV_INLINE
3474 +mutex_t*
3475 +buf_page_get_mutex_enter(
3476 +/*=========================*/
3477 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3478 +       __attribute__((pure));
3479 +
3480  /*********************************************************************//**
3481  Get the flush type of a page.
3482  @return        flush type */
3483 @@ -1313,7 +1336,7 @@
3484         All these are protected by buf_pool->mutex. */
3485         /* @{ */
3486  
3487 -       UT_LIST_NODE_T(buf_page_t) list;
3488 +       /* UT_LIST_NODE_T(buf_page_t) list; */
3489                                         /*!< based on state, this is a
3490                                         list node, protected either by
3491                                         buf_pool->mutex or by
3492 @@ -1341,6 +1364,10 @@
3493                                         BUF_BLOCK_REMOVE_HASH or
3494                                         BUF_BLOCK_READY_IN_USE. */
3495  
3496 +       /* resplit for optimistic use */
3497 +       UT_LIST_NODE_T(buf_page_t) free;
3498 +       UT_LIST_NODE_T(buf_page_t) flush_list;
3499 +       UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3500  #ifdef UNIV_DEBUG
3501         ibool           in_flush_list;  /*!< TRUE if in buf_pool->flush_list;
3502                                         when buf_pool->flush_list_mutex is
3503 @@ -1433,11 +1460,11 @@
3504                                         a block is in the unzip_LRU list
3505                                         if page.state == BUF_BLOCK_FILE_PAGE
3506                                         and page.zip.data != NULL */
3507 -#ifdef UNIV_DEBUG
3508 +//#ifdef UNIV_DEBUG
3509         ibool           in_unzip_LRU_list;/*!< TRUE if the page is in the
3510                                         decompressed LRU list;
3511                                         used in debugging */
3512 -#endif /* UNIV_DEBUG */
3513 +//#endif /* UNIV_DEBUG */
3514         mutex_t         mutex;          /*!< mutex protecting this block:
3515                                         state (also protected by the buffer
3516                                         pool mutex), io_fix, buf_fix_count,
3517 @@ -1612,6 +1639,11 @@
3518                                         pool instance, protects compressed
3519                                         only pages (of type buf_page_t, not
3520                                         buf_block_t */
3521 +       mutex_t         LRU_list_mutex;
3522 +       rw_lock_t       page_hash_latch;
3523 +       mutex_t         free_list_mutex;
3524 +       mutex_t         zip_free_mutex;
3525 +       mutex_t         zip_hash_mutex;
3526         ulint           instance_no;    /*!< Array index of this buffer
3527                                         pool instance */
3528         ulint           old_pool_size;  /*!< Old pool size in bytes */
3529 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3530 --- a/storage/innobase/include/buf0buf.ic       2010-11-03 07:01:13.000000000 +0900
3531 +++ b/storage/innobase/include/buf0buf.ic       2010-12-03 15:48:29.345024524 +0900
3532 @@ -274,7 +274,7 @@
3533         case BUF_BLOCK_ZIP_FREE:
3534                 /* This is a free page in buf_pool->zip_free[].
3535                 Such pages should only be accessed by the buddy allocator. */
3536 -               ut_error;
3537 +               /* ut_error; */ /* optimistic */
3538                 break;
3539         case BUF_BLOCK_ZIP_PAGE:
3540         case BUF_BLOCK_ZIP_DIRTY:
3541 @@ -317,9 +317,14 @@
3542  {
3543         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3544  
3545 +       if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3546 +               /* TODO: this code is the interim. should be confirmed later. */
3547 +               return(&buf_pool->zip_mutex);
3548 +       }
3549 +
3550         switch (buf_page_get_state(bpage)) {
3551         case BUF_BLOCK_ZIP_FREE:
3552 -               ut_error;
3553 +               /* ut_error; */ /* optimistic */
3554                 return(NULL);
3555         case BUF_BLOCK_ZIP_PAGE:
3556         case BUF_BLOCK_ZIP_DIRTY:
3557 @@ -329,6 +334,28 @@
3558         }
3559  }
3560  
3561 +/*************************************************************************
3562 +Gets the mutex of a block and enter the mutex with consistency. */
3563 +UNIV_INLINE
3564 +mutex_t*
3565 +buf_page_get_mutex_enter(
3566 +/*=========================*/
3567 +       const buf_page_t*       bpage)  /*!< in: pointer to control block */
3568 +{
3569 +       mutex_t*        block_mutex;
3570 +
3571 +       while(1) {
3572 +               block_mutex = buf_page_get_mutex(bpage);
3573 +               if (!block_mutex)
3574 +                       return block_mutex;
3575 +
3576 +               mutex_enter(block_mutex);
3577 +               if (block_mutex == buf_page_get_mutex(bpage))
3578 +                       return block_mutex;
3579 +               mutex_exit(block_mutex);
3580 +       }
3581 +}
3582 +
3583  /*********************************************************************//**
3584  Get the flush type of a page.
3585  @return        flush type */
3586 @@ -425,8 +452,8 @@
3587         enum buf_io_fix io_fix) /*!< in: io_fix state */
3588  {
3589  #ifdef UNIV_DEBUG
3590 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3591 -       ut_ad(buf_pool_mutex_own(buf_pool));
3592 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3593 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3594  #endif
3595         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3596  
3597 @@ -456,14 +483,14 @@
3598         const buf_page_t*       bpage)  /*!< control block being relocated */
3599  {
3600  #ifdef UNIV_DEBUG
3601 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3602 -       ut_ad(buf_pool_mutex_own(buf_pool));
3603 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3604 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3605  #endif
3606         ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3607         ut_ad(buf_page_in_file(bpage));
3608 -       ut_ad(bpage->in_LRU_list);
3609 +       //ut_ad(bpage->in_LRU_list);
3610  
3611 -       return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3612 +       return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3613                && bpage->buf_fix_count == 0);
3614  }
3615  
3616 @@ -477,8 +504,8 @@
3617         const buf_page_t*       bpage)  /*!< in: control block */
3618  {
3619  #ifdef UNIV_DEBUG
3620 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3621 -       ut_ad(buf_pool_mutex_own(buf_pool));
3622 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3623 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3624  #endif
3625         ut_ad(buf_page_in_file(bpage));
3626  
3627 @@ -498,7 +525,8 @@
3628         buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3629  #endif /* UNIV_DEBUG */
3630         ut_a(buf_page_in_file(bpage));
3631 -       ut_ad(buf_pool_mutex_own(buf_pool));
3632 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3633 +       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3634         ut_ad(bpage->in_LRU_list);
3635  
3636  #ifdef UNIV_LRU_DEBUG
3637 @@ -545,9 +573,10 @@
3638         ulint           time_ms)        /*!< in: ut_time_ms() */
3639  {
3640  #ifdef UNIV_DEBUG
3641 -       buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
3642 -       ut_ad(buf_pool_mutex_own(buf_pool));
3643 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage(bpage);
3644 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3645  #endif
3646 +       ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3647         ut_a(buf_page_in_file(bpage));
3648  
3649         if (!bpage->access_time) {
3650 @@ -761,19 +790,19 @@
3651  /*===========*/
3652         buf_block_t*    block)  /*!< in, own: block to be freed */
3653  {
3654 -       buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3655 +       //buf_pool_t*   buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3656  
3657 -       buf_pool_mutex_enter(buf_pool);
3658 +       //buf_pool_mutex_enter(buf_pool);
3659  
3660         mutex_enter(&block->mutex);
3661  
3662         ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3663  
3664 -       buf_LRU_block_free_non_file_page(block);
3665 +       buf_LRU_block_free_non_file_page(block, FALSE);
3666  
3667         mutex_exit(&block->mutex);
3668  
3669 -       buf_pool_mutex_exit(buf_pool);
3670 +       //buf_pool_mutex_exit(buf_pool);
3671  }
3672  #endif /* !UNIV_HOTBACKUP */
3673  
3674 @@ -821,17 +850,17 @@
3675                                         page frame */
3676  {
3677         ib_uint64_t     lsn;
3678 -       mutex_t*        block_mutex = buf_page_get_mutex(bpage);
3679 -
3680 -       mutex_enter(block_mutex);
3681 +       mutex_t*        block_mutex = buf_page_get_mutex_enter(bpage);
3682  
3683 -       if (buf_page_in_file(bpage)) {
3684 +       if (block_mutex && buf_page_in_file(bpage)) {
3685                 lsn = bpage->newest_modification;
3686         } else {
3687                 lsn = 0;
3688         }
3689  
3690 -       mutex_exit(block_mutex);
3691 +       if (block_mutex) {
3692 +               mutex_exit(block_mutex);
3693 +       }
3694  
3695         return(lsn);
3696  }
3697 @@ -849,7 +878,7 @@
3698  #ifdef UNIV_SYNC_DEBUG
3699         buf_pool_t*     buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3700  
3701 -       ut_ad((buf_pool_mutex_own(buf_pool)
3702 +       ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3703                && (block->page.buf_fix_count == 0))
3704               || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3705  #endif /* UNIV_SYNC_DEBUG */
3706 @@ -979,7 +1008,11 @@
3707         buf_page_t*     bpage;
3708  
3709         ut_ad(buf_pool);
3710 -       ut_ad(buf_pool_mutex_own(buf_pool));
3711 +       //ut_ad(buf_pool_mutex_own(buf_pool));
3712 +#ifdef UNIV_SYNC_DEBUG
3713 +       ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3714 +             || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3715 +#endif
3716         ut_ad(fold == buf_page_address_fold(space, offset));
3717  
3718         /* Look for the page in the hash table */
3719 @@ -1064,11 +1097,13 @@
3720         const buf_page_t*       bpage;
3721         buf_pool_t*             buf_pool = buf_pool_get(space, offset);
3722  
3723 -       buf_pool_mutex_enter(buf_pool);
3724 +       //buf_pool_mutex_enter(buf_pool);
3725 +       rw_lock_s_lock(&buf_pool->page_hash_latch);
3726  
3727         bpage = buf_page_hash_get(buf_pool, space, offset);
3728  
3729 -       buf_pool_mutex_exit(buf_pool);
3730 +       //buf_pool_mutex_exit(buf_pool);
3731 +       rw_lock_s_unlock(&buf_pool->page_hash_latch);
3732  
3733         return(bpage != NULL);
3734  }
3735 @@ -1196,4 +1231,38 @@
3736                 buf_pool_mutex_exit(buf_pool);
3737         }
3738  }
3739 +
3740 +/********************************************************************//**
3741 +*/
3742 +UNIV_INLINE
3743 +void
3744 +buf_pool_page_hash_x_lock_all(void)
3745 +/*===============================*/
3746 +{
3747 +       ulint   i;
3748 +
3749 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3750 +               buf_pool_t*     buf_pool;
3751 +
3752 +               buf_pool = buf_pool_from_array(i);
3753 +               rw_lock_x_lock(&buf_pool->page_hash_latch);
3754 +       }
3755 +}
3756 +
3757 +/********************************************************************//**
3758 +*/
3759 +UNIV_INLINE
3760 +void
3761 +buf_pool_page_hash_x_unlock_all(void)
3762 +/*=================================*/
3763 +{
3764 +       ulint   i;
3765 +
3766 +       for (i = 0; i < srv_buf_pool_instances; i++) {
3767 +               buf_pool_t*     buf_pool;
3768 +
3769 +               buf_pool = buf_pool_from_array(i);
3770 +               rw_lock_x_unlock(&buf_pool->page_hash_latch);
3771 +       }
3772 +}
3773  #endif /* !UNIV_HOTBACKUP */
3774 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3775 --- a/storage/innobase/include/buf0lru.h        2010-11-03 07:01:13.000000000 +0900
3776 +++ b/storage/innobase/include/buf0lru.h        2010-12-03 15:48:29.349024701 +0900
3777 @@ -113,10 +113,11 @@
3778         buf_page_t*     bpage,  /*!< in: block to be freed */
3779         ibool           zip,    /*!< in: TRUE if should remove also the
3780                                 compressed page of an uncompressed page */
3781 -       ibool*          buf_pool_mutex_released);
3782 +       ibool*          buf_pool_mutex_released,
3783                                 /*!< in: pointer to a variable that will
3784                                 be assigned TRUE if buf_pool->mutex
3785                                 was temporarily released, or NULL */
3786 +       ibool           have_LRU_mutex);
3787  /******************************************************************//**
3788  Try to free a replaceable block.
3789  @return        TRUE if found and freed */
3790 @@ -163,7 +164,8 @@
3791  void
3792  buf_LRU_block_free_non_file_page(
3793  /*=============================*/
3794 -       buf_block_t*    block); /*!< in: block, must not contain a file page */
3795 +       buf_block_t*    block,  /*!< in: block, must not contain a file page */
3796 +       ibool           have_page_hash_mutex);
3797  /******************************************************************//**
3798  Adds a block to the LRU list. */
3799  UNIV_INTERN
3800 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3801 --- a/storage/innobase/include/sync0rw.h        2010-11-03 07:01:13.000000000 +0900
3802 +++ b/storage/innobase/include/sync0rw.h        2010-12-03 15:48:29.349942993 +0900
3803 @@ -112,6 +112,7 @@
3804  extern mysql_pfs_key_t archive_lock_key;
3805  # endif /* UNIV_LOG_ARCHIVE */
3806  extern mysql_pfs_key_t btr_search_latch_key;
3807 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3808  extern mysql_pfs_key_t buf_block_lock_key;
3809  # ifdef UNIV_SYNC_DEBUG
3810  extern mysql_pfs_key_t buf_block_debug_latch_key;
3811 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3812 --- a/storage/innobase/include/sync0sync.h      2010-11-03 07:01:13.000000000 +0900
3813 +++ b/storage/innobase/include/sync0sync.h      2010-12-03 15:48:29.352024614 +0900
3814 @@ -75,6 +75,10 @@
3815  extern mysql_pfs_key_t buffer_block_mutex_key;
3816  extern mysql_pfs_key_t buf_pool_mutex_key;
3817  extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3818 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3819 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3820 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3821 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3822  extern mysql_pfs_key_t cache_last_read_mutex_key;
3823  extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3824  extern mysql_pfs_key_t dict_sys_mutex_key;
3825 @@ -660,7 +664,7 @@
3826  #define        SYNC_TRX_LOCK_HEAP      298
3827  #define SYNC_TRX_SYS_HEADER    290
3828  #define SYNC_LOG               170
3829 -#define SYNC_LOG_FLUSH_ORDER   147
3830 +#define SYNC_LOG_FLUSH_ORDER   156
3831  #define SYNC_RECV              168
3832  #define        SYNC_WORK_QUEUE         162
3833  #define        SYNC_SEARCH_SYS_CONF    161     /* for assigning btr_search_enabled */
3834 @@ -670,8 +674,13 @@
3835                                         SYNC_SEARCH_SYS, as memory allocation
3836                                         can call routines there! Otherwise
3837                                         the level is SYNC_MEM_HASH. */
3838 +#define        SYNC_BUF_LRU_LIST       158
3839 +#define        SYNC_BUF_PAGE_HASH      157
3840 +#define        SYNC_BUF_BLOCK          155     /* Block mutex */
3841 +#define        SYNC_BUF_FREE_LIST      153
3842 +#define        SYNC_BUF_ZIP_FREE       152
3843 +#define        SYNC_BUF_ZIP_HASH       151
3844  #define        SYNC_BUF_POOL           150     /* Buffer pool mutex */
3845 -#define        SYNC_BUF_BLOCK          146     /* Block mutex */
3846  #define        SYNC_BUF_FLUSH_LIST     145     /* Buffer flush list mutex */
3847  #define SYNC_DOUBLEWRITE       140
3848  #define        SYNC_ANY_LATCH          135
3849 @@ -703,7 +712,7 @@
3850                 os_fast_mutex;  /*!< We use this OS mutex in place of lock_word
3851                                 when atomic operations are not enabled */
3852  #endif
3853 -       ulint   waiters;        /*!< This ulint is set to 1 if there are (or
3854 +       volatile ulint  waiters;        /*!< This ulint is set to 1 if there are (or
3855                                 may be) threads waiting in the global wait
3856                                 array for this mutex to be released.
3857                                 Otherwise, this is 0. */
3858 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3859 --- a/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:03.080956216 +0900
3860 +++ b/storage/innobase/srv/srv0srv.c    2010-12-03 15:48:29.355023766 +0900
3861 @@ -3094,7 +3094,7 @@
3862                                                                 level += log_sys->max_checkpoint_age
3863                                                                          - (lsn - oldest_modification);
3864                                                         }
3865 -                                                       bpage = UT_LIST_GET_NEXT(list, bpage);
3866 +                                                       bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3867                                                         n_blocks++;
3868                                                 }
3869  
3870 @@ -3180,7 +3180,7 @@
3871                                                         found = TRUE;
3872                                                         break;
3873                                                 }
3874 -                                               bpage = UT_LIST_GET_NEXT(list, bpage);
3875 +                                               bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3876                                                 new_blocks_num++;
3877                                         }
3878                                         if (!found) {
3879 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3880 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3881 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3882 @@ -265,7 +265,7 @@
3883         mutex->lock_word = 0;
3884  #endif
3885         mutex->event = os_event_create(NULL);
3886 -       mutex_set_waiters(mutex, 0);
3887 +       mutex->waiters = 0;
3888  #ifdef UNIV_DEBUG
3889         mutex->magic_n = MUTEX_MAGIC_N;
3890  #endif /* UNIV_DEBUG */
3891 @@ -444,6 +444,15 @@
3892         mutex_t*        mutex,  /*!< in: mutex */
3893         ulint           n)      /*!< in: value to set */
3894  {
3895 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3896 +       ut_ad(mutex);
3897 +
3898 +       if (n) {
3899 +               os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3900 +       } else {
3901 +               os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3902 +       }
3903 +#else
3904         volatile ulint* ptr;            /* declared volatile to ensure that
3905                                         the value is stored to memory */
3906         ut_ad(mutex);
3907 @@ -452,6 +461,7 @@
3908  
3909         *ptr = n;               /* Here we assume that the write of a single
3910                                 word in memory is atomic */
3911 +#endif
3912  }
3913  
3914  /******************************************************************//**
3915 @@ -1193,7 +1203,12 @@
3916                         ut_error;
3917                 }
3918                 break;
3919 +       case SYNC_BUF_LRU_LIST:
3920         case SYNC_BUF_FLUSH_LIST:
3921 +       case SYNC_BUF_PAGE_HASH:
3922 +       case SYNC_BUF_FREE_LIST:
3923 +       case SYNC_BUF_ZIP_FREE:
3924 +       case SYNC_BUF_ZIP_HASH:
3925         case SYNC_BUF_POOL:
3926                 /* We can have multiple mutexes of this type therefore we
3927                 can only check whether the greater than condition holds. */
3928 @@ -1211,7 +1226,8 @@
3929                 buffer block (block->mutex or buf_pool->zip_mutex). */
3930                 if (!sync_thread_levels_g(array, level, FALSE)) {
3931                         ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3932 -                       ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3933 +                       /* the exact rule is not fixed yet, for now */
3934 +                       //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3935                 }
3936                 break;
3937         case SYNC_REC_LOCK:
This page took 0.732313 seconds and 3 git commands to generate.