]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_split_buf_pool_mutex.patch
- up to 5.1.57
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_split_buf_pool_mutex.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9--- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10+++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
11822e22 11@@ -4069,7 +4069,8 @@
b4e1fa2c
AM
12
13 mtr_commit(mtr);
14
15- buf_pool_mutex_enter(buf_pool);
16+ //buf_pool_mutex_enter(buf_pool);
17+ mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
19
20 /* Only free the block if it is still allocated to
11822e22 21@@ -4080,16 +4081,21 @@
b4e1fa2c
AM
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
24
df1b5770 25- if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
b4e1fa2c 26- && all && block->page.zip.data) {
df1b5770 27+ if (buf_LRU_free_block(&block->page, all, TRUE) != BUF_LRU_FREED
b4e1fa2c
AM
28+ && all && block->page.zip.data
29+ /* Now, buf_LRU_free_block() may release mutex temporarily */
30+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
31+ && buf_block_get_space(block) == space
32+ && buf_block_get_page_no(block) == page_no) {
33 /* Attempt to deallocate the uncompressed page
34 if the whole block cannot be deallocted. */
35
df1b5770
AM
36- buf_LRU_free_block(&block->page, FALSE);
37+ buf_LRU_free_block(&block->page, FALSE, TRUE);
b4e1fa2c
AM
38 }
39 }
40
41- buf_pool_mutex_exit(buf_pool);
42+ //buf_pool_mutex_exit(buf_pool);
43+ mutex_exit(&buf_pool->LRU_list_mutex);
44 mutex_exit(&block->mutex);
45 }
46
47diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
48--- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
49+++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
d8778560 50@@ -1943,7 +1943,7 @@
b4e1fa2c
AM
51 rec_offs_init(offsets_);
52
53 rw_lock_x_lock(&btr_search_latch);
54- buf_pool_mutex_enter_all();
55+ buf_pool_page_hash_x_lock_all();
56
57 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
58
d8778560 59@@ -1951,11 +1951,11 @@
b4e1fa2c
AM
60 /* We release btr_search_latch every once in a while to
61 give other queries a chance to run. */
62 if ((i != 0) && ((i % chunk_size) == 0)) {
63- buf_pool_mutex_exit_all();
64+ buf_pool_page_hash_x_unlock_all();
65 rw_lock_x_unlock(&btr_search_latch);
66 os_thread_yield();
67 rw_lock_x_lock(&btr_search_latch);
68- buf_pool_mutex_enter_all();
69+ buf_pool_page_hash_x_lock_all();
70 }
71
72 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
d8778560 73@@ -2066,11 +2066,11 @@
b4e1fa2c
AM
74 /* We release btr_search_latch every once in a while to
75 give other queries a chance to run. */
76 if (i != 0) {
77- buf_pool_mutex_exit_all();
78+ buf_pool_page_hash_x_unlock_all();
79 rw_lock_x_unlock(&btr_search_latch);
80 os_thread_yield();
81 rw_lock_x_lock(&btr_search_latch);
82- buf_pool_mutex_enter_all();
83+ buf_pool_page_hash_x_lock_all();
84 }
85
86 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
d8778560 87@@ -2078,7 +2078,7 @@
b4e1fa2c
AM
88 }
89 }
90
91- buf_pool_mutex_exit_all();
92+ buf_pool_page_hash_x_unlock_all();
93 rw_lock_x_unlock(&btr_search_latch);
94 if (UNIV_LIKELY_NULL(heap)) {
95 mem_heap_free(heap);
96diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
97--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
98+++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
99@@ -73,10 +73,11 @@
100 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
101 #endif /* UNIV_DEBUG_VALGRIND */
102
103- ut_ad(buf_pool_mutex_own(buf_pool));
104+ //ut_ad(buf_pool_mutex_own(buf_pool));
105+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
106 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
107 ut_ad(buf_pool->zip_free[i].start != bpage);
108- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
109+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
110
111 #ifdef UNIV_DEBUG_VALGRIND
112 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
113@@ -96,8 +97,8 @@
114 buf_pool->zip_free[] */
115 {
116 #ifdef UNIV_DEBUG_VALGRIND
117- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
118- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
119+ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
120+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
121
122 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
123 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
124@@ -106,9 +107,10 @@
125 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
126 #endif /* UNIV_DEBUG_VALGRIND */
127
128- ut_ad(buf_pool_mutex_own(buf_pool));
129+ //ut_ad(buf_pool_mutex_own(buf_pool));
130+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
131 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
132- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
133+ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
134
135 #ifdef UNIV_DEBUG_VALGRIND
136 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
137@@ -128,12 +130,13 @@
138 {
139 buf_page_t* bpage;
140
141- ut_ad(buf_pool_mutex_own(buf_pool));
142+ //ut_ad(buf_pool_mutex_own(buf_pool));
143+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
144 ut_a(i < BUF_BUDDY_SIZES);
145
146 #ifndef UNIV_DEBUG_VALGRIND
147 /* Valgrind would complain about accessing free memory. */
148- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
149+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
150 ut_ad(buf_page_get_state(ut_list_node_313)
151 == BUF_BLOCK_ZIP_FREE)));
152 #endif /* !UNIV_DEBUG_VALGRIND */
153@@ -177,16 +180,19 @@
154 buf_buddy_block_free(
155 /*=================*/
156 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
157- void* buf) /*!< in: buffer frame to deallocate */
158+ void* buf, /*!< in: buffer frame to deallocate */
159+ ibool have_page_hash_mutex)
160 {
161 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
162 buf_page_t* bpage;
163 buf_block_t* block;
164
165- ut_ad(buf_pool_mutex_own(buf_pool));
166+ //ut_ad(buf_pool_mutex_own(buf_pool));
167 ut_ad(!mutex_own(&buf_pool->zip_mutex));
168 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
169
170+ mutex_enter(&buf_pool->zip_hash_mutex);
171+
172 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
173 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
174 && bpage->in_zip_hash && !bpage->in_page_hash),
175@@ -198,12 +204,14 @@
176 ut_d(bpage->in_zip_hash = FALSE);
177 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
178
179+ mutex_exit(&buf_pool->zip_hash_mutex);
180+
181 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
182 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
183
184 block = (buf_block_t*) bpage;
185 mutex_enter(&block->mutex);
186- buf_LRU_block_free_non_file_page(block);
187+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
188 mutex_exit(&block->mutex);
189
190 ut_ad(buf_pool->buddy_n_frames > 0);
191@@ -220,7 +228,7 @@
192 {
193 buf_pool_t* buf_pool = buf_pool_from_block(block);
194 const ulint fold = BUF_POOL_ZIP_FOLD(block);
195- ut_ad(buf_pool_mutex_own(buf_pool));
196+ //ut_ad(buf_pool_mutex_own(buf_pool));
197 ut_ad(!mutex_own(&buf_pool->zip_mutex));
198 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
199
200@@ -232,7 +240,10 @@
201 ut_ad(!block->page.in_page_hash);
202 ut_ad(!block->page.in_zip_hash);
203 ut_d(block->page.in_zip_hash = TRUE);
204+
205+ mutex_enter(&buf_pool->zip_hash_mutex);
206 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
207+ mutex_exit(&buf_pool->zip_hash_mutex);
208
209 ut_d(buf_pool->buddy_n_frames++);
210 }
211@@ -268,7 +279,7 @@
212 bpage->state = BUF_BLOCK_ZIP_FREE;
213 #ifndef UNIV_DEBUG_VALGRIND
214 /* Valgrind would complain about accessing free memory. */
215- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
216+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
217 ut_ad(buf_page_get_state(
218 ut_list_node_313)
219 == BUF_BLOCK_ZIP_FREE)));
220@@ -291,25 +302,29 @@
221 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
222 ulint i, /*!< in: index of buf_pool->zip_free[],
223 or BUF_BUDDY_SIZES */
224- ibool* lru) /*!< in: pointer to a variable that
225+ ibool* lru, /*!< in: pointer to a variable that
226 will be assigned TRUE if storage was
227 allocated from the LRU list and
228 buf_pool->mutex was temporarily
229 released, or NULL if the LRU list
230 should not be used */
231+ ibool have_page_hash_mutex)
232 {
233 buf_block_t* block;
234
235- ut_ad(buf_pool_mutex_own(buf_pool));
236+ //ut_ad(buf_pool_mutex_own(buf_pool));
237+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
238 ut_ad(!mutex_own(&buf_pool->zip_mutex));
239
240 if (i < BUF_BUDDY_SIZES) {
241 /* Try to allocate from the buddy system. */
242+ mutex_enter(&buf_pool->zip_free_mutex);
243 block = buf_buddy_alloc_zip(buf_pool, i);
244
245 if (block) {
246 goto func_exit;
247 }
248+ mutex_exit(&buf_pool->zip_free_mutex);
249 }
250
251 /* Try allocating from the buf_pool->free list. */
252@@ -326,19 +341,30 @@
253 }
254
255 /* Try replacing an uncompressed page in the buffer pool. */
256- buf_pool_mutex_exit(buf_pool);
257+ //buf_pool_mutex_exit(buf_pool);
258+ mutex_exit(&buf_pool->LRU_list_mutex);
259+ if (have_page_hash_mutex) {
260+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
261+ }
df1b5770 262 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
263 *lru = TRUE;
264- buf_pool_mutex_enter(buf_pool);
265+ //buf_pool_mutex_enter(buf_pool);
266+ mutex_enter(&buf_pool->LRU_list_mutex);
267+ if (have_page_hash_mutex) {
268+ rw_lock_x_lock(&buf_pool->page_hash_latch);
269+ }
270
271 alloc_big:
272 buf_buddy_block_register(block);
273
274+ mutex_enter(&buf_pool->zip_free_mutex);
275 block = buf_buddy_alloc_from(
276 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
277
278 func_exit:
279 buf_pool->buddy_stat[i].used++;
280+ mutex_exit(&buf_pool->zip_free_mutex);
281+
282 return(block);
283 }
284
285@@ -355,7 +381,10 @@
286 buf_page_t* b;
287 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
288
289- ut_ad(buf_pool_mutex_own(buf_pool));
290+ //ut_ad(buf_pool_mutex_own(buf_pool));
291+#ifdef UNIV_SYNC_DEBUG
292+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
293+#endif
294
295 switch (buf_page_get_state(bpage)) {
296 case BUF_BLOCK_ZIP_FREE:
297@@ -364,7 +393,7 @@
298 case BUF_BLOCK_FILE_PAGE:
299 case BUF_BLOCK_MEMORY:
300 case BUF_BLOCK_REMOVE_HASH:
301- ut_error;
302+ /* ut_error; */ /* optimistic */
303 case BUF_BLOCK_ZIP_DIRTY:
304 /* Cannot relocate dirty pages. */
305 return(FALSE);
306@@ -374,9 +403,18 @@
307 }
308
309 mutex_enter(&buf_pool->zip_mutex);
310+ mutex_enter(&buf_pool->zip_free_mutex);
311
312 if (!buf_page_can_relocate(bpage)) {
313 mutex_exit(&buf_pool->zip_mutex);
314+ mutex_exit(&buf_pool->zip_free_mutex);
315+ return(FALSE);
316+ }
317+
318+ if (bpage != buf_page_hash_get(buf_pool,
319+ bpage->space, bpage->offset)) {
320+ mutex_exit(&buf_pool->zip_mutex);
321+ mutex_exit(&buf_pool->zip_free_mutex);
322 return(FALSE);
323 }
324
325@@ -384,18 +422,19 @@
326 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
327
328 /* relocate buf_pool->zip_clean */
329- b = UT_LIST_GET_PREV(list, dpage);
330- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
331+ b = UT_LIST_GET_PREV(zip_list, dpage);
332+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
333
334 if (b) {
335- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
336+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
337 } else {
338- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
339+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
340 }
341
342 UNIV_MEM_INVALID(bpage, sizeof *bpage);
343
344 mutex_exit(&buf_pool->zip_mutex);
345+ mutex_exit(&buf_pool->zip_free_mutex);
346 return(TRUE);
347 }
348
349@@ -409,14 +448,16 @@
350 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
351 void* src, /*!< in: block to relocate */
352 void* dst, /*!< in: free block to relocate to */
353- ulint i) /*!< in: index of
354+ ulint i, /*!< in: index of
355 buf_pool->zip_free[] */
356+ ibool have_page_hash_mutex)
357 {
358 buf_page_t* bpage;
359 const ulint size = BUF_BUDDY_LOW << i;
360 ullint usec = ut_time_us(NULL);
361
362- ut_ad(buf_pool_mutex_own(buf_pool));
363+ //ut_ad(buf_pool_mutex_own(buf_pool));
364+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
365 ut_ad(!mutex_own(&buf_pool->zip_mutex));
366 ut_ad(!ut_align_offset(src, size));
367 ut_ad(!ut_align_offset(dst, size));
d8778560
AM
368@@ -437,6 +478,13 @@
369 if (size >= PAGE_ZIP_MIN_SIZE) {
b4e1fa2c
AM
370 /* This is a compressed page. */
371 mutex_t* mutex;
d8778560
AM
372+ ulint space, page_no;
373+
b4e1fa2c
AM
374+ if (!have_page_hash_mutex) {
375+ mutex_exit(&buf_pool->zip_free_mutex);
376+ mutex_enter(&buf_pool->LRU_list_mutex);
377+ rw_lock_x_lock(&buf_pool->page_hash_latch);
378+ }
d8778560 379
b4e1fa2c
AM
380 /* The src block may be split into smaller blocks,
381 some of which may be free. Thus, the
d8778560
AM
382@@ -446,9 +494,9 @@
383 pool), so there is nothing wrong about this. The
384 mach_read_from_4() calls here will only trigger bogus
385 Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
386- ulint space = mach_read_from_4(
387+ space = mach_read_from_4(
388 (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
389- ulint page_no = mach_read_from_4(
390+ page_no = mach_read_from_4(
391 (const byte*) src + FIL_PAGE_OFFSET);
392 /* Suppress Valgrind warnings about conditional jump
393 on uninitialized value. */
394@@ -462,6 +510,11 @@
b4e1fa2c
AM
395 added to buf_pool->page_hash yet. Obviously,
396 it cannot be relocated. */
397
398+ if (!have_page_hash_mutex) {
399+ mutex_enter(&buf_pool->zip_free_mutex);
400+ mutex_exit(&buf_pool->LRU_list_mutex);
401+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
402+ }
403 return(FALSE);
404 }
405
d8778560 406@@ -473,18 +526,27 @@
b4e1fa2c
AM
407 For the sake of simplicity, give up. */
408 ut_ad(page_zip_get_size(&bpage->zip) < size);
409
410+ if (!have_page_hash_mutex) {
411+ mutex_enter(&buf_pool->zip_free_mutex);
412+ mutex_exit(&buf_pool->LRU_list_mutex);
413+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
414+ }
415 return(FALSE);
416 }
417
418+ /* To keep latch order */
419+ if (have_page_hash_mutex)
420+ mutex_exit(&buf_pool->zip_free_mutex);
421+
422 /* The block must have been allocated, but it may
423 contain uninitialized data. */
424 UNIV_MEM_ASSERT_W(src, size);
425
426- mutex = buf_page_get_mutex(bpage);
427+ mutex = buf_page_get_mutex_enter(bpage);
428
429- mutex_enter(mutex);
430+ mutex_enter(&buf_pool->zip_free_mutex);
431
432- if (buf_page_can_relocate(bpage)) {
433+ if (mutex && buf_page_can_relocate(bpage)) {
434 /* Relocate the compressed page. */
435 ut_a(bpage->zip.data == src);
436 memcpy(dst, src, size);
d8778560 437@@ -499,10 +561,22 @@
b4e1fa2c
AM
438 buddy_stat->relocated_usec
439 += ut_time_us(NULL) - usec;
440 }
441+
442+ if (!have_page_hash_mutex) {
443+ mutex_exit(&buf_pool->LRU_list_mutex);
444+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
445+ }
446 return(TRUE);
447 }
448
449- mutex_exit(mutex);
450+ if (!have_page_hash_mutex) {
451+ mutex_exit(&buf_pool->LRU_list_mutex);
452+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
453+ }
454+
455+ if (mutex) {
456+ mutex_exit(mutex);
457+ }
458 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
459 /* This must be a buf_page_t object. */
460 #if UNIV_WORD_SIZE == 4
d8778560 461@@ -511,10 +585,31 @@
b4e1fa2c
AM
462 about uninitialized pad bytes. */
463 UNIV_MEM_ASSERT_RW(src, size);
464 #endif
465+
466+ mutex_exit(&buf_pool->zip_free_mutex);
467+
468+ if (!have_page_hash_mutex) {
469+ mutex_enter(&buf_pool->LRU_list_mutex);
470+ rw_lock_x_lock(&buf_pool->page_hash_latch);
471+ }
472+
473 if (buf_buddy_relocate_block(src, dst)) {
474+ mutex_enter(&buf_pool->zip_free_mutex);
475+
476+ if (!have_page_hash_mutex) {
477+ mutex_exit(&buf_pool->LRU_list_mutex);
478+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
479+ }
480
481 goto success;
482 }
483+
484+ mutex_enter(&buf_pool->zip_free_mutex);
485+
486+ if (!have_page_hash_mutex) {
487+ mutex_exit(&buf_pool->LRU_list_mutex);
488+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
489+ }
490 }
491
492 return(FALSE);
d8778560 493@@ -529,13 +624,15 @@
b4e1fa2c
AM
494 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
495 void* buf, /*!< in: block to be freed, must not be
496 pointed to by the buffer pool */
497- ulint i) /*!< in: index of buf_pool->zip_free[],
498+ ulint i, /*!< in: index of buf_pool->zip_free[],
499 or BUF_BUDDY_SIZES */
500+ ibool have_page_hash_mutex)
501 {
502 buf_page_t* bpage;
503 buf_page_t* buddy;
504
505- ut_ad(buf_pool_mutex_own(buf_pool));
506+ //ut_ad(buf_pool_mutex_own(buf_pool));
507+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
508 ut_ad(!mutex_own(&buf_pool->zip_mutex));
509 ut_ad(i <= BUF_BUDDY_SIZES);
510 ut_ad(buf_pool->buddy_stat[i].used > 0);
d8778560 511@@ -546,7 +643,9 @@
b4e1fa2c
AM
512 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
513
514 if (i == BUF_BUDDY_SIZES) {
515- buf_buddy_block_free(buf_pool, buf);
516+ mutex_exit(&buf_pool->zip_free_mutex);
517+ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
518+ mutex_enter(&buf_pool->zip_free_mutex);
519 return;
520 }
521
d8778560 522@@ -591,7 +690,7 @@
b4e1fa2c
AM
523 ut_a(bpage != buf);
524
525 {
526- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
527+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
528 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
529 bpage = next;
530 }
d8778560 531@@ -600,13 +699,13 @@
b4e1fa2c
AM
532 #ifndef UNIV_DEBUG_VALGRIND
533 buddy_nonfree:
534 /* Valgrind would complain about accessing free memory. */
535- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
536+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
537 ut_ad(buf_page_get_state(ut_list_node_313)
538 == BUF_BLOCK_ZIP_FREE)));
539 #endif /* UNIV_DEBUG_VALGRIND */
540
541 /* The buddy is not free. Is there a free block of this size? */
542- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
543+ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
544
545 if (bpage) {
546 /* Remove the block from the free list, because a successful
d8778560 547@@ -616,7 +715,7 @@
b4e1fa2c
AM
548 buf_buddy_remove_from_free(buf_pool, bpage, i);
549
550 /* Try to relocate the buddy of buf to the free block. */
551- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
552+ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
553
554 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
555 goto buddy_free2;
d8778560 556@@ -636,14 +735,14 @@
b4e1fa2c
AM
557
558 (Parts of the buddy can be free in
559 buf_pool->zip_free[j] with j < i.) */
560- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
561+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
562 ut_ad(buf_page_get_state(
563 ut_list_node_313)
564 == BUF_BLOCK_ZIP_FREE
565 && ut_list_node_313 != buddy)));
566 #endif /* !UNIV_DEBUG_VALGRIND */
567
568- if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
569+ if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
570
571 buf = bpage;
572 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
573diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
574--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
575+++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
576@@ -263,6 +263,7 @@
577 #ifdef UNIV_PFS_RWLOCK
578 /* Keys to register buffer block related rwlocks and mutexes with
579 performance schema */
580+UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
581 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
582 # ifdef UNIV_SYNC_DEBUG
583 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
584@@ -273,6 +274,10 @@
585 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
586 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
587 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
588+UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
589+UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
590+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
591+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
592 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
593 #endif /* UNIV_PFS_MUTEX */
594
595@@ -881,9 +886,9 @@
596 block->page.in_zip_hash = FALSE;
597 block->page.in_flush_list = FALSE;
598 block->page.in_free_list = FALSE;
599- block->in_unzip_LRU_list = FALSE;
600 #endif /* UNIV_DEBUG */
601 block->page.in_LRU_list = FALSE;
602+ block->in_unzip_LRU_list = FALSE;
603 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
604 block->n_pointers = 0;
605 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
606@@ -981,9 +986,11 @@
607 memset(block->frame, '\0', UNIV_PAGE_SIZE);
608 #endif
609 /* Add the block to the free list */
610- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
611+ mutex_enter(&buf_pool->free_list_mutex);
612+ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
613
614 ut_d(block->page.in_free_list = TRUE);
615+ mutex_exit(&buf_pool->free_list_mutex);
616 ut_ad(buf_pool_from_block(block) == buf_pool);
617
618 block++;
619@@ -1038,7 +1045,8 @@
620 buf_chunk_t* chunk = buf_pool->chunks;
621
622 ut_ad(buf_pool);
623- ut_ad(buf_pool_mutex_own(buf_pool));
624+ //ut_ad(buf_pool_mutex_own(buf_pool));
625+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
626 for (n = buf_pool->n_chunks; n--; chunk++) {
627
628 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
629@@ -1138,7 +1146,7 @@
630 buf_block_t* block;
631 const buf_block_t* block_end;
632
633- ut_ad(buf_pool_mutex_own(buf_pool));
634+ //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
635
636 block_end = chunk->blocks + chunk->size;
637
638@@ -1150,8 +1158,10 @@
639 ut_ad(!block->in_unzip_LRU_list);
640 ut_ad(!block->page.in_flush_list);
641 /* Remove the block from the free list. */
642+ mutex_enter(&buf_pool->free_list_mutex);
643 ut_ad(block->page.in_free_list);
644- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
645+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
646+ mutex_exit(&buf_pool->free_list_mutex);
647
648 /* Free the latches. */
649 mutex_free(&block->mutex);
650@@ -1208,9 +1218,21 @@
651 ------------------------------- */
652 mutex_create(buf_pool_mutex_key,
653 &buf_pool->mutex, SYNC_BUF_POOL);
654+ mutex_create(buf_pool_LRU_list_mutex_key,
655+ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
656+ rw_lock_create(buf_pool_page_hash_key,
657+ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
658+ mutex_create(buf_pool_free_list_mutex_key,
659+ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
660+ mutex_create(buf_pool_zip_free_mutex_key,
661+ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
662+ mutex_create(buf_pool_zip_hash_mutex_key,
663+ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
664 mutex_create(buf_pool_zip_mutex_key,
665 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
666
667+ mutex_enter(&buf_pool->LRU_list_mutex);
668+ rw_lock_x_lock(&buf_pool->page_hash_latch);
669 buf_pool_mutex_enter(buf_pool);
670
671 if (buf_pool_size > 0) {
672@@ -1223,6 +1245,8 @@
673 mem_free(chunk);
674 mem_free(buf_pool);
675
676+ mutex_exit(&buf_pool->LRU_list_mutex);
677+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
678 buf_pool_mutex_exit(buf_pool);
679
680 return(DB_ERROR);
681@@ -1253,6 +1277,8 @@
682
683 /* All fields are initialized by mem_zalloc(). */
684
685+ mutex_exit(&buf_pool->LRU_list_mutex);
686+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
687 buf_pool_mutex_exit(buf_pool);
688
689 return(DB_SUCCESS);
690@@ -1467,7 +1493,11 @@
691 ulint fold;
692 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
693
694- ut_ad(buf_pool_mutex_own(buf_pool));
695+ //ut_ad(buf_pool_mutex_own(buf_pool));
696+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
697+#ifdef UNIV_SYNC_DEBUG
698+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
699+#endif
700 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
701 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
702 ut_a(bpage->buf_fix_count == 0);
703@@ -1554,7 +1584,8 @@
704
705 try_again:
706 btr_search_disable(); /* Empty the adaptive hash index again */
707- buf_pool_mutex_enter(buf_pool);
708+ //buf_pool_mutex_enter(buf_pool);
709+ mutex_enter(&buf_pool->LRU_list_mutex);
710
711 shrink_again:
712 if (buf_pool->n_chunks <= 1) {
713@@ -1625,7 +1656,7 @@
714
715 buf_LRU_make_block_old(&block->page);
716 dirty++;
df1b5770
AM
717- } else if (buf_LRU_free_block(&block->page, TRUE)
718+ } else if (buf_LRU_free_block(&block->page, TRUE, TRUE)
b4e1fa2c
AM
719 != BUF_LRU_FREED) {
720 nonfree++;
721 }
722@@ -1633,7 +1664,8 @@
723 mutex_exit(&block->mutex);
724 }
725
726- buf_pool_mutex_exit(buf_pool);
727+ //buf_pool_mutex_exit(buf_pool);
728+ mutex_exit(&buf_pool->LRU_list_mutex);
729
730 /* Request for a flush of the chunk if it helps.
731 Do not flush if there are non-free blocks, since
732@@ -1683,7 +1715,8 @@
733 func_done:
734 buf_pool->old_pool_size = buf_pool->curr_pool_size;
735 func_exit:
736- buf_pool_mutex_exit(buf_pool);
737+ //buf_pool_mutex_exit(buf_pool);
738+ mutex_exit(&buf_pool->LRU_list_mutex);
739 btr_search_enable();
740 }
741
742@@ -1724,7 +1757,9 @@
743 hash_table_t* zip_hash;
744 hash_table_t* page_hash;
745
746- buf_pool_mutex_enter(buf_pool);
747+ //buf_pool_mutex_enter(buf_pool);
748+ mutex_enter(&buf_pool->LRU_list_mutex);
749+ rw_lock_x_lock(&buf_pool->page_hash_latch);
750
751 /* Free, create, and populate the hash table. */
752 hash_table_free(buf_pool->page_hash);
753@@ -1765,8 +1800,9 @@
754 All such blocks are either in buf_pool->zip_clean or
755 in buf_pool->flush_list. */
756
757+ mutex_enter(&buf_pool->zip_mutex);
758 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
759- b = UT_LIST_GET_NEXT(list, b)) {
760+ b = UT_LIST_GET_NEXT(zip_list, b)) {
761 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
762 ut_ad(!b->in_flush_list);
763 ut_ad(b->in_LRU_list);
764@@ -1776,10 +1812,11 @@
765 HASH_INSERT(buf_page_t, hash, page_hash,
766 buf_page_address_fold(b->space, b->offset), b);
767 }
768+ mutex_exit(&buf_pool->zip_mutex);
769
770 buf_flush_list_mutex_enter(buf_pool);
771 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
772- b = UT_LIST_GET_NEXT(list, b)) {
773+ b = UT_LIST_GET_NEXT(flush_list, b)) {
774 ut_ad(b->in_flush_list);
775 ut_ad(b->in_LRU_list);
776 ut_ad(b->in_page_hash);
777@@ -1806,7 +1843,9 @@
778 }
779
780 buf_flush_list_mutex_exit(buf_pool);
781- buf_pool_mutex_exit(buf_pool);
782+ //buf_pool_mutex_exit(buf_pool);
783+ mutex_exit(&buf_pool->LRU_list_mutex);
784+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
785 }
786
787 /********************************************************************
788@@ -1853,21 +1892,32 @@
789 buf_page_t* bpage;
790 ulint i;
791 buf_pool_t* buf_pool = buf_pool_get(space, offset);
792+ mutex_t* block_mutex;
793
794- ut_ad(buf_pool_mutex_own(buf_pool));
795+ //ut_ad(buf_pool_mutex_own(buf_pool));
796
797+ rw_lock_x_lock(&buf_pool->page_hash_latch);
798 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
799+ if (bpage) {
800+ block_mutex = buf_page_get_mutex_enter(bpage);
801+ ut_a(block_mutex);
802+ }
803
804 if (UNIV_LIKELY_NULL(bpage)) {
805 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
806 /* The page was loaded meanwhile. */
807+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
808 return(bpage);
809 }
810 /* Add to an existing watch. */
811 bpage->buf_fix_count++;
812+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
813+ mutex_exit(block_mutex);
814 return(NULL);
815 }
816
817+ /* buf_pool->watch is protected by zip_mutex for now */
818+ mutex_enter(&buf_pool->zip_mutex);
819 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
820 bpage = &buf_pool->watch[i];
821
822@@ -1891,10 +1941,12 @@
823 bpage->space = space;
824 bpage->offset = offset;
825 bpage->buf_fix_count = 1;
826-
827+ bpage->buf_pool_index = buf_pool_index(buf_pool);
828 ut_d(bpage->in_page_hash = TRUE);
829 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
830 fold, bpage);
831+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
832+ mutex_exit(&buf_pool->zip_mutex);
833 return(NULL);
834 case BUF_BLOCK_ZIP_PAGE:
835 ut_ad(bpage->in_page_hash);
836@@ -1912,6 +1964,8 @@
837 ut_error;
838
839 /* Fix compiler warning */
840+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
841+ mutex_exit(&buf_pool->zip_mutex);
842 return(NULL);
843 }
844
845@@ -1941,6 +1995,8 @@
846 buf_chunk_t* chunks;
847 buf_chunk_t* chunk;
848
849+ mutex_enter(&buf_pool->LRU_list_mutex);
850+ rw_lock_x_lock(&buf_pool->page_hash_latch);
851 buf_pool_mutex_enter(buf_pool);
852 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
853
854@@ -1959,6 +2015,8 @@
855 buf_pool->n_chunks++;
856 }
857
858+ mutex_exit(&buf_pool->LRU_list_mutex);
859+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
860 buf_pool_mutex_exit(buf_pool);
861 }
862
863@@ -2046,7 +2104,11 @@
864 space, offset) */
865 buf_page_t* watch) /*!< in/out: sentinel for watch */
866 {
867- ut_ad(buf_pool_mutex_own(buf_pool));
868+ //ut_ad(buf_pool_mutex_own(buf_pool));
869+#ifdef UNIV_SYNC_DEBUG
870+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
871+#endif
872+ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
873
874 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
875 ut_d(watch->in_page_hash = FALSE);
876@@ -2068,28 +2130,31 @@
877 buf_pool_t* buf_pool = buf_pool_get(space, offset);
878 ulint fold = buf_page_address_fold(space, offset);
879
880- buf_pool_mutex_enter(buf_pool);
881+ //buf_pool_mutex_enter(buf_pool);
882+ rw_lock_x_lock(&buf_pool->page_hash_latch);
883 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
884 /* The page must exist because buf_pool_watch_set()
885 increments buf_fix_count. */
886 ut_a(bpage);
887
888 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
889- mutex_t* mutex = buf_page_get_mutex(bpage);
890+ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
891
892- mutex_enter(mutex);
893 ut_a(bpage->buf_fix_count > 0);
894 bpage->buf_fix_count--;
895 mutex_exit(mutex);
896 } else {
897+ mutex_enter(&buf_pool->zip_mutex);
898 ut_a(bpage->buf_fix_count > 0);
899
900 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
901 buf_pool_watch_remove(buf_pool, fold, bpage);
902 }
903+ mutex_exit(&buf_pool->zip_mutex);
904 }
905
906- buf_pool_mutex_exit(buf_pool);
907+ //buf_pool_mutex_exit(buf_pool);
908+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
909 }
910
911 /****************************************************************//**
912@@ -2109,14 +2174,16 @@
913 buf_pool_t* buf_pool = buf_pool_get(space, offset);
914 ulint fold = buf_page_address_fold(space, offset);
915
916- buf_pool_mutex_enter(buf_pool);
917+ //buf_pool_mutex_enter(buf_pool);
918+ rw_lock_s_lock(&buf_pool->page_hash_latch);
919
920 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
921 /* The page must exist because buf_pool_watch_set()
922 increments buf_fix_count. */
923 ut_a(bpage);
924 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
925- buf_pool_mutex_exit(buf_pool);
926+ //buf_pool_mutex_exit(buf_pool);
927+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
928
929 return(ret);
930 }
931@@ -2133,13 +2200,15 @@
932 {
933 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
934
935- buf_pool_mutex_enter(buf_pool);
936+ //buf_pool_mutex_enter(buf_pool);
937+ mutex_enter(&buf_pool->LRU_list_mutex);
938
939 ut_a(buf_page_in_file(bpage));
940
941 buf_LRU_make_block_young(bpage);
942
943- buf_pool_mutex_exit(buf_pool);
944+ //buf_pool_mutex_exit(buf_pool);
945+ mutex_exit(&buf_pool->LRU_list_mutex);
946 }
947
948 /********************************************************************//**
949@@ -2163,14 +2232,20 @@
950 ut_a(buf_page_in_file(bpage));
951
952 if (buf_page_peek_if_too_old(bpage)) {
953- buf_pool_mutex_enter(buf_pool);
954+ //buf_pool_mutex_enter(buf_pool);
955+ mutex_enter(&buf_pool->LRU_list_mutex);
956 buf_LRU_make_block_young(bpage);
957- buf_pool_mutex_exit(buf_pool);
958+ //buf_pool_mutex_exit(buf_pool);
959+ mutex_exit(&buf_pool->LRU_list_mutex);
960 } else if (!access_time) {
961 ulint time_ms = ut_time_ms();
962- buf_pool_mutex_enter(buf_pool);
963+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
964+ //buf_pool_mutex_enter(buf_pool);
965+ if (block_mutex) {
966 buf_page_set_accessed(bpage, time_ms);
967- buf_pool_mutex_exit(buf_pool);
968+ mutex_exit(block_mutex);
969+ }
970+ //buf_pool_mutex_exit(buf_pool);
971 }
972 }
973
974@@ -2187,7 +2262,8 @@
975 buf_block_t* block;
976 buf_pool_t* buf_pool = buf_pool_get(space, offset);
977
978- buf_pool_mutex_enter(buf_pool);
979+ //buf_pool_mutex_enter(buf_pool);
980+ rw_lock_s_lock(&buf_pool->page_hash_latch);
981
982 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
983
984@@ -2196,7 +2272,8 @@
985 block->check_index_page_at_flush = FALSE;
986 }
987
988- buf_pool_mutex_exit(buf_pool);
989+ //buf_pool_mutex_exit(buf_pool);
990+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
991 }
992
993 /********************************************************************//**
994@@ -2215,7 +2292,8 @@
995 ibool is_hashed;
996 buf_pool_t* buf_pool = buf_pool_get(space, offset);
997
998- buf_pool_mutex_enter(buf_pool);
999+ //buf_pool_mutex_enter(buf_pool);
1000+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1001
1002 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1003
1004@@ -2226,7 +2304,8 @@
1005 is_hashed = block->is_hashed;
1006 }
1007
1008- buf_pool_mutex_exit(buf_pool);
1009+ //buf_pool_mutex_exit(buf_pool);
1010+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1011
1012 return(is_hashed);
1013 }
1014@@ -2248,7 +2327,8 @@
1015 buf_page_t* bpage;
1016 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1017
1018- buf_pool_mutex_enter(buf_pool);
1019+ //buf_pool_mutex_enter(buf_pool);
1020+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1021
1022 bpage = buf_page_hash_get(buf_pool, space, offset);
1023
df1b5770 1024@@ -2259,7 +2339,8 @@
b4e1fa2c
AM
1025 bpage->file_page_was_freed = TRUE;
1026 }
1027
1028- buf_pool_mutex_exit(buf_pool);
1029+ //buf_pool_mutex_exit(buf_pool);
1030+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1031
1032 return(bpage);
1033 }
df1b5770 1034@@ -2280,7 +2361,8 @@
b4e1fa2c
AM
1035 buf_page_t* bpage;
1036 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1037
1038- buf_pool_mutex_enter(buf_pool);
1039+ //buf_pool_mutex_enter(buf_pool);
1040+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1041
1042 bpage = buf_page_hash_get(buf_pool, space, offset);
1043
df1b5770 1044@@ -2289,7 +2371,8 @@
b4e1fa2c
AM
1045 bpage->file_page_was_freed = FALSE;
1046 }
1047
1048- buf_pool_mutex_exit(buf_pool);
1049+ //buf_pool_mutex_exit(buf_pool);
1050+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1051
1052 return(bpage);
1053 }
df1b5770 1054@@ -2324,8 +2407,9 @@
b4e1fa2c
AM
1055 buf_pool->stat.n_page_gets++;
1056
1057 for (;;) {
1058- buf_pool_mutex_enter(buf_pool);
1059+ //buf_pool_mutex_enter(buf_pool);
1060 lookup:
1061+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1062 bpage = buf_page_hash_get(buf_pool, space, offset);
1063 if (bpage) {
1064 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
df1b5770 1065@@ -2334,7 +2418,8 @@
b4e1fa2c
AM
1066
1067 /* Page not in buf_pool: needs to be read from file */
1068
1069- buf_pool_mutex_exit(buf_pool);
1070+ //buf_pool_mutex_exit(buf_pool);
1071+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1072
1073 buf_read_page(space, zip_size, offset);
1074
df1b5770 1075@@ -2346,10 +2431,15 @@
b4e1fa2c
AM
1076 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1077 /* There is no compressed page. */
1078 err_exit:
1079- buf_pool_mutex_exit(buf_pool);
1080+ //buf_pool_mutex_exit(buf_pool);
1081+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1082 return(NULL);
1083 }
1084
1085+ block_mutex = buf_page_get_mutex_enter(bpage);
1086+
1087+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1088+
1089 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1090
1091 switch (buf_page_get_state(bpage)) {
df1b5770 1092@@ -2358,19 +2448,19 @@
b4e1fa2c
AM
1093 case BUF_BLOCK_MEMORY:
1094 case BUF_BLOCK_REMOVE_HASH:
1095 case BUF_BLOCK_ZIP_FREE:
1096+ if (block_mutex)
1097+ mutex_exit(block_mutex);
1098 break;
1099 case BUF_BLOCK_ZIP_PAGE:
1100 case BUF_BLOCK_ZIP_DIRTY:
1101- block_mutex = &buf_pool->zip_mutex;
1102- mutex_enter(block_mutex);
1103+ ut_a(block_mutex == &buf_pool->zip_mutex);
1104 bpage->buf_fix_count++;
1105 goto got_block;
1106 case BUF_BLOCK_FILE_PAGE:
1107- block_mutex = &((buf_block_t*) bpage)->mutex;
1108- mutex_enter(block_mutex);
1109+ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1110
1111 /* Discard the uncompressed page frame if possible. */
df1b5770
AM
1112- if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
1113+ if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) {
b4e1fa2c
AM
1114
1115 mutex_exit(block_mutex);
df1b5770
AM
1116 goto lookup;
1117@@ -2388,7 +2478,7 @@
b4e1fa2c
AM
1118 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1119 access_time = buf_page_is_accessed(bpage);
1120
1121- buf_pool_mutex_exit(buf_pool);
1122+ //buf_pool_mutex_exit(buf_pool);
1123
1124 mutex_exit(block_mutex);
1125
df1b5770 1126@@ -2697,7 +2787,7 @@
b4e1fa2c
AM
1127 const buf_block_t* block) /*!< in: pointer to block,
1128 not dereferenced */
1129 {
1130- ut_ad(buf_pool_mutex_own(buf_pool));
1131+ //ut_ad(buf_pool_mutex_own(buf_pool));
1132
1133 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1134 /* The pointer should be aligned. */
df1b5770 1135@@ -2733,6 +2823,7 @@
b4e1fa2c
AM
1136 ulint fix_type;
1137 ibool must_read;
1138 ulint retries = 0;
1139+ mutex_t* block_mutex = NULL;
1140 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1141
1142 ut_ad(mtr);
df1b5770 1143@@ -2755,9 +2846,11 @@
b4e1fa2c
AM
1144 fold = buf_page_address_fold(space, offset);
1145 loop:
1146 block = guess;
1147- buf_pool_mutex_enter(buf_pool);
1148+ //buf_pool_mutex_enter(buf_pool);
1149
1150 if (block) {
1151+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1152+
1153 /* If the guess is a compressed page descriptor that
1154 has been allocated by buf_buddy_alloc(), it may have
1155 been invalidated by buf_buddy_relocate(). In that
df1b5770 1156@@ -2766,11 +2859,15 @@
b4e1fa2c
AM
1157 the guess may be pointing to a buffer pool chunk that
1158 has been released when resizing the buffer pool. */
1159
1160- if (!buf_block_is_uncompressed(buf_pool, block)
1161+ if (!block_mutex) {
1162+ block = guess = NULL;
1163+ } else if (!buf_block_is_uncompressed(buf_pool, block)
1164 || offset != block->page.offset
1165 || space != block->page.space
1166 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1167
1168+ mutex_exit(block_mutex);
1169+
1170 block = guess = NULL;
1171 } else {
1172 ut_ad(!block->page.in_zip_hash);
df1b5770 1173@@ -2779,12 +2876,19 @@
b4e1fa2c
AM
1174 }
1175
1176 if (block == NULL) {
1177+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1178 block = (buf_block_t*) buf_page_hash_get_low(
1179 buf_pool, space, offset, fold);
1180+ if (block) {
1181+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1182+ ut_a(block_mutex);
1183+ }
1184+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1185 }
1186
1187 loop2:
1188 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1189+ mutex_exit(block_mutex);
1190 block = NULL;
1191 }
1192
df1b5770 1193@@ -2796,12 +2900,14 @@
b4e1fa2c
AM
1194 space, offset, fold);
1195
1196 if (UNIV_LIKELY_NULL(block)) {
1197-
1198+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
1199+ ut_a(block_mutex);
1200+ ut_ad(mutex_own(block_mutex));
1201 goto got_block;
1202 }
1203 }
1204
1205- buf_pool_mutex_exit(buf_pool);
1206+ //buf_pool_mutex_exit(buf_pool);
1207
1208 if (mode == BUF_GET_IF_IN_POOL
1209 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
df1b5770 1210@@ -2849,7 +2955,8 @@
b4e1fa2c
AM
1211 /* The page is being read to buffer pool,
1212 but we cannot wait around for the read to
1213 complete. */
1214- buf_pool_mutex_exit(buf_pool);
1215+ //buf_pool_mutex_exit(buf_pool);
1216+ mutex_exit(block_mutex);
1217
1218 return(NULL);
1219 }
df1b5770 1220@@ -2859,38 +2966,49 @@
b4e1fa2c
AM
1221 ibool success;
1222
1223 case BUF_BLOCK_FILE_PAGE:
1224+ if (block_mutex == &buf_pool->zip_mutex) {
1225+ /* it is wrong mutex... */
1226+ mutex_exit(block_mutex);
1227+ goto loop;
1228+ }
1229 break;
1230
1231 case BUF_BLOCK_ZIP_PAGE:
1232 case BUF_BLOCK_ZIP_DIRTY:
1233+ ut_ad(block_mutex == &buf_pool->zip_mutex);
1234 bpage = &block->page;
1235 /* Protect bpage->buf_fix_count. */
1236- mutex_enter(&buf_pool->zip_mutex);
1237+ //mutex_enter(&buf_pool->zip_mutex);
1238
1239 if (bpage->buf_fix_count
1240 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1241 /* This condition often occurs when the buffer
1242 is not buffer-fixed, but I/O-fixed by
1243 buf_page_init_for_read(). */
1244- mutex_exit(&buf_pool->zip_mutex);
1245+ //mutex_exit(&buf_pool->zip_mutex);
1246 wait_until_unfixed:
1247 /* The block is buffer-fixed or I/O-fixed.
1248 Try again later. */
1249- buf_pool_mutex_exit(buf_pool);
1250+ //buf_pool_mutex_exit(buf_pool);
1251+ mutex_exit(block_mutex);
1252 os_thread_sleep(WAIT_FOR_READ);
1253
1254 goto loop;
1255 }
1256
1257 /* Allocate an uncompressed page. */
1258- buf_pool_mutex_exit(buf_pool);
1259- mutex_exit(&buf_pool->zip_mutex);
1260+ //buf_pool_mutex_exit(buf_pool);
1261+ //mutex_exit(&buf_pool->zip_mutex);
1262+ mutex_exit(block_mutex);
1263
df1b5770 1264 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
1265 ut_a(block);
1266+ block_mutex = &block->mutex;
1267
1268- buf_pool_mutex_enter(buf_pool);
1269- mutex_enter(&block->mutex);
1270+ //buf_pool_mutex_enter(buf_pool);
1271+ mutex_enter(&buf_pool->LRU_list_mutex);
1272+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1273+ mutex_enter(block_mutex);
1274
1275 {
1276 buf_page_t* hash_bpage;
df1b5770 1277@@ -2903,35 +3021,47 @@
b4e1fa2c
AM
1278 while buf_pool->mutex was released.
1279 Free the block that was allocated. */
1280
1281- buf_LRU_block_free_non_file_page(block);
1282- mutex_exit(&block->mutex);
1283+ buf_LRU_block_free_non_file_page(block, TRUE);
1284+ mutex_exit(block_mutex);
1285
1286 block = (buf_block_t*) hash_bpage;
1287+ if (block) {
1288+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1289+ ut_a(block_mutex);
1290+ }
1291+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1292+ mutex_exit(&buf_pool->LRU_list_mutex);
1293 goto loop2;
1294 }
1295 }
1296
1297+ mutex_enter(&buf_pool->zip_mutex);
1298+
1299 if (UNIV_UNLIKELY
1300 (bpage->buf_fix_count
1301 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1302
1303+ mutex_exit(&buf_pool->zip_mutex);
1304 /* The block was buffer-fixed or I/O-fixed
1305 while buf_pool->mutex was not held by this thread.
1306 Free the block that was allocated and try again.
1307 This should be extremely unlikely. */
1308
1309- buf_LRU_block_free_non_file_page(block);
1310- mutex_exit(&block->mutex);
1311+ buf_LRU_block_free_non_file_page(block, TRUE);
1312+ //mutex_exit(&block->mutex);
1313
1314+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1315+ mutex_exit(&buf_pool->LRU_list_mutex);
1316 goto wait_until_unfixed;
1317 }
1318
1319 /* Move the compressed page from bpage to block,
1320 and uncompress it. */
1321
1322- mutex_enter(&buf_pool->zip_mutex);
1323-
1324 buf_relocate(bpage, &block->page);
1325+
1326+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1327+
1328 buf_block_init_low(block);
1329 block->lock_hash_val = lock_rec_hash(space, offset);
1330
df1b5770 1331@@ -2940,7 +3070,7 @@
b4e1fa2c
AM
1332
1333 if (buf_page_get_state(&block->page)
1334 == BUF_BLOCK_ZIP_PAGE) {
1335- UT_LIST_REMOVE(list, buf_pool->zip_clean,
1336+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1337 &block->page);
1338 ut_ad(!block->page.in_flush_list);
1339 } else {
df1b5770 1340@@ -2957,19 +3087,24 @@
b4e1fa2c
AM
1341 /* Insert at the front of unzip_LRU list */
1342 buf_unzip_LRU_add_block(block, FALSE);
1343
1344+ mutex_exit(&buf_pool->LRU_list_mutex);
1345+
1346 block->page.buf_fix_count = 1;
1347 buf_block_set_io_fix(block, BUF_IO_READ);
1348 rw_lock_x_lock_func(&block->lock, 0, file, line);
1349
1350 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1351
1352- mutex_exit(&block->mutex);
1353+ mutex_exit(block_mutex);
1354 mutex_exit(&buf_pool->zip_mutex);
1355+
1356+ buf_pool_mutex_enter(buf_pool);
1357 buf_pool->n_pend_unzip++;
1358+ buf_pool_mutex_exit(buf_pool);
1359
1360- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1361+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1362
1363- buf_pool_mutex_exit(buf_pool);
1364+ //buf_pool_mutex_exit(buf_pool);
1365
1366 /* Decompress the page and apply buffered operations
1367 while not holding buf_pool->mutex or block->mutex. */
df1b5770 1368@@ -2982,12 +3117,15 @@
b4e1fa2c
AM
1369 }
1370
1371 /* Unfix and unlatch the block. */
1372- buf_pool_mutex_enter(buf_pool);
1373- mutex_enter(&block->mutex);
1374+ //buf_pool_mutex_enter(buf_pool);
1375+ block_mutex = &block->mutex;
1376+ mutex_enter(block_mutex);
1377 block->page.buf_fix_count--;
1378 buf_block_set_io_fix(block, BUF_IO_NONE);
1379- mutex_exit(&block->mutex);
1380+
1381+ buf_pool_mutex_enter(buf_pool);
1382 buf_pool->n_pend_unzip--;
1383+ buf_pool_mutex_exit(buf_pool);
1384 rw_lock_x_unlock(&block->lock);
1385
1386 break;
df1b5770 1387@@ -3003,7 +3141,7 @@
b4e1fa2c
AM
1388
1389 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1390
1391- mutex_enter(&block->mutex);
1392+ //mutex_enter(&block->mutex);
1393 #if UNIV_WORD_SIZE == 4
1394 /* On 32-bit systems, there is no padding in buf_page_t. On
1395 other systems, Valgrind could complain about uninitialized pad
11822e22 1396@@ -3016,8 +3154,8 @@
b4e1fa2c
AM
1397 /* Try to evict the block from the buffer pool, to use the
1398 insert buffer (change buffer) as much as possible. */
1399
df1b5770 1400- if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
11822e22 1401- mutex_exit(&block->mutex);
df1b5770 1402+ if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) {
11822e22 1403+ mutex_exit(block_mutex);
b4e1fa2c 1404 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
df1b5770 1405 /* Set the watch, as it would have
11822e22
AM
1406 been set if the page were not in the
1407@@ -3026,6 +3164,9 @@
1408 space, offset, fold);
1409
1410 if (UNIV_LIKELY_NULL(block)) {
1411+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
1412+ ut_a(block_mutex);
1413+ ut_ad(mutex_own(block_mutex));
1414
1415 /* The page entered the buffer
1416 pool for some reason. Try to
1417@@ -3033,7 +3174,7 @@
1418 goto got_block;
1419 }
1420 }
1421- buf_pool_mutex_exit(buf_pool);
1422+ //buf_pool_mutex_exit(buf_pool);
1423 fprintf(stderr,
1424 "innodb_change_buffering_debug evict %u %u\n",
1425 (unsigned) space, (unsigned) offset);
1426@@ -3052,13 +3193,14 @@
b4e1fa2c
AM
1427
1428 buf_block_buf_fix_inc(block, file, line);
1429
1430- mutex_exit(&block->mutex);
1431+ //mutex_exit(&block->mutex);
1432
1433 /* Check if this is the first access to the page */
1434
1435 access_time = buf_page_is_accessed(&block->page);
1436
1437- buf_pool_mutex_exit(buf_pool);
1438+ //buf_pool_mutex_exit(buf_pool);
1439+ mutex_exit(block_mutex);
1440
1441 buf_page_set_accessed_make_young(&block->page, access_time);
1442
11822e22 1443@@ -3291,9 +3433,11 @@
b4e1fa2c
AM
1444 buf_pool = buf_pool_from_block(block);
1445
1446 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1447- buf_pool_mutex_enter(buf_pool);
1448+ //buf_pool_mutex_enter(buf_pool);
1449+ mutex_enter(&buf_pool->LRU_list_mutex);
1450 buf_LRU_make_block_young(&block->page);
1451- buf_pool_mutex_exit(buf_pool);
1452+ //buf_pool_mutex_exit(buf_pool);
1453+ mutex_exit(&buf_pool->LRU_list_mutex);
1454 } else if (!buf_page_is_accessed(&block->page)) {
1455 /* Above, we do a dirty read on purpose, to avoid
1456 mutex contention. The field buf_page_t::access_time
11822e22 1457@@ -3301,9 +3445,11 @@
b4e1fa2c
AM
1458 field must be protected by mutex, however. */
1459 ulint time_ms = ut_time_ms();
1460
1461- buf_pool_mutex_enter(buf_pool);
1462+ //buf_pool_mutex_enter(buf_pool);
1463+ mutex_enter(&block->mutex);
1464 buf_page_set_accessed(&block->page, time_ms);
1465- buf_pool_mutex_exit(buf_pool);
1466+ //buf_pool_mutex_exit(buf_pool);
1467+ mutex_exit(&block->mutex);
1468 }
1469
1470 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
11822e22 1471@@ -3370,18 +3516,21 @@
b4e1fa2c
AM
1472 ut_ad(mtr);
1473 ut_ad(mtr->state == MTR_ACTIVE);
1474
1475- buf_pool_mutex_enter(buf_pool);
1476+ //buf_pool_mutex_enter(buf_pool);
1477+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1478 block = buf_block_hash_get(buf_pool, space_id, page_no);
1479
1480 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1481- buf_pool_mutex_exit(buf_pool);
1482+ //buf_pool_mutex_exit(buf_pool);
1483+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1484 return(NULL);
1485 }
1486
1487 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1488
1489 mutex_enter(&block->mutex);
1490- buf_pool_mutex_exit(buf_pool);
1491+ //buf_pool_mutex_exit(buf_pool);
1492+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1493
1494 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1495 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
11822e22 1496@@ -3470,7 +3619,10 @@
b4e1fa2c
AM
1497 buf_page_t* hash_page;
1498 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1499
1500- ut_ad(buf_pool_mutex_own(buf_pool));
1501+ //ut_ad(buf_pool_mutex_own(buf_pool));
1502+#ifdef UNIV_SYNC_DEBUG
1503+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1504+#endif
1505 ut_ad(mutex_own(&(block->mutex)));
1506 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1507
11822e22 1508@@ -3499,11 +3651,14 @@
b4e1fa2c
AM
1509 if (UNIV_LIKELY(!hash_page)) {
1510 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1511 /* Preserve the reference count. */
1512- ulint buf_fix_count = hash_page->buf_fix_count;
1513+ ulint buf_fix_count;
1514
1515+ mutex_enter(&buf_pool->zip_mutex);
1516+ buf_fix_count = hash_page->buf_fix_count;
1517 ut_a(buf_fix_count > 0);
1518 block->page.buf_fix_count += buf_fix_count;
1519 buf_pool_watch_remove(buf_pool, fold, hash_page);
1520+ mutex_exit(&buf_pool->zip_mutex);
1521 } else {
1522 fprintf(stderr,
1523 "InnoDB: Error: page %lu %lu already found"
11822e22 1524@@ -3513,7 +3668,8 @@
b4e1fa2c
AM
1525 (const void*) hash_page, (const void*) block);
1526 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1527 mutex_exit(&block->mutex);
1528- buf_pool_mutex_exit(buf_pool);
1529+ //buf_pool_mutex_exit(buf_pool);
1530+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1531 buf_print();
1532 buf_LRU_print();
1533 buf_validate();
11822e22 1534@@ -3597,7 +3753,9 @@
b4e1fa2c
AM
1535
1536 fold = buf_page_address_fold(space, offset);
1537
1538- buf_pool_mutex_enter(buf_pool);
1539+ //buf_pool_mutex_enter(buf_pool);
1540+ mutex_enter(&buf_pool->LRU_list_mutex);
1541+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1542
1543 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1544 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
11822e22 1545@@ -3606,9 +3764,15 @@
b4e1fa2c
AM
1546 err_exit:
1547 if (block) {
1548 mutex_enter(&block->mutex);
1549- buf_LRU_block_free_non_file_page(block);
1550+ mutex_exit(&buf_pool->LRU_list_mutex);
1551+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1552+ buf_LRU_block_free_non_file_page(block, FALSE);
1553 mutex_exit(&block->mutex);
1554 }
1555+ else {
1556+ mutex_exit(&buf_pool->LRU_list_mutex);
1557+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1558+ }
1559
1560 bpage = NULL;
1561 goto func_exit;
11822e22 1562@@ -3631,6 +3795,8 @@
b4e1fa2c
AM
1563
1564 buf_page_init(space, offset, fold, block);
1565
1566+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1567+
1568 /* The block must be put to the LRU list, to the old blocks */
1569 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1570
11822e22 1571@@ -3658,7 +3824,7 @@
b4e1fa2c
AM
1572 been added to buf_pool->LRU and
1573 buf_pool->page_hash. */
1574 mutex_exit(&block->mutex);
1575- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1576+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1577 mutex_enter(&block->mutex);
1578 block->page.zip.data = data;
1579
11822e22 1580@@ -3671,6 +3837,7 @@
b4e1fa2c
AM
1581 buf_unzip_LRU_add_block(block, TRUE);
1582 }
1583
1584+ mutex_exit(&buf_pool->LRU_list_mutex);
1585 mutex_exit(&block->mutex);
1586 } else {
1587 /* Defer buf_buddy_alloc() until after the block has
11822e22 1588@@ -3682,8 +3849,8 @@
b4e1fa2c
AM
1589 control block (bpage), in order to avoid the
1590 invocation of buf_buddy_relocate_block() on
1591 uninitialized data. */
1592- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1593- bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1594+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1595+ bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1596
1597 /* Initialize the buf_pool pointer. */
1598 bpage->buf_pool_index = buf_pool_index(buf_pool);
11822e22 1599@@ -3702,8 +3869,11 @@
b4e1fa2c
AM
1600
1601 /* The block was added by some other thread. */
1602 watch_page = NULL;
1603- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1604- buf_buddy_free(buf_pool, data, zip_size);
1605+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1606+ buf_buddy_free(buf_pool, data, zip_size, TRUE);
1607+
1608+ mutex_exit(&buf_pool->LRU_list_mutex);
1609+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1610
1611 bpage = NULL;
1612 goto func_exit;
11822e22 1613@@ -3747,18 +3917,24 @@
b4e1fa2c
AM
1614 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1615 bpage);
1616
1617+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1618+
1619 /* The block must be put to the LRU list, to the old blocks */
1620 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1621 buf_LRU_insert_zip_clean(bpage);
1622
1623+ mutex_exit(&buf_pool->LRU_list_mutex);
1624+
1625 buf_page_set_io_fix(bpage, BUF_IO_READ);
1626
1627 mutex_exit(&buf_pool->zip_mutex);
1628 }
1629
1630+ buf_pool_mutex_enter(buf_pool);
1631 buf_pool->n_pend_reads++;
1632-func_exit:
1633 buf_pool_mutex_exit(buf_pool);
1634+func_exit:
1635+ //buf_pool_mutex_exit(buf_pool);
1636
1637 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1638
11822e22 1639@@ -3800,7 +3976,9 @@
b4e1fa2c
AM
1640
1641 fold = buf_page_address_fold(space, offset);
1642
1643- buf_pool_mutex_enter(buf_pool);
1644+ //buf_pool_mutex_enter(buf_pool);
1645+ mutex_enter(&buf_pool->LRU_list_mutex);
1646+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1647
1648 block = (buf_block_t*) buf_page_hash_get_low(
1649 buf_pool, space, offset, fold);
11822e22 1650@@ -3816,7 +3994,9 @@
df1b5770 1651 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
b4e1fa2c
AM
1652
1653 /* Page can be found in buf_pool */
1654- buf_pool_mutex_exit(buf_pool);
1655+ //buf_pool_mutex_exit(buf_pool);
1656+ mutex_exit(&buf_pool->LRU_list_mutex);
1657+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1658
1659 buf_block_free(free_block);
1660
11822e22 1661@@ -3838,6 +4018,7 @@
b4e1fa2c
AM
1662 mutex_enter(&block->mutex);
1663
1664 buf_page_init(space, offset, fold, block);
1665+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1666
1667 /* The block must be put to the LRU list */
1668 buf_LRU_add_block(&block->page, FALSE);
11822e22 1669@@ -3864,7 +4045,7 @@
b4e1fa2c
AM
1670 the reacquisition of buf_pool->mutex. We also must
1671 defer this operation until after the block descriptor
1672 has been added to buf_pool->LRU and buf_pool->page_hash. */
1673- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1674+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1675 mutex_enter(&block->mutex);
1676 block->page.zip.data = data;
1677
11822e22 1678@@ -3882,7 +4063,8 @@
b4e1fa2c
AM
1679
1680 buf_page_set_accessed(&block->page, time_ms);
1681
1682- buf_pool_mutex_exit(buf_pool);
1683+ //buf_pool_mutex_exit(buf_pool);
1684+ mutex_exit(&buf_pool->LRU_list_mutex);
1685
1686 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1687
11822e22 1688@@ -3933,6 +4115,8 @@
b4e1fa2c
AM
1689 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1690 const ibool uncompressed = (buf_page_get_state(bpage)
1691 == BUF_BLOCK_FILE_PAGE);
1692+ ibool have_LRU_mutex = FALSE;
1693+ mutex_t* block_mutex;
1694
1695 ut_a(buf_page_in_file(bpage));
1696
11822e22 1697@@ -4066,8 +4250,26 @@
b4e1fa2c
AM
1698 }
1699 }
1700
1701+ if (io_type == BUF_IO_WRITE
1702+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1703+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1704+ /* to keep consistency at buf_LRU_insert_zip_clean() */
1705+ have_LRU_mutex = TRUE; /* optimistic */
1706+ }
1707+retry_mutex:
1708+ if (have_LRU_mutex)
1709+ mutex_enter(&buf_pool->LRU_list_mutex);
1710+ block_mutex = buf_page_get_mutex_enter(bpage);
1711+ ut_a(block_mutex);
1712+ if (io_type == BUF_IO_WRITE
1713+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1714+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1715+ && !have_LRU_mutex) {
1716+ mutex_exit(block_mutex);
1717+ have_LRU_mutex = TRUE;
1718+ goto retry_mutex;
1719+ }
1720 buf_pool_mutex_enter(buf_pool);
1721- mutex_enter(buf_page_get_mutex(bpage));
1722
1723 #ifdef UNIV_IBUF_COUNT_DEBUG
1724 if (io_type == BUF_IO_WRITE || uncompressed) {
11822e22 1725@@ -4090,6 +4292,7 @@
b4e1fa2c
AM
1726 the x-latch to this OS thread: do not let this confuse you in
1727 debugging! */
1728
1729+ ut_a(!have_LRU_mutex);
1730 ut_ad(buf_pool->n_pend_reads > 0);
1731 buf_pool->n_pend_reads--;
1732 buf_pool->stat.n_pages_read++;
11822e22 1733@@ -4107,6 +4310,9 @@
b4e1fa2c
AM
1734
1735 buf_flush_write_complete(bpage);
1736
1737+ if (have_LRU_mutex)
1738+ mutex_exit(&buf_pool->LRU_list_mutex);
1739+
1740 if (uncompressed) {
1741 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1742 BUF_IO_WRITE);
11822e22 1743@@ -4129,8 +4335,8 @@
b4e1fa2c
AM
1744 }
1745 #endif /* UNIV_DEBUG */
1746
1747- mutex_exit(buf_page_get_mutex(bpage));
1748 buf_pool_mutex_exit(buf_pool);
1749+ mutex_exit(block_mutex);
1750 }
1751
1752 /*********************************************************************//**
11822e22 1753@@ -4147,7 +4353,9 @@
b4e1fa2c
AM
1754
1755 ut_ad(buf_pool);
1756
1757- buf_pool_mutex_enter(buf_pool);
1758+ //buf_pool_mutex_enter(buf_pool);
1759+ mutex_enter(&buf_pool->LRU_list_mutex);
1760+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1761
1762 chunk = buf_pool->chunks;
1763
11822e22 1764@@ -4164,7 +4372,9 @@
b4e1fa2c
AM
1765 }
1766 }
1767
1768- buf_pool_mutex_exit(buf_pool);
1769+ //buf_pool_mutex_exit(buf_pool);
1770+ mutex_exit(&buf_pool->LRU_list_mutex);
1771+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1772
1773 return(TRUE);
1774 }
11822e22 1775@@ -4212,7 +4422,8 @@
b4e1fa2c
AM
1776 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1777 }
1778
1779- buf_pool_mutex_enter(buf_pool);
1780+ //buf_pool_mutex_enter(buf_pool);
1781+ mutex_enter(&buf_pool->LRU_list_mutex);
1782
1783 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1784 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
11822e22 1785@@ -4225,7 +4436,8 @@
b4e1fa2c
AM
1786 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1787 buf_refresh_io_stats(buf_pool);
1788
1789- buf_pool_mutex_exit(buf_pool);
1790+ //buf_pool_mutex_exit(buf_pool);
1791+ mutex_exit(&buf_pool->LRU_list_mutex);
1792 }
1793
1794 /*********************************************************************//**
11822e22 1795@@ -4267,7 +4479,10 @@
b4e1fa2c
AM
1796
1797 ut_ad(buf_pool);
1798
1799- buf_pool_mutex_enter(buf_pool);
1800+ //buf_pool_mutex_enter(buf_pool);
1801+ mutex_enter(&buf_pool->LRU_list_mutex);
1802+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1803+ /* for keep the new latch order, it cannot validate correctly... */
1804
1805 chunk = buf_pool->chunks;
1806
11822e22 1807@@ -4362,7 +4577,7 @@
b4e1fa2c
AM
1808 /* Check clean compressed-only blocks. */
1809
1810 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1811- b = UT_LIST_GET_NEXT(list, b)) {
1812+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1813 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1814 switch (buf_page_get_io_fix(b)) {
1815 case BUF_IO_NONE:
11822e22 1816@@ -4393,7 +4608,7 @@
b4e1fa2c
AM
1817
1818 buf_flush_list_mutex_enter(buf_pool);
1819 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1820- b = UT_LIST_GET_NEXT(list, b)) {
1821+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1822 ut_ad(b->in_flush_list);
1823 ut_a(b->oldest_modification);
1824 n_flush++;
11822e22 1825@@ -4452,6 +4667,8 @@
b4e1fa2c
AM
1826 }
1827
1828 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1829+ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1830+/*
1831 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1832 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1833 (ulong) UT_LIST_GET_LEN(buf_pool->free),
11822e22 1834@@ -4462,8 +4679,11 @@
b4e1fa2c
AM
1835 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1836 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1837 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1838+*/
1839
1840- buf_pool_mutex_exit(buf_pool);
1841+ //buf_pool_mutex_exit(buf_pool);
1842+ mutex_exit(&buf_pool->LRU_list_mutex);
1843+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1844
1845 ut_a(buf_LRU_validate());
1846 ut_a(buf_flush_validate(buf_pool));
11822e22 1847@@ -4519,7 +4739,9 @@
b4e1fa2c
AM
1848 index_ids = mem_alloc(size * sizeof *index_ids);
1849 counts = mem_alloc(sizeof(ulint) * size);
1850
1851- buf_pool_mutex_enter(buf_pool);
1852+ //buf_pool_mutex_enter(buf_pool);
1853+ mutex_enter(&buf_pool->LRU_list_mutex);
1854+ mutex_enter(&buf_pool->free_list_mutex);
1855 buf_flush_list_mutex_enter(buf_pool);
1856
1857 fprintf(stderr,
11822e22 1858@@ -4588,7 +4810,9 @@
b4e1fa2c
AM
1859 }
1860 }
1861
1862- buf_pool_mutex_exit(buf_pool);
1863+ //buf_pool_mutex_exit(buf_pool);
1864+ mutex_exit(&buf_pool->LRU_list_mutex);
1865+ mutex_exit(&buf_pool->free_list_mutex);
1866
1867 for (i = 0; i < n_found; i++) {
1868 index = dict_index_get_if_in_cache(index_ids[i]);
11822e22 1869@@ -4645,7 +4869,7 @@
b4e1fa2c
AM
1870 buf_chunk_t* chunk;
1871 ulint fixed_pages_number = 0;
1872
1873- buf_pool_mutex_enter(buf_pool);
1874+ //buf_pool_mutex_enter(buf_pool);
1875
1876 chunk = buf_pool->chunks;
1877
11822e22 1878@@ -4679,7 +4903,7 @@
b4e1fa2c
AM
1879 /* Traverse the lists of clean and dirty compressed-only blocks. */
1880
1881 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1882- b = UT_LIST_GET_NEXT(list, b)) {
1883+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1884 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1885 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1886
11822e22 1887@@ -4691,7 +4915,7 @@
b4e1fa2c
AM
1888
1889 buf_flush_list_mutex_enter(buf_pool);
1890 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1891- b = UT_LIST_GET_NEXT(list, b)) {
1892+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1893 ut_ad(b->in_flush_list);
1894
1895 switch (buf_page_get_state(b)) {
11822e22 1896@@ -4717,7 +4941,7 @@
b4e1fa2c
AM
1897
1898 buf_flush_list_mutex_exit(buf_pool);
1899 mutex_exit(&buf_pool->zip_mutex);
1900- buf_pool_mutex_exit(buf_pool);
1901+ //buf_pool_mutex_exit(buf_pool);
1902
1903 return(fixed_pages_number);
1904 }
11822e22 1905@@ -4873,6 +5097,8 @@
d8778560
AM
1906 /* Find appropriate pool_info to store stats for this buffer pool */
1907 pool_info = &all_pool_info[pool_id];
b4e1fa2c
AM
1908
1909+ mutex_enter(&buf_pool->LRU_list_mutex);
1910+ mutex_enter(&buf_pool->free_list_mutex);
1911 buf_pool_mutex_enter(buf_pool);
1912 buf_flush_list_mutex_enter(buf_pool);
1913
11822e22 1914@@ -4983,6 +5209,8 @@
d8778560 1915 pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
b4e1fa2c
AM
1916
1917 buf_refresh_io_stats(buf_pool);
1918+ mutex_exit(&buf_pool->LRU_list_mutex);
1919+ mutex_exit(&buf_pool->free_list_mutex);
1920 buf_pool_mutex_exit(buf_pool);
1921 }
1922
11822e22 1923@@ -5224,11 +5452,13 @@
b4e1fa2c
AM
1924 {
1925 ulint len;
1926
1927- buf_pool_mutex_enter(buf_pool);
1928+ //buf_pool_mutex_enter(buf_pool);
1929+ mutex_enter(&buf_pool->free_list_mutex);
1930
1931 len = UT_LIST_GET_LEN(buf_pool->free);
1932
1933- buf_pool_mutex_exit(buf_pool);
1934+ //buf_pool_mutex_exit(buf_pool);
1935+ mutex_exit(&buf_pool->free_list_mutex);
1936
1937 return(len);
1938 }
1939diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1940--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1941+++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
d8778560 1942@@ -307,7 +307,7 @@
b4e1fa2c
AM
1943
1944 ut_d(block->page.in_flush_list = TRUE);
1945 block->page.oldest_modification = lsn;
1946- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1947+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1948
1949 #ifdef UNIV_DEBUG_VALGRIND
1950 {
d8778560 1951@@ -401,14 +401,14 @@
b4e1fa2c
AM
1952 > block->page.oldest_modification) {
1953 ut_ad(b->in_flush_list);
1954 prev_b = b;
1955- b = UT_LIST_GET_NEXT(list, b);
1956+ b = UT_LIST_GET_NEXT(flush_list, b);
1957 }
1958 }
1959
1960 if (prev_b == NULL) {
1961- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1962+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1963 } else {
1964- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1965+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1966 prev_b, &block->page);
1967 }
1968
d8778560 1969@@ -434,7 +434,7 @@
b4e1fa2c
AM
1970 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1971 //ut_ad(buf_pool_mutex_own(buf_pool));
1972 #endif
1973- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1974+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1975 //ut_ad(bpage->in_LRU_list);
1976
1977 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
d8778560 1978@@ -470,14 +470,14 @@
b4e1fa2c
AM
1979 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1980 {
1981 #ifdef UNIV_DEBUG
1982- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1983- ut_ad(buf_pool_mutex_own(buf_pool));
1984+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1985+ //ut_ad(buf_pool_mutex_own(buf_pool));
1986 #endif
1987- ut_a(buf_page_in_file(bpage));
1988+ //ut_a(buf_page_in_file(bpage));
1989 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1990 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1991
1992- if (bpage->oldest_modification != 0
1993+ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1994 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1995 ut_ad(bpage->in_flush_list);
1996
d8778560 1997@@ -508,7 +508,7 @@
b4e1fa2c
AM
1998 {
1999 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2000
2001- ut_ad(buf_pool_mutex_own(buf_pool));
2002+ //ut_ad(buf_pool_mutex_own(buf_pool));
2003 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2004 ut_ad(bpage->in_flush_list);
2005
d8778560 2006@@ -526,11 +526,11 @@
b4e1fa2c
AM
2007 return;
2008 case BUF_BLOCK_ZIP_DIRTY:
2009 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
2010- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2011+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2012 buf_LRU_insert_zip_clean(bpage);
2013 break;
2014 case BUF_BLOCK_FILE_PAGE:
2015- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2016+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2017 break;
2018 }
2019
d8778560 2020@@ -574,7 +574,7 @@
b4e1fa2c
AM
2021 buf_page_t* prev_b = NULL;
2022 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2023
2024- ut_ad(buf_pool_mutex_own(buf_pool));
2025+ //ut_ad(buf_pool_mutex_own(buf_pool));
2026 /* Must reside in the same buffer pool. */
2027 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2028
d8778560 2029@@ -603,18 +603,18 @@
b4e1fa2c
AM
2030 because we assert on in_flush_list in comparison function. */
2031 ut_d(bpage->in_flush_list = FALSE);
2032
2033- prev = UT_LIST_GET_PREV(list, bpage);
2034- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2035+ prev = UT_LIST_GET_PREV(flush_list, bpage);
2036+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2037
2038 if (prev) {
2039 ut_ad(prev->in_flush_list);
2040 UT_LIST_INSERT_AFTER(
2041- list,
2042+ flush_list,
2043 buf_pool->flush_list,
2044 prev, dpage);
2045 } else {
2046 UT_LIST_ADD_FIRST(
2047- list,
2048+ flush_list,
2049 buf_pool->flush_list,
2050 dpage);
2051 }
d8778560 2052@@ -1083,7 +1083,7 @@
b4e1fa2c
AM
2053
2054 #ifdef UNIV_DEBUG
2055 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2056- ut_ad(!buf_pool_mutex_own(buf_pool));
2057+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2058 #endif
2059
2060 #ifdef UNIV_LOG_DEBUG
d8778560 2061@@ -1097,7 +1097,8 @@
b4e1fa2c
AM
2062 io_fixed and oldest_modification != 0. Thus, it cannot be
2063 relocated in the buffer pool or removed from flush_list or
2064 LRU_list. */
2065- ut_ad(!buf_pool_mutex_own(buf_pool));
2066+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2067+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2068 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2069 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2070 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
11822e22
AM
2071@@ -1177,7 +1178,7 @@
2072 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
2073 buf_block_t* block) /*!< in/out: buffer control block */
2074 {
2075- ut_ad(buf_pool_mutex_own(buf_pool));
2076+ //ut_ad(buf_pool_mutex_own(buf_pool));
2077 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2078 ut_ad(mutex_own(&block->mutex));
2079
2080@@ -1185,8 +1186,11 @@
2081 return(FALSE);
2082 }
2083
2084+ buf_pool_mutex_enter(buf_pool);
2085+
2086 if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
2087 || buf_pool->init_flush[BUF_FLUSH_LRU]) {
2088+ buf_pool_mutex_exit(buf_pool);
2089 /* There is already a flush batch of the same type running */
2090 return(FALSE);
2091 }
2092@@ -1260,12 +1264,18 @@
b4e1fa2c
AM
2093 ibool is_uncompressed;
2094
2095 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2096- ut_ad(buf_pool_mutex_own(buf_pool));
2097+ //ut_ad(buf_pool_mutex_own(buf_pool));
2098+#ifdef UNIV_SYNC_DEBUG
2099+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2100+#endif
2101 ut_ad(buf_page_in_file(bpage));
2102
2103 block_mutex = buf_page_get_mutex(bpage);
2104 ut_ad(mutex_own(block_mutex));
2105
2106+ buf_pool_mutex_enter(buf_pool);
2107+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2108+
2109 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2110
2111 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
11822e22 2112@@ -1427,14 +1437,16 @@
b4e1fa2c
AM
2113
2114 buf_pool = buf_pool_get(space, i);
2115
2116- buf_pool_mutex_enter(buf_pool);
2117+ //buf_pool_mutex_enter(buf_pool);
2118+ rw_lock_s_lock(&buf_pool->page_hash_latch);
2119
2120 /* We only want to flush pages from this buffer pool. */
2121 bpage = buf_page_hash_get(buf_pool, space, i);
2122
2123 if (!bpage) {
2124
2125- buf_pool_mutex_exit(buf_pool);
2126+ //buf_pool_mutex_exit(buf_pool);
2127+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2128 continue;
2129 }
2130
11822e22 2131@@ -1446,11 +1458,9 @@
b4e1fa2c
AM
2132 if (flush_type != BUF_FLUSH_LRU
2133 || i == offset
2134 || buf_page_is_old(bpage)) {
2135- mutex_t* block_mutex = buf_page_get_mutex(bpage);
b4e1fa2c
AM
2136+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2137
11822e22
AM
2138- mutex_enter(block_mutex);
2139-
b4e1fa2c
AM
2140- if (buf_flush_ready_for_flush(bpage, flush_type)
2141+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2142 && (i == offset || !bpage->buf_fix_count)) {
2143 /* We only try to flush those
2144 neighbors != offset where the buf fix
11822e22 2145@@ -1466,11 +1476,12 @@
b4e1fa2c
AM
2146 ut_ad(!buf_pool_mutex_own(buf_pool));
2147 count++;
2148 continue;
2149- } else {
2150+ } else if (block_mutex) {
2151 mutex_exit(block_mutex);
2152 }
2153 }
2154- buf_pool_mutex_exit(buf_pool);
2155+ //buf_pool_mutex_exit(buf_pool);
2156+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2157 }
2158
2159 return(count);
11822e22 2160@@ -1503,21 +1514,25 @@
b4e1fa2c
AM
2161 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2162 #endif /* UNIV_DEBUG */
2163
2164- ut_ad(buf_pool_mutex_own(buf_pool));
2165+ //ut_ad(buf_pool_mutex_own(buf_pool));
2166+ ut_ad(flush_type != BUF_FLUSH_LRU
2167+ || mutex_own(&buf_pool->LRU_list_mutex));
2168
2169- block_mutex = buf_page_get_mutex(bpage);
2170- mutex_enter(block_mutex);
2171+ block_mutex = buf_page_get_mutex_enter(bpage);
2172
2173- ut_a(buf_page_in_file(bpage));
2174+ //ut_a(buf_page_in_file(bpage));
2175
2176- if (buf_flush_ready_for_flush(bpage, flush_type)) {
2177+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2178 ulint space;
2179 ulint offset;
2180 buf_pool_t* buf_pool;
2181
2182 buf_pool = buf_pool_from_bpage(bpage);
2183
2184- buf_pool_mutex_exit(buf_pool);
2185+ //buf_pool_mutex_exit(buf_pool);
2186+ if (flush_type == BUF_FLUSH_LRU) {
2187+ mutex_exit(&buf_pool->LRU_list_mutex);
2188+ }
2189
2190 /* These fields are protected by both the
2191 buffer pool mutex and block mutex. */
11822e22 2192@@ -1533,13 +1548,18 @@
b4e1fa2c
AM
2193 *count,
2194 n_to_flush);
2195
2196- buf_pool_mutex_enter(buf_pool);
2197+ //buf_pool_mutex_enter(buf_pool);
2198+ if (flush_type == BUF_FLUSH_LRU) {
2199+ mutex_enter(&buf_pool->LRU_list_mutex);
2200+ }
2201 flushed = TRUE;
2202- } else {
2203+ } else if (block_mutex) {
2204 mutex_exit(block_mutex);
2205 }
2206
2207- ut_ad(buf_pool_mutex_own(buf_pool));
2208+ //ut_ad(buf_pool_mutex_own(buf_pool));
2209+ ut_ad(flush_type != BUF_FLUSH_LRU
2210+ || mutex_own(&buf_pool->LRU_list_mutex));
2211
2212 return(flushed);
2213 }
11822e22 2214@@ -1560,7 +1580,8 @@
b4e1fa2c
AM
2215 buf_page_t* bpage;
2216 ulint count = 0;
2217
2218- ut_ad(buf_pool_mutex_own(buf_pool));
2219+ //ut_ad(buf_pool_mutex_own(buf_pool));
2220+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2221
2222 do {
2223 /* Start from the end of the list looking for a
11822e22 2224@@ -1582,7 +1603,8 @@
b4e1fa2c
AM
2225 should be flushed, we factor in this value. */
2226 buf_lru_flush_page_count += count;
2227
2228- ut_ad(buf_pool_mutex_own(buf_pool));
2229+ //ut_ad(buf_pool_mutex_own(buf_pool));
2230+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2231
2232 return(count);
2233 }
11822e22 2234@@ -1610,9 +1632,10 @@
b4e1fa2c
AM
2235 {
2236 ulint len;
2237 buf_page_t* bpage;
2238+ buf_page_t* prev_bpage = NULL;
2239 ulint count = 0;
2240
2241- ut_ad(buf_pool_mutex_own(buf_pool));
2242+ //ut_ad(buf_pool_mutex_own(buf_pool));
2243
2244 /* If we have flushed enough, leave the loop */
2245 do {
11822e22 2246@@ -1631,6 +1654,7 @@
b4e1fa2c
AM
2247
2248 if (bpage) {
2249 ut_a(bpage->oldest_modification > 0);
2250+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2251 }
2252
2253 if (!bpage || bpage->oldest_modification >= lsn_limit) {
11822e22 2254@@ -1672,9 +1696,17 @@
b4e1fa2c
AM
2255 break;
2256 }
2257
2258- bpage = UT_LIST_GET_PREV(list, bpage);
2259+ bpage = UT_LIST_GET_PREV(flush_list, bpage);
2260
2261- ut_ad(!bpage || bpage->in_flush_list);
2262+ //ut_ad(!bpage || bpage->in_flush_list);
2263+ if (bpage != prev_bpage) {
2264+ /* the search might warp.. retrying */
2265+ buf_flush_list_mutex_exit(buf_pool);
2266+ break;
2267+ }
2268+ if (bpage) {
2269+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2270+ }
2271
2272 buf_flush_list_mutex_exit(buf_pool);
2273
11822e22 2274@@ -1683,7 +1715,7 @@
b4e1fa2c
AM
2275
2276 } while (count < min_n && bpage != NULL && len > 0);
2277
2278- ut_ad(buf_pool_mutex_own(buf_pool));
2279+ //ut_ad(buf_pool_mutex_own(buf_pool));
2280
2281 return(count);
2282 }
11822e22 2283@@ -1722,13 +1754,15 @@
b4e1fa2c
AM
2284 || sync_thread_levels_empty_gen(TRUE));
2285 #endif /* UNIV_SYNC_DEBUG */
2286
2287- buf_pool_mutex_enter(buf_pool);
2288+ //buf_pool_mutex_enter(buf_pool);
2289
2290 /* Note: The buffer pool mutex is released and reacquired within
2291 the flush functions. */
2292 switch(flush_type) {
2293 case BUF_FLUSH_LRU:
2294+ mutex_enter(&buf_pool->LRU_list_mutex);
2295 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2296+ mutex_exit(&buf_pool->LRU_list_mutex);
2297 break;
2298 case BUF_FLUSH_LIST:
2299 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
11822e22 2300@@ -1737,7 +1771,7 @@
b4e1fa2c
AM
2301 ut_error;
2302 }
2303
2304- buf_pool_mutex_exit(buf_pool);
2305+ //buf_pool_mutex_exit(buf_pool);
2306
2307 buf_flush_buffered_writes();
2308
11822e22 2309@@ -1993,7 +2027,7 @@
b4e1fa2c
AM
2310 retry:
2311 //buf_pool_mutex_enter(buf_pool);
2312 if (have_LRU_mutex)
2313- buf_pool_mutex_enter(buf_pool);
2314+ mutex_enter(&buf_pool->LRU_list_mutex);
2315
2316 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2317
11822e22 2318@@ -2010,15 +2044,15 @@
b4e1fa2c
AM
2319 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2320 continue;
2321 }
2322- block_mutex = buf_page_get_mutex(bpage);
2323-
2324- mutex_enter(block_mutex);
2325+ block_mutex = buf_page_get_mutex_enter(bpage);
2326
2327- if (buf_flush_ready_for_replace(bpage)) {
2328+ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2329 n_replaceable++;
2330 }
2331
2332- mutex_exit(block_mutex);
2333+ if (block_mutex) {
2334+ mutex_exit(block_mutex);
2335+ }
2336
2337 distance++;
2338
11822e22 2339@@ -2027,7 +2061,7 @@
b4e1fa2c
AM
2340
2341 //buf_pool_mutex_exit(buf_pool);
2342 if (have_LRU_mutex)
2343- buf_pool_mutex_exit(buf_pool);
2344+ mutex_exit(&buf_pool->LRU_list_mutex);
2345
2346 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2347
11822e22 2348@@ -2226,7 +2260,7 @@
b4e1fa2c
AM
2349
2350 ut_ad(buf_flush_list_mutex_own(buf_pool));
2351
2352- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2353+ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2354 ut_ad(ut_list_node_313->in_flush_list));
2355
2356 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
11822e22 2357@@ -2266,7 +2300,7 @@
b4e1fa2c
AM
2358 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2359 }
2360
2361- bpage = UT_LIST_GET_NEXT(list, bpage);
2362+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2363
2364 ut_a(!bpage || om >= bpage->oldest_modification);
2365 }
2366diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2367--- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2368+++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2369@@ -143,8 +143,9 @@
2370 void
2371 buf_LRU_block_free_hashed_page(
2372 /*===========================*/
2373- buf_block_t* block); /*!< in: block, must contain a file page and
2374+ buf_block_t* block, /*!< in: block, must contain a file page and
2375 be in a state where it can be freed */
2376+ ibool have_page_hash_mutex);
2377
2378 /******************************************************************//**
2379 Determines if the unzip_LRU list should be used for evicting a victim
2380@@ -154,15 +155,20 @@
2381 ibool
2382 buf_LRU_evict_from_unzip_LRU(
2383 /*=========================*/
2384- buf_pool_t* buf_pool)
2385+ buf_pool_t* buf_pool,
2386+ ibool have_LRU_mutex)
2387 {
2388 ulint io_avg;
2389 ulint unzip_avg;
2390
2391- ut_ad(buf_pool_mutex_own(buf_pool));
2392+ //ut_ad(buf_pool_mutex_own(buf_pool));
2393
2394+ if (!have_LRU_mutex)
2395+ mutex_enter(&buf_pool->LRU_list_mutex);
2396 /* If the unzip_LRU list is empty, we can only use the LRU. */
2397 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2398+ if (!have_LRU_mutex)
2399+ mutex_exit(&buf_pool->LRU_list_mutex);
2400 return(FALSE);
2401 }
2402
2403@@ -171,14 +177,20 @@
2404 decompressed pages in the buffer pool. */
2405 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2406 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2407+ if (!have_LRU_mutex)
2408+ mutex_exit(&buf_pool->LRU_list_mutex);
2409 return(FALSE);
2410 }
2411
2412 /* If eviction hasn't started yet, we assume by default
2413 that a workload is disk bound. */
2414 if (buf_pool->freed_page_clock == 0) {
2415+ if (!have_LRU_mutex)
2416+ mutex_exit(&buf_pool->LRU_list_mutex);
2417 return(TRUE);
2418 }
2419+ if (!have_LRU_mutex)
2420+ mutex_exit(&buf_pool->LRU_list_mutex);
2421
2422 /* Calculate the average over past intervals, and add the values
2423 of the current interval. */
2424@@ -246,19 +258,23 @@
2425 page_arr = ut_malloc(
2426 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2427
2428- buf_pool_mutex_enter(buf_pool);
2429+ //buf_pool_mutex_enter(buf_pool);
2430+ mutex_enter(&buf_pool->LRU_list_mutex);
2431
2432 scan_again:
2433 num_entries = 0;
2434 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2435
2436 while (bpage != NULL) {
2437- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2438+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2439 buf_page_t* prev_bpage;
2440
2441- mutex_enter(block_mutex);
2442 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2443
2444+ if (!block_mutex) {
2445+ goto next_page;
2446+ }
2447+
2448 ut_a(buf_page_in_file(bpage));
2449
2450 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2451@@ -287,14 +303,16 @@
2452
2453 /* Array full. We release the buf_pool->mutex to
2454 obey the latching order. */
2455- buf_pool_mutex_exit(buf_pool);
2456+ //buf_pool_mutex_exit(buf_pool);
2457+ mutex_exit(&buf_pool->LRU_list_mutex);
2458
2459 buf_LRU_drop_page_hash_batch(
2460 id, zip_size, page_arr, num_entries);
2461
2462 num_entries = 0;
2463
2464- buf_pool_mutex_enter(buf_pool);
2465+ //buf_pool_mutex_enter(buf_pool);
2466+ mutex_enter(&buf_pool->LRU_list_mutex);
2467 } else {
2468 mutex_exit(block_mutex);
2469 }
2470@@ -319,7 +337,8 @@
2471 }
2472 }
2473
2474- buf_pool_mutex_exit(buf_pool);
2475+ //buf_pool_mutex_exit(buf_pool);
2476+ mutex_exit(&buf_pool->LRU_list_mutex);
2477
2478 /* Drop any remaining batch of search hashed pages. */
2479 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2480@@ -341,7 +360,9 @@
2481 ibool all_freed;
2482
2483 scan_again:
2484- buf_pool_mutex_enter(buf_pool);
2485+ //buf_pool_mutex_enter(buf_pool);
2486+ mutex_enter(&buf_pool->LRU_list_mutex);
2487+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2488
2489 all_freed = TRUE;
2490
2491@@ -369,8 +390,16 @@
2492
2493 all_freed = FALSE;
2494 } else {
2495- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2496- mutex_enter(block_mutex);
2497+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2498+
2499+ if (!block_mutex) {
2500+ /* It may be impossible case...
2501+ Something wrong, so will be scan_again */
2502+
2503+ all_freed = FALSE;
2504+
2505+ goto next_page_no_mutex;
2506+ }
2507
2508 if (bpage->buf_fix_count > 0) {
2509
2510@@ -429,7 +458,9 @@
2511 ulint page_no;
2512 ulint zip_size;
2513
2514- buf_pool_mutex_exit(buf_pool);
2515+ //buf_pool_mutex_exit(buf_pool);
2516+ mutex_exit(&buf_pool->LRU_list_mutex);
2517+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2518
2519 zip_size = buf_page_get_zip_size(bpage);
2520 page_no = buf_page_get_page_no(bpage);
2521@@ -454,7 +485,7 @@
2522 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2523 != BUF_BLOCK_ZIP_FREE) {
2524 buf_LRU_block_free_hashed_page((buf_block_t*)
2525- bpage);
2526+ bpage, TRUE);
2527 } else {
2528 /* The block_mutex should have been
2529 released by buf_LRU_block_remove_hashed_page()
2530@@ -486,7 +517,9 @@
2531 bpage = prev_bpage;
2532 }
2533
2534- buf_pool_mutex_exit(buf_pool);
2535+ //buf_pool_mutex_exit(buf_pool);
2536+ mutex_exit(&buf_pool->LRU_list_mutex);
2537+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2538
2539 if (!all_freed) {
2540 os_thread_sleep(20000);
2541@@ -532,7 +565,9 @@
2542 buf_page_t* b;
2543 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2544
2545- ut_ad(buf_pool_mutex_own(buf_pool));
2546+ //ut_ad(buf_pool_mutex_own(buf_pool));
2547+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
11822e22 2548+ ut_ad(mutex_own(&buf_pool->zip_mutex));
b4e1fa2c
AM
2549 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2550
2551 /* Find the first successor of bpage in the LRU list
2552@@ -540,17 +575,17 @@
2553 b = bpage;
2554 do {
2555 b = UT_LIST_GET_NEXT(LRU, b);
2556- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2557+ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2558
2559 /* Insert bpage before b, i.e., after the predecessor of b. */
2560 if (b) {
2561- b = UT_LIST_GET_PREV(list, b);
2562+ b = UT_LIST_GET_PREV(zip_list, b);
2563 }
2564
2565 if (b) {
2566- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2567+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2568 } else {
2569- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2570+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2571 }
2572 }
2573
2574@@ -563,18 +598,19 @@
2575 buf_LRU_free_from_unzip_LRU_list(
2576 /*=============================*/
2577 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2578- ulint n_iterations) /*!< in: how many times this has
2579+ ulint n_iterations, /*!< in: how many times this has
2580 been called repeatedly without
2581 result: a high value means that
2582 we should search farther; we will
2583 search n_iterations / 5 of the
2584 unzip_LRU list, or nothing if
2585 n_iterations >= 5 */
2586+ ibool have_LRU_mutex)
2587 {
2588 buf_block_t* block;
2589 ulint distance;
2590
2591- ut_ad(buf_pool_mutex_own(buf_pool));
2592+ //ut_ad(buf_pool_mutex_own(buf_pool));
2593
2594 /* Theoratically it should be much easier to find a victim
2595 from unzip_LRU as we can choose even a dirty block (as we'll
2596@@ -584,7 +620,7 @@
2597 if we have done five iterations so far. */
2598
2599 if (UNIV_UNLIKELY(n_iterations >= 5)
2600- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2601+ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2602
2603 return(FALSE);
2604 }
2605@@ -592,18 +628,25 @@
2606 distance = 100 + (n_iterations
2607 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2608
2609+restart:
2610 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2611 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2612 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2613
2614 enum buf_lru_free_block_status freed;
2615
2616+ mutex_enter(&block->mutex);
2617+ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2618+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2619+ mutex_exit(&block->mutex);
2620+ goto restart;
2621+ }
2622+
2623 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2624 ut_ad(block->in_unzip_LRU_list);
2625 ut_ad(block->page.in_LRU_list);
2626
2627- mutex_enter(&block->mutex);
df1b5770
AM
2628- freed = buf_LRU_free_block(&block->page, FALSE);
2629+ freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
b4e1fa2c
AM
2630 mutex_exit(&block->mutex);
2631
2632 switch (freed) {
2633@@ -637,21 +680,23 @@
2634 buf_LRU_free_from_common_LRU_list(
2635 /*==============================*/
2636 buf_pool_t* buf_pool,
2637- ulint n_iterations)
2638+ ulint n_iterations,
2639 /*!< in: how many times this has been called
2640 repeatedly without result: a high value means
2641 that we should search farther; if
2642 n_iterations < 10, then we search
2643 n_iterations / 10 * buf_pool->curr_size
2644 pages from the end of the LRU list */
2645+ ibool have_LRU_mutex)
2646 {
2647 buf_page_t* bpage;
2648 ulint distance;
2649
2650- ut_ad(buf_pool_mutex_own(buf_pool));
2651+ //ut_ad(buf_pool_mutex_own(buf_pool));
2652
2653 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2654
2655+restart:
2656 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2657 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2658 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2659@@ -659,14 +704,23 @@
2660 enum buf_lru_free_block_status freed;
2661 unsigned accessed;
2662 mutex_t* block_mutex
2663- = buf_page_get_mutex(bpage);
2664+ = buf_page_get_mutex_enter(bpage);
2665+
2666+ if (!block_mutex) {
2667+ goto restart;
2668+ }
2669+
2670+ if (!bpage->in_LRU_list
2671+ || !buf_page_in_file(bpage)) {
2672+ mutex_exit(block_mutex);
2673+ goto restart;
2674+ }
2675
2676 ut_ad(buf_page_in_file(bpage));
2677 ut_ad(bpage->in_LRU_list);
2678
2679- mutex_enter(block_mutex);
2680 accessed = buf_page_is_accessed(bpage);
df1b5770
AM
2681- freed = buf_LRU_free_block(bpage, TRUE);
2682+ freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
b4e1fa2c
AM
2683 mutex_exit(block_mutex);
2684
2685 switch (freed) {
2686@@ -718,16 +772,23 @@
2687 n_iterations / 5 of the unzip_LRU list. */
2688 {
2689 ibool freed = FALSE;
2690+ ibool have_LRU_mutex = FALSE;
2691
2692- buf_pool_mutex_enter(buf_pool);
2693+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2694+ have_LRU_mutex = TRUE;
2695+
2696+ //buf_pool_mutex_enter(buf_pool);
2697+ if (have_LRU_mutex)
2698+ mutex_enter(&buf_pool->LRU_list_mutex);
2699
2700- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2701+ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2702
2703 if (!freed) {
2704 freed = buf_LRU_free_from_common_LRU_list(
2705- buf_pool, n_iterations);
2706+ buf_pool, n_iterations, have_LRU_mutex);
2707 }
2708
2709+ buf_pool_mutex_enter(buf_pool);
2710 if (!freed) {
2711 buf_pool->LRU_flush_ended = 0;
2712 } else if (buf_pool->LRU_flush_ended > 0) {
2713@@ -735,6 +796,8 @@
2714 }
2715
2716 buf_pool_mutex_exit(buf_pool);
2717+ if (have_LRU_mutex)
2718+ mutex_exit(&buf_pool->LRU_list_mutex);
2719
2720 return(freed);
2721 }
2722@@ -795,7 +858,9 @@
2723
2724 buf_pool = buf_pool_from_array(i);
2725
2726- buf_pool_mutex_enter(buf_pool);
2727+ //buf_pool_mutex_enter(buf_pool);
2728+ mutex_enter(&buf_pool->LRU_list_mutex);
2729+ mutex_enter(&buf_pool->free_list_mutex);
2730
2731 if (!recv_recovery_on
2732 && UT_LIST_GET_LEN(buf_pool->free)
2733@@ -805,7 +870,9 @@
2734 ret = TRUE;
2735 }
2736
2737- buf_pool_mutex_exit(buf_pool);
2738+ //buf_pool_mutex_exit(buf_pool);
2739+ mutex_exit(&buf_pool->LRU_list_mutex);
2740+ mutex_exit(&buf_pool->free_list_mutex);
2741 }
2742
2743 return(ret);
2744@@ -823,9 +890,10 @@
2745 {
2746 buf_block_t* block;
2747
2748- ut_ad(buf_pool_mutex_own(buf_pool));
2749+ //ut_ad(buf_pool_mutex_own(buf_pool));
2750
2751- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2752+ mutex_enter(&buf_pool->free_list_mutex);
2753+ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2754
2755 if (block) {
2756
2757@@ -834,7 +902,9 @@
2758 ut_ad(!block->page.in_flush_list);
2759 ut_ad(!block->page.in_LRU_list);
2760 ut_a(!buf_page_in_file(&block->page));
2761- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2762+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2763+
2764+ mutex_exit(&buf_pool->free_list_mutex);
2765
2766 mutex_enter(&block->mutex);
2767
2768@@ -844,6 +914,8 @@
2769 ut_ad(buf_pool_from_block(block) == buf_pool);
2770
2771 mutex_exit(&block->mutex);
2772+ } else {
2773+ mutex_exit(&buf_pool->free_list_mutex);
2774 }
2775
2776 return(block);
df1b5770 2777@@ -866,7 +938,7 @@
b4e1fa2c
AM
2778 ibool mon_value_was = FALSE;
2779 ibool started_monitor = FALSE;
2780 loop:
2781- buf_pool_mutex_enter(buf_pool);
2782+ //buf_pool_mutex_enter(buf_pool);
2783
2784 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2785 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
df1b5770 2786@@ -934,7 +1006,7 @@
b4e1fa2c 2787
df1b5770
AM
2788 /* If there is a block in the free list, take it */
2789 block = buf_LRU_get_free_only(buf_pool);
b4e1fa2c
AM
2790- buf_pool_mutex_exit(buf_pool);
2791+ //buf_pool_mutex_exit(buf_pool);
2792
df1b5770
AM
2793 if (block) {
2794 ut_ad(buf_pool_from_block(block) == buf_pool);
2795@@ -1034,7 +1106,8 @@
b4e1fa2c
AM
2796 ulint new_len;
2797
2798 ut_a(buf_pool->LRU_old);
2799- ut_ad(buf_pool_mutex_own(buf_pool));
2800+ //ut_ad(buf_pool_mutex_own(buf_pool));
2801+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2802 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2803 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2804 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
df1b5770 2805@@ -1100,7 +1173,8 @@
b4e1fa2c
AM
2806 {
2807 buf_page_t* bpage;
2808
2809- ut_ad(buf_pool_mutex_own(buf_pool));
2810+ //ut_ad(buf_pool_mutex_own(buf_pool));
2811+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2812 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2813
2814 /* We first initialize all blocks in the LRU list as old and then use
df1b5770 2815@@ -1135,13 +1209,14 @@
b4e1fa2c
AM
2816 ut_ad(buf_pool);
2817 ut_ad(bpage);
2818 ut_ad(buf_page_in_file(bpage));
2819- ut_ad(buf_pool_mutex_own(buf_pool));
2820+ //ut_ad(buf_pool_mutex_own(buf_pool));
2821+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2822
2823 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2824 buf_block_t* block = (buf_block_t*) bpage;
2825
2826 ut_ad(block->in_unzip_LRU_list);
2827- ut_d(block->in_unzip_LRU_list = FALSE);
2828+ block->in_unzip_LRU_list = FALSE;
2829
2830 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2831 }
df1b5770 2832@@ -1159,7 +1234,8 @@
b4e1fa2c
AM
2833
2834 ut_ad(buf_pool);
2835 ut_ad(bpage);
2836- ut_ad(buf_pool_mutex_own(buf_pool));
2837+ //ut_ad(buf_pool_mutex_own(buf_pool));
2838+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2839
2840 ut_a(buf_page_in_file(bpage));
2841
df1b5770 2842@@ -1236,12 +1312,13 @@
b4e1fa2c
AM
2843
2844 ut_ad(buf_pool);
2845 ut_ad(block);
2846- ut_ad(buf_pool_mutex_own(buf_pool));
2847+ //ut_ad(buf_pool_mutex_own(buf_pool));
2848+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2849
2850 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2851
2852 ut_ad(!block->in_unzip_LRU_list);
2853- ut_d(block->in_unzip_LRU_list = TRUE);
2854+ block->in_unzip_LRU_list = TRUE;
2855
2856 if (old) {
2857 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
df1b5770 2858@@ -1262,7 +1339,8 @@
b4e1fa2c
AM
2859
2860 ut_ad(buf_pool);
2861 ut_ad(bpage);
2862- ut_ad(buf_pool_mutex_own(buf_pool));
2863+ //ut_ad(buf_pool_mutex_own(buf_pool));
2864+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2865
2866 ut_a(buf_page_in_file(bpage));
2867
df1b5770 2868@@ -1313,7 +1391,8 @@
b4e1fa2c
AM
2869
2870 ut_ad(buf_pool);
2871 ut_ad(bpage);
2872- ut_ad(buf_pool_mutex_own(buf_pool));
2873+ //ut_ad(buf_pool_mutex_own(buf_pool));
2874+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2875
2876 ut_a(buf_page_in_file(bpage));
2877 ut_ad(!bpage->in_LRU_list);
df1b5770 2878@@ -1392,7 +1471,8 @@
b4e1fa2c
AM
2879 {
2880 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2881
2882- ut_ad(buf_pool_mutex_own(buf_pool));
2883+ //ut_ad(buf_pool_mutex_own(buf_pool));
2884+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2885
2886 if (bpage->old) {
2887 buf_pool->stat.n_pages_made_young++;
df1b5770
AM
2888@@ -1432,17 +1512,18 @@
2889 buf_LRU_free_block(
2890 /*===============*/
b4e1fa2c 2891 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
2892- ibool zip) /*!< in: TRUE if should remove also the
2893+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 2894 compressed page of an uncompressed page */
b4e1fa2c
AM
2895+ ibool have_LRU_mutex)
2896 {
2897 buf_page_t* b = NULL;
2898 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2899 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2900
2901- ut_ad(buf_pool_mutex_own(buf_pool));
2902+ //ut_ad(buf_pool_mutex_own(buf_pool));
2903 ut_ad(mutex_own(block_mutex));
2904 ut_ad(buf_page_in_file(bpage));
2905- ut_ad(bpage->in_LRU_list);
2906+ //ut_ad(bpage->in_LRU_list);
2907 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2908 #if UNIV_WORD_SIZE == 4
2909 /* On 32-bit systems, there is no padding in buf_page_t. On
df1b5770 2910@@ -1451,7 +1532,7 @@
b4e1fa2c
AM
2911 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2912 #endif
2913
2914- if (!buf_page_can_relocate(bpage)) {
2915+ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2916
2917 /* Do not free buffer-fixed or I/O-fixed blocks. */
2918 return(BUF_LRU_NOT_FREED);
df1b5770 2919@@ -1483,15 +1564,15 @@
b4e1fa2c
AM
2920 If it cannot be allocated (without freeing a block
2921 from the LRU list), refuse to free bpage. */
2922 alloc:
2923- buf_pool_mutex_exit_forbid(buf_pool);
2924- b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2925- buf_pool_mutex_exit_allow(buf_pool);
2926+ //buf_pool_mutex_exit_forbid(buf_pool);
2927+ b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2928+ //buf_pool_mutex_exit_allow(buf_pool);
2929
2930 if (UNIV_UNLIKELY(!b)) {
2931 return(BUF_LRU_CANNOT_RELOCATE);
2932 }
2933
2934- memcpy(b, bpage, sizeof *b);
2935+ //memcpy(b, bpage, sizeof *b);
2936 }
2937
2938 #ifdef UNIV_DEBUG
df1b5770 2939@@ -1502,6 +1583,39 @@
b4e1fa2c
AM
2940 }
2941 #endif /* UNIV_DEBUG */
2942
2943+ /* not to break latch order, must re-enter block_mutex */
2944+ mutex_exit(block_mutex);
2945+
2946+ if (!have_LRU_mutex)
2947+ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2948+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2949+ mutex_enter(block_mutex);
2950+
2951+ /* recheck states of block */
2952+ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2953+ || !buf_page_can_relocate(bpage)) {
2954+not_freed:
2955+ if (b) {
2956+ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2957+ }
2958+ if (!have_LRU_mutex)
2959+ mutex_exit(&buf_pool->LRU_list_mutex);
2960+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2961+ return(BUF_LRU_NOT_FREED);
2962+ } else if (zip || !bpage->zip.data) {
2963+ if (bpage->oldest_modification)
2964+ goto not_freed;
2965+ } else if (bpage->oldest_modification) {
2966+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2967+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2968+ goto not_freed;
2969+ }
2970+ }
2971+
2972+ if (b) {
2973+ memcpy(b, bpage, sizeof *b);
2974+ }
2975+
2976 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2977 != BUF_BLOCK_ZIP_FREE) {
2978 ut_a(bpage->buf_fix_count == 0);
df1b5770 2979@@ -1518,6 +1632,10 @@
b4e1fa2c
AM
2980
2981 ut_a(!hash_b);
2982
2983+ while (prev_b && !prev_b->in_LRU_list) {
2984+ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2985+ }
2986+
2987 b->state = b->oldest_modification
2988 ? BUF_BLOCK_ZIP_DIRTY
2989 : BUF_BLOCK_ZIP_PAGE;
df1b5770
AM
2990@@ -1610,7 +1728,9 @@
2991 b->io_fix = BUF_IO_READ;
b4e1fa2c
AM
2992 }
2993
2994- buf_pool_mutex_exit(buf_pool);
2995+ //buf_pool_mutex_exit(buf_pool);
2996+ mutex_exit(&buf_pool->LRU_list_mutex);
2997+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2998 mutex_exit(block_mutex);
2999
3000 /* Remove possible adaptive hash index on the page.
df1b5770 3001@@ -1642,7 +1762,9 @@
b4e1fa2c
AM
3002 : BUF_NO_CHECKSUM_MAGIC);
3003 }
3004
3005- buf_pool_mutex_enter(buf_pool);
3006+ //buf_pool_mutex_enter(buf_pool);
3007+ if (have_LRU_mutex)
3008+ mutex_enter(&buf_pool->LRU_list_mutex);
3009 mutex_enter(block_mutex);
3010
3011 if (b) {
df1b5770 3012@@ -1652,13 +1774,17 @@
b4e1fa2c
AM
3013 mutex_exit(&buf_pool->zip_mutex);
3014 }
3015
3016- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
3017+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3018 } else {
3019 /* The block_mutex should have been released by
3020 buf_LRU_block_remove_hashed_page() when it returns
3021 BUF_BLOCK_ZIP_FREE. */
3022 ut_ad(block_mutex == &buf_pool->zip_mutex);
3023 mutex_enter(block_mutex);
3024+
3025+ if (!have_LRU_mutex)
3026+ mutex_exit(&buf_pool->LRU_list_mutex);
3027+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3028 }
3029
3030 return(BUF_LRU_FREED);
df1b5770 3031@@ -1670,13 +1796,14 @@
b4e1fa2c
AM
3032 void
3033 buf_LRU_block_free_non_file_page(
3034 /*=============================*/
3035- buf_block_t* block) /*!< in: block, must not contain a file page */
3036+ buf_block_t* block, /*!< in: block, must not contain a file page */
3037+ ibool have_page_hash_mutex)
3038 {
3039 void* data;
3040 buf_pool_t* buf_pool = buf_pool_from_block(block);
3041
3042 ut_ad(block);
3043- ut_ad(buf_pool_mutex_own(buf_pool));
3044+ //ut_ad(buf_pool_mutex_own(buf_pool));
3045 ut_ad(mutex_own(&block->mutex));
3046
3047 switch (buf_block_get_state(block)) {
df1b5770 3048@@ -1710,18 +1837,21 @@
b4e1fa2c
AM
3049 if (data) {
3050 block->page.zip.data = NULL;
3051 mutex_exit(&block->mutex);
3052- buf_pool_mutex_exit_forbid(buf_pool);
3053+ //buf_pool_mutex_exit_forbid(buf_pool);
3054
3055 buf_buddy_free(
3056- buf_pool, data, page_zip_get_size(&block->page.zip));
3057+ buf_pool, data, page_zip_get_size(&block->page.zip),
3058+ have_page_hash_mutex);
3059
3060- buf_pool_mutex_exit_allow(buf_pool);
3061+ //buf_pool_mutex_exit_allow(buf_pool);
3062 mutex_enter(&block->mutex);
3063 page_zip_set_size(&block->page.zip, 0);
3064 }
3065
3066- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3067+ mutex_enter(&buf_pool->free_list_mutex);
3068+ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3069 ut_d(block->page.in_free_list = TRUE);
3070+ mutex_exit(&buf_pool->free_list_mutex);
3071
3072 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3073 }
df1b5770 3074@@ -1751,7 +1881,11 @@
b4e1fa2c
AM
3075 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3076
3077 ut_ad(bpage);
3078- ut_ad(buf_pool_mutex_own(buf_pool));
3079+ //ut_ad(buf_pool_mutex_own(buf_pool));
3080+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3081+#ifdef UNIV_SYNC_DEBUG
3082+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3083+#endif
3084 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3085
3086 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
df1b5770 3087@@ -1859,7 +1993,9 @@
b4e1fa2c
AM
3088
3089 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3090 mutex_exit(buf_page_get_mutex(bpage));
3091- buf_pool_mutex_exit(buf_pool);
3092+ //buf_pool_mutex_exit(buf_pool);
3093+ mutex_exit(&buf_pool->LRU_list_mutex);
3094+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3095 buf_print();
3096 buf_LRU_print();
3097 buf_validate();
df1b5770 3098@@ -1880,17 +2016,17 @@
b4e1fa2c
AM
3099 ut_a(bpage->zip.data);
3100 ut_a(buf_page_get_zip_size(bpage));
3101
3102- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3103+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3104
3105 mutex_exit(&buf_pool->zip_mutex);
3106- buf_pool_mutex_exit_forbid(buf_pool);
3107+ //buf_pool_mutex_exit_forbid(buf_pool);
3108
3109 buf_buddy_free(
3110 buf_pool, bpage->zip.data,
3111- page_zip_get_size(&bpage->zip));
3112+ page_zip_get_size(&bpage->zip), TRUE);
3113
3114- buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3115- buf_pool_mutex_exit_allow(buf_pool);
3116+ buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3117+ //buf_pool_mutex_exit_allow(buf_pool);
3118
3119 UNIV_MEM_UNDESC(bpage);
3120 return(BUF_BLOCK_ZIP_FREE);
df1b5770 3121@@ -1913,13 +2049,13 @@
b4e1fa2c
AM
3122 ut_ad(!bpage->in_flush_list);
3123 ut_ad(!bpage->in_LRU_list);
3124 mutex_exit(&((buf_block_t*) bpage)->mutex);
3125- buf_pool_mutex_exit_forbid(buf_pool);
3126+ //buf_pool_mutex_exit_forbid(buf_pool);
3127
3128 buf_buddy_free(
3129 buf_pool, data,
3130- page_zip_get_size(&bpage->zip));
3131+ page_zip_get_size(&bpage->zip), TRUE);
3132
3133- buf_pool_mutex_exit_allow(buf_pool);
3134+ //buf_pool_mutex_exit_allow(buf_pool);
3135 mutex_enter(&((buf_block_t*) bpage)->mutex);
3136 page_zip_set_size(&bpage->zip, 0);
3137 }
df1b5770 3138@@ -1945,18 +2081,19 @@
b4e1fa2c
AM
3139 void
3140 buf_LRU_block_free_hashed_page(
3141 /*===========================*/
3142- buf_block_t* block) /*!< in: block, must contain a file page and
3143+ buf_block_t* block, /*!< in: block, must contain a file page and
3144 be in a state where it can be freed */
3145+ ibool have_page_hash_mutex)
3146 {
3147 #ifdef UNIV_DEBUG
3148- buf_pool_t* buf_pool = buf_pool_from_block(block);
3149- ut_ad(buf_pool_mutex_own(buf_pool));
3150+ //buf_pool_t* buf_pool = buf_pool_from_block(block);
3151+ //ut_ad(buf_pool_mutex_own(buf_pool));
3152 #endif
3153 ut_ad(mutex_own(&block->mutex));
3154
3155 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3156
3157- buf_LRU_block_free_non_file_page(block);
3158+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3159 }
3160
3161 /**********************************************************************//**
df1b5770 3162@@ -1983,7 +2120,8 @@
b4e1fa2c
AM
3163 }
3164
3165 if (adjust) {
3166- buf_pool_mutex_enter(buf_pool);
3167+ //buf_pool_mutex_enter(buf_pool);
3168+ mutex_enter(&buf_pool->LRU_list_mutex);
3169
3170 if (ratio != buf_pool->LRU_old_ratio) {
3171 buf_pool->LRU_old_ratio = ratio;
df1b5770 3172@@ -1995,7 +2133,8 @@
b4e1fa2c
AM
3173 }
3174 }
3175
3176- buf_pool_mutex_exit(buf_pool);
3177+ //buf_pool_mutex_exit(buf_pool);
3178+ mutex_exit(&buf_pool->LRU_list_mutex);
3179 } else {
3180 buf_pool->LRU_old_ratio = ratio;
3181 }
df1b5770 3182@@ -2100,7 +2239,8 @@
b4e1fa2c
AM
3183 ulint new_len;
3184
3185 ut_ad(buf_pool);
3186- buf_pool_mutex_enter(buf_pool);
3187+ //buf_pool_mutex_enter(buf_pool);
3188+ mutex_enter(&buf_pool->LRU_list_mutex);
3189
3190 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3191
df1b5770 3192@@ -2161,16 +2301,22 @@
b4e1fa2c
AM
3193
3194 ut_a(buf_pool->LRU_old_len == old_len);
3195
3196- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3197+ mutex_exit(&buf_pool->LRU_list_mutex);
3198+ mutex_enter(&buf_pool->free_list_mutex);
3199+
3200+ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3201 ut_ad(ut_list_node_313->in_free_list));
3202
3203 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3204 bpage != NULL;
3205- bpage = UT_LIST_GET_NEXT(list, bpage)) {
3206+ bpage = UT_LIST_GET_NEXT(free, bpage)) {
3207
3208 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3209 }
3210
3211+ mutex_exit(&buf_pool->free_list_mutex);
3212+ mutex_enter(&buf_pool->LRU_list_mutex);
3213+
3214 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3215 ut_ad(ut_list_node_313->in_unzip_LRU_list
3216 && ut_list_node_313->page.in_LRU_list));
df1b5770 3217@@ -2184,7 +2330,8 @@
b4e1fa2c
AM
3218 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3219 }
3220
3221- buf_pool_mutex_exit(buf_pool);
3222+ //buf_pool_mutex_exit(buf_pool);
3223+ mutex_exit(&buf_pool->LRU_list_mutex);
3224 }
3225
3226 /**********************************************************************//**
df1b5770 3227@@ -2220,7 +2367,8 @@
b4e1fa2c
AM
3228 const buf_page_t* bpage;
3229
3230 ut_ad(buf_pool);
3231- buf_pool_mutex_enter(buf_pool);
3232+ //buf_pool_mutex_enter(buf_pool);
3233+ mutex_enter(&buf_pool->LRU_list_mutex);
3234
3235 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3236
df1b5770 3237@@ -2277,7 +2425,8 @@
b4e1fa2c
AM
3238 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3239 }
3240
3241- buf_pool_mutex_exit(buf_pool);
3242+ //buf_pool_mutex_exit(buf_pool);
3243+ mutex_exit(&buf_pool->LRU_list_mutex);
3244 }
3245
3246 /**********************************************************************//**
3247diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3248--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3249+++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3250@@ -311,6 +311,7 @@
3251
3252 return(0);
3253 }
3254+ buf_pool_mutex_exit(buf_pool);
3255
3256 /* Check that almost all pages in the area have been accessed; if
3257 offset == low, the accesses must be in a descending order, otherwise,
3258@@ -329,6 +330,7 @@
3259
3260 fail_count = 0;
3261
3262+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3263 for (i = low; i < high; i++) {
3264 bpage = buf_page_hash_get(buf_pool, space, i);
3265
3266@@ -356,7 +358,8 @@
3267
3268 if (fail_count > threshold) {
3269 /* Too many failures: return */
3270- buf_pool_mutex_exit(buf_pool);
3271+ //buf_pool_mutex_exit(buf_pool);
3272+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3273 return(0);
3274 }
3275
3276@@ -371,7 +374,8 @@
3277 bpage = buf_page_hash_get(buf_pool, space, offset);
3278
3279 if (bpage == NULL) {
3280- buf_pool_mutex_exit(buf_pool);
3281+ //buf_pool_mutex_exit(buf_pool);
3282+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3283
3284 return(0);
3285 }
3286@@ -397,7 +401,8 @@
3287 pred_offset = fil_page_get_prev(frame);
3288 succ_offset = fil_page_get_next(frame);
3289
3290- buf_pool_mutex_exit(buf_pool);
3291+ //buf_pool_mutex_exit(buf_pool);
3292+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3293
3294 if ((offset == low) && (succ_offset == offset + 1)) {
3295
3296diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3297--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3298+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
df1b5770 3299@@ -264,6 +264,10 @@
b4e1fa2c
AM
3300 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3301 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3302 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3303+ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3304+ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3305+ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3306+ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3307 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3308 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3309 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
df1b5770 3310@@ -314,6 +318,7 @@
b4e1fa2c
AM
3311 {&archive_lock_key, "archive_lock", 0},
3312 # endif /* UNIV_LOG_ARCHIVE */
3313 {&btr_search_latch_key, "btr_search_latch", 0},
3314+ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3315 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3316 {&buf_block_lock_key, "buf_block_lock", 0},
3317 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3318diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3319--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3320+++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
d8778560 3321@@ -1565,7 +1565,8 @@
b4e1fa2c
AM
3322
3323 buf_pool = buf_pool_from_array(i);
3324
3325- buf_pool_mutex_enter(buf_pool);
3326+ //buf_pool_mutex_enter(buf_pool);
3327+ mutex_enter(&buf_pool->zip_free_mutex);
3328
3329 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3330 buf_buddy_stat_t* buddy_stat;
d8778560 3331@@ -1595,7 +1596,8 @@
b4e1fa2c
AM
3332 }
3333 }
3334
3335- buf_pool_mutex_exit(buf_pool);
3336+ //buf_pool_mutex_exit(buf_pool);
3337+ mutex_exit(&buf_pool->zip_free_mutex);
3338
3339 if (status) {
3340 break;
3341diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3342--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3343+++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
df1b5770 3344@@ -3766,9 +3766,11 @@
b4e1fa2c
AM
3345 ulint fold = buf_page_address_fold(space, page_no);
3346 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3347
3348- buf_pool_mutex_enter(buf_pool);
3349+ //buf_pool_mutex_enter(buf_pool);
3350+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3351 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3352- buf_pool_mutex_exit(buf_pool);
3353+ //buf_pool_mutex_exit(buf_pool);
3354+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3355
3356 if (UNIV_LIKELY_NULL(bpage)) {
3357 /* A buffer pool watch has been set or the
3358diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3359--- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3360+++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3361@@ -51,10 +51,11 @@
3362 buf_pool_t* buf_pool,
3363 /*!< buffer pool in which the block resides */
3364 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3365- ibool* lru) /*!< in: pointer to a variable that will be assigned
3366+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3367 TRUE if storage was allocated from the LRU list
3368 and buf_pool->mutex was temporarily released,
3369 or NULL if the LRU list should not be used */
3370+ ibool have_page_hash_mutex)
3371 __attribute__((malloc));
3372
3373 /**********************************************************************//**
3374@@ -67,7 +68,8 @@
3375 /*!< buffer pool in which the block resides */
3376 void* buf, /*!< in: block to be freed, must not be
3377 pointed to by the buffer pool */
3378- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3379+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3380+ ibool have_page_hash_mutex)
3381 __attribute__((nonnull));
3382
3383 #ifndef UNIV_NONINL
3384diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3385--- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3386+++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3387@@ -46,10 +46,11 @@
3388 /*!< in: buffer pool in which the page resides */
3389 ulint i, /*!< in: index of buf_pool->zip_free[],
3390 or BUF_BUDDY_SIZES */
3391- ibool* lru) /*!< in: pointer to a variable that will be assigned
3392+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3393 TRUE if storage was allocated from the LRU list
3394 and buf_pool->mutex was temporarily released,
3395 or NULL if the LRU list should not be used */
3396+ ibool have_page_hash_mutex)
3397 __attribute__((malloc));
3398
3399 /**********************************************************************//**
3400@@ -61,8 +62,9 @@
3401 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3402 void* buf, /*!< in: block to be freed, must not be
3403 pointed to by the buffer pool */
3404- ulint i) /*!< in: index of buf_pool->zip_free[],
3405+ ulint i, /*!< in: index of buf_pool->zip_free[],
3406 or BUF_BUDDY_SIZES */
3407+ ibool have_page_hash_mutex)
3408 __attribute__((nonnull));
3409
3410 /**********************************************************************//**
3411@@ -102,16 +104,17 @@
3412 the page resides */
3413 ulint size, /*!< in: block size, up to
3414 UNIV_PAGE_SIZE */
3415- ibool* lru) /*!< in: pointer to a variable
3416+ ibool* lru, /*!< in: pointer to a variable
3417 that will be assigned TRUE if
3418 storage was allocated from the
3419 LRU list and buf_pool->mutex was
3420 temporarily released, or NULL if
3421 the LRU list should not be used */
3422+ ibool have_page_hash_mutex)
3423 {
3424- ut_ad(buf_pool_mutex_own(buf_pool));
3425+ //ut_ad(buf_pool_mutex_own(buf_pool));
3426
3427- return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3428+ return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3429 }
3430
3431 /**********************************************************************//**
3432@@ -123,12 +126,25 @@
3433 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3434 void* buf, /*!< in: block to be freed, must not be
3435 pointed to by the buffer pool */
3436- ulint size) /*!< in: block size, up to
3437+ ulint size, /*!< in: block size, up to
3438 UNIV_PAGE_SIZE */
3439+ ibool have_page_hash_mutex)
3440 {
3441- ut_ad(buf_pool_mutex_own(buf_pool));
3442+ //ut_ad(buf_pool_mutex_own(buf_pool));
3443+
3444+ if (!have_page_hash_mutex) {
3445+ mutex_enter(&buf_pool->LRU_list_mutex);
3446+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3447+ }
3448
3449- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3450+ mutex_enter(&buf_pool->zip_free_mutex);
3451+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3452+ mutex_exit(&buf_pool->zip_free_mutex);
3453+
3454+ if (!have_page_hash_mutex) {
3455+ mutex_exit(&buf_pool->LRU_list_mutex);
3456+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3457+ }
3458 }
3459
3460 #ifdef UNIV_MATERIALIZE
3461diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3462--- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3463+++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
d8778560 3464@@ -203,6 +203,20 @@
b4e1fa2c
AM
3465 /*==========================*/
3466
3467 /********************************************************************//**
3468+*/
3469+UNIV_INLINE
3470+void
3471+buf_pool_page_hash_x_lock_all(void);
3472+/*================================*/
3473+
3474+/********************************************************************//**
3475+*/
3476+UNIV_INLINE
3477+void
3478+buf_pool_page_hash_x_unlock_all(void);
3479+/*==================================*/
3480+
3481+/********************************************************************//**
3482 Creates the buffer pool.
3483 @return own: buf_pool object, NULL if not enough memory or error */
3484 UNIV_INTERN
d8778560 3485@@ -832,6 +846,15 @@
b4e1fa2c
AM
3486 const buf_page_t* bpage) /*!< in: pointer to control block */
3487 __attribute__((pure));
3488
3489+/*************************************************************************
3490+Gets the mutex of a block and enter the mutex with consistency. */
3491+UNIV_INLINE
3492+mutex_t*
3493+buf_page_get_mutex_enter(
3494+/*=========================*/
3495+ const buf_page_t* bpage) /*!< in: pointer to control block */
3496+ __attribute__((pure));
3497+
3498 /*********************************************************************//**
3499 Get the flush type of a page.
3500 @return flush type */
d8778560 3501@@ -1313,7 +1336,7 @@
b4e1fa2c
AM
3502 All these are protected by buf_pool->mutex. */
3503 /* @{ */
3504
3505- UT_LIST_NODE_T(buf_page_t) list;
3506+ /* UT_LIST_NODE_T(buf_page_t) list; */
3507 /*!< based on state, this is a
3508 list node, protected either by
3509 buf_pool->mutex or by
d8778560 3510@@ -1341,6 +1364,10 @@
b4e1fa2c
AM
3511 BUF_BLOCK_REMOVE_HASH or
3512 BUF_BLOCK_READY_IN_USE. */
3513
3514+ /* resplit for optimistic use */
3515+ UT_LIST_NODE_T(buf_page_t) free;
3516+ UT_LIST_NODE_T(buf_page_t) flush_list;
3517+ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3518 #ifdef UNIV_DEBUG
3519 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3520 when buf_pool->flush_list_mutex is
d8778560 3521@@ -1433,11 +1460,11 @@
b4e1fa2c
AM
3522 a block is in the unzip_LRU list
3523 if page.state == BUF_BLOCK_FILE_PAGE
3524 and page.zip.data != NULL */
3525-#ifdef UNIV_DEBUG
3526+//#ifdef UNIV_DEBUG
3527 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3528 decompressed LRU list;
3529 used in debugging */
3530-#endif /* UNIV_DEBUG */
3531+//#endif /* UNIV_DEBUG */
3532 mutex_t mutex; /*!< mutex protecting this block:
3533 state (also protected by the buffer
3534 pool mutex), io_fix, buf_fix_count,
d8778560 3535@@ -1612,6 +1639,11 @@
b4e1fa2c
AM
3536 pool instance, protects compressed
3537 only pages (of type buf_page_t, not
3538 buf_block_t */
3539+ mutex_t LRU_list_mutex;
3540+ rw_lock_t page_hash_latch;
3541+ mutex_t free_list_mutex;
3542+ mutex_t zip_free_mutex;
3543+ mutex_t zip_hash_mutex;
3544 ulint instance_no; /*!< Array index of this buffer
3545 pool instance */
3546 ulint old_pool_size; /*!< Old pool size in bytes */
11822e22
AM
3547@@ -1763,8 +1795,8 @@
3548 /** Test if a buffer pool mutex is owned. */
3549 #define buf_pool_mutex_own(b) mutex_own(&b->mutex)
3550 /** Acquire a buffer pool mutex. */
3551+/* the buf_pool_mutex is changed the latch order */
3552 #define buf_pool_mutex_enter(b) do { \
3553- ut_ad(!mutex_own(&b->zip_mutex)); \
3554 mutex_enter(&b->mutex); \
3555 } while (0)
3556
b4e1fa2c
AM
3557diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3558--- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3559+++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3560@@ -274,7 +274,7 @@
3561 case BUF_BLOCK_ZIP_FREE:
3562 /* This is a free page in buf_pool->zip_free[].
3563 Such pages should only be accessed by the buddy allocator. */
3564- ut_error;
3565+ /* ut_error; */ /* optimistic */
3566 break;
3567 case BUF_BLOCK_ZIP_PAGE:
3568 case BUF_BLOCK_ZIP_DIRTY:
11822e22 3569@@ -317,9 +317,16 @@
b4e1fa2c
AM
3570 {
3571 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3572
11822e22
AM
3573+ if (/*equivalent to buf_pool_watch_is_sentinel(buf_pool, bpage)*/
3574+ bpage >= &buf_pool->watch[0]
3575+ && bpage < &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
b4e1fa2c
AM
3576+ /* TODO: this code is the interim. should be confirmed later. */
3577+ return(&buf_pool->zip_mutex);
3578+ }
3579+
3580 switch (buf_page_get_state(bpage)) {
3581 case BUF_BLOCK_ZIP_FREE:
3582- ut_error;
3583+ /* ut_error; */ /* optimistic */
3584 return(NULL);
3585 case BUF_BLOCK_ZIP_PAGE:
3586 case BUF_BLOCK_ZIP_DIRTY:
11822e22 3587@@ -329,6 +336,28 @@
b4e1fa2c
AM
3588 }
3589 }
3590
3591+/*************************************************************************
3592+Gets the mutex of a block and enter the mutex with consistency. */
3593+UNIV_INLINE
3594+mutex_t*
3595+buf_page_get_mutex_enter(
3596+/*=========================*/
3597+ const buf_page_t* bpage) /*!< in: pointer to control block */
3598+{
3599+ mutex_t* block_mutex;
3600+
3601+ while(1) {
3602+ block_mutex = buf_page_get_mutex(bpage);
3603+ if (!block_mutex)
3604+ return block_mutex;
3605+
3606+ mutex_enter(block_mutex);
3607+ if (block_mutex == buf_page_get_mutex(bpage))
3608+ return block_mutex;
3609+ mutex_exit(block_mutex);
3610+ }
3611+}
3612+
3613 /*********************************************************************//**
3614 Get the flush type of a page.
3615 @return flush type */
11822e22 3616@@ -425,8 +454,8 @@
b4e1fa2c
AM
3617 enum buf_io_fix io_fix) /*!< in: io_fix state */
3618 {
3619 #ifdef UNIV_DEBUG
3620- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3621- ut_ad(buf_pool_mutex_own(buf_pool));
3622+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3623+ //ut_ad(buf_pool_mutex_own(buf_pool));
3624 #endif
3625 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3626
11822e22 3627@@ -456,14 +485,14 @@
b4e1fa2c
AM
3628 const buf_page_t* bpage) /*!< control block being relocated */
3629 {
3630 #ifdef UNIV_DEBUG
3631- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3632- ut_ad(buf_pool_mutex_own(buf_pool));
3633+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3634+ //ut_ad(buf_pool_mutex_own(buf_pool));
3635 #endif
3636 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3637 ut_ad(buf_page_in_file(bpage));
3638- ut_ad(bpage->in_LRU_list);
3639+ //ut_ad(bpage->in_LRU_list);
3640
3641- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3642+ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3643 && bpage->buf_fix_count == 0);
3644 }
3645
11822e22 3646@@ -477,8 +506,8 @@
b4e1fa2c
AM
3647 const buf_page_t* bpage) /*!< in: control block */
3648 {
3649 #ifdef UNIV_DEBUG
3650- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3651- ut_ad(buf_pool_mutex_own(buf_pool));
3652+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3653+ //ut_ad(buf_pool_mutex_own(buf_pool));
3654 #endif
3655 ut_ad(buf_page_in_file(bpage));
3656
11822e22 3657@@ -498,7 +527,8 @@
b4e1fa2c
AM
3658 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3659 #endif /* UNIV_DEBUG */
3660 ut_a(buf_page_in_file(bpage));
3661- ut_ad(buf_pool_mutex_own(buf_pool));
3662+ //ut_ad(buf_pool_mutex_own(buf_pool));
3663+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3664 ut_ad(bpage->in_LRU_list);
3665
3666 #ifdef UNIV_LRU_DEBUG
11822e22 3667@@ -545,9 +575,10 @@
b4e1fa2c
AM
3668 ulint time_ms) /*!< in: ut_time_ms() */
3669 {
3670 #ifdef UNIV_DEBUG
3671- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3672- ut_ad(buf_pool_mutex_own(buf_pool));
3673+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3674+ //ut_ad(buf_pool_mutex_own(buf_pool));
3675 #endif
3676+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3677 ut_a(buf_page_in_file(bpage));
3678
3679 if (!bpage->access_time) {
11822e22 3680@@ -761,19 +792,19 @@
b4e1fa2c
AM
3681 /*===========*/
3682 buf_block_t* block) /*!< in, own: block to be freed */
3683 {
3684- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3685+ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3686
3687- buf_pool_mutex_enter(buf_pool);
3688+ //buf_pool_mutex_enter(buf_pool);
3689
3690 mutex_enter(&block->mutex);
3691
3692 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3693
3694- buf_LRU_block_free_non_file_page(block);
3695+ buf_LRU_block_free_non_file_page(block, FALSE);
3696
3697 mutex_exit(&block->mutex);
3698
3699- buf_pool_mutex_exit(buf_pool);
3700+ //buf_pool_mutex_exit(buf_pool);
3701 }
3702 #endif /* !UNIV_HOTBACKUP */
3703
11822e22 3704@@ -821,17 +852,17 @@
b4e1fa2c
AM
3705 page frame */
3706 {
3707 ib_uint64_t lsn;
3708- mutex_t* block_mutex = buf_page_get_mutex(bpage);
3709-
3710- mutex_enter(block_mutex);
3711+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3712
3713- if (buf_page_in_file(bpage)) {
3714+ if (block_mutex && buf_page_in_file(bpage)) {
3715 lsn = bpage->newest_modification;
3716 } else {
3717 lsn = 0;
3718 }
3719
3720- mutex_exit(block_mutex);
3721+ if (block_mutex) {
3722+ mutex_exit(block_mutex);
3723+ }
3724
3725 return(lsn);
3726 }
11822e22 3727@@ -849,7 +880,7 @@
b4e1fa2c
AM
3728 #ifdef UNIV_SYNC_DEBUG
3729 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3730
3731- ut_ad((buf_pool_mutex_own(buf_pool)
3732+ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3733 && (block->page.buf_fix_count == 0))
3734 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3735 #endif /* UNIV_SYNC_DEBUG */
11822e22 3736@@ -979,7 +1010,11 @@
b4e1fa2c
AM
3737 buf_page_t* bpage;
3738
3739 ut_ad(buf_pool);
3740- ut_ad(buf_pool_mutex_own(buf_pool));
3741+ //ut_ad(buf_pool_mutex_own(buf_pool));
3742+#ifdef UNIV_SYNC_DEBUG
3743+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3744+ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3745+#endif
3746 ut_ad(fold == buf_page_address_fold(space, offset));
3747
3748 /* Look for the page in the hash table */
11822e22 3749@@ -1064,11 +1099,13 @@
b4e1fa2c
AM
3750 const buf_page_t* bpage;
3751 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3752
3753- buf_pool_mutex_enter(buf_pool);
3754+ //buf_pool_mutex_enter(buf_pool);
3755+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3756
3757 bpage = buf_page_hash_get(buf_pool, space, offset);
3758
3759- buf_pool_mutex_exit(buf_pool);
3760+ //buf_pool_mutex_exit(buf_pool);
3761+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3762
3763 return(bpage != NULL);
3764 }
11822e22 3765@@ -1196,4 +1233,38 @@
b4e1fa2c
AM
3766 buf_pool_mutex_exit(buf_pool);
3767 }
3768 }
3769+
3770+/********************************************************************//**
3771+*/
3772+UNIV_INLINE
3773+void
3774+buf_pool_page_hash_x_lock_all(void)
3775+/*===============================*/
3776+{
3777+ ulint i;
3778+
3779+ for (i = 0; i < srv_buf_pool_instances; i++) {
3780+ buf_pool_t* buf_pool;
3781+
3782+ buf_pool = buf_pool_from_array(i);
3783+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3784+ }
3785+}
3786+
3787+/********************************************************************//**
3788+*/
3789+UNIV_INLINE
3790+void
3791+buf_pool_page_hash_x_unlock_all(void)
3792+/*=================================*/
3793+{
3794+ ulint i;
3795+
3796+ for (i = 0; i < srv_buf_pool_instances; i++) {
3797+ buf_pool_t* buf_pool;
3798+
3799+ buf_pool = buf_pool_from_array(i);
3800+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3801+ }
3802+}
3803 #endif /* !UNIV_HOTBACKUP */
3804diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3805--- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3806+++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
df1b5770
AM
3807@@ -111,8 +111,9 @@
3808 buf_LRU_free_block(
3809 /*===============*/
b4e1fa2c 3810 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
3811- ibool zip) /*!< in: TRUE if should remove also the
3812+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 3813 compressed page of an uncompressed page */
df1b5770
AM
3814+ ibool have_LRU_mutex)
3815 __attribute__((nonnull));
b4e1fa2c
AM
3816 /******************************************************************//**
3817 Try to free a replaceable block.
df1b5770 3818@@ -159,7 +160,8 @@
b4e1fa2c
AM
3819 void
3820 buf_LRU_block_free_non_file_page(
3821 /*=============================*/
3822- buf_block_t* block); /*!< in: block, must not contain a file page */
3823+ buf_block_t* block, /*!< in: block, must not contain a file page */
3824+ ibool have_page_hash_mutex);
3825 /******************************************************************//**
3826 Adds a block to the LRU list. */
3827 UNIV_INTERN
3828diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3829--- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3830+++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3831@@ -112,6 +112,7 @@
3832 extern mysql_pfs_key_t archive_lock_key;
3833 # endif /* UNIV_LOG_ARCHIVE */
3834 extern mysql_pfs_key_t btr_search_latch_key;
3835+extern mysql_pfs_key_t buf_pool_page_hash_key;
3836 extern mysql_pfs_key_t buf_block_lock_key;
3837 # ifdef UNIV_SYNC_DEBUG
3838 extern mysql_pfs_key_t buf_block_debug_latch_key;
3839diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3840--- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3841+++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3842@@ -75,6 +75,10 @@
3843 extern mysql_pfs_key_t buffer_block_mutex_key;
3844 extern mysql_pfs_key_t buf_pool_mutex_key;
3845 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3846+extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3847+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3848+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3849+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3850 extern mysql_pfs_key_t cache_last_read_mutex_key;
3851 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3852 extern mysql_pfs_key_t dict_sys_mutex_key;
3853@@ -660,7 +664,7 @@
b4e1fa2c 3854 #define SYNC_TRX_SYS_HEADER 290
11822e22 3855 #define SYNC_PURGE_QUEUE 200
b4e1fa2c
AM
3856 #define SYNC_LOG 170
3857-#define SYNC_LOG_FLUSH_ORDER 147
3858+#define SYNC_LOG_FLUSH_ORDER 156
3859 #define SYNC_RECV 168
3860 #define SYNC_WORK_QUEUE 162
3861 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3862@@ -670,8 +674,13 @@
3863 SYNC_SEARCH_SYS, as memory allocation
3864 can call routines there! Otherwise
3865 the level is SYNC_MEM_HASH. */
3866+#define SYNC_BUF_LRU_LIST 158
3867+#define SYNC_BUF_PAGE_HASH 157
3868+#define SYNC_BUF_BLOCK 155 /* Block mutex */
3869+#define SYNC_BUF_FREE_LIST 153
3870+#define SYNC_BUF_ZIP_FREE 152
3871+#define SYNC_BUF_ZIP_HASH 151
3872 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3873-#define SYNC_BUF_BLOCK 146 /* Block mutex */
3874 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3875 #define SYNC_DOUBLEWRITE 140
3876 #define SYNC_ANY_LATCH 135
3877@@ -703,7 +712,7 @@
3878 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3879 when atomic operations are not enabled */
3880 #endif
3881- ulint waiters; /*!< This ulint is set to 1 if there are (or
3882+ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3883 may be) threads waiting in the global wait
3884 array for this mutex to be released.
3885 Otherwise, this is 0. */
3886diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3887--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3888+++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
11822e22 3889@@ -3101,7 +3101,7 @@
b4e1fa2c
AM
3890 level += log_sys->max_checkpoint_age
3891 - (lsn - oldest_modification);
3892 }
3893- bpage = UT_LIST_GET_NEXT(list, bpage);
3894+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3895 n_blocks++;
3896 }
3897
11822e22 3898@@ -3187,7 +3187,7 @@
b4e1fa2c
AM
3899 found = TRUE;
3900 break;
3901 }
3902- bpage = UT_LIST_GET_NEXT(list, bpage);
3903+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3904 new_blocks_num++;
3905 }
3906 if (!found) {
3907diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3908--- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3909+++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
df1b5770 3910@@ -284,7 +284,7 @@
b4e1fa2c
AM
3911 mutex->lock_word = 0;
3912 #endif
3913 mutex->event = os_event_create(NULL);
3914- mutex_set_waiters(mutex, 0);
3915+ mutex->waiters = 0;
3916 #ifdef UNIV_DEBUG
3917 mutex->magic_n = MUTEX_MAGIC_N;
3918 #endif /* UNIV_DEBUG */
df1b5770 3919@@ -463,6 +463,15 @@
b4e1fa2c
AM
3920 mutex_t* mutex, /*!< in: mutex */
3921 ulint n) /*!< in: value to set */
3922 {
3923+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3924+ ut_ad(mutex);
3925+
3926+ if (n) {
3927+ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3928+ } else {
3929+ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3930+ }
3931+#else
3932 volatile ulint* ptr; /* declared volatile to ensure that
3933 the value is stored to memory */
3934 ut_ad(mutex);
df1b5770 3935@@ -471,6 +480,7 @@
b4e1fa2c
AM
3936
3937 *ptr = n; /* Here we assume that the write of a single
3938 word in memory is atomic */
3939+#endif
3940 }
3941
3942 /******************************************************************//**
df1b5770 3943@@ -1185,7 +1195,12 @@
b4e1fa2c
AM
3944 ut_error;
3945 }
3946 break;
3947+ case SYNC_BUF_LRU_LIST:
3948 case SYNC_BUF_FLUSH_LIST:
3949+ case SYNC_BUF_PAGE_HASH:
3950+ case SYNC_BUF_FREE_LIST:
3951+ case SYNC_BUF_ZIP_FREE:
3952+ case SYNC_BUF_ZIP_HASH:
3953 case SYNC_BUF_POOL:
3954 /* We can have multiple mutexes of this type therefore we
3955 can only check whether the greater than condition holds. */
df1b5770 3956@@ -1203,7 +1218,8 @@
b4e1fa2c
AM
3957 buffer block (block->mutex or buf_pool->zip_mutex). */
3958 if (!sync_thread_levels_g(array, level, FALSE)) {
3959 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3960- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3961+ /* the exact rule is not fixed yet, for now */
3962+ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3963 }
3964 break;
3965 case SYNC_REC_LOCK:
This page took 0.717803 seconds and 4 git commands to generate.