]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_split_buf_pool_mutex.patch
- fixed todo list
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_split_buf_pool_mutex.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
8diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9--- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10+++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
df1b5770 11@@ -4066,7 +4066,8 @@
b4e1fa2c
AM
12
13 mtr_commit(mtr);
14
15- buf_pool_mutex_enter(buf_pool);
16+ //buf_pool_mutex_enter(buf_pool);
17+ mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
19
20 /* Only free the block if it is still allocated to
df1b5770 21@@ -4077,16 +4078,21 @@
b4e1fa2c
AM
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
24
df1b5770 25- if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
b4e1fa2c 26- && all && block->page.zip.data) {
df1b5770 27+ if (buf_LRU_free_block(&block->page, all, TRUE) != BUF_LRU_FREED
b4e1fa2c
AM
28+ && all && block->page.zip.data
29+ /* Now, buf_LRU_free_block() may release mutex temporarily */
30+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
31+ && buf_block_get_space(block) == space
32+ && buf_block_get_page_no(block) == page_no) {
33 /* Attempt to deallocate the uncompressed page
34 if the whole block cannot be deallocted. */
35
df1b5770
AM
36- buf_LRU_free_block(&block->page, FALSE);
37+ buf_LRU_free_block(&block->page, FALSE, TRUE);
b4e1fa2c
AM
38 }
39 }
40
41- buf_pool_mutex_exit(buf_pool);
42+ //buf_pool_mutex_exit(buf_pool);
43+ mutex_exit(&buf_pool->LRU_list_mutex);
44 mutex_exit(&block->mutex);
45 }
46
47diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
48--- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
49+++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
d8778560 50@@ -1943,7 +1943,7 @@
b4e1fa2c
AM
51 rec_offs_init(offsets_);
52
53 rw_lock_x_lock(&btr_search_latch);
54- buf_pool_mutex_enter_all();
55+ buf_pool_page_hash_x_lock_all();
56
57 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
58
d8778560 59@@ -1951,11 +1951,11 @@
b4e1fa2c
AM
60 /* We release btr_search_latch every once in a while to
61 give other queries a chance to run. */
62 if ((i != 0) && ((i % chunk_size) == 0)) {
63- buf_pool_mutex_exit_all();
64+ buf_pool_page_hash_x_unlock_all();
65 rw_lock_x_unlock(&btr_search_latch);
66 os_thread_yield();
67 rw_lock_x_lock(&btr_search_latch);
68- buf_pool_mutex_enter_all();
69+ buf_pool_page_hash_x_lock_all();
70 }
71
72 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
d8778560 73@@ -2066,11 +2066,11 @@
b4e1fa2c
AM
74 /* We release btr_search_latch every once in a while to
75 give other queries a chance to run. */
76 if (i != 0) {
77- buf_pool_mutex_exit_all();
78+ buf_pool_page_hash_x_unlock_all();
79 rw_lock_x_unlock(&btr_search_latch);
80 os_thread_yield();
81 rw_lock_x_lock(&btr_search_latch);
82- buf_pool_mutex_enter_all();
83+ buf_pool_page_hash_x_lock_all();
84 }
85
86 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
d8778560 87@@ -2078,7 +2078,7 @@
b4e1fa2c
AM
88 }
89 }
90
91- buf_pool_mutex_exit_all();
92+ buf_pool_page_hash_x_unlock_all();
93 rw_lock_x_unlock(&btr_search_latch);
94 if (UNIV_LIKELY_NULL(heap)) {
95 mem_heap_free(heap);
96diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
97--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
98+++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
99@@ -73,10 +73,11 @@
100 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
101 #endif /* UNIV_DEBUG_VALGRIND */
102
103- ut_ad(buf_pool_mutex_own(buf_pool));
104+ //ut_ad(buf_pool_mutex_own(buf_pool));
105+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
106 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
107 ut_ad(buf_pool->zip_free[i].start != bpage);
108- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
109+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
110
111 #ifdef UNIV_DEBUG_VALGRIND
112 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
113@@ -96,8 +97,8 @@
114 buf_pool->zip_free[] */
115 {
116 #ifdef UNIV_DEBUG_VALGRIND
117- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
118- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
119+ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
120+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
121
122 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
123 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
124@@ -106,9 +107,10 @@
125 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
126 #endif /* UNIV_DEBUG_VALGRIND */
127
128- ut_ad(buf_pool_mutex_own(buf_pool));
129+ //ut_ad(buf_pool_mutex_own(buf_pool));
130+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
131 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
132- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
133+ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
134
135 #ifdef UNIV_DEBUG_VALGRIND
136 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
137@@ -128,12 +130,13 @@
138 {
139 buf_page_t* bpage;
140
141- ut_ad(buf_pool_mutex_own(buf_pool));
142+ //ut_ad(buf_pool_mutex_own(buf_pool));
143+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
144 ut_a(i < BUF_BUDDY_SIZES);
145
146 #ifndef UNIV_DEBUG_VALGRIND
147 /* Valgrind would complain about accessing free memory. */
148- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
149+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
150 ut_ad(buf_page_get_state(ut_list_node_313)
151 == BUF_BLOCK_ZIP_FREE)));
152 #endif /* !UNIV_DEBUG_VALGRIND */
153@@ -177,16 +180,19 @@
154 buf_buddy_block_free(
155 /*=================*/
156 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
157- void* buf) /*!< in: buffer frame to deallocate */
158+ void* buf, /*!< in: buffer frame to deallocate */
159+ ibool have_page_hash_mutex)
160 {
161 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
162 buf_page_t* bpage;
163 buf_block_t* block;
164
165- ut_ad(buf_pool_mutex_own(buf_pool));
166+ //ut_ad(buf_pool_mutex_own(buf_pool));
167 ut_ad(!mutex_own(&buf_pool->zip_mutex));
168 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
169
170+ mutex_enter(&buf_pool->zip_hash_mutex);
171+
172 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
173 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
174 && bpage->in_zip_hash && !bpage->in_page_hash),
175@@ -198,12 +204,14 @@
176 ut_d(bpage->in_zip_hash = FALSE);
177 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
178
179+ mutex_exit(&buf_pool->zip_hash_mutex);
180+
181 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
182 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
183
184 block = (buf_block_t*) bpage;
185 mutex_enter(&block->mutex);
186- buf_LRU_block_free_non_file_page(block);
187+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
188 mutex_exit(&block->mutex);
189
190 ut_ad(buf_pool->buddy_n_frames > 0);
191@@ -220,7 +228,7 @@
192 {
193 buf_pool_t* buf_pool = buf_pool_from_block(block);
194 const ulint fold = BUF_POOL_ZIP_FOLD(block);
195- ut_ad(buf_pool_mutex_own(buf_pool));
196+ //ut_ad(buf_pool_mutex_own(buf_pool));
197 ut_ad(!mutex_own(&buf_pool->zip_mutex));
198 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
199
200@@ -232,7 +240,10 @@
201 ut_ad(!block->page.in_page_hash);
202 ut_ad(!block->page.in_zip_hash);
203 ut_d(block->page.in_zip_hash = TRUE);
204+
205+ mutex_enter(&buf_pool->zip_hash_mutex);
206 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
207+ mutex_exit(&buf_pool->zip_hash_mutex);
208
209 ut_d(buf_pool->buddy_n_frames++);
210 }
211@@ -268,7 +279,7 @@
212 bpage->state = BUF_BLOCK_ZIP_FREE;
213 #ifndef UNIV_DEBUG_VALGRIND
214 /* Valgrind would complain about accessing free memory. */
215- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
216+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
217 ut_ad(buf_page_get_state(
218 ut_list_node_313)
219 == BUF_BLOCK_ZIP_FREE)));
220@@ -291,25 +302,29 @@
221 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
222 ulint i, /*!< in: index of buf_pool->zip_free[],
223 or BUF_BUDDY_SIZES */
224- ibool* lru) /*!< in: pointer to a variable that
225+ ibool* lru, /*!< in: pointer to a variable that
226 will be assigned TRUE if storage was
227 allocated from the LRU list and
228 buf_pool->mutex was temporarily
229 released, or NULL if the LRU list
230 should not be used */
231+ ibool have_page_hash_mutex)
232 {
233 buf_block_t* block;
234
235- ut_ad(buf_pool_mutex_own(buf_pool));
236+ //ut_ad(buf_pool_mutex_own(buf_pool));
237+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
238 ut_ad(!mutex_own(&buf_pool->zip_mutex));
239
240 if (i < BUF_BUDDY_SIZES) {
241 /* Try to allocate from the buddy system. */
242+ mutex_enter(&buf_pool->zip_free_mutex);
243 block = buf_buddy_alloc_zip(buf_pool, i);
244
245 if (block) {
246 goto func_exit;
247 }
248+ mutex_exit(&buf_pool->zip_free_mutex);
249 }
250
251 /* Try allocating from the buf_pool->free list. */
252@@ -326,19 +341,30 @@
253 }
254
255 /* Try replacing an uncompressed page in the buffer pool. */
256- buf_pool_mutex_exit(buf_pool);
257+ //buf_pool_mutex_exit(buf_pool);
258+ mutex_exit(&buf_pool->LRU_list_mutex);
259+ if (have_page_hash_mutex) {
260+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
261+ }
df1b5770 262 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
263 *lru = TRUE;
264- buf_pool_mutex_enter(buf_pool);
265+ //buf_pool_mutex_enter(buf_pool);
266+ mutex_enter(&buf_pool->LRU_list_mutex);
267+ if (have_page_hash_mutex) {
268+ rw_lock_x_lock(&buf_pool->page_hash_latch);
269+ }
270
271 alloc_big:
272 buf_buddy_block_register(block);
273
274+ mutex_enter(&buf_pool->zip_free_mutex);
275 block = buf_buddy_alloc_from(
276 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
277
278 func_exit:
279 buf_pool->buddy_stat[i].used++;
280+ mutex_exit(&buf_pool->zip_free_mutex);
281+
282 return(block);
283 }
284
285@@ -355,7 +381,10 @@
286 buf_page_t* b;
287 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
288
289- ut_ad(buf_pool_mutex_own(buf_pool));
290+ //ut_ad(buf_pool_mutex_own(buf_pool));
291+#ifdef UNIV_SYNC_DEBUG
292+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
293+#endif
294
295 switch (buf_page_get_state(bpage)) {
296 case BUF_BLOCK_ZIP_FREE:
297@@ -364,7 +393,7 @@
298 case BUF_BLOCK_FILE_PAGE:
299 case BUF_BLOCK_MEMORY:
300 case BUF_BLOCK_REMOVE_HASH:
301- ut_error;
302+ /* ut_error; */ /* optimistic */
303 case BUF_BLOCK_ZIP_DIRTY:
304 /* Cannot relocate dirty pages. */
305 return(FALSE);
306@@ -374,9 +403,18 @@
307 }
308
309 mutex_enter(&buf_pool->zip_mutex);
310+ mutex_enter(&buf_pool->zip_free_mutex);
311
312 if (!buf_page_can_relocate(bpage)) {
313 mutex_exit(&buf_pool->zip_mutex);
314+ mutex_exit(&buf_pool->zip_free_mutex);
315+ return(FALSE);
316+ }
317+
318+ if (bpage != buf_page_hash_get(buf_pool,
319+ bpage->space, bpage->offset)) {
320+ mutex_exit(&buf_pool->zip_mutex);
321+ mutex_exit(&buf_pool->zip_free_mutex);
322 return(FALSE);
323 }
324
325@@ -384,18 +422,19 @@
326 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
327
328 /* relocate buf_pool->zip_clean */
329- b = UT_LIST_GET_PREV(list, dpage);
330- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
331+ b = UT_LIST_GET_PREV(zip_list, dpage);
332+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
333
334 if (b) {
335- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
336+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
337 } else {
338- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
339+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
340 }
341
342 UNIV_MEM_INVALID(bpage, sizeof *bpage);
343
344 mutex_exit(&buf_pool->zip_mutex);
345+ mutex_exit(&buf_pool->zip_free_mutex);
346 return(TRUE);
347 }
348
349@@ -409,14 +448,16 @@
350 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
351 void* src, /*!< in: block to relocate */
352 void* dst, /*!< in: free block to relocate to */
353- ulint i) /*!< in: index of
354+ ulint i, /*!< in: index of
355 buf_pool->zip_free[] */
356+ ibool have_page_hash_mutex)
357 {
358 buf_page_t* bpage;
359 const ulint size = BUF_BUDDY_LOW << i;
360 ullint usec = ut_time_us(NULL);
361
362- ut_ad(buf_pool_mutex_own(buf_pool));
363+ //ut_ad(buf_pool_mutex_own(buf_pool));
364+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
365 ut_ad(!mutex_own(&buf_pool->zip_mutex));
366 ut_ad(!ut_align_offset(src, size));
367 ut_ad(!ut_align_offset(dst, size));
d8778560
AM
368@@ -437,6 +478,13 @@
369 if (size >= PAGE_ZIP_MIN_SIZE) {
b4e1fa2c
AM
370 /* This is a compressed page. */
371 mutex_t* mutex;
d8778560
AM
372+ ulint space, page_no;
373+
b4e1fa2c
AM
374+ if (!have_page_hash_mutex) {
375+ mutex_exit(&buf_pool->zip_free_mutex);
376+ mutex_enter(&buf_pool->LRU_list_mutex);
377+ rw_lock_x_lock(&buf_pool->page_hash_latch);
378+ }
d8778560 379
b4e1fa2c
AM
380 /* The src block may be split into smaller blocks,
381 some of which may be free. Thus, the
d8778560
AM
382@@ -446,9 +494,9 @@
383 pool), so there is nothing wrong about this. The
384 mach_read_from_4() calls here will only trigger bogus
385 Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
386- ulint space = mach_read_from_4(
387+ space = mach_read_from_4(
388 (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
389- ulint page_no = mach_read_from_4(
390+ page_no = mach_read_from_4(
391 (const byte*) src + FIL_PAGE_OFFSET);
392 /* Suppress Valgrind warnings about conditional jump
393 on uninitialized value. */
394@@ -462,6 +510,11 @@
b4e1fa2c
AM
395 added to buf_pool->page_hash yet. Obviously,
396 it cannot be relocated. */
397
398+ if (!have_page_hash_mutex) {
399+ mutex_enter(&buf_pool->zip_free_mutex);
400+ mutex_exit(&buf_pool->LRU_list_mutex);
401+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
402+ }
403 return(FALSE);
404 }
405
d8778560 406@@ -473,18 +526,27 @@
b4e1fa2c
AM
407 For the sake of simplicity, give up. */
408 ut_ad(page_zip_get_size(&bpage->zip) < size);
409
410+ if (!have_page_hash_mutex) {
411+ mutex_enter(&buf_pool->zip_free_mutex);
412+ mutex_exit(&buf_pool->LRU_list_mutex);
413+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
414+ }
415 return(FALSE);
416 }
417
418+ /* To keep latch order */
419+ if (have_page_hash_mutex)
420+ mutex_exit(&buf_pool->zip_free_mutex);
421+
422 /* The block must have been allocated, but it may
423 contain uninitialized data. */
424 UNIV_MEM_ASSERT_W(src, size);
425
426- mutex = buf_page_get_mutex(bpage);
427+ mutex = buf_page_get_mutex_enter(bpage);
428
429- mutex_enter(mutex);
430+ mutex_enter(&buf_pool->zip_free_mutex);
431
432- if (buf_page_can_relocate(bpage)) {
433+ if (mutex && buf_page_can_relocate(bpage)) {
434 /* Relocate the compressed page. */
435 ut_a(bpage->zip.data == src);
436 memcpy(dst, src, size);
d8778560 437@@ -499,10 +561,22 @@
b4e1fa2c
AM
438 buddy_stat->relocated_usec
439 += ut_time_us(NULL) - usec;
440 }
441+
442+ if (!have_page_hash_mutex) {
443+ mutex_exit(&buf_pool->LRU_list_mutex);
444+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
445+ }
446 return(TRUE);
447 }
448
449- mutex_exit(mutex);
450+ if (!have_page_hash_mutex) {
451+ mutex_exit(&buf_pool->LRU_list_mutex);
452+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
453+ }
454+
455+ if (mutex) {
456+ mutex_exit(mutex);
457+ }
458 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
459 /* This must be a buf_page_t object. */
460 #if UNIV_WORD_SIZE == 4
d8778560 461@@ -511,10 +585,31 @@
b4e1fa2c
AM
462 about uninitialized pad bytes. */
463 UNIV_MEM_ASSERT_RW(src, size);
464 #endif
465+
466+ mutex_exit(&buf_pool->zip_free_mutex);
467+
468+ if (!have_page_hash_mutex) {
469+ mutex_enter(&buf_pool->LRU_list_mutex);
470+ rw_lock_x_lock(&buf_pool->page_hash_latch);
471+ }
472+
473 if (buf_buddy_relocate_block(src, dst)) {
474+ mutex_enter(&buf_pool->zip_free_mutex);
475+
476+ if (!have_page_hash_mutex) {
477+ mutex_exit(&buf_pool->LRU_list_mutex);
478+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
479+ }
480
481 goto success;
482 }
483+
484+ mutex_enter(&buf_pool->zip_free_mutex);
485+
486+ if (!have_page_hash_mutex) {
487+ mutex_exit(&buf_pool->LRU_list_mutex);
488+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
489+ }
490 }
491
492 return(FALSE);
d8778560 493@@ -529,13 +624,15 @@
b4e1fa2c
AM
494 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
495 void* buf, /*!< in: block to be freed, must not be
496 pointed to by the buffer pool */
497- ulint i) /*!< in: index of buf_pool->zip_free[],
498+ ulint i, /*!< in: index of buf_pool->zip_free[],
499 or BUF_BUDDY_SIZES */
500+ ibool have_page_hash_mutex)
501 {
502 buf_page_t* bpage;
503 buf_page_t* buddy;
504
505- ut_ad(buf_pool_mutex_own(buf_pool));
506+ //ut_ad(buf_pool_mutex_own(buf_pool));
507+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
508 ut_ad(!mutex_own(&buf_pool->zip_mutex));
509 ut_ad(i <= BUF_BUDDY_SIZES);
510 ut_ad(buf_pool->buddy_stat[i].used > 0);
d8778560 511@@ -546,7 +643,9 @@
b4e1fa2c
AM
512 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
513
514 if (i == BUF_BUDDY_SIZES) {
515- buf_buddy_block_free(buf_pool, buf);
516+ mutex_exit(&buf_pool->zip_free_mutex);
517+ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
518+ mutex_enter(&buf_pool->zip_free_mutex);
519 return;
520 }
521
d8778560 522@@ -591,7 +690,7 @@
b4e1fa2c
AM
523 ut_a(bpage != buf);
524
525 {
526- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
527+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
528 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
529 bpage = next;
530 }
d8778560 531@@ -600,13 +699,13 @@
b4e1fa2c
AM
532 #ifndef UNIV_DEBUG_VALGRIND
533 buddy_nonfree:
534 /* Valgrind would complain about accessing free memory. */
535- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
536+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
537 ut_ad(buf_page_get_state(ut_list_node_313)
538 == BUF_BLOCK_ZIP_FREE)));
539 #endif /* UNIV_DEBUG_VALGRIND */
540
541 /* The buddy is not free. Is there a free block of this size? */
542- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
543+ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
544
545 if (bpage) {
546 /* Remove the block from the free list, because a successful
d8778560 547@@ -616,7 +715,7 @@
b4e1fa2c
AM
548 buf_buddy_remove_from_free(buf_pool, bpage, i);
549
550 /* Try to relocate the buddy of buf to the free block. */
551- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
552+ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
553
554 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
555 goto buddy_free2;
d8778560 556@@ -636,14 +735,14 @@
b4e1fa2c
AM
557
558 (Parts of the buddy can be free in
559 buf_pool->zip_free[j] with j < i.) */
560- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
561+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
562 ut_ad(buf_page_get_state(
563 ut_list_node_313)
564 == BUF_BLOCK_ZIP_FREE
565 && ut_list_node_313 != buddy)));
566 #endif /* !UNIV_DEBUG_VALGRIND */
567
568- if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
569+ if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
570
571 buf = bpage;
572 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
573diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
574--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
575+++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
576@@ -263,6 +263,7 @@
577 #ifdef UNIV_PFS_RWLOCK
578 /* Keys to register buffer block related rwlocks and mutexes with
579 performance schema */
580+UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
581 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
582 # ifdef UNIV_SYNC_DEBUG
583 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
584@@ -273,6 +274,10 @@
585 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
586 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
587 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
588+UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
589+UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
590+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
591+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
592 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
593 #endif /* UNIV_PFS_MUTEX */
594
595@@ -881,9 +886,9 @@
596 block->page.in_zip_hash = FALSE;
597 block->page.in_flush_list = FALSE;
598 block->page.in_free_list = FALSE;
599- block->in_unzip_LRU_list = FALSE;
600 #endif /* UNIV_DEBUG */
601 block->page.in_LRU_list = FALSE;
602+ block->in_unzip_LRU_list = FALSE;
603 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
604 block->n_pointers = 0;
605 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
606@@ -981,9 +986,11 @@
607 memset(block->frame, '\0', UNIV_PAGE_SIZE);
608 #endif
609 /* Add the block to the free list */
610- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
611+ mutex_enter(&buf_pool->free_list_mutex);
612+ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
613
614 ut_d(block->page.in_free_list = TRUE);
615+ mutex_exit(&buf_pool->free_list_mutex);
616 ut_ad(buf_pool_from_block(block) == buf_pool);
617
618 block++;
619@@ -1038,7 +1045,8 @@
620 buf_chunk_t* chunk = buf_pool->chunks;
621
622 ut_ad(buf_pool);
623- ut_ad(buf_pool_mutex_own(buf_pool));
624+ //ut_ad(buf_pool_mutex_own(buf_pool));
625+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
626 for (n = buf_pool->n_chunks; n--; chunk++) {
627
628 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
629@@ -1138,7 +1146,7 @@
630 buf_block_t* block;
631 const buf_block_t* block_end;
632
633- ut_ad(buf_pool_mutex_own(buf_pool));
634+ //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
635
636 block_end = chunk->blocks + chunk->size;
637
638@@ -1150,8 +1158,10 @@
639 ut_ad(!block->in_unzip_LRU_list);
640 ut_ad(!block->page.in_flush_list);
641 /* Remove the block from the free list. */
642+ mutex_enter(&buf_pool->free_list_mutex);
643 ut_ad(block->page.in_free_list);
644- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
645+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
646+ mutex_exit(&buf_pool->free_list_mutex);
647
648 /* Free the latches. */
649 mutex_free(&block->mutex);
650@@ -1208,9 +1218,21 @@
651 ------------------------------- */
652 mutex_create(buf_pool_mutex_key,
653 &buf_pool->mutex, SYNC_BUF_POOL);
654+ mutex_create(buf_pool_LRU_list_mutex_key,
655+ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
656+ rw_lock_create(buf_pool_page_hash_key,
657+ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
658+ mutex_create(buf_pool_free_list_mutex_key,
659+ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
660+ mutex_create(buf_pool_zip_free_mutex_key,
661+ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
662+ mutex_create(buf_pool_zip_hash_mutex_key,
663+ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
664 mutex_create(buf_pool_zip_mutex_key,
665 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
666
667+ mutex_enter(&buf_pool->LRU_list_mutex);
668+ rw_lock_x_lock(&buf_pool->page_hash_latch);
669 buf_pool_mutex_enter(buf_pool);
670
671 if (buf_pool_size > 0) {
672@@ -1223,6 +1245,8 @@
673 mem_free(chunk);
674 mem_free(buf_pool);
675
676+ mutex_exit(&buf_pool->LRU_list_mutex);
677+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
678 buf_pool_mutex_exit(buf_pool);
679
680 return(DB_ERROR);
681@@ -1253,6 +1277,8 @@
682
683 /* All fields are initialized by mem_zalloc(). */
684
685+ mutex_exit(&buf_pool->LRU_list_mutex);
686+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
687 buf_pool_mutex_exit(buf_pool);
688
689 return(DB_SUCCESS);
690@@ -1467,7 +1493,11 @@
691 ulint fold;
692 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
693
694- ut_ad(buf_pool_mutex_own(buf_pool));
695+ //ut_ad(buf_pool_mutex_own(buf_pool));
696+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
697+#ifdef UNIV_SYNC_DEBUG
698+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
699+#endif
700 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
701 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
702 ut_a(bpage->buf_fix_count == 0);
703@@ -1554,7 +1584,8 @@
704
705 try_again:
706 btr_search_disable(); /* Empty the adaptive hash index again */
707- buf_pool_mutex_enter(buf_pool);
708+ //buf_pool_mutex_enter(buf_pool);
709+ mutex_enter(&buf_pool->LRU_list_mutex);
710
711 shrink_again:
712 if (buf_pool->n_chunks <= 1) {
713@@ -1625,7 +1656,7 @@
714
715 buf_LRU_make_block_old(&block->page);
716 dirty++;
df1b5770
AM
717- } else if (buf_LRU_free_block(&block->page, TRUE)
718+ } else if (buf_LRU_free_block(&block->page, TRUE, TRUE)
b4e1fa2c
AM
719 != BUF_LRU_FREED) {
720 nonfree++;
721 }
722@@ -1633,7 +1664,8 @@
723 mutex_exit(&block->mutex);
724 }
725
726- buf_pool_mutex_exit(buf_pool);
727+ //buf_pool_mutex_exit(buf_pool);
728+ mutex_exit(&buf_pool->LRU_list_mutex);
729
730 /* Request for a flush of the chunk if it helps.
731 Do not flush if there are non-free blocks, since
732@@ -1683,7 +1715,8 @@
733 func_done:
734 buf_pool->old_pool_size = buf_pool->curr_pool_size;
735 func_exit:
736- buf_pool_mutex_exit(buf_pool);
737+ //buf_pool_mutex_exit(buf_pool);
738+ mutex_exit(&buf_pool->LRU_list_mutex);
739 btr_search_enable();
740 }
741
742@@ -1724,7 +1757,9 @@
743 hash_table_t* zip_hash;
744 hash_table_t* page_hash;
745
746- buf_pool_mutex_enter(buf_pool);
747+ //buf_pool_mutex_enter(buf_pool);
748+ mutex_enter(&buf_pool->LRU_list_mutex);
749+ rw_lock_x_lock(&buf_pool->page_hash_latch);
750
751 /* Free, create, and populate the hash table. */
752 hash_table_free(buf_pool->page_hash);
753@@ -1765,8 +1800,9 @@
754 All such blocks are either in buf_pool->zip_clean or
755 in buf_pool->flush_list. */
756
757+ mutex_enter(&buf_pool->zip_mutex);
758 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
759- b = UT_LIST_GET_NEXT(list, b)) {
760+ b = UT_LIST_GET_NEXT(zip_list, b)) {
761 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
762 ut_ad(!b->in_flush_list);
763 ut_ad(b->in_LRU_list);
764@@ -1776,10 +1812,11 @@
765 HASH_INSERT(buf_page_t, hash, page_hash,
766 buf_page_address_fold(b->space, b->offset), b);
767 }
768+ mutex_exit(&buf_pool->zip_mutex);
769
770 buf_flush_list_mutex_enter(buf_pool);
771 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
772- b = UT_LIST_GET_NEXT(list, b)) {
773+ b = UT_LIST_GET_NEXT(flush_list, b)) {
774 ut_ad(b->in_flush_list);
775 ut_ad(b->in_LRU_list);
776 ut_ad(b->in_page_hash);
777@@ -1806,7 +1843,9 @@
778 }
779
780 buf_flush_list_mutex_exit(buf_pool);
781- buf_pool_mutex_exit(buf_pool);
782+ //buf_pool_mutex_exit(buf_pool);
783+ mutex_exit(&buf_pool->LRU_list_mutex);
784+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
785 }
786
787 /********************************************************************
788@@ -1853,21 +1892,32 @@
789 buf_page_t* bpage;
790 ulint i;
791 buf_pool_t* buf_pool = buf_pool_get(space, offset);
792+ mutex_t* block_mutex;
793
794- ut_ad(buf_pool_mutex_own(buf_pool));
795+ //ut_ad(buf_pool_mutex_own(buf_pool));
796
797+ rw_lock_x_lock(&buf_pool->page_hash_latch);
798 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
799+ if (bpage) {
800+ block_mutex = buf_page_get_mutex_enter(bpage);
801+ ut_a(block_mutex);
802+ }
803
804 if (UNIV_LIKELY_NULL(bpage)) {
805 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
806 /* The page was loaded meanwhile. */
807+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
808 return(bpage);
809 }
810 /* Add to an existing watch. */
811 bpage->buf_fix_count++;
812+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
813+ mutex_exit(block_mutex);
814 return(NULL);
815 }
816
817+ /* buf_pool->watch is protected by zip_mutex for now */
818+ mutex_enter(&buf_pool->zip_mutex);
819 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
820 bpage = &buf_pool->watch[i];
821
822@@ -1891,10 +1941,12 @@
823 bpage->space = space;
824 bpage->offset = offset;
825 bpage->buf_fix_count = 1;
826-
827+ bpage->buf_pool_index = buf_pool_index(buf_pool);
828 ut_d(bpage->in_page_hash = TRUE);
829 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
830 fold, bpage);
831+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
832+ mutex_exit(&buf_pool->zip_mutex);
833 return(NULL);
834 case BUF_BLOCK_ZIP_PAGE:
835 ut_ad(bpage->in_page_hash);
836@@ -1912,6 +1964,8 @@
837 ut_error;
838
839 /* Fix compiler warning */
840+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
841+ mutex_exit(&buf_pool->zip_mutex);
842 return(NULL);
843 }
844
845@@ -1941,6 +1995,8 @@
846 buf_chunk_t* chunks;
847 buf_chunk_t* chunk;
848
849+ mutex_enter(&buf_pool->LRU_list_mutex);
850+ rw_lock_x_lock(&buf_pool->page_hash_latch);
851 buf_pool_mutex_enter(buf_pool);
852 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
853
854@@ -1959,6 +2015,8 @@
855 buf_pool->n_chunks++;
856 }
857
858+ mutex_exit(&buf_pool->LRU_list_mutex);
859+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
860 buf_pool_mutex_exit(buf_pool);
861 }
862
863@@ -2046,7 +2104,11 @@
864 space, offset) */
865 buf_page_t* watch) /*!< in/out: sentinel for watch */
866 {
867- ut_ad(buf_pool_mutex_own(buf_pool));
868+ //ut_ad(buf_pool_mutex_own(buf_pool));
869+#ifdef UNIV_SYNC_DEBUG
870+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
871+#endif
872+ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
873
874 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
875 ut_d(watch->in_page_hash = FALSE);
876@@ -2068,28 +2130,31 @@
877 buf_pool_t* buf_pool = buf_pool_get(space, offset);
878 ulint fold = buf_page_address_fold(space, offset);
879
880- buf_pool_mutex_enter(buf_pool);
881+ //buf_pool_mutex_enter(buf_pool);
882+ rw_lock_x_lock(&buf_pool->page_hash_latch);
883 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
884 /* The page must exist because buf_pool_watch_set()
885 increments buf_fix_count. */
886 ut_a(bpage);
887
888 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
889- mutex_t* mutex = buf_page_get_mutex(bpage);
890+ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
891
892- mutex_enter(mutex);
893 ut_a(bpage->buf_fix_count > 0);
894 bpage->buf_fix_count--;
895 mutex_exit(mutex);
896 } else {
897+ mutex_enter(&buf_pool->zip_mutex);
898 ut_a(bpage->buf_fix_count > 0);
899
900 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
901 buf_pool_watch_remove(buf_pool, fold, bpage);
902 }
903+ mutex_exit(&buf_pool->zip_mutex);
904 }
905
906- buf_pool_mutex_exit(buf_pool);
907+ //buf_pool_mutex_exit(buf_pool);
908+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
909 }
910
911 /****************************************************************//**
912@@ -2109,14 +2174,16 @@
913 buf_pool_t* buf_pool = buf_pool_get(space, offset);
914 ulint fold = buf_page_address_fold(space, offset);
915
916- buf_pool_mutex_enter(buf_pool);
917+ //buf_pool_mutex_enter(buf_pool);
918+ rw_lock_s_lock(&buf_pool->page_hash_latch);
919
920 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
921 /* The page must exist because buf_pool_watch_set()
922 increments buf_fix_count. */
923 ut_a(bpage);
924 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
925- buf_pool_mutex_exit(buf_pool);
926+ //buf_pool_mutex_exit(buf_pool);
927+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
928
929 return(ret);
930 }
931@@ -2133,13 +2200,15 @@
932 {
933 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
934
935- buf_pool_mutex_enter(buf_pool);
936+ //buf_pool_mutex_enter(buf_pool);
937+ mutex_enter(&buf_pool->LRU_list_mutex);
938
939 ut_a(buf_page_in_file(bpage));
940
941 buf_LRU_make_block_young(bpage);
942
943- buf_pool_mutex_exit(buf_pool);
944+ //buf_pool_mutex_exit(buf_pool);
945+ mutex_exit(&buf_pool->LRU_list_mutex);
946 }
947
948 /********************************************************************//**
949@@ -2163,14 +2232,20 @@
950 ut_a(buf_page_in_file(bpage));
951
952 if (buf_page_peek_if_too_old(bpage)) {
953- buf_pool_mutex_enter(buf_pool);
954+ //buf_pool_mutex_enter(buf_pool);
955+ mutex_enter(&buf_pool->LRU_list_mutex);
956 buf_LRU_make_block_young(bpage);
957- buf_pool_mutex_exit(buf_pool);
958+ //buf_pool_mutex_exit(buf_pool);
959+ mutex_exit(&buf_pool->LRU_list_mutex);
960 } else if (!access_time) {
961 ulint time_ms = ut_time_ms();
962- buf_pool_mutex_enter(buf_pool);
963+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
964+ //buf_pool_mutex_enter(buf_pool);
965+ if (block_mutex) {
966 buf_page_set_accessed(bpage, time_ms);
967- buf_pool_mutex_exit(buf_pool);
968+ mutex_exit(block_mutex);
969+ }
970+ //buf_pool_mutex_exit(buf_pool);
971 }
972 }
973
974@@ -2187,7 +2262,8 @@
975 buf_block_t* block;
976 buf_pool_t* buf_pool = buf_pool_get(space, offset);
977
978- buf_pool_mutex_enter(buf_pool);
979+ //buf_pool_mutex_enter(buf_pool);
980+ rw_lock_s_lock(&buf_pool->page_hash_latch);
981
982 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
983
984@@ -2196,7 +2272,8 @@
985 block->check_index_page_at_flush = FALSE;
986 }
987
988- buf_pool_mutex_exit(buf_pool);
989+ //buf_pool_mutex_exit(buf_pool);
990+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
991 }
992
993 /********************************************************************//**
994@@ -2215,7 +2292,8 @@
995 ibool is_hashed;
996 buf_pool_t* buf_pool = buf_pool_get(space, offset);
997
998- buf_pool_mutex_enter(buf_pool);
999+ //buf_pool_mutex_enter(buf_pool);
1000+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1001
1002 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1003
1004@@ -2226,7 +2304,8 @@
1005 is_hashed = block->is_hashed;
1006 }
1007
1008- buf_pool_mutex_exit(buf_pool);
1009+ //buf_pool_mutex_exit(buf_pool);
1010+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1011
1012 return(is_hashed);
1013 }
1014@@ -2248,7 +2327,8 @@
1015 buf_page_t* bpage;
1016 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1017
1018- buf_pool_mutex_enter(buf_pool);
1019+ //buf_pool_mutex_enter(buf_pool);
1020+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1021
1022 bpage = buf_page_hash_get(buf_pool, space, offset);
1023
df1b5770 1024@@ -2259,7 +2339,8 @@
b4e1fa2c
AM
1025 bpage->file_page_was_freed = TRUE;
1026 }
1027
1028- buf_pool_mutex_exit(buf_pool);
1029+ //buf_pool_mutex_exit(buf_pool);
1030+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1031
1032 return(bpage);
1033 }
df1b5770 1034@@ -2280,7 +2361,8 @@
b4e1fa2c
AM
1035 buf_page_t* bpage;
1036 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1037
1038- buf_pool_mutex_enter(buf_pool);
1039+ //buf_pool_mutex_enter(buf_pool);
1040+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1041
1042 bpage = buf_page_hash_get(buf_pool, space, offset);
1043
df1b5770 1044@@ -2289,7 +2371,8 @@
b4e1fa2c
AM
1045 bpage->file_page_was_freed = FALSE;
1046 }
1047
1048- buf_pool_mutex_exit(buf_pool);
1049+ //buf_pool_mutex_exit(buf_pool);
1050+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1051
1052 return(bpage);
1053 }
df1b5770 1054@@ -2324,8 +2407,9 @@
b4e1fa2c
AM
1055 buf_pool->stat.n_page_gets++;
1056
1057 for (;;) {
1058- buf_pool_mutex_enter(buf_pool);
1059+ //buf_pool_mutex_enter(buf_pool);
1060 lookup:
1061+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1062 bpage = buf_page_hash_get(buf_pool, space, offset);
1063 if (bpage) {
1064 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
df1b5770 1065@@ -2334,7 +2418,8 @@
b4e1fa2c
AM
1066
1067 /* Page not in buf_pool: needs to be read from file */
1068
1069- buf_pool_mutex_exit(buf_pool);
1070+ //buf_pool_mutex_exit(buf_pool);
1071+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1072
1073 buf_read_page(space, zip_size, offset);
1074
df1b5770 1075@@ -2346,10 +2431,15 @@
b4e1fa2c
AM
1076 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1077 /* There is no compressed page. */
1078 err_exit:
1079- buf_pool_mutex_exit(buf_pool);
1080+ //buf_pool_mutex_exit(buf_pool);
1081+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1082 return(NULL);
1083 }
1084
1085+ block_mutex = buf_page_get_mutex_enter(bpage);
1086+
1087+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1088+
1089 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1090
1091 switch (buf_page_get_state(bpage)) {
df1b5770 1092@@ -2358,19 +2448,19 @@
b4e1fa2c
AM
1093 case BUF_BLOCK_MEMORY:
1094 case BUF_BLOCK_REMOVE_HASH:
1095 case BUF_BLOCK_ZIP_FREE:
1096+ if (block_mutex)
1097+ mutex_exit(block_mutex);
1098 break;
1099 case BUF_BLOCK_ZIP_PAGE:
1100 case BUF_BLOCK_ZIP_DIRTY:
1101- block_mutex = &buf_pool->zip_mutex;
1102- mutex_enter(block_mutex);
1103+ ut_a(block_mutex == &buf_pool->zip_mutex);
1104 bpage->buf_fix_count++;
1105 goto got_block;
1106 case BUF_BLOCK_FILE_PAGE:
1107- block_mutex = &((buf_block_t*) bpage)->mutex;
1108- mutex_enter(block_mutex);
1109+ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1110
1111 /* Discard the uncompressed page frame if possible. */
df1b5770
AM
1112- if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
1113+ if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) {
b4e1fa2c
AM
1114
1115 mutex_exit(block_mutex);
df1b5770
AM
1116 goto lookup;
1117@@ -2388,7 +2478,7 @@
b4e1fa2c
AM
1118 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1119 access_time = buf_page_is_accessed(bpage);
1120
1121- buf_pool_mutex_exit(buf_pool);
1122+ //buf_pool_mutex_exit(buf_pool);
1123
1124 mutex_exit(block_mutex);
1125
df1b5770 1126@@ -2697,7 +2787,7 @@
b4e1fa2c
AM
1127 const buf_block_t* block) /*!< in: pointer to block,
1128 not dereferenced */
1129 {
1130- ut_ad(buf_pool_mutex_own(buf_pool));
1131+ //ut_ad(buf_pool_mutex_own(buf_pool));
1132
1133 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1134 /* The pointer should be aligned. */
df1b5770 1135@@ -2733,6 +2823,7 @@
b4e1fa2c
AM
1136 ulint fix_type;
1137 ibool must_read;
1138 ulint retries = 0;
1139+ mutex_t* block_mutex = NULL;
1140 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1141
1142 ut_ad(mtr);
df1b5770 1143@@ -2755,9 +2846,11 @@
b4e1fa2c
AM
1144 fold = buf_page_address_fold(space, offset);
1145 loop:
1146 block = guess;
1147- buf_pool_mutex_enter(buf_pool);
1148+ //buf_pool_mutex_enter(buf_pool);
1149
1150 if (block) {
1151+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1152+
1153 /* If the guess is a compressed page descriptor that
1154 has been allocated by buf_buddy_alloc(), it may have
1155 been invalidated by buf_buddy_relocate(). In that
df1b5770 1156@@ -2766,11 +2859,15 @@
b4e1fa2c
AM
1157 the guess may be pointing to a buffer pool chunk that
1158 has been released when resizing the buffer pool. */
1159
1160- if (!buf_block_is_uncompressed(buf_pool, block)
1161+ if (!block_mutex) {
1162+ block = guess = NULL;
1163+ } else if (!buf_block_is_uncompressed(buf_pool, block)
1164 || offset != block->page.offset
1165 || space != block->page.space
1166 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1167
1168+ mutex_exit(block_mutex);
1169+
1170 block = guess = NULL;
1171 } else {
1172 ut_ad(!block->page.in_zip_hash);
df1b5770 1173@@ -2779,12 +2876,19 @@
b4e1fa2c
AM
1174 }
1175
1176 if (block == NULL) {
1177+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1178 block = (buf_block_t*) buf_page_hash_get_low(
1179 buf_pool, space, offset, fold);
1180+ if (block) {
1181+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1182+ ut_a(block_mutex);
1183+ }
1184+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1185 }
1186
1187 loop2:
1188 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1189+ mutex_exit(block_mutex);
1190 block = NULL;
1191 }
1192
df1b5770 1193@@ -2796,12 +2900,14 @@
b4e1fa2c
AM
1194 space, offset, fold);
1195
1196 if (UNIV_LIKELY_NULL(block)) {
1197-
1198+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
1199+ ut_a(block_mutex);
1200+ ut_ad(mutex_own(block_mutex));
1201 goto got_block;
1202 }
1203 }
1204
1205- buf_pool_mutex_exit(buf_pool);
1206+ //buf_pool_mutex_exit(buf_pool);
1207
1208 if (mode == BUF_GET_IF_IN_POOL
1209 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
df1b5770 1210@@ -2849,7 +2955,8 @@
b4e1fa2c
AM
1211 /* The page is being read to buffer pool,
1212 but we cannot wait around for the read to
1213 complete. */
1214- buf_pool_mutex_exit(buf_pool);
1215+ //buf_pool_mutex_exit(buf_pool);
1216+ mutex_exit(block_mutex);
1217
1218 return(NULL);
1219 }
df1b5770 1220@@ -2859,38 +2966,49 @@
b4e1fa2c
AM
1221 ibool success;
1222
1223 case BUF_BLOCK_FILE_PAGE:
1224+ if (block_mutex == &buf_pool->zip_mutex) {
1225+ /* it is wrong mutex... */
1226+ mutex_exit(block_mutex);
1227+ goto loop;
1228+ }
1229 break;
1230
1231 case BUF_BLOCK_ZIP_PAGE:
1232 case BUF_BLOCK_ZIP_DIRTY:
1233+ ut_ad(block_mutex == &buf_pool->zip_mutex);
1234 bpage = &block->page;
1235 /* Protect bpage->buf_fix_count. */
1236- mutex_enter(&buf_pool->zip_mutex);
1237+ //mutex_enter(&buf_pool->zip_mutex);
1238
1239 if (bpage->buf_fix_count
1240 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1241 /* This condition often occurs when the buffer
1242 is not buffer-fixed, but I/O-fixed by
1243 buf_page_init_for_read(). */
1244- mutex_exit(&buf_pool->zip_mutex);
1245+ //mutex_exit(&buf_pool->zip_mutex);
1246 wait_until_unfixed:
1247 /* The block is buffer-fixed or I/O-fixed.
1248 Try again later. */
1249- buf_pool_mutex_exit(buf_pool);
1250+ //buf_pool_mutex_exit(buf_pool);
1251+ mutex_exit(block_mutex);
1252 os_thread_sleep(WAIT_FOR_READ);
1253
1254 goto loop;
1255 }
1256
1257 /* Allocate an uncompressed page. */
1258- buf_pool_mutex_exit(buf_pool);
1259- mutex_exit(&buf_pool->zip_mutex);
1260+ //buf_pool_mutex_exit(buf_pool);
1261+ //mutex_exit(&buf_pool->zip_mutex);
1262+ mutex_exit(block_mutex);
1263
df1b5770 1264 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
1265 ut_a(block);
1266+ block_mutex = &block->mutex;
1267
1268- buf_pool_mutex_enter(buf_pool);
1269- mutex_enter(&block->mutex);
1270+ //buf_pool_mutex_enter(buf_pool);
1271+ mutex_enter(&buf_pool->LRU_list_mutex);
1272+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1273+ mutex_enter(block_mutex);
1274
1275 {
1276 buf_page_t* hash_bpage;
df1b5770 1277@@ -2903,35 +3021,47 @@
b4e1fa2c
AM
1278 while buf_pool->mutex was released.
1279 Free the block that was allocated. */
1280
1281- buf_LRU_block_free_non_file_page(block);
1282- mutex_exit(&block->mutex);
1283+ buf_LRU_block_free_non_file_page(block, TRUE);
1284+ mutex_exit(block_mutex);
1285
1286 block = (buf_block_t*) hash_bpage;
1287+ if (block) {
1288+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1289+ ut_a(block_mutex);
1290+ }
1291+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1292+ mutex_exit(&buf_pool->LRU_list_mutex);
1293 goto loop2;
1294 }
1295 }
1296
1297+ mutex_enter(&buf_pool->zip_mutex);
1298+
1299 if (UNIV_UNLIKELY
1300 (bpage->buf_fix_count
1301 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1302
1303+ mutex_exit(&buf_pool->zip_mutex);
1304 /* The block was buffer-fixed or I/O-fixed
1305 while buf_pool->mutex was not held by this thread.
1306 Free the block that was allocated and try again.
1307 This should be extremely unlikely. */
1308
1309- buf_LRU_block_free_non_file_page(block);
1310- mutex_exit(&block->mutex);
1311+ buf_LRU_block_free_non_file_page(block, TRUE);
1312+ //mutex_exit(&block->mutex);
1313
1314+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1315+ mutex_exit(&buf_pool->LRU_list_mutex);
1316 goto wait_until_unfixed;
1317 }
1318
1319 /* Move the compressed page from bpage to block,
1320 and uncompress it. */
1321
1322- mutex_enter(&buf_pool->zip_mutex);
1323-
1324 buf_relocate(bpage, &block->page);
1325+
1326+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1327+
1328 buf_block_init_low(block);
1329 block->lock_hash_val = lock_rec_hash(space, offset);
1330
df1b5770 1331@@ -2940,7 +3070,7 @@
b4e1fa2c
AM
1332
1333 if (buf_page_get_state(&block->page)
1334 == BUF_BLOCK_ZIP_PAGE) {
1335- UT_LIST_REMOVE(list, buf_pool->zip_clean,
1336+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1337 &block->page);
1338 ut_ad(!block->page.in_flush_list);
1339 } else {
df1b5770 1340@@ -2957,19 +3087,24 @@
b4e1fa2c
AM
1341 /* Insert at the front of unzip_LRU list */
1342 buf_unzip_LRU_add_block(block, FALSE);
1343
1344+ mutex_exit(&buf_pool->LRU_list_mutex);
1345+
1346 block->page.buf_fix_count = 1;
1347 buf_block_set_io_fix(block, BUF_IO_READ);
1348 rw_lock_x_lock_func(&block->lock, 0, file, line);
1349
1350 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1351
1352- mutex_exit(&block->mutex);
1353+ mutex_exit(block_mutex);
1354 mutex_exit(&buf_pool->zip_mutex);
1355+
1356+ buf_pool_mutex_enter(buf_pool);
1357 buf_pool->n_pend_unzip++;
1358+ buf_pool_mutex_exit(buf_pool);
1359
1360- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1361+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1362
1363- buf_pool_mutex_exit(buf_pool);
1364+ //buf_pool_mutex_exit(buf_pool);
1365
1366 /* Decompress the page and apply buffered operations
1367 while not holding buf_pool->mutex or block->mutex. */
df1b5770 1368@@ -2982,12 +3117,15 @@
b4e1fa2c
AM
1369 }
1370
1371 /* Unfix and unlatch the block. */
1372- buf_pool_mutex_enter(buf_pool);
1373- mutex_enter(&block->mutex);
1374+ //buf_pool_mutex_enter(buf_pool);
1375+ block_mutex = &block->mutex;
1376+ mutex_enter(block_mutex);
1377 block->page.buf_fix_count--;
1378 buf_block_set_io_fix(block, BUF_IO_NONE);
1379- mutex_exit(&block->mutex);
1380+
1381+ buf_pool_mutex_enter(buf_pool);
1382 buf_pool->n_pend_unzip--;
1383+ buf_pool_mutex_exit(buf_pool);
1384 rw_lock_x_unlock(&block->lock);
1385
1386 break;
df1b5770 1387@@ -3003,7 +3141,7 @@
b4e1fa2c
AM
1388
1389 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1390
1391- mutex_enter(&block->mutex);
1392+ //mutex_enter(&block->mutex);
1393 #if UNIV_WORD_SIZE == 4
1394 /* On 32-bit systems, there is no padding in buf_page_t. On
1395 other systems, Valgrind could complain about uninitialized pad
df1b5770 1396@@ -3016,7 +3154,7 @@
b4e1fa2c
AM
1397 /* Try to evict the block from the buffer pool, to use the
1398 insert buffer (change buffer) as much as possible. */
1399
df1b5770
AM
1400- if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
1401+ if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) {
b4e1fa2c
AM
1402 mutex_exit(&block->mutex);
1403 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
df1b5770 1404 /* Set the watch, as it would have
d8778560 1405@@ -3052,13 +3190,14 @@
b4e1fa2c
AM
1406
1407 buf_block_buf_fix_inc(block, file, line);
1408
1409- mutex_exit(&block->mutex);
1410+ //mutex_exit(&block->mutex);
1411
1412 /* Check if this is the first access to the page */
1413
1414 access_time = buf_page_is_accessed(&block->page);
1415
1416- buf_pool_mutex_exit(buf_pool);
1417+ //buf_pool_mutex_exit(buf_pool);
1418+ mutex_exit(block_mutex);
1419
1420 buf_page_set_accessed_make_young(&block->page, access_time);
1421
d8778560 1422@@ -3291,9 +3430,11 @@
b4e1fa2c
AM
1423 buf_pool = buf_pool_from_block(block);
1424
1425 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1426- buf_pool_mutex_enter(buf_pool);
1427+ //buf_pool_mutex_enter(buf_pool);
1428+ mutex_enter(&buf_pool->LRU_list_mutex);
1429 buf_LRU_make_block_young(&block->page);
1430- buf_pool_mutex_exit(buf_pool);
1431+ //buf_pool_mutex_exit(buf_pool);
1432+ mutex_exit(&buf_pool->LRU_list_mutex);
1433 } else if (!buf_page_is_accessed(&block->page)) {
1434 /* Above, we do a dirty read on purpose, to avoid
1435 mutex contention. The field buf_page_t::access_time
d8778560 1436@@ -3301,9 +3442,11 @@
b4e1fa2c
AM
1437 field must be protected by mutex, however. */
1438 ulint time_ms = ut_time_ms();
1439
1440- buf_pool_mutex_enter(buf_pool);
1441+ //buf_pool_mutex_enter(buf_pool);
1442+ mutex_enter(&block->mutex);
1443 buf_page_set_accessed(&block->page, time_ms);
1444- buf_pool_mutex_exit(buf_pool);
1445+ //buf_pool_mutex_exit(buf_pool);
1446+ mutex_exit(&block->mutex);
1447 }
1448
1449 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
d8778560 1450@@ -3370,18 +3513,21 @@
b4e1fa2c
AM
1451 ut_ad(mtr);
1452 ut_ad(mtr->state == MTR_ACTIVE);
1453
1454- buf_pool_mutex_enter(buf_pool);
1455+ //buf_pool_mutex_enter(buf_pool);
1456+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1457 block = buf_block_hash_get(buf_pool, space_id, page_no);
1458
1459 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1460- buf_pool_mutex_exit(buf_pool);
1461+ //buf_pool_mutex_exit(buf_pool);
1462+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1463 return(NULL);
1464 }
1465
1466 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1467
1468 mutex_enter(&block->mutex);
1469- buf_pool_mutex_exit(buf_pool);
1470+ //buf_pool_mutex_exit(buf_pool);
1471+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1472
1473 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1474 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
d8778560 1475@@ -3470,7 +3616,10 @@
b4e1fa2c
AM
1476 buf_page_t* hash_page;
1477 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1478
1479- ut_ad(buf_pool_mutex_own(buf_pool));
1480+ //ut_ad(buf_pool_mutex_own(buf_pool));
1481+#ifdef UNIV_SYNC_DEBUG
1482+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1483+#endif
1484 ut_ad(mutex_own(&(block->mutex)));
1485 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1486
d8778560 1487@@ -3499,11 +3648,14 @@
b4e1fa2c
AM
1488 if (UNIV_LIKELY(!hash_page)) {
1489 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1490 /* Preserve the reference count. */
1491- ulint buf_fix_count = hash_page->buf_fix_count;
1492+ ulint buf_fix_count;
1493
1494+ mutex_enter(&buf_pool->zip_mutex);
1495+ buf_fix_count = hash_page->buf_fix_count;
1496 ut_a(buf_fix_count > 0);
1497 block->page.buf_fix_count += buf_fix_count;
1498 buf_pool_watch_remove(buf_pool, fold, hash_page);
1499+ mutex_exit(&buf_pool->zip_mutex);
1500 } else {
1501 fprintf(stderr,
1502 "InnoDB: Error: page %lu %lu already found"
d8778560 1503@@ -3513,7 +3665,8 @@
b4e1fa2c
AM
1504 (const void*) hash_page, (const void*) block);
1505 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1506 mutex_exit(&block->mutex);
1507- buf_pool_mutex_exit(buf_pool);
1508+ //buf_pool_mutex_exit(buf_pool);
1509+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1510 buf_print();
1511 buf_LRU_print();
1512 buf_validate();
d8778560 1513@@ -3597,7 +3750,9 @@
b4e1fa2c
AM
1514
1515 fold = buf_page_address_fold(space, offset);
1516
1517- buf_pool_mutex_enter(buf_pool);
1518+ //buf_pool_mutex_enter(buf_pool);
1519+ mutex_enter(&buf_pool->LRU_list_mutex);
1520+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1521
1522 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1523 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
d8778560 1524@@ -3606,9 +3761,15 @@
b4e1fa2c
AM
1525 err_exit:
1526 if (block) {
1527 mutex_enter(&block->mutex);
1528- buf_LRU_block_free_non_file_page(block);
1529+ mutex_exit(&buf_pool->LRU_list_mutex);
1530+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1531+ buf_LRU_block_free_non_file_page(block, FALSE);
1532 mutex_exit(&block->mutex);
1533 }
1534+ else {
1535+ mutex_exit(&buf_pool->LRU_list_mutex);
1536+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1537+ }
1538
1539 bpage = NULL;
1540 goto func_exit;
d8778560 1541@@ -3631,6 +3792,8 @@
b4e1fa2c
AM
1542
1543 buf_page_init(space, offset, fold, block);
1544
1545+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1546+
1547 /* The block must be put to the LRU list, to the old blocks */
1548 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1549
d8778560 1550@@ -3658,7 +3821,7 @@
b4e1fa2c
AM
1551 been added to buf_pool->LRU and
1552 buf_pool->page_hash. */
1553 mutex_exit(&block->mutex);
1554- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1555+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1556 mutex_enter(&block->mutex);
1557 block->page.zip.data = data;
1558
d8778560 1559@@ -3671,6 +3834,7 @@
b4e1fa2c
AM
1560 buf_unzip_LRU_add_block(block, TRUE);
1561 }
1562
1563+ mutex_exit(&buf_pool->LRU_list_mutex);
1564 mutex_exit(&block->mutex);
1565 } else {
1566 /* Defer buf_buddy_alloc() until after the block has
d8778560 1567@@ -3682,8 +3846,8 @@
b4e1fa2c
AM
1568 control block (bpage), in order to avoid the
1569 invocation of buf_buddy_relocate_block() on
1570 uninitialized data. */
1571- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1572- bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1573+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1574+ bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1575
1576 /* Initialize the buf_pool pointer. */
1577 bpage->buf_pool_index = buf_pool_index(buf_pool);
d8778560 1578@@ -3702,8 +3866,11 @@
b4e1fa2c
AM
1579
1580 /* The block was added by some other thread. */
1581 watch_page = NULL;
1582- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1583- buf_buddy_free(buf_pool, data, zip_size);
1584+ buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1585+ buf_buddy_free(buf_pool, data, zip_size, TRUE);
1586+
1587+ mutex_exit(&buf_pool->LRU_list_mutex);
1588+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1589
1590 bpage = NULL;
1591 goto func_exit;
d8778560 1592@@ -3747,18 +3914,24 @@
b4e1fa2c
AM
1593 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1594 bpage);
1595
1596+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1597+
1598 /* The block must be put to the LRU list, to the old blocks */
1599 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1600 buf_LRU_insert_zip_clean(bpage);
1601
1602+ mutex_exit(&buf_pool->LRU_list_mutex);
1603+
1604 buf_page_set_io_fix(bpage, BUF_IO_READ);
1605
1606 mutex_exit(&buf_pool->zip_mutex);
1607 }
1608
1609+ buf_pool_mutex_enter(buf_pool);
1610 buf_pool->n_pend_reads++;
1611-func_exit:
1612 buf_pool_mutex_exit(buf_pool);
1613+func_exit:
1614+ //buf_pool_mutex_exit(buf_pool);
1615
1616 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1617
d8778560 1618@@ -3800,7 +3973,9 @@
b4e1fa2c
AM
1619
1620 fold = buf_page_address_fold(space, offset);
1621
1622- buf_pool_mutex_enter(buf_pool);
1623+ //buf_pool_mutex_enter(buf_pool);
1624+ mutex_enter(&buf_pool->LRU_list_mutex);
1625+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1626
1627 block = (buf_block_t*) buf_page_hash_get_low(
1628 buf_pool, space, offset, fold);
d8778560 1629@@ -3816,7 +3991,9 @@
df1b5770 1630 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
b4e1fa2c
AM
1631
1632 /* Page can be found in buf_pool */
1633- buf_pool_mutex_exit(buf_pool);
1634+ //buf_pool_mutex_exit(buf_pool);
1635+ mutex_exit(&buf_pool->LRU_list_mutex);
1636+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1637
1638 buf_block_free(free_block);
1639
d8778560 1640@@ -3838,6 +4015,7 @@
b4e1fa2c
AM
1641 mutex_enter(&block->mutex);
1642
1643 buf_page_init(space, offset, fold, block);
1644+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1645
1646 /* The block must be put to the LRU list */
1647 buf_LRU_add_block(&block->page, FALSE);
d8778560 1648@@ -3864,7 +4042,7 @@
b4e1fa2c
AM
1649 the reacquisition of buf_pool->mutex. We also must
1650 defer this operation until after the block descriptor
1651 has been added to buf_pool->LRU and buf_pool->page_hash. */
1652- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1653+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1654 mutex_enter(&block->mutex);
1655 block->page.zip.data = data;
1656
d8778560 1657@@ -3882,7 +4060,8 @@
b4e1fa2c
AM
1658
1659 buf_page_set_accessed(&block->page, time_ms);
1660
1661- buf_pool_mutex_exit(buf_pool);
1662+ //buf_pool_mutex_exit(buf_pool);
1663+ mutex_exit(&buf_pool->LRU_list_mutex);
1664
1665 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1666
d8778560 1667@@ -3933,6 +4112,8 @@
b4e1fa2c
AM
1668 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1669 const ibool uncompressed = (buf_page_get_state(bpage)
1670 == BUF_BLOCK_FILE_PAGE);
1671+ ibool have_LRU_mutex = FALSE;
1672+ mutex_t* block_mutex;
1673
1674 ut_a(buf_page_in_file(bpage));
1675
d8778560 1676@@ -4066,8 +4247,26 @@
b4e1fa2c
AM
1677 }
1678 }
1679
1680+ if (io_type == BUF_IO_WRITE
1681+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1682+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1683+ /* to keep consistency at buf_LRU_insert_zip_clean() */
1684+ have_LRU_mutex = TRUE; /* optimistic */
1685+ }
1686+retry_mutex:
1687+ if (have_LRU_mutex)
1688+ mutex_enter(&buf_pool->LRU_list_mutex);
1689+ block_mutex = buf_page_get_mutex_enter(bpage);
1690+ ut_a(block_mutex);
1691+ if (io_type == BUF_IO_WRITE
1692+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1693+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1694+ && !have_LRU_mutex) {
1695+ mutex_exit(block_mutex);
1696+ have_LRU_mutex = TRUE;
1697+ goto retry_mutex;
1698+ }
1699 buf_pool_mutex_enter(buf_pool);
1700- mutex_enter(buf_page_get_mutex(bpage));
1701
1702 #ifdef UNIV_IBUF_COUNT_DEBUG
1703 if (io_type == BUF_IO_WRITE || uncompressed) {
d8778560 1704@@ -4090,6 +4289,7 @@
b4e1fa2c
AM
1705 the x-latch to this OS thread: do not let this confuse you in
1706 debugging! */
1707
1708+ ut_a(!have_LRU_mutex);
1709 ut_ad(buf_pool->n_pend_reads > 0);
1710 buf_pool->n_pend_reads--;
1711 buf_pool->stat.n_pages_read++;
d8778560 1712@@ -4107,6 +4307,9 @@
b4e1fa2c
AM
1713
1714 buf_flush_write_complete(bpage);
1715
1716+ if (have_LRU_mutex)
1717+ mutex_exit(&buf_pool->LRU_list_mutex);
1718+
1719 if (uncompressed) {
1720 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1721 BUF_IO_WRITE);
d8778560 1722@@ -4129,8 +4332,8 @@
b4e1fa2c
AM
1723 }
1724 #endif /* UNIV_DEBUG */
1725
1726- mutex_exit(buf_page_get_mutex(bpage));
1727 buf_pool_mutex_exit(buf_pool);
1728+ mutex_exit(block_mutex);
1729 }
1730
1731 /*********************************************************************//**
d8778560 1732@@ -4147,7 +4350,9 @@
b4e1fa2c
AM
1733
1734 ut_ad(buf_pool);
1735
1736- buf_pool_mutex_enter(buf_pool);
1737+ //buf_pool_mutex_enter(buf_pool);
1738+ mutex_enter(&buf_pool->LRU_list_mutex);
1739+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1740
1741 chunk = buf_pool->chunks;
1742
d8778560 1743@@ -4164,7 +4369,9 @@
b4e1fa2c
AM
1744 }
1745 }
1746
1747- buf_pool_mutex_exit(buf_pool);
1748+ //buf_pool_mutex_exit(buf_pool);
1749+ mutex_exit(&buf_pool->LRU_list_mutex);
1750+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1751
1752 return(TRUE);
1753 }
d8778560 1754@@ -4212,7 +4419,8 @@
b4e1fa2c
AM
1755 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1756 }
1757
1758- buf_pool_mutex_enter(buf_pool);
1759+ //buf_pool_mutex_enter(buf_pool);
1760+ mutex_enter(&buf_pool->LRU_list_mutex);
1761
1762 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1763 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
d8778560 1764@@ -4225,7 +4433,8 @@
b4e1fa2c
AM
1765 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1766 buf_refresh_io_stats(buf_pool);
1767
1768- buf_pool_mutex_exit(buf_pool);
1769+ //buf_pool_mutex_exit(buf_pool);
1770+ mutex_exit(&buf_pool->LRU_list_mutex);
1771 }
1772
1773 /*********************************************************************//**
d8778560 1774@@ -4267,7 +4476,10 @@
b4e1fa2c
AM
1775
1776 ut_ad(buf_pool);
1777
1778- buf_pool_mutex_enter(buf_pool);
1779+ //buf_pool_mutex_enter(buf_pool);
1780+ mutex_enter(&buf_pool->LRU_list_mutex);
1781+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1782+ /* for keep the new latch order, it cannot validate correctly... */
1783
1784 chunk = buf_pool->chunks;
1785
d8778560 1786@@ -4362,7 +4574,7 @@
b4e1fa2c
AM
1787 /* Check clean compressed-only blocks. */
1788
1789 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1790- b = UT_LIST_GET_NEXT(list, b)) {
1791+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1792 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1793 switch (buf_page_get_io_fix(b)) {
1794 case BUF_IO_NONE:
d8778560 1795@@ -4393,7 +4605,7 @@
b4e1fa2c
AM
1796
1797 buf_flush_list_mutex_enter(buf_pool);
1798 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1799- b = UT_LIST_GET_NEXT(list, b)) {
1800+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1801 ut_ad(b->in_flush_list);
1802 ut_a(b->oldest_modification);
1803 n_flush++;
d8778560 1804@@ -4452,6 +4664,8 @@
b4e1fa2c
AM
1805 }
1806
1807 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1808+ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1809+/*
1810 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1811 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1812 (ulong) UT_LIST_GET_LEN(buf_pool->free),
d8778560 1813@@ -4462,8 +4676,11 @@
b4e1fa2c
AM
1814 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1815 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1816 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1817+*/
1818
1819- buf_pool_mutex_exit(buf_pool);
1820+ //buf_pool_mutex_exit(buf_pool);
1821+ mutex_exit(&buf_pool->LRU_list_mutex);
1822+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1823
1824 ut_a(buf_LRU_validate());
1825 ut_a(buf_flush_validate(buf_pool));
d8778560 1826@@ -4519,7 +4736,9 @@
b4e1fa2c
AM
1827 index_ids = mem_alloc(size * sizeof *index_ids);
1828 counts = mem_alloc(sizeof(ulint) * size);
1829
1830- buf_pool_mutex_enter(buf_pool);
1831+ //buf_pool_mutex_enter(buf_pool);
1832+ mutex_enter(&buf_pool->LRU_list_mutex);
1833+ mutex_enter(&buf_pool->free_list_mutex);
1834 buf_flush_list_mutex_enter(buf_pool);
1835
1836 fprintf(stderr,
d8778560 1837@@ -4588,7 +4807,9 @@
b4e1fa2c
AM
1838 }
1839 }
1840
1841- buf_pool_mutex_exit(buf_pool);
1842+ //buf_pool_mutex_exit(buf_pool);
1843+ mutex_exit(&buf_pool->LRU_list_mutex);
1844+ mutex_exit(&buf_pool->free_list_mutex);
1845
1846 for (i = 0; i < n_found; i++) {
1847 index = dict_index_get_if_in_cache(index_ids[i]);
d8778560 1848@@ -4645,7 +4866,7 @@
b4e1fa2c
AM
1849 buf_chunk_t* chunk;
1850 ulint fixed_pages_number = 0;
1851
1852- buf_pool_mutex_enter(buf_pool);
1853+ //buf_pool_mutex_enter(buf_pool);
1854
1855 chunk = buf_pool->chunks;
1856
d8778560 1857@@ -4679,7 +4900,7 @@
b4e1fa2c
AM
1858 /* Traverse the lists of clean and dirty compressed-only blocks. */
1859
1860 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1861- b = UT_LIST_GET_NEXT(list, b)) {
1862+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1863 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1864 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1865
d8778560 1866@@ -4691,7 +4912,7 @@
b4e1fa2c
AM
1867
1868 buf_flush_list_mutex_enter(buf_pool);
1869 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1870- b = UT_LIST_GET_NEXT(list, b)) {
1871+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1872 ut_ad(b->in_flush_list);
1873
1874 switch (buf_page_get_state(b)) {
d8778560 1875@@ -4717,7 +4938,7 @@
b4e1fa2c
AM
1876
1877 buf_flush_list_mutex_exit(buf_pool);
1878 mutex_exit(&buf_pool->zip_mutex);
1879- buf_pool_mutex_exit(buf_pool);
1880+ //buf_pool_mutex_exit(buf_pool);
1881
1882 return(fixed_pages_number);
1883 }
d8778560
AM
1884@@ -4873,6 +5094,8 @@
1885 /* Find appropriate pool_info to store stats for this buffer pool */
1886 pool_info = &all_pool_info[pool_id];
b4e1fa2c
AM
1887
1888+ mutex_enter(&buf_pool->LRU_list_mutex);
1889+ mutex_enter(&buf_pool->free_list_mutex);
1890 buf_pool_mutex_enter(buf_pool);
1891 buf_flush_list_mutex_enter(buf_pool);
1892
d8778560
AM
1893@@ -4983,6 +5206,8 @@
1894 pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
b4e1fa2c
AM
1895
1896 buf_refresh_io_stats(buf_pool);
1897+ mutex_exit(&buf_pool->LRU_list_mutex);
1898+ mutex_exit(&buf_pool->free_list_mutex);
1899 buf_pool_mutex_exit(buf_pool);
1900 }
1901
d8778560 1902@@ -5224,11 +5449,13 @@
b4e1fa2c
AM
1903 {
1904 ulint len;
1905
1906- buf_pool_mutex_enter(buf_pool);
1907+ //buf_pool_mutex_enter(buf_pool);
1908+ mutex_enter(&buf_pool->free_list_mutex);
1909
1910 len = UT_LIST_GET_LEN(buf_pool->free);
1911
1912- buf_pool_mutex_exit(buf_pool);
1913+ //buf_pool_mutex_exit(buf_pool);
1914+ mutex_exit(&buf_pool->free_list_mutex);
1915
1916 return(len);
1917 }
1918diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1919--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1920+++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
d8778560 1921@@ -307,7 +307,7 @@
b4e1fa2c
AM
1922
1923 ut_d(block->page.in_flush_list = TRUE);
1924 block->page.oldest_modification = lsn;
1925- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1926+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1927
1928 #ifdef UNIV_DEBUG_VALGRIND
1929 {
d8778560 1930@@ -401,14 +401,14 @@
b4e1fa2c
AM
1931 > block->page.oldest_modification) {
1932 ut_ad(b->in_flush_list);
1933 prev_b = b;
1934- b = UT_LIST_GET_NEXT(list, b);
1935+ b = UT_LIST_GET_NEXT(flush_list, b);
1936 }
1937 }
1938
1939 if (prev_b == NULL) {
1940- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1941+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1942 } else {
1943- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1944+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1945 prev_b, &block->page);
1946 }
1947
d8778560 1948@@ -434,7 +434,7 @@
b4e1fa2c
AM
1949 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1950 //ut_ad(buf_pool_mutex_own(buf_pool));
1951 #endif
1952- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1953+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1954 //ut_ad(bpage->in_LRU_list);
1955
1956 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
d8778560 1957@@ -470,14 +470,14 @@
b4e1fa2c
AM
1958 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1959 {
1960 #ifdef UNIV_DEBUG
1961- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1962- ut_ad(buf_pool_mutex_own(buf_pool));
1963+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1964+ //ut_ad(buf_pool_mutex_own(buf_pool));
1965 #endif
1966- ut_a(buf_page_in_file(bpage));
1967+ //ut_a(buf_page_in_file(bpage));
1968 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1969 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1970
1971- if (bpage->oldest_modification != 0
1972+ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1973 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1974 ut_ad(bpage->in_flush_list);
1975
d8778560 1976@@ -508,7 +508,7 @@
b4e1fa2c
AM
1977 {
1978 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1979
1980- ut_ad(buf_pool_mutex_own(buf_pool));
1981+ //ut_ad(buf_pool_mutex_own(buf_pool));
1982 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1983 ut_ad(bpage->in_flush_list);
1984
d8778560 1985@@ -526,11 +526,11 @@
b4e1fa2c
AM
1986 return;
1987 case BUF_BLOCK_ZIP_DIRTY:
1988 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1989- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1990+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1991 buf_LRU_insert_zip_clean(bpage);
1992 break;
1993 case BUF_BLOCK_FILE_PAGE:
1994- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1995+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1996 break;
1997 }
1998
d8778560 1999@@ -574,7 +574,7 @@
b4e1fa2c
AM
2000 buf_page_t* prev_b = NULL;
2001 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2002
2003- ut_ad(buf_pool_mutex_own(buf_pool));
2004+ //ut_ad(buf_pool_mutex_own(buf_pool));
2005 /* Must reside in the same buffer pool. */
2006 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2007
d8778560 2008@@ -603,18 +603,18 @@
b4e1fa2c
AM
2009 because we assert on in_flush_list in comparison function. */
2010 ut_d(bpage->in_flush_list = FALSE);
2011
2012- prev = UT_LIST_GET_PREV(list, bpage);
2013- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2014+ prev = UT_LIST_GET_PREV(flush_list, bpage);
2015+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2016
2017 if (prev) {
2018 ut_ad(prev->in_flush_list);
2019 UT_LIST_INSERT_AFTER(
2020- list,
2021+ flush_list,
2022 buf_pool->flush_list,
2023 prev, dpage);
2024 } else {
2025 UT_LIST_ADD_FIRST(
2026- list,
2027+ flush_list,
2028 buf_pool->flush_list,
2029 dpage);
2030 }
d8778560 2031@@ -1083,7 +1083,7 @@
b4e1fa2c
AM
2032
2033 #ifdef UNIV_DEBUG
2034 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2035- ut_ad(!buf_pool_mutex_own(buf_pool));
2036+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2037 #endif
2038
2039 #ifdef UNIV_LOG_DEBUG
d8778560 2040@@ -1097,7 +1097,8 @@
b4e1fa2c
AM
2041 io_fixed and oldest_modification != 0. Thus, it cannot be
2042 relocated in the buffer pool or removed from flush_list or
2043 LRU_list. */
2044- ut_ad(!buf_pool_mutex_own(buf_pool));
2045+ //ut_ad(!buf_pool_mutex_own(buf_pool));
2046+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2047 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2048 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2049 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
d8778560 2050@@ -1260,12 +1261,18 @@
b4e1fa2c
AM
2051 ibool is_uncompressed;
2052
2053 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2054- ut_ad(buf_pool_mutex_own(buf_pool));
2055+ //ut_ad(buf_pool_mutex_own(buf_pool));
2056+#ifdef UNIV_SYNC_DEBUG
2057+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2058+#endif
2059 ut_ad(buf_page_in_file(bpage));
2060
2061 block_mutex = buf_page_get_mutex(bpage);
2062 ut_ad(mutex_own(block_mutex));
2063
2064+ buf_pool_mutex_enter(buf_pool);
2065+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2066+
2067 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2068
2069 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
d8778560 2070@@ -1427,14 +1434,16 @@
b4e1fa2c
AM
2071
2072 buf_pool = buf_pool_get(space, i);
2073
2074- buf_pool_mutex_enter(buf_pool);
2075+ //buf_pool_mutex_enter(buf_pool);
2076+ rw_lock_s_lock(&buf_pool->page_hash_latch);
2077
2078 /* We only want to flush pages from this buffer pool. */
2079 bpage = buf_page_hash_get(buf_pool, space, i);
2080
2081 if (!bpage) {
2082
2083- buf_pool_mutex_exit(buf_pool);
2084+ //buf_pool_mutex_exit(buf_pool);
2085+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2086 continue;
2087 }
2088
d8778560 2089@@ -1446,11 +1455,9 @@
b4e1fa2c
AM
2090 if (flush_type != BUF_FLUSH_LRU
2091 || i == offset
2092 || buf_page_is_old(bpage)) {
2093- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2094-
2095- mutex_enter(block_mutex);
2096+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2097
2098- if (buf_flush_ready_for_flush(bpage, flush_type)
2099+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2100 && (i == offset || !bpage->buf_fix_count)) {
2101 /* We only try to flush those
2102 neighbors != offset where the buf fix
d8778560 2103@@ -1466,11 +1473,12 @@
b4e1fa2c
AM
2104 ut_ad(!buf_pool_mutex_own(buf_pool));
2105 count++;
2106 continue;
2107- } else {
2108+ } else if (block_mutex) {
2109 mutex_exit(block_mutex);
2110 }
2111 }
2112- buf_pool_mutex_exit(buf_pool);
2113+ //buf_pool_mutex_exit(buf_pool);
2114+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
2115 }
2116
2117 return(count);
d8778560 2118@@ -1503,21 +1511,25 @@
b4e1fa2c
AM
2119 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2120 #endif /* UNIV_DEBUG */
2121
2122- ut_ad(buf_pool_mutex_own(buf_pool));
2123+ //ut_ad(buf_pool_mutex_own(buf_pool));
2124+ ut_ad(flush_type != BUF_FLUSH_LRU
2125+ || mutex_own(&buf_pool->LRU_list_mutex));
2126
2127- block_mutex = buf_page_get_mutex(bpage);
2128- mutex_enter(block_mutex);
2129+ block_mutex = buf_page_get_mutex_enter(bpage);
2130
2131- ut_a(buf_page_in_file(bpage));
2132+ //ut_a(buf_page_in_file(bpage));
2133
2134- if (buf_flush_ready_for_flush(bpage, flush_type)) {
2135+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2136 ulint space;
2137 ulint offset;
2138 buf_pool_t* buf_pool;
2139
2140 buf_pool = buf_pool_from_bpage(bpage);
2141
2142- buf_pool_mutex_exit(buf_pool);
2143+ //buf_pool_mutex_exit(buf_pool);
2144+ if (flush_type == BUF_FLUSH_LRU) {
2145+ mutex_exit(&buf_pool->LRU_list_mutex);
2146+ }
2147
2148 /* These fields are protected by both the
2149 buffer pool mutex and block mutex. */
d8778560 2150@@ -1533,13 +1545,18 @@
b4e1fa2c
AM
2151 *count,
2152 n_to_flush);
2153
2154- buf_pool_mutex_enter(buf_pool);
2155+ //buf_pool_mutex_enter(buf_pool);
2156+ if (flush_type == BUF_FLUSH_LRU) {
2157+ mutex_enter(&buf_pool->LRU_list_mutex);
2158+ }
2159 flushed = TRUE;
2160- } else {
2161+ } else if (block_mutex) {
2162 mutex_exit(block_mutex);
2163 }
2164
2165- ut_ad(buf_pool_mutex_own(buf_pool));
2166+ //ut_ad(buf_pool_mutex_own(buf_pool));
2167+ ut_ad(flush_type != BUF_FLUSH_LRU
2168+ || mutex_own(&buf_pool->LRU_list_mutex));
2169
2170 return(flushed);
2171 }
d8778560 2172@@ -1560,7 +1577,8 @@
b4e1fa2c
AM
2173 buf_page_t* bpage;
2174 ulint count = 0;
2175
2176- ut_ad(buf_pool_mutex_own(buf_pool));
2177+ //ut_ad(buf_pool_mutex_own(buf_pool));
2178+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2179
2180 do {
2181 /* Start from the end of the list looking for a
d8778560 2182@@ -1582,7 +1600,8 @@
b4e1fa2c
AM
2183 should be flushed, we factor in this value. */
2184 buf_lru_flush_page_count += count;
2185
2186- ut_ad(buf_pool_mutex_own(buf_pool));
2187+ //ut_ad(buf_pool_mutex_own(buf_pool));
2188+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2189
2190 return(count);
2191 }
d8778560 2192@@ -1610,9 +1629,10 @@
b4e1fa2c
AM
2193 {
2194 ulint len;
2195 buf_page_t* bpage;
2196+ buf_page_t* prev_bpage = NULL;
2197 ulint count = 0;
2198
2199- ut_ad(buf_pool_mutex_own(buf_pool));
2200+ //ut_ad(buf_pool_mutex_own(buf_pool));
2201
2202 /* If we have flushed enough, leave the loop */
2203 do {
d8778560 2204@@ -1631,6 +1651,7 @@
b4e1fa2c
AM
2205
2206 if (bpage) {
2207 ut_a(bpage->oldest_modification > 0);
2208+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2209 }
2210
2211 if (!bpage || bpage->oldest_modification >= lsn_limit) {
d8778560 2212@@ -1672,9 +1693,17 @@
b4e1fa2c
AM
2213 break;
2214 }
2215
2216- bpage = UT_LIST_GET_PREV(list, bpage);
2217+ bpage = UT_LIST_GET_PREV(flush_list, bpage);
2218
2219- ut_ad(!bpage || bpage->in_flush_list);
2220+ //ut_ad(!bpage || bpage->in_flush_list);
2221+ if (bpage != prev_bpage) {
2222+ /* the search might warp.. retrying */
2223+ buf_flush_list_mutex_exit(buf_pool);
2224+ break;
2225+ }
2226+ if (bpage) {
2227+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2228+ }
2229
2230 buf_flush_list_mutex_exit(buf_pool);
2231
d8778560 2232@@ -1683,7 +1712,7 @@
b4e1fa2c
AM
2233
2234 } while (count < min_n && bpage != NULL && len > 0);
2235
2236- ut_ad(buf_pool_mutex_own(buf_pool));
2237+ //ut_ad(buf_pool_mutex_own(buf_pool));
2238
2239 return(count);
2240 }
d8778560 2241@@ -1722,13 +1751,15 @@
b4e1fa2c
AM
2242 || sync_thread_levels_empty_gen(TRUE));
2243 #endif /* UNIV_SYNC_DEBUG */
2244
2245- buf_pool_mutex_enter(buf_pool);
2246+ //buf_pool_mutex_enter(buf_pool);
2247
2248 /* Note: The buffer pool mutex is released and reacquired within
2249 the flush functions. */
2250 switch(flush_type) {
2251 case BUF_FLUSH_LRU:
2252+ mutex_enter(&buf_pool->LRU_list_mutex);
2253 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2254+ mutex_exit(&buf_pool->LRU_list_mutex);
2255 break;
2256 case BUF_FLUSH_LIST:
2257 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
d8778560 2258@@ -1737,7 +1768,7 @@
b4e1fa2c
AM
2259 ut_error;
2260 }
2261
2262- buf_pool_mutex_exit(buf_pool);
2263+ //buf_pool_mutex_exit(buf_pool);
2264
2265 buf_flush_buffered_writes();
2266
d8778560 2267@@ -1993,7 +2024,7 @@
b4e1fa2c
AM
2268 retry:
2269 //buf_pool_mutex_enter(buf_pool);
2270 if (have_LRU_mutex)
2271- buf_pool_mutex_enter(buf_pool);
2272+ mutex_enter(&buf_pool->LRU_list_mutex);
2273
2274 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2275
d8778560 2276@@ -2010,15 +2041,15 @@
b4e1fa2c
AM
2277 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2278 continue;
2279 }
2280- block_mutex = buf_page_get_mutex(bpage);
2281-
2282- mutex_enter(block_mutex);
2283+ block_mutex = buf_page_get_mutex_enter(bpage);
2284
2285- if (buf_flush_ready_for_replace(bpage)) {
2286+ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2287 n_replaceable++;
2288 }
2289
2290- mutex_exit(block_mutex);
2291+ if (block_mutex) {
2292+ mutex_exit(block_mutex);
2293+ }
2294
2295 distance++;
2296
d8778560 2297@@ -2027,7 +2058,7 @@
b4e1fa2c
AM
2298
2299 //buf_pool_mutex_exit(buf_pool);
2300 if (have_LRU_mutex)
2301- buf_pool_mutex_exit(buf_pool);
2302+ mutex_exit(&buf_pool->LRU_list_mutex);
2303
2304 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2305
d8778560 2306@@ -2226,7 +2257,7 @@
b4e1fa2c
AM
2307
2308 ut_ad(buf_flush_list_mutex_own(buf_pool));
2309
2310- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2311+ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2312 ut_ad(ut_list_node_313->in_flush_list));
2313
2314 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
d8778560 2315@@ -2266,7 +2297,7 @@
b4e1fa2c
AM
2316 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2317 }
2318
2319- bpage = UT_LIST_GET_NEXT(list, bpage);
2320+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2321
2322 ut_a(!bpage || om >= bpage->oldest_modification);
2323 }
2324diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2325--- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2326+++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2327@@ -143,8 +143,9 @@
2328 void
2329 buf_LRU_block_free_hashed_page(
2330 /*===========================*/
2331- buf_block_t* block); /*!< in: block, must contain a file page and
2332+ buf_block_t* block, /*!< in: block, must contain a file page and
2333 be in a state where it can be freed */
2334+ ibool have_page_hash_mutex);
2335
2336 /******************************************************************//**
2337 Determines if the unzip_LRU list should be used for evicting a victim
2338@@ -154,15 +155,20 @@
2339 ibool
2340 buf_LRU_evict_from_unzip_LRU(
2341 /*=========================*/
2342- buf_pool_t* buf_pool)
2343+ buf_pool_t* buf_pool,
2344+ ibool have_LRU_mutex)
2345 {
2346 ulint io_avg;
2347 ulint unzip_avg;
2348
2349- ut_ad(buf_pool_mutex_own(buf_pool));
2350+ //ut_ad(buf_pool_mutex_own(buf_pool));
2351
2352+ if (!have_LRU_mutex)
2353+ mutex_enter(&buf_pool->LRU_list_mutex);
2354 /* If the unzip_LRU list is empty, we can only use the LRU. */
2355 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2356+ if (!have_LRU_mutex)
2357+ mutex_exit(&buf_pool->LRU_list_mutex);
2358 return(FALSE);
2359 }
2360
2361@@ -171,14 +177,20 @@
2362 decompressed pages in the buffer pool. */
2363 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2364 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2365+ if (!have_LRU_mutex)
2366+ mutex_exit(&buf_pool->LRU_list_mutex);
2367 return(FALSE);
2368 }
2369
2370 /* If eviction hasn't started yet, we assume by default
2371 that a workload is disk bound. */
2372 if (buf_pool->freed_page_clock == 0) {
2373+ if (!have_LRU_mutex)
2374+ mutex_exit(&buf_pool->LRU_list_mutex);
2375 return(TRUE);
2376 }
2377+ if (!have_LRU_mutex)
2378+ mutex_exit(&buf_pool->LRU_list_mutex);
2379
2380 /* Calculate the average over past intervals, and add the values
2381 of the current interval. */
2382@@ -246,19 +258,23 @@
2383 page_arr = ut_malloc(
2384 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2385
2386- buf_pool_mutex_enter(buf_pool);
2387+ //buf_pool_mutex_enter(buf_pool);
2388+ mutex_enter(&buf_pool->LRU_list_mutex);
2389
2390 scan_again:
2391 num_entries = 0;
2392 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2393
2394 while (bpage != NULL) {
2395- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2396+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2397 buf_page_t* prev_bpage;
2398
2399- mutex_enter(block_mutex);
2400 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2401
2402+ if (!block_mutex) {
2403+ goto next_page;
2404+ }
2405+
2406 ut_a(buf_page_in_file(bpage));
2407
2408 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2409@@ -287,14 +303,16 @@
2410
2411 /* Array full. We release the buf_pool->mutex to
2412 obey the latching order. */
2413- buf_pool_mutex_exit(buf_pool);
2414+ //buf_pool_mutex_exit(buf_pool);
2415+ mutex_exit(&buf_pool->LRU_list_mutex);
2416
2417 buf_LRU_drop_page_hash_batch(
2418 id, zip_size, page_arr, num_entries);
2419
2420 num_entries = 0;
2421
2422- buf_pool_mutex_enter(buf_pool);
2423+ //buf_pool_mutex_enter(buf_pool);
2424+ mutex_enter(&buf_pool->LRU_list_mutex);
2425 } else {
2426 mutex_exit(block_mutex);
2427 }
2428@@ -319,7 +337,8 @@
2429 }
2430 }
2431
2432- buf_pool_mutex_exit(buf_pool);
2433+ //buf_pool_mutex_exit(buf_pool);
2434+ mutex_exit(&buf_pool->LRU_list_mutex);
2435
2436 /* Drop any remaining batch of search hashed pages. */
2437 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2438@@ -341,7 +360,9 @@
2439 ibool all_freed;
2440
2441 scan_again:
2442- buf_pool_mutex_enter(buf_pool);
2443+ //buf_pool_mutex_enter(buf_pool);
2444+ mutex_enter(&buf_pool->LRU_list_mutex);
2445+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2446
2447 all_freed = TRUE;
2448
2449@@ -369,8 +390,16 @@
2450
2451 all_freed = FALSE;
2452 } else {
2453- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2454- mutex_enter(block_mutex);
2455+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2456+
2457+ if (!block_mutex) {
2458+ /* It may be impossible case...
2459+ Something wrong, so will be scan_again */
2460+
2461+ all_freed = FALSE;
2462+
2463+ goto next_page_no_mutex;
2464+ }
2465
2466 if (bpage->buf_fix_count > 0) {
2467
2468@@ -429,7 +458,9 @@
2469 ulint page_no;
2470 ulint zip_size;
2471
2472- buf_pool_mutex_exit(buf_pool);
2473+ //buf_pool_mutex_exit(buf_pool);
2474+ mutex_exit(&buf_pool->LRU_list_mutex);
2475+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2476
2477 zip_size = buf_page_get_zip_size(bpage);
2478 page_no = buf_page_get_page_no(bpage);
2479@@ -454,7 +485,7 @@
2480 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2481 != BUF_BLOCK_ZIP_FREE) {
2482 buf_LRU_block_free_hashed_page((buf_block_t*)
2483- bpage);
2484+ bpage, TRUE);
2485 } else {
2486 /* The block_mutex should have been
2487 released by buf_LRU_block_remove_hashed_page()
2488@@ -486,7 +517,9 @@
2489 bpage = prev_bpage;
2490 }
2491
2492- buf_pool_mutex_exit(buf_pool);
2493+ //buf_pool_mutex_exit(buf_pool);
2494+ mutex_exit(&buf_pool->LRU_list_mutex);
2495+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2496
2497 if (!all_freed) {
2498 os_thread_sleep(20000);
2499@@ -532,7 +565,9 @@
2500 buf_page_t* b;
2501 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2502
2503- ut_ad(buf_pool_mutex_own(buf_pool));
2504+ //ut_ad(buf_pool_mutex_own(buf_pool));
2505+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2506+ ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2507 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2508
2509 /* Find the first successor of bpage in the LRU list
2510@@ -540,17 +575,17 @@
2511 b = bpage;
2512 do {
2513 b = UT_LIST_GET_NEXT(LRU, b);
2514- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2515+ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2516
2517 /* Insert bpage before b, i.e., after the predecessor of b. */
2518 if (b) {
2519- b = UT_LIST_GET_PREV(list, b);
2520+ b = UT_LIST_GET_PREV(zip_list, b);
2521 }
2522
2523 if (b) {
2524- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2525+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2526 } else {
2527- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2528+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2529 }
2530 }
2531
2532@@ -563,18 +598,19 @@
2533 buf_LRU_free_from_unzip_LRU_list(
2534 /*=============================*/
2535 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2536- ulint n_iterations) /*!< in: how many times this has
2537+ ulint n_iterations, /*!< in: how many times this has
2538 been called repeatedly without
2539 result: a high value means that
2540 we should search farther; we will
2541 search n_iterations / 5 of the
2542 unzip_LRU list, or nothing if
2543 n_iterations >= 5 */
2544+ ibool have_LRU_mutex)
2545 {
2546 buf_block_t* block;
2547 ulint distance;
2548
2549- ut_ad(buf_pool_mutex_own(buf_pool));
2550+ //ut_ad(buf_pool_mutex_own(buf_pool));
2551
2552 /* Theoratically it should be much easier to find a victim
2553 from unzip_LRU as we can choose even a dirty block (as we'll
2554@@ -584,7 +620,7 @@
2555 if we have done five iterations so far. */
2556
2557 if (UNIV_UNLIKELY(n_iterations >= 5)
2558- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2559+ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2560
2561 return(FALSE);
2562 }
2563@@ -592,18 +628,25 @@
2564 distance = 100 + (n_iterations
2565 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2566
2567+restart:
2568 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2569 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2570 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2571
2572 enum buf_lru_free_block_status freed;
2573
2574+ mutex_enter(&block->mutex);
2575+ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2576+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2577+ mutex_exit(&block->mutex);
2578+ goto restart;
2579+ }
2580+
2581 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2582 ut_ad(block->in_unzip_LRU_list);
2583 ut_ad(block->page.in_LRU_list);
2584
2585- mutex_enter(&block->mutex);
df1b5770
AM
2586- freed = buf_LRU_free_block(&block->page, FALSE);
2587+ freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
b4e1fa2c
AM
2588 mutex_exit(&block->mutex);
2589
2590 switch (freed) {
2591@@ -637,21 +680,23 @@
2592 buf_LRU_free_from_common_LRU_list(
2593 /*==============================*/
2594 buf_pool_t* buf_pool,
2595- ulint n_iterations)
2596+ ulint n_iterations,
2597 /*!< in: how many times this has been called
2598 repeatedly without result: a high value means
2599 that we should search farther; if
2600 n_iterations < 10, then we search
2601 n_iterations / 10 * buf_pool->curr_size
2602 pages from the end of the LRU list */
2603+ ibool have_LRU_mutex)
2604 {
2605 buf_page_t* bpage;
2606 ulint distance;
2607
2608- ut_ad(buf_pool_mutex_own(buf_pool));
2609+ //ut_ad(buf_pool_mutex_own(buf_pool));
2610
2611 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2612
2613+restart:
2614 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2615 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2616 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2617@@ -659,14 +704,23 @@
2618 enum buf_lru_free_block_status freed;
2619 unsigned accessed;
2620 mutex_t* block_mutex
2621- = buf_page_get_mutex(bpage);
2622+ = buf_page_get_mutex_enter(bpage);
2623+
2624+ if (!block_mutex) {
2625+ goto restart;
2626+ }
2627+
2628+ if (!bpage->in_LRU_list
2629+ || !buf_page_in_file(bpage)) {
2630+ mutex_exit(block_mutex);
2631+ goto restart;
2632+ }
2633
2634 ut_ad(buf_page_in_file(bpage));
2635 ut_ad(bpage->in_LRU_list);
2636
2637- mutex_enter(block_mutex);
2638 accessed = buf_page_is_accessed(bpage);
df1b5770
AM
2639- freed = buf_LRU_free_block(bpage, TRUE);
2640+ freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
b4e1fa2c
AM
2641 mutex_exit(block_mutex);
2642
2643 switch (freed) {
2644@@ -718,16 +772,23 @@
2645 n_iterations / 5 of the unzip_LRU list. */
2646 {
2647 ibool freed = FALSE;
2648+ ibool have_LRU_mutex = FALSE;
2649
2650- buf_pool_mutex_enter(buf_pool);
2651+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2652+ have_LRU_mutex = TRUE;
2653+
2654+ //buf_pool_mutex_enter(buf_pool);
2655+ if (have_LRU_mutex)
2656+ mutex_enter(&buf_pool->LRU_list_mutex);
2657
2658- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2659+ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2660
2661 if (!freed) {
2662 freed = buf_LRU_free_from_common_LRU_list(
2663- buf_pool, n_iterations);
2664+ buf_pool, n_iterations, have_LRU_mutex);
2665 }
2666
2667+ buf_pool_mutex_enter(buf_pool);
2668 if (!freed) {
2669 buf_pool->LRU_flush_ended = 0;
2670 } else if (buf_pool->LRU_flush_ended > 0) {
2671@@ -735,6 +796,8 @@
2672 }
2673
2674 buf_pool_mutex_exit(buf_pool);
2675+ if (have_LRU_mutex)
2676+ mutex_exit(&buf_pool->LRU_list_mutex);
2677
2678 return(freed);
2679 }
2680@@ -795,7 +858,9 @@
2681
2682 buf_pool = buf_pool_from_array(i);
2683
2684- buf_pool_mutex_enter(buf_pool);
2685+ //buf_pool_mutex_enter(buf_pool);
2686+ mutex_enter(&buf_pool->LRU_list_mutex);
2687+ mutex_enter(&buf_pool->free_list_mutex);
2688
2689 if (!recv_recovery_on
2690 && UT_LIST_GET_LEN(buf_pool->free)
2691@@ -805,7 +870,9 @@
2692 ret = TRUE;
2693 }
2694
2695- buf_pool_mutex_exit(buf_pool);
2696+ //buf_pool_mutex_exit(buf_pool);
2697+ mutex_exit(&buf_pool->LRU_list_mutex);
2698+ mutex_exit(&buf_pool->free_list_mutex);
2699 }
2700
2701 return(ret);
2702@@ -823,9 +890,10 @@
2703 {
2704 buf_block_t* block;
2705
2706- ut_ad(buf_pool_mutex_own(buf_pool));
2707+ //ut_ad(buf_pool_mutex_own(buf_pool));
2708
2709- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2710+ mutex_enter(&buf_pool->free_list_mutex);
2711+ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2712
2713 if (block) {
2714
2715@@ -834,7 +902,9 @@
2716 ut_ad(!block->page.in_flush_list);
2717 ut_ad(!block->page.in_LRU_list);
2718 ut_a(!buf_page_in_file(&block->page));
2719- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2720+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2721+
2722+ mutex_exit(&buf_pool->free_list_mutex);
2723
2724 mutex_enter(&block->mutex);
2725
2726@@ -844,6 +914,8 @@
2727 ut_ad(buf_pool_from_block(block) == buf_pool);
2728
2729 mutex_exit(&block->mutex);
2730+ } else {
2731+ mutex_exit(&buf_pool->free_list_mutex);
2732 }
2733
2734 return(block);
df1b5770 2735@@ -866,7 +938,7 @@
b4e1fa2c
AM
2736 ibool mon_value_was = FALSE;
2737 ibool started_monitor = FALSE;
2738 loop:
2739- buf_pool_mutex_enter(buf_pool);
2740+ //buf_pool_mutex_enter(buf_pool);
2741
2742 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2743 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
df1b5770 2744@@ -934,7 +1006,7 @@
b4e1fa2c 2745
df1b5770
AM
2746 /* If there is a block in the free list, take it */
2747 block = buf_LRU_get_free_only(buf_pool);
b4e1fa2c
AM
2748- buf_pool_mutex_exit(buf_pool);
2749+ //buf_pool_mutex_exit(buf_pool);
2750
df1b5770
AM
2751 if (block) {
2752 ut_ad(buf_pool_from_block(block) == buf_pool);
2753@@ -1034,7 +1106,8 @@
b4e1fa2c
AM
2754 ulint new_len;
2755
2756 ut_a(buf_pool->LRU_old);
2757- ut_ad(buf_pool_mutex_own(buf_pool));
2758+ //ut_ad(buf_pool_mutex_own(buf_pool));
2759+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2760 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2761 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2762 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
df1b5770 2763@@ -1100,7 +1173,8 @@
b4e1fa2c
AM
2764 {
2765 buf_page_t* bpage;
2766
2767- ut_ad(buf_pool_mutex_own(buf_pool));
2768+ //ut_ad(buf_pool_mutex_own(buf_pool));
2769+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2770 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2771
2772 /* We first initialize all blocks in the LRU list as old and then use
df1b5770 2773@@ -1135,13 +1209,14 @@
b4e1fa2c
AM
2774 ut_ad(buf_pool);
2775 ut_ad(bpage);
2776 ut_ad(buf_page_in_file(bpage));
2777- ut_ad(buf_pool_mutex_own(buf_pool));
2778+ //ut_ad(buf_pool_mutex_own(buf_pool));
2779+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2780
2781 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2782 buf_block_t* block = (buf_block_t*) bpage;
2783
2784 ut_ad(block->in_unzip_LRU_list);
2785- ut_d(block->in_unzip_LRU_list = FALSE);
2786+ block->in_unzip_LRU_list = FALSE;
2787
2788 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2789 }
df1b5770 2790@@ -1159,7 +1234,8 @@
b4e1fa2c
AM
2791
2792 ut_ad(buf_pool);
2793 ut_ad(bpage);
2794- ut_ad(buf_pool_mutex_own(buf_pool));
2795+ //ut_ad(buf_pool_mutex_own(buf_pool));
2796+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2797
2798 ut_a(buf_page_in_file(bpage));
2799
df1b5770 2800@@ -1236,12 +1312,13 @@
b4e1fa2c
AM
2801
2802 ut_ad(buf_pool);
2803 ut_ad(block);
2804- ut_ad(buf_pool_mutex_own(buf_pool));
2805+ //ut_ad(buf_pool_mutex_own(buf_pool));
2806+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2807
2808 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2809
2810 ut_ad(!block->in_unzip_LRU_list);
2811- ut_d(block->in_unzip_LRU_list = TRUE);
2812+ block->in_unzip_LRU_list = TRUE;
2813
2814 if (old) {
2815 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
df1b5770 2816@@ -1262,7 +1339,8 @@
b4e1fa2c
AM
2817
2818 ut_ad(buf_pool);
2819 ut_ad(bpage);
2820- ut_ad(buf_pool_mutex_own(buf_pool));
2821+ //ut_ad(buf_pool_mutex_own(buf_pool));
2822+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2823
2824 ut_a(buf_page_in_file(bpage));
2825
df1b5770 2826@@ -1313,7 +1391,8 @@
b4e1fa2c
AM
2827
2828 ut_ad(buf_pool);
2829 ut_ad(bpage);
2830- ut_ad(buf_pool_mutex_own(buf_pool));
2831+ //ut_ad(buf_pool_mutex_own(buf_pool));
2832+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2833
2834 ut_a(buf_page_in_file(bpage));
2835 ut_ad(!bpage->in_LRU_list);
df1b5770 2836@@ -1392,7 +1471,8 @@
b4e1fa2c
AM
2837 {
2838 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2839
2840- ut_ad(buf_pool_mutex_own(buf_pool));
2841+ //ut_ad(buf_pool_mutex_own(buf_pool));
2842+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2843
2844 if (bpage->old) {
2845 buf_pool->stat.n_pages_made_young++;
df1b5770
AM
2846@@ -1432,17 +1512,18 @@
2847 buf_LRU_free_block(
2848 /*===============*/
b4e1fa2c 2849 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
2850- ibool zip) /*!< in: TRUE if should remove also the
2851+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 2852 compressed page of an uncompressed page */
b4e1fa2c
AM
2853+ ibool have_LRU_mutex)
2854 {
2855 buf_page_t* b = NULL;
2856 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2857 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2858
2859- ut_ad(buf_pool_mutex_own(buf_pool));
2860+ //ut_ad(buf_pool_mutex_own(buf_pool));
2861 ut_ad(mutex_own(block_mutex));
2862 ut_ad(buf_page_in_file(bpage));
2863- ut_ad(bpage->in_LRU_list);
2864+ //ut_ad(bpage->in_LRU_list);
2865 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2866 #if UNIV_WORD_SIZE == 4
2867 /* On 32-bit systems, there is no padding in buf_page_t. On
df1b5770 2868@@ -1451,7 +1532,7 @@
b4e1fa2c
AM
2869 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2870 #endif
2871
2872- if (!buf_page_can_relocate(bpage)) {
2873+ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2874
2875 /* Do not free buffer-fixed or I/O-fixed blocks. */
2876 return(BUF_LRU_NOT_FREED);
df1b5770 2877@@ -1483,15 +1564,15 @@
b4e1fa2c
AM
2878 If it cannot be allocated (without freeing a block
2879 from the LRU list), refuse to free bpage. */
2880 alloc:
2881- buf_pool_mutex_exit_forbid(buf_pool);
2882- b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2883- buf_pool_mutex_exit_allow(buf_pool);
2884+ //buf_pool_mutex_exit_forbid(buf_pool);
2885+ b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2886+ //buf_pool_mutex_exit_allow(buf_pool);
2887
2888 if (UNIV_UNLIKELY(!b)) {
2889 return(BUF_LRU_CANNOT_RELOCATE);
2890 }
2891
2892- memcpy(b, bpage, sizeof *b);
2893+ //memcpy(b, bpage, sizeof *b);
2894 }
2895
2896 #ifdef UNIV_DEBUG
df1b5770 2897@@ -1502,6 +1583,39 @@
b4e1fa2c
AM
2898 }
2899 #endif /* UNIV_DEBUG */
2900
2901+ /* not to break latch order, must re-enter block_mutex */
2902+ mutex_exit(block_mutex);
2903+
2904+ if (!have_LRU_mutex)
2905+ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2906+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2907+ mutex_enter(block_mutex);
2908+
2909+ /* recheck states of block */
2910+ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2911+ || !buf_page_can_relocate(bpage)) {
2912+not_freed:
2913+ if (b) {
2914+ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2915+ }
2916+ if (!have_LRU_mutex)
2917+ mutex_exit(&buf_pool->LRU_list_mutex);
2918+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2919+ return(BUF_LRU_NOT_FREED);
2920+ } else if (zip || !bpage->zip.data) {
2921+ if (bpage->oldest_modification)
2922+ goto not_freed;
2923+ } else if (bpage->oldest_modification) {
2924+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2925+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2926+ goto not_freed;
2927+ }
2928+ }
2929+
2930+ if (b) {
2931+ memcpy(b, bpage, sizeof *b);
2932+ }
2933+
2934 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2935 != BUF_BLOCK_ZIP_FREE) {
2936 ut_a(bpage->buf_fix_count == 0);
df1b5770 2937@@ -1518,6 +1632,10 @@
b4e1fa2c
AM
2938
2939 ut_a(!hash_b);
2940
2941+ while (prev_b && !prev_b->in_LRU_list) {
2942+ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2943+ }
2944+
2945 b->state = b->oldest_modification
2946 ? BUF_BLOCK_ZIP_DIRTY
2947 : BUF_BLOCK_ZIP_PAGE;
df1b5770
AM
2948@@ -1610,7 +1728,9 @@
2949 b->io_fix = BUF_IO_READ;
b4e1fa2c
AM
2950 }
2951
2952- buf_pool_mutex_exit(buf_pool);
2953+ //buf_pool_mutex_exit(buf_pool);
2954+ mutex_exit(&buf_pool->LRU_list_mutex);
2955+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2956 mutex_exit(block_mutex);
2957
2958 /* Remove possible adaptive hash index on the page.
df1b5770 2959@@ -1642,7 +1762,9 @@
b4e1fa2c
AM
2960 : BUF_NO_CHECKSUM_MAGIC);
2961 }
2962
2963- buf_pool_mutex_enter(buf_pool);
2964+ //buf_pool_mutex_enter(buf_pool);
2965+ if (have_LRU_mutex)
2966+ mutex_enter(&buf_pool->LRU_list_mutex);
2967 mutex_enter(block_mutex);
2968
2969 if (b) {
df1b5770 2970@@ -1652,13 +1774,17 @@
b4e1fa2c
AM
2971 mutex_exit(&buf_pool->zip_mutex);
2972 }
2973
2974- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2975+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
2976 } else {
2977 /* The block_mutex should have been released by
2978 buf_LRU_block_remove_hashed_page() when it returns
2979 BUF_BLOCK_ZIP_FREE. */
2980 ut_ad(block_mutex == &buf_pool->zip_mutex);
2981 mutex_enter(block_mutex);
2982+
2983+ if (!have_LRU_mutex)
2984+ mutex_exit(&buf_pool->LRU_list_mutex);
2985+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2986 }
2987
2988 return(BUF_LRU_FREED);
df1b5770 2989@@ -1670,13 +1796,14 @@
b4e1fa2c
AM
2990 void
2991 buf_LRU_block_free_non_file_page(
2992 /*=============================*/
2993- buf_block_t* block) /*!< in: block, must not contain a file page */
2994+ buf_block_t* block, /*!< in: block, must not contain a file page */
2995+ ibool have_page_hash_mutex)
2996 {
2997 void* data;
2998 buf_pool_t* buf_pool = buf_pool_from_block(block);
2999
3000 ut_ad(block);
3001- ut_ad(buf_pool_mutex_own(buf_pool));
3002+ //ut_ad(buf_pool_mutex_own(buf_pool));
3003 ut_ad(mutex_own(&block->mutex));
3004
3005 switch (buf_block_get_state(block)) {
df1b5770 3006@@ -1710,18 +1837,21 @@
b4e1fa2c
AM
3007 if (data) {
3008 block->page.zip.data = NULL;
3009 mutex_exit(&block->mutex);
3010- buf_pool_mutex_exit_forbid(buf_pool);
3011+ //buf_pool_mutex_exit_forbid(buf_pool);
3012
3013 buf_buddy_free(
3014- buf_pool, data, page_zip_get_size(&block->page.zip));
3015+ buf_pool, data, page_zip_get_size(&block->page.zip),
3016+ have_page_hash_mutex);
3017
3018- buf_pool_mutex_exit_allow(buf_pool);
3019+ //buf_pool_mutex_exit_allow(buf_pool);
3020 mutex_enter(&block->mutex);
3021 page_zip_set_size(&block->page.zip, 0);
3022 }
3023
3024- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3025+ mutex_enter(&buf_pool->free_list_mutex);
3026+ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3027 ut_d(block->page.in_free_list = TRUE);
3028+ mutex_exit(&buf_pool->free_list_mutex);
3029
3030 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3031 }
df1b5770 3032@@ -1751,7 +1881,11 @@
b4e1fa2c
AM
3033 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3034
3035 ut_ad(bpage);
3036- ut_ad(buf_pool_mutex_own(buf_pool));
3037+ //ut_ad(buf_pool_mutex_own(buf_pool));
3038+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3039+#ifdef UNIV_SYNC_DEBUG
3040+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3041+#endif
3042 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3043
3044 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
df1b5770 3045@@ -1859,7 +1993,9 @@
b4e1fa2c
AM
3046
3047 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3048 mutex_exit(buf_page_get_mutex(bpage));
3049- buf_pool_mutex_exit(buf_pool);
3050+ //buf_pool_mutex_exit(buf_pool);
3051+ mutex_exit(&buf_pool->LRU_list_mutex);
3052+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3053 buf_print();
3054 buf_LRU_print();
3055 buf_validate();
df1b5770 3056@@ -1880,17 +2016,17 @@
b4e1fa2c
AM
3057 ut_a(bpage->zip.data);
3058 ut_a(buf_page_get_zip_size(bpage));
3059
3060- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3061+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3062
3063 mutex_exit(&buf_pool->zip_mutex);
3064- buf_pool_mutex_exit_forbid(buf_pool);
3065+ //buf_pool_mutex_exit_forbid(buf_pool);
3066
3067 buf_buddy_free(
3068 buf_pool, bpage->zip.data,
3069- page_zip_get_size(&bpage->zip));
3070+ page_zip_get_size(&bpage->zip), TRUE);
3071
3072- buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3073- buf_pool_mutex_exit_allow(buf_pool);
3074+ buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3075+ //buf_pool_mutex_exit_allow(buf_pool);
3076
3077 UNIV_MEM_UNDESC(bpage);
3078 return(BUF_BLOCK_ZIP_FREE);
df1b5770 3079@@ -1913,13 +2049,13 @@
b4e1fa2c
AM
3080 ut_ad(!bpage->in_flush_list);
3081 ut_ad(!bpage->in_LRU_list);
3082 mutex_exit(&((buf_block_t*) bpage)->mutex);
3083- buf_pool_mutex_exit_forbid(buf_pool);
3084+ //buf_pool_mutex_exit_forbid(buf_pool);
3085
3086 buf_buddy_free(
3087 buf_pool, data,
3088- page_zip_get_size(&bpage->zip));
3089+ page_zip_get_size(&bpage->zip), TRUE);
3090
3091- buf_pool_mutex_exit_allow(buf_pool);
3092+ //buf_pool_mutex_exit_allow(buf_pool);
3093 mutex_enter(&((buf_block_t*) bpage)->mutex);
3094 page_zip_set_size(&bpage->zip, 0);
3095 }
df1b5770 3096@@ -1945,18 +2081,19 @@
b4e1fa2c
AM
3097 void
3098 buf_LRU_block_free_hashed_page(
3099 /*===========================*/
3100- buf_block_t* block) /*!< in: block, must contain a file page and
3101+ buf_block_t* block, /*!< in: block, must contain a file page and
3102 be in a state where it can be freed */
3103+ ibool have_page_hash_mutex)
3104 {
3105 #ifdef UNIV_DEBUG
3106- buf_pool_t* buf_pool = buf_pool_from_block(block);
3107- ut_ad(buf_pool_mutex_own(buf_pool));
3108+ //buf_pool_t* buf_pool = buf_pool_from_block(block);
3109+ //ut_ad(buf_pool_mutex_own(buf_pool));
3110 #endif
3111 ut_ad(mutex_own(&block->mutex));
3112
3113 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3114
3115- buf_LRU_block_free_non_file_page(block);
3116+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3117 }
3118
3119 /**********************************************************************//**
df1b5770 3120@@ -1983,7 +2120,8 @@
b4e1fa2c
AM
3121 }
3122
3123 if (adjust) {
3124- buf_pool_mutex_enter(buf_pool);
3125+ //buf_pool_mutex_enter(buf_pool);
3126+ mutex_enter(&buf_pool->LRU_list_mutex);
3127
3128 if (ratio != buf_pool->LRU_old_ratio) {
3129 buf_pool->LRU_old_ratio = ratio;
df1b5770 3130@@ -1995,7 +2133,8 @@
b4e1fa2c
AM
3131 }
3132 }
3133
3134- buf_pool_mutex_exit(buf_pool);
3135+ //buf_pool_mutex_exit(buf_pool);
3136+ mutex_exit(&buf_pool->LRU_list_mutex);
3137 } else {
3138 buf_pool->LRU_old_ratio = ratio;
3139 }
df1b5770 3140@@ -2100,7 +2239,8 @@
b4e1fa2c
AM
3141 ulint new_len;
3142
3143 ut_ad(buf_pool);
3144- buf_pool_mutex_enter(buf_pool);
3145+ //buf_pool_mutex_enter(buf_pool);
3146+ mutex_enter(&buf_pool->LRU_list_mutex);
3147
3148 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3149
df1b5770 3150@@ -2161,16 +2301,22 @@
b4e1fa2c
AM
3151
3152 ut_a(buf_pool->LRU_old_len == old_len);
3153
3154- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3155+ mutex_exit(&buf_pool->LRU_list_mutex);
3156+ mutex_enter(&buf_pool->free_list_mutex);
3157+
3158+ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3159 ut_ad(ut_list_node_313->in_free_list));
3160
3161 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3162 bpage != NULL;
3163- bpage = UT_LIST_GET_NEXT(list, bpage)) {
3164+ bpage = UT_LIST_GET_NEXT(free, bpage)) {
3165
3166 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3167 }
3168
3169+ mutex_exit(&buf_pool->free_list_mutex);
3170+ mutex_enter(&buf_pool->LRU_list_mutex);
3171+
3172 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3173 ut_ad(ut_list_node_313->in_unzip_LRU_list
3174 && ut_list_node_313->page.in_LRU_list));
df1b5770 3175@@ -2184,7 +2330,8 @@
b4e1fa2c
AM
3176 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3177 }
3178
3179- buf_pool_mutex_exit(buf_pool);
3180+ //buf_pool_mutex_exit(buf_pool);
3181+ mutex_exit(&buf_pool->LRU_list_mutex);
3182 }
3183
3184 /**********************************************************************//**
df1b5770 3185@@ -2220,7 +2367,8 @@
b4e1fa2c
AM
3186 const buf_page_t* bpage;
3187
3188 ut_ad(buf_pool);
3189- buf_pool_mutex_enter(buf_pool);
3190+ //buf_pool_mutex_enter(buf_pool);
3191+ mutex_enter(&buf_pool->LRU_list_mutex);
3192
3193 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3194
df1b5770 3195@@ -2277,7 +2425,8 @@
b4e1fa2c
AM
3196 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3197 }
3198
3199- buf_pool_mutex_exit(buf_pool);
3200+ //buf_pool_mutex_exit(buf_pool);
3201+ mutex_exit(&buf_pool->LRU_list_mutex);
3202 }
3203
3204 /**********************************************************************//**
3205diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3206--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3207+++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3208@@ -311,6 +311,7 @@
3209
3210 return(0);
3211 }
3212+ buf_pool_mutex_exit(buf_pool);
3213
3214 /* Check that almost all pages in the area have been accessed; if
3215 offset == low, the accesses must be in a descending order, otherwise,
3216@@ -329,6 +330,7 @@
3217
3218 fail_count = 0;
3219
3220+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3221 for (i = low; i < high; i++) {
3222 bpage = buf_page_hash_get(buf_pool, space, i);
3223
3224@@ -356,7 +358,8 @@
3225
3226 if (fail_count > threshold) {
3227 /* Too many failures: return */
3228- buf_pool_mutex_exit(buf_pool);
3229+ //buf_pool_mutex_exit(buf_pool);
3230+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3231 return(0);
3232 }
3233
3234@@ -371,7 +374,8 @@
3235 bpage = buf_page_hash_get(buf_pool, space, offset);
3236
3237 if (bpage == NULL) {
3238- buf_pool_mutex_exit(buf_pool);
3239+ //buf_pool_mutex_exit(buf_pool);
3240+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3241
3242 return(0);
3243 }
3244@@ -397,7 +401,8 @@
3245 pred_offset = fil_page_get_prev(frame);
3246 succ_offset = fil_page_get_next(frame);
3247
3248- buf_pool_mutex_exit(buf_pool);
3249+ //buf_pool_mutex_exit(buf_pool);
3250+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3251
3252 if ((offset == low) && (succ_offset == offset + 1)) {
3253
3254diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3255--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3256+++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
df1b5770 3257@@ -264,6 +264,10 @@
b4e1fa2c
AM
3258 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3259 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3260 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3261+ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3262+ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3263+ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3264+ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3265 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3266 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3267 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
df1b5770 3268@@ -314,6 +318,7 @@
b4e1fa2c
AM
3269 {&archive_lock_key, "archive_lock", 0},
3270 # endif /* UNIV_LOG_ARCHIVE */
3271 {&btr_search_latch_key, "btr_search_latch", 0},
3272+ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3273 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3274 {&buf_block_lock_key, "buf_block_lock", 0},
3275 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3276diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3277--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3278+++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
d8778560 3279@@ -1565,7 +1565,8 @@
b4e1fa2c
AM
3280
3281 buf_pool = buf_pool_from_array(i);
3282
3283- buf_pool_mutex_enter(buf_pool);
3284+ //buf_pool_mutex_enter(buf_pool);
3285+ mutex_enter(&buf_pool->zip_free_mutex);
3286
3287 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3288 buf_buddy_stat_t* buddy_stat;
d8778560 3289@@ -1595,7 +1596,8 @@
b4e1fa2c
AM
3290 }
3291 }
3292
3293- buf_pool_mutex_exit(buf_pool);
3294+ //buf_pool_mutex_exit(buf_pool);
3295+ mutex_exit(&buf_pool->zip_free_mutex);
3296
3297 if (status) {
3298 break;
3299diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3300--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3301+++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
df1b5770 3302@@ -3766,9 +3766,11 @@
b4e1fa2c
AM
3303 ulint fold = buf_page_address_fold(space, page_no);
3304 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3305
3306- buf_pool_mutex_enter(buf_pool);
3307+ //buf_pool_mutex_enter(buf_pool);
3308+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3309 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3310- buf_pool_mutex_exit(buf_pool);
3311+ //buf_pool_mutex_exit(buf_pool);
3312+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3313
3314 if (UNIV_LIKELY_NULL(bpage)) {
3315 /* A buffer pool watch has been set or the
3316diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3317--- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3318+++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3319@@ -51,10 +51,11 @@
3320 buf_pool_t* buf_pool,
3321 /*!< buffer pool in which the block resides */
3322 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3323- ibool* lru) /*!< in: pointer to a variable that will be assigned
3324+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3325 TRUE if storage was allocated from the LRU list
3326 and buf_pool->mutex was temporarily released,
3327 or NULL if the LRU list should not be used */
3328+ ibool have_page_hash_mutex)
3329 __attribute__((malloc));
3330
3331 /**********************************************************************//**
3332@@ -67,7 +68,8 @@
3333 /*!< buffer pool in which the block resides */
3334 void* buf, /*!< in: block to be freed, must not be
3335 pointed to by the buffer pool */
3336- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3337+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3338+ ibool have_page_hash_mutex)
3339 __attribute__((nonnull));
3340
3341 #ifndef UNIV_NONINL
3342diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3343--- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3344+++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3345@@ -46,10 +46,11 @@
3346 /*!< in: buffer pool in which the page resides */
3347 ulint i, /*!< in: index of buf_pool->zip_free[],
3348 or BUF_BUDDY_SIZES */
3349- ibool* lru) /*!< in: pointer to a variable that will be assigned
3350+ ibool* lru, /*!< in: pointer to a variable that will be assigned
3351 TRUE if storage was allocated from the LRU list
3352 and buf_pool->mutex was temporarily released,
3353 or NULL if the LRU list should not be used */
3354+ ibool have_page_hash_mutex)
3355 __attribute__((malloc));
3356
3357 /**********************************************************************//**
3358@@ -61,8 +62,9 @@
3359 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3360 void* buf, /*!< in: block to be freed, must not be
3361 pointed to by the buffer pool */
3362- ulint i) /*!< in: index of buf_pool->zip_free[],
3363+ ulint i, /*!< in: index of buf_pool->zip_free[],
3364 or BUF_BUDDY_SIZES */
3365+ ibool have_page_hash_mutex)
3366 __attribute__((nonnull));
3367
3368 /**********************************************************************//**
3369@@ -102,16 +104,17 @@
3370 the page resides */
3371 ulint size, /*!< in: block size, up to
3372 UNIV_PAGE_SIZE */
3373- ibool* lru) /*!< in: pointer to a variable
3374+ ibool* lru, /*!< in: pointer to a variable
3375 that will be assigned TRUE if
3376 storage was allocated from the
3377 LRU list and buf_pool->mutex was
3378 temporarily released, or NULL if
3379 the LRU list should not be used */
3380+ ibool have_page_hash_mutex)
3381 {
3382- ut_ad(buf_pool_mutex_own(buf_pool));
3383+ //ut_ad(buf_pool_mutex_own(buf_pool));
3384
3385- return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3386+ return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3387 }
3388
3389 /**********************************************************************//**
3390@@ -123,12 +126,25 @@
3391 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3392 void* buf, /*!< in: block to be freed, must not be
3393 pointed to by the buffer pool */
3394- ulint size) /*!< in: block size, up to
3395+ ulint size, /*!< in: block size, up to
3396 UNIV_PAGE_SIZE */
3397+ ibool have_page_hash_mutex)
3398 {
3399- ut_ad(buf_pool_mutex_own(buf_pool));
3400+ //ut_ad(buf_pool_mutex_own(buf_pool));
3401+
3402+ if (!have_page_hash_mutex) {
3403+ mutex_enter(&buf_pool->LRU_list_mutex);
3404+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3405+ }
3406
3407- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3408+ mutex_enter(&buf_pool->zip_free_mutex);
3409+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3410+ mutex_exit(&buf_pool->zip_free_mutex);
3411+
3412+ if (!have_page_hash_mutex) {
3413+ mutex_exit(&buf_pool->LRU_list_mutex);
3414+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3415+ }
3416 }
3417
3418 #ifdef UNIV_MATERIALIZE
3419diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3420--- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3421+++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
d8778560 3422@@ -203,6 +203,20 @@
b4e1fa2c
AM
3423 /*==========================*/
3424
3425 /********************************************************************//**
3426+*/
3427+UNIV_INLINE
3428+void
3429+buf_pool_page_hash_x_lock_all(void);
3430+/*================================*/
3431+
3432+/********************************************************************//**
3433+*/
3434+UNIV_INLINE
3435+void
3436+buf_pool_page_hash_x_unlock_all(void);
3437+/*==================================*/
3438+
3439+/********************************************************************//**
3440 Creates the buffer pool.
3441 @return own: buf_pool object, NULL if not enough memory or error */
3442 UNIV_INTERN
d8778560 3443@@ -832,6 +846,15 @@
b4e1fa2c
AM
3444 const buf_page_t* bpage) /*!< in: pointer to control block */
3445 __attribute__((pure));
3446
3447+/*************************************************************************
3448+Gets the mutex of a block and enter the mutex with consistency. */
3449+UNIV_INLINE
3450+mutex_t*
3451+buf_page_get_mutex_enter(
3452+/*=========================*/
3453+ const buf_page_t* bpage) /*!< in: pointer to control block */
3454+ __attribute__((pure));
3455+
3456 /*********************************************************************//**
3457 Get the flush type of a page.
3458 @return flush type */
d8778560 3459@@ -1313,7 +1336,7 @@
b4e1fa2c
AM
3460 All these are protected by buf_pool->mutex. */
3461 /* @{ */
3462
3463- UT_LIST_NODE_T(buf_page_t) list;
3464+ /* UT_LIST_NODE_T(buf_page_t) list; */
3465 /*!< based on state, this is a
3466 list node, protected either by
3467 buf_pool->mutex or by
d8778560 3468@@ -1341,6 +1364,10 @@
b4e1fa2c
AM
3469 BUF_BLOCK_REMOVE_HASH or
3470 BUF_BLOCK_READY_IN_USE. */
3471
3472+ /* resplit for optimistic use */
3473+ UT_LIST_NODE_T(buf_page_t) free;
3474+ UT_LIST_NODE_T(buf_page_t) flush_list;
3475+ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3476 #ifdef UNIV_DEBUG
3477 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3478 when buf_pool->flush_list_mutex is
d8778560 3479@@ -1433,11 +1460,11 @@
b4e1fa2c
AM
3480 a block is in the unzip_LRU list
3481 if page.state == BUF_BLOCK_FILE_PAGE
3482 and page.zip.data != NULL */
3483-#ifdef UNIV_DEBUG
3484+//#ifdef UNIV_DEBUG
3485 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3486 decompressed LRU list;
3487 used in debugging */
3488-#endif /* UNIV_DEBUG */
3489+//#endif /* UNIV_DEBUG */
3490 mutex_t mutex; /*!< mutex protecting this block:
3491 state (also protected by the buffer
3492 pool mutex), io_fix, buf_fix_count,
d8778560 3493@@ -1612,6 +1639,11 @@
b4e1fa2c
AM
3494 pool instance, protects compressed
3495 only pages (of type buf_page_t, not
3496 buf_block_t */
3497+ mutex_t LRU_list_mutex;
3498+ rw_lock_t page_hash_latch;
3499+ mutex_t free_list_mutex;
3500+ mutex_t zip_free_mutex;
3501+ mutex_t zip_hash_mutex;
3502 ulint instance_no; /*!< Array index of this buffer
3503 pool instance */
3504 ulint old_pool_size; /*!< Old pool size in bytes */
3505diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3506--- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3507+++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3508@@ -274,7 +274,7 @@
3509 case BUF_BLOCK_ZIP_FREE:
3510 /* This is a free page in buf_pool->zip_free[].
3511 Such pages should only be accessed by the buddy allocator. */
3512- ut_error;
3513+ /* ut_error; */ /* optimistic */
3514 break;
3515 case BUF_BLOCK_ZIP_PAGE:
3516 case BUF_BLOCK_ZIP_DIRTY:
3517@@ -317,9 +317,14 @@
3518 {
3519 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3520
3521+ if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3522+ /* TODO: this code is the interim. should be confirmed later. */
3523+ return(&buf_pool->zip_mutex);
3524+ }
3525+
3526 switch (buf_page_get_state(bpage)) {
3527 case BUF_BLOCK_ZIP_FREE:
3528- ut_error;
3529+ /* ut_error; */ /* optimistic */
3530 return(NULL);
3531 case BUF_BLOCK_ZIP_PAGE:
3532 case BUF_BLOCK_ZIP_DIRTY:
3533@@ -329,6 +334,28 @@
3534 }
3535 }
3536
3537+/*************************************************************************
3538+Gets the mutex of a block and enter the mutex with consistency. */
3539+UNIV_INLINE
3540+mutex_t*
3541+buf_page_get_mutex_enter(
3542+/*=========================*/
3543+ const buf_page_t* bpage) /*!< in: pointer to control block */
3544+{
3545+ mutex_t* block_mutex;
3546+
3547+ while(1) {
3548+ block_mutex = buf_page_get_mutex(bpage);
3549+ if (!block_mutex)
3550+ return block_mutex;
3551+
3552+ mutex_enter(block_mutex);
3553+ if (block_mutex == buf_page_get_mutex(bpage))
3554+ return block_mutex;
3555+ mutex_exit(block_mutex);
3556+ }
3557+}
3558+
3559 /*********************************************************************//**
3560 Get the flush type of a page.
3561 @return flush type */
3562@@ -425,8 +452,8 @@
3563 enum buf_io_fix io_fix) /*!< in: io_fix state */
3564 {
3565 #ifdef UNIV_DEBUG
3566- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3567- ut_ad(buf_pool_mutex_own(buf_pool));
3568+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3569+ //ut_ad(buf_pool_mutex_own(buf_pool));
3570 #endif
3571 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3572
3573@@ -456,14 +483,14 @@
3574 const buf_page_t* bpage) /*!< control block being relocated */
3575 {
3576 #ifdef UNIV_DEBUG
3577- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3578- ut_ad(buf_pool_mutex_own(buf_pool));
3579+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3580+ //ut_ad(buf_pool_mutex_own(buf_pool));
3581 #endif
3582 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3583 ut_ad(buf_page_in_file(bpage));
3584- ut_ad(bpage->in_LRU_list);
3585+ //ut_ad(bpage->in_LRU_list);
3586
3587- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3588+ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3589 && bpage->buf_fix_count == 0);
3590 }
3591
3592@@ -477,8 +504,8 @@
3593 const buf_page_t* bpage) /*!< in: control block */
3594 {
3595 #ifdef UNIV_DEBUG
3596- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3597- ut_ad(buf_pool_mutex_own(buf_pool));
3598+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3599+ //ut_ad(buf_pool_mutex_own(buf_pool));
3600 #endif
3601 ut_ad(buf_page_in_file(bpage));
3602
3603@@ -498,7 +525,8 @@
3604 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3605 #endif /* UNIV_DEBUG */
3606 ut_a(buf_page_in_file(bpage));
3607- ut_ad(buf_pool_mutex_own(buf_pool));
3608+ //ut_ad(buf_pool_mutex_own(buf_pool));
3609+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3610 ut_ad(bpage->in_LRU_list);
3611
3612 #ifdef UNIV_LRU_DEBUG
3613@@ -545,9 +573,10 @@
3614 ulint time_ms) /*!< in: ut_time_ms() */
3615 {
3616 #ifdef UNIV_DEBUG
3617- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3618- ut_ad(buf_pool_mutex_own(buf_pool));
3619+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3620+ //ut_ad(buf_pool_mutex_own(buf_pool));
3621 #endif
3622+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3623 ut_a(buf_page_in_file(bpage));
3624
3625 if (!bpage->access_time) {
3626@@ -761,19 +790,19 @@
3627 /*===========*/
3628 buf_block_t* block) /*!< in, own: block to be freed */
3629 {
3630- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3631+ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3632
3633- buf_pool_mutex_enter(buf_pool);
3634+ //buf_pool_mutex_enter(buf_pool);
3635
3636 mutex_enter(&block->mutex);
3637
3638 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3639
3640- buf_LRU_block_free_non_file_page(block);
3641+ buf_LRU_block_free_non_file_page(block, FALSE);
3642
3643 mutex_exit(&block->mutex);
3644
3645- buf_pool_mutex_exit(buf_pool);
3646+ //buf_pool_mutex_exit(buf_pool);
3647 }
3648 #endif /* !UNIV_HOTBACKUP */
3649
3650@@ -821,17 +850,17 @@
3651 page frame */
3652 {
3653 ib_uint64_t lsn;
3654- mutex_t* block_mutex = buf_page_get_mutex(bpage);
3655-
3656- mutex_enter(block_mutex);
3657+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3658
3659- if (buf_page_in_file(bpage)) {
3660+ if (block_mutex && buf_page_in_file(bpage)) {
3661 lsn = bpage->newest_modification;
3662 } else {
3663 lsn = 0;
3664 }
3665
3666- mutex_exit(block_mutex);
3667+ if (block_mutex) {
3668+ mutex_exit(block_mutex);
3669+ }
3670
3671 return(lsn);
3672 }
3673@@ -849,7 +878,7 @@
3674 #ifdef UNIV_SYNC_DEBUG
3675 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3676
3677- ut_ad((buf_pool_mutex_own(buf_pool)
3678+ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3679 && (block->page.buf_fix_count == 0))
3680 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3681 #endif /* UNIV_SYNC_DEBUG */
3682@@ -979,7 +1008,11 @@
3683 buf_page_t* bpage;
3684
3685 ut_ad(buf_pool);
3686- ut_ad(buf_pool_mutex_own(buf_pool));
3687+ //ut_ad(buf_pool_mutex_own(buf_pool));
3688+#ifdef UNIV_SYNC_DEBUG
3689+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3690+ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3691+#endif
3692 ut_ad(fold == buf_page_address_fold(space, offset));
3693
3694 /* Look for the page in the hash table */
3695@@ -1064,11 +1097,13 @@
3696 const buf_page_t* bpage;
3697 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3698
3699- buf_pool_mutex_enter(buf_pool);
3700+ //buf_pool_mutex_enter(buf_pool);
3701+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3702
3703 bpage = buf_page_hash_get(buf_pool, space, offset);
3704
3705- buf_pool_mutex_exit(buf_pool);
3706+ //buf_pool_mutex_exit(buf_pool);
3707+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3708
3709 return(bpage != NULL);
3710 }
3711@@ -1196,4 +1231,38 @@
3712 buf_pool_mutex_exit(buf_pool);
3713 }
3714 }
3715+
3716+/********************************************************************//**
3717+*/
3718+UNIV_INLINE
3719+void
3720+buf_pool_page_hash_x_lock_all(void)
3721+/*===============================*/
3722+{
3723+ ulint i;
3724+
3725+ for (i = 0; i < srv_buf_pool_instances; i++) {
3726+ buf_pool_t* buf_pool;
3727+
3728+ buf_pool = buf_pool_from_array(i);
3729+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3730+ }
3731+}
3732+
3733+/********************************************************************//**
3734+*/
3735+UNIV_INLINE
3736+void
3737+buf_pool_page_hash_x_unlock_all(void)
3738+/*=================================*/
3739+{
3740+ ulint i;
3741+
3742+ for (i = 0; i < srv_buf_pool_instances; i++) {
3743+ buf_pool_t* buf_pool;
3744+
3745+ buf_pool = buf_pool_from_array(i);
3746+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3747+ }
3748+}
3749 #endif /* !UNIV_HOTBACKUP */
3750diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3751--- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3752+++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
df1b5770
AM
3753@@ -111,8 +111,9 @@
3754 buf_LRU_free_block(
3755 /*===============*/
b4e1fa2c 3756 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
3757- ibool zip) /*!< in: TRUE if should remove also the
3758+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 3759 compressed page of an uncompressed page */
df1b5770
AM
3760+ ibool have_LRU_mutex)
3761 __attribute__((nonnull));
b4e1fa2c
AM
3762 /******************************************************************//**
3763 Try to free a replaceable block.
df1b5770 3764@@ -159,7 +160,8 @@
b4e1fa2c
AM
3765 void
3766 buf_LRU_block_free_non_file_page(
3767 /*=============================*/
3768- buf_block_t* block); /*!< in: block, must not contain a file page */
3769+ buf_block_t* block, /*!< in: block, must not contain a file page */
3770+ ibool have_page_hash_mutex);
3771 /******************************************************************//**
3772 Adds a block to the LRU list. */
3773 UNIV_INTERN
3774diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3775--- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3776+++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3777@@ -112,6 +112,7 @@
3778 extern mysql_pfs_key_t archive_lock_key;
3779 # endif /* UNIV_LOG_ARCHIVE */
3780 extern mysql_pfs_key_t btr_search_latch_key;
3781+extern mysql_pfs_key_t buf_pool_page_hash_key;
3782 extern mysql_pfs_key_t buf_block_lock_key;
3783 # ifdef UNIV_SYNC_DEBUG
3784 extern mysql_pfs_key_t buf_block_debug_latch_key;
3785diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3786--- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3787+++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3788@@ -75,6 +75,10 @@
3789 extern mysql_pfs_key_t buffer_block_mutex_key;
3790 extern mysql_pfs_key_t buf_pool_mutex_key;
3791 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3792+extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3793+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3794+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3795+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3796 extern mysql_pfs_key_t cache_last_read_mutex_key;
3797 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3798 extern mysql_pfs_key_t dict_sys_mutex_key;
3799@@ -660,7 +664,7 @@
3800 #define SYNC_TRX_LOCK_HEAP 298
3801 #define SYNC_TRX_SYS_HEADER 290
3802 #define SYNC_LOG 170
3803-#define SYNC_LOG_FLUSH_ORDER 147
3804+#define SYNC_LOG_FLUSH_ORDER 156
3805 #define SYNC_RECV 168
3806 #define SYNC_WORK_QUEUE 162
3807 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3808@@ -670,8 +674,13 @@
3809 SYNC_SEARCH_SYS, as memory allocation
3810 can call routines there! Otherwise
3811 the level is SYNC_MEM_HASH. */
3812+#define SYNC_BUF_LRU_LIST 158
3813+#define SYNC_BUF_PAGE_HASH 157
3814+#define SYNC_BUF_BLOCK 155 /* Block mutex */
3815+#define SYNC_BUF_FREE_LIST 153
3816+#define SYNC_BUF_ZIP_FREE 152
3817+#define SYNC_BUF_ZIP_HASH 151
3818 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3819-#define SYNC_BUF_BLOCK 146 /* Block mutex */
3820 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3821 #define SYNC_DOUBLEWRITE 140
3822 #define SYNC_ANY_LATCH 135
3823@@ -703,7 +712,7 @@
3824 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3825 when atomic operations are not enabled */
3826 #endif
3827- ulint waiters; /*!< This ulint is set to 1 if there are (or
3828+ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3829 may be) threads waiting in the global wait
3830 array for this mutex to be released.
3831 Otherwise, this is 0. */
3832diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3833--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3834+++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
df1b5770 3835@@ -3099,7 +3099,7 @@
b4e1fa2c
AM
3836 level += log_sys->max_checkpoint_age
3837 - (lsn - oldest_modification);
3838 }
3839- bpage = UT_LIST_GET_NEXT(list, bpage);
3840+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3841 n_blocks++;
3842 }
3843
df1b5770 3844@@ -3185,7 +3185,7 @@
b4e1fa2c
AM
3845 found = TRUE;
3846 break;
3847 }
3848- bpage = UT_LIST_GET_NEXT(list, bpage);
3849+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3850 new_blocks_num++;
3851 }
3852 if (!found) {
3853diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3854--- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3855+++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
df1b5770 3856@@ -284,7 +284,7 @@
b4e1fa2c
AM
3857 mutex->lock_word = 0;
3858 #endif
3859 mutex->event = os_event_create(NULL);
3860- mutex_set_waiters(mutex, 0);
3861+ mutex->waiters = 0;
3862 #ifdef UNIV_DEBUG
3863 mutex->magic_n = MUTEX_MAGIC_N;
3864 #endif /* UNIV_DEBUG */
df1b5770 3865@@ -463,6 +463,15 @@
b4e1fa2c
AM
3866 mutex_t* mutex, /*!< in: mutex */
3867 ulint n) /*!< in: value to set */
3868 {
3869+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3870+ ut_ad(mutex);
3871+
3872+ if (n) {
3873+ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3874+ } else {
3875+ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3876+ }
3877+#else
3878 volatile ulint* ptr; /* declared volatile to ensure that
3879 the value is stored to memory */
3880 ut_ad(mutex);
df1b5770 3881@@ -471,6 +480,7 @@
b4e1fa2c
AM
3882
3883 *ptr = n; /* Here we assume that the write of a single
3884 word in memory is atomic */
3885+#endif
3886 }
3887
3888 /******************************************************************//**
df1b5770 3889@@ -1185,7 +1195,12 @@
b4e1fa2c
AM
3890 ut_error;
3891 }
3892 break;
3893+ case SYNC_BUF_LRU_LIST:
3894 case SYNC_BUF_FLUSH_LIST:
3895+ case SYNC_BUF_PAGE_HASH:
3896+ case SYNC_BUF_FREE_LIST:
3897+ case SYNC_BUF_ZIP_FREE:
3898+ case SYNC_BUF_ZIP_HASH:
3899 case SYNC_BUF_POOL:
3900 /* We can have multiple mutexes of this type therefore we
3901 can only check whether the greater than condition holds. */
df1b5770 3902@@ -1203,7 +1218,8 @@
b4e1fa2c
AM
3903 buffer block (block->mutex or buf_pool->zip_mutex). */
3904 if (!sync_thread_levels_g(array, level, FALSE)) {
3905 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3906- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3907+ /* the exact rule is not fixed yet, for now */
3908+ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3909 }
3910 break;
3911 case SYNC_REC_LOCK:
This page took 0.525879 seconds and 4 git commands to generate.