]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_split_buf_pool_mutex.patch
- rel. 2
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_split_buf_pool_mutex.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
db82db79
AM
8--- a/storage/innobase/btr/btr0cur.c
9+++ b/storage/innobase/btr/btr0cur.c
1bfc1981 10@@ -4070,7 +4070,8 @@
b4e1fa2c
AM
11
12 mtr_commit(mtr);
13
14- buf_pool_mutex_enter(buf_pool);
15+ //buf_pool_mutex_enter(buf_pool);
16+ mutex_enter(&buf_pool->LRU_list_mutex);
17 mutex_enter(&block->mutex);
18
19 /* Only free the block if it is still allocated to
1bfc1981 20@@ -4081,16 +4082,21 @@
b4e1fa2c
AM
21 && buf_block_get_space(block) == space
22 && buf_block_get_page_no(block) == page_no) {
23
db82db79 24- if (!buf_LRU_free_block(&block->page, all)
b4e1fa2c 25- && all && block->page.zip.data) {
db82db79 26+ if (!buf_LRU_free_block(&block->page, all, TRUE)
b4e1fa2c
AM
27+ && all && block->page.zip.data
28+ /* Now, buf_LRU_free_block() may release mutex temporarily */
29+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
30+ && buf_block_get_space(block) == space
31+ && buf_block_get_page_no(block) == page_no) {
32 /* Attempt to deallocate the uncompressed page
33 if the whole block cannot be deallocted. */
34
df1b5770
AM
35- buf_LRU_free_block(&block->page, FALSE);
36+ buf_LRU_free_block(&block->page, FALSE, TRUE);
b4e1fa2c
AM
37 }
38 }
39
40- buf_pool_mutex_exit(buf_pool);
41+ //buf_pool_mutex_exit(buf_pool);
42+ mutex_exit(&buf_pool->LRU_list_mutex);
43 mutex_exit(&block->mutex);
44 }
45
db82db79
AM
46--- a/storage/innobase/btr/btr0sea.c
47+++ b/storage/innobase/btr/btr0sea.c
13ceb006 48@@ -1972,7 +1972,7 @@
b4e1fa2c
AM
49 rec_offs_init(offsets_);
50
51 rw_lock_x_lock(&btr_search_latch);
52- buf_pool_mutex_enter_all();
53+ buf_pool_page_hash_x_lock_all();
54
55 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
56
13ceb006 57@@ -1980,11 +1980,11 @@
b4e1fa2c
AM
58 /* We release btr_search_latch every once in a while to
59 give other queries a chance to run. */
60 if ((i != 0) && ((i % chunk_size) == 0)) {
61- buf_pool_mutex_exit_all();
62+ buf_pool_page_hash_x_unlock_all();
63 rw_lock_x_unlock(&btr_search_latch);
64 os_thread_yield();
65 rw_lock_x_lock(&btr_search_latch);
66- buf_pool_mutex_enter_all();
67+ buf_pool_page_hash_x_lock_all();
68 }
69
70 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
13ceb006 71@@ -2093,11 +2093,11 @@
b4e1fa2c
AM
72 /* We release btr_search_latch every once in a while to
73 give other queries a chance to run. */
74 if (i != 0) {
75- buf_pool_mutex_exit_all();
76+ buf_pool_page_hash_x_unlock_all();
77 rw_lock_x_unlock(&btr_search_latch);
78 os_thread_yield();
79 rw_lock_x_lock(&btr_search_latch);
80- buf_pool_mutex_enter_all();
81+ buf_pool_page_hash_x_lock_all();
82 }
83
84 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
13ceb006 85@@ -2105,7 +2105,7 @@
b4e1fa2c
AM
86 }
87 }
88
89- buf_pool_mutex_exit_all();
90+ buf_pool_page_hash_x_unlock_all();
91 rw_lock_x_unlock(&btr_search_latch);
92 if (UNIV_LIKELY_NULL(heap)) {
93 mem_heap_free(heap);
db82db79
AM
94--- a/storage/innobase/buf/buf0buddy.c
95+++ b/storage/innobase/buf/buf0buddy.c
96@@ -58,7 +58,7 @@
97
98 /** Validate a given zip_free list. */
99 #define BUF_BUDDY_LIST_VALIDATE(b, i) \
100- UT_LIST_VALIDATE(list, buf_page_t, \
101+ UT_LIST_VALIDATE(zip_list, buf_page_t, \
102 b->zip_free[i], \
103 ut_ad(buf_page_get_state( \
104 ut_list_node_313) \
105@@ -75,10 +75,11 @@
106 ulint i) /*!< in: index of
107 buf_pool->zip_free[] */
108 {
b4e1fa2c
AM
109- ut_ad(buf_pool_mutex_own(buf_pool));
110+ //ut_ad(buf_pool_mutex_own(buf_pool));
111+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
112 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
113 ut_ad(buf_pool->zip_free[i].start != bpage);
114- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
115+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
db82db79 116 }
b4e1fa2c 117
db82db79
AM
118 /**********************************************************************//**
119@@ -93,16 +94,17 @@
b4e1fa2c
AM
120 buf_pool->zip_free[] */
121 {
db82db79 122 #ifdef UNIV_DEBUG
b4e1fa2c
AM
123- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
124- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
125+ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
126+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
127
db82db79 128 ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
b4e1fa2c 129 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
db82db79 130 #endif /* UNIV_DEBUG */
b4e1fa2c
AM
131
132- ut_ad(buf_pool_mutex_own(buf_pool));
133+ //ut_ad(buf_pool_mutex_own(buf_pool));
134+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
135 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
136- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
137+ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
db82db79 138 }
b4e1fa2c 139
db82db79
AM
140 /**********************************************************************//**
141@@ -117,7 +119,8 @@
b4e1fa2c
AM
142 {
143 buf_page_t* bpage;
144
145- ut_ad(buf_pool_mutex_own(buf_pool));
146+ //ut_ad(buf_pool_mutex_own(buf_pool));
147+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
148 ut_a(i < BUF_BUDDY_SIZES);
db82db79 149 ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
b4e1fa2c 150
db82db79 151@@ -159,16 +162,19 @@
b4e1fa2c
AM
152 buf_buddy_block_free(
153 /*=================*/
154 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
155- void* buf) /*!< in: buffer frame to deallocate */
156+ void* buf, /*!< in: buffer frame to deallocate */
157+ ibool have_page_hash_mutex)
158 {
159 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
160 buf_page_t* bpage;
161 buf_block_t* block;
162
163- ut_ad(buf_pool_mutex_own(buf_pool));
164+ //ut_ad(buf_pool_mutex_own(buf_pool));
165 ut_ad(!mutex_own(&buf_pool->zip_mutex));
166 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
167
168+ mutex_enter(&buf_pool->zip_hash_mutex);
169+
170 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
171 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
172 && bpage->in_zip_hash && !bpage->in_page_hash),
db82db79 173@@ -180,12 +186,14 @@
b4e1fa2c
AM
174 ut_d(bpage->in_zip_hash = FALSE);
175 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
176
177+ mutex_exit(&buf_pool->zip_hash_mutex);
178+
179 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
180 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
181
182 block = (buf_block_t*) bpage;
183 mutex_enter(&block->mutex);
184- buf_LRU_block_free_non_file_page(block);
185+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
186 mutex_exit(&block->mutex);
187
188 ut_ad(buf_pool->buddy_n_frames > 0);
db82db79 189@@ -202,7 +210,7 @@
b4e1fa2c
AM
190 {
191 buf_pool_t* buf_pool = buf_pool_from_block(block);
192 const ulint fold = BUF_POOL_ZIP_FOLD(block);
193- ut_ad(buf_pool_mutex_own(buf_pool));
194+ //ut_ad(buf_pool_mutex_own(buf_pool));
195 ut_ad(!mutex_own(&buf_pool->zip_mutex));
196 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
197
db82db79 198@@ -214,7 +222,10 @@
b4e1fa2c
AM
199 ut_ad(!block->page.in_page_hash);
200 ut_ad(!block->page.in_zip_hash);
201 ut_d(block->page.in_zip_hash = TRUE);
202+
203+ mutex_enter(&buf_pool->zip_hash_mutex);
204 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
205+ mutex_exit(&buf_pool->zip_hash_mutex);
206
207 ut_d(buf_pool->buddy_n_frames++);
208 }
db82db79
AM
209@@ -268,26 +279,30 @@
210 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
b4e1fa2c
AM
211 ulint i, /*!< in: index of buf_pool->zip_free[],
212 or BUF_BUDDY_SIZES */
213- ibool* lru) /*!< in: pointer to a variable that
214+ ibool* lru, /*!< in: pointer to a variable that
215 will be assigned TRUE if storage was
216 allocated from the LRU list and
217 buf_pool->mutex was temporarily
db82db79 218 released */
b4e1fa2c
AM
219+ ibool have_page_hash_mutex)
220 {
221 buf_block_t* block;
222
db82db79 223 ut_ad(lru);
b4e1fa2c
AM
224- ut_ad(buf_pool_mutex_own(buf_pool));
225+ //ut_ad(buf_pool_mutex_own(buf_pool));
226+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
227 ut_ad(!mutex_own(&buf_pool->zip_mutex));
db82db79 228 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
b4e1fa2c
AM
229
230 if (i < BUF_BUDDY_SIZES) {
231 /* Try to allocate from the buddy system. */
232+ mutex_enter(&buf_pool->zip_free_mutex);
233 block = buf_buddy_alloc_zip(buf_pool, i);
234
235 if (block) {
236 goto func_exit;
237 }
238+ mutex_exit(&buf_pool->zip_free_mutex);
239 }
240
241 /* Try allocating from the buf_pool->free list. */
db82db79 242@@ -299,19 +314,30 @@
b4e1fa2c
AM
243 }
244
245 /* Try replacing an uncompressed page in the buffer pool. */
246- buf_pool_mutex_exit(buf_pool);
247+ //buf_pool_mutex_exit(buf_pool);
248+ mutex_exit(&buf_pool->LRU_list_mutex);
249+ if (have_page_hash_mutex) {
250+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
251+ }
df1b5770 252 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
253 *lru = TRUE;
254- buf_pool_mutex_enter(buf_pool);
255+ //buf_pool_mutex_enter(buf_pool);
256+ mutex_enter(&buf_pool->LRU_list_mutex);
257+ if (have_page_hash_mutex) {
258+ rw_lock_x_lock(&buf_pool->page_hash_latch);
259+ }
260
261 alloc_big:
262 buf_buddy_block_register(block);
263
264+ mutex_enter(&buf_pool->zip_free_mutex);
265 block = buf_buddy_alloc_from(
266 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
267
268 func_exit:
269 buf_pool->buddy_stat[i].used++;
270+ mutex_exit(&buf_pool->zip_free_mutex);
271+
272 return(block);
273 }
274
db82db79 275@@ -325,8 +351,9 @@
b4e1fa2c
AM
276 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
277 void* src, /*!< in: block to relocate */
278 void* dst, /*!< in: free block to relocate to */
279- ulint i) /*!< in: index of
280+ ulint i, /*!< in: index of
281 buf_pool->zip_free[] */
282+ ibool have_page_hash_mutex)
283 {
284 buf_page_t* bpage;
285 const ulint size = BUF_BUDDY_LOW << i;
734d6226 286@@ -334,13 +361,20 @@
db82db79
AM
287 ulint space;
288 ulint page_no;
b4e1fa2c
AM
289
290- ut_ad(buf_pool_mutex_own(buf_pool));
291+ //ut_ad(buf_pool_mutex_own(buf_pool));
292+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
293 ut_ad(!mutex_own(&buf_pool->zip_mutex));
294 ut_ad(!ut_align_offset(src, size));
295 ut_ad(!ut_align_offset(dst, size));
db82db79
AM
296 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
297 UNIV_MEM_ASSERT_W(dst, size);
b4e1fa2c 298
db82db79
AM
299+ if (!have_page_hash_mutex) {
300+ mutex_exit(&buf_pool->zip_free_mutex);
301+ mutex_enter(&buf_pool->LRU_list_mutex);
302+ rw_lock_x_lock(&buf_pool->page_hash_latch);
303+ }
b4e1fa2c 304+
db82db79
AM
305 /* We assume that all memory from buf_buddy_alloc()
306 is used for compressed page frames. */
b4e1fa2c 307
734d6226 308@@ -374,6 +408,11 @@
db82db79
AM
309 added to buf_pool->page_hash yet. Obviously,
310 it cannot be relocated. */
b4e1fa2c 311
db82db79
AM
312+ if (!have_page_hash_mutex) {
313+ mutex_enter(&buf_pool->zip_free_mutex);
314+ mutex_exit(&buf_pool->LRU_list_mutex);
315+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
316+ }
317 return(FALSE);
318 }
b4e1fa2c 319
734d6226 320@@ -383,18 +422,27 @@
db82db79
AM
321 For the sake of simplicity, give up. */
322 ut_ad(page_zip_get_size(&bpage->zip) < size);
b4e1fa2c 323
b4e1fa2c 324+ if (!have_page_hash_mutex) {
db82db79 325+ mutex_enter(&buf_pool->zip_free_mutex);
b4e1fa2c
AM
326+ mutex_exit(&buf_pool->LRU_list_mutex);
327+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
328+ }
db82db79
AM
329 return(FALSE);
330 }
331
332+ /* To keep latch order */
333+ if (have_page_hash_mutex)
b4e1fa2c
AM
334+ mutex_exit(&buf_pool->zip_free_mutex);
335+
db82db79
AM
336 /* The block must have been allocated, but it may
337 contain uninitialized data. */
338 UNIV_MEM_ASSERT_W(src, size);
339
340- mutex = buf_page_get_mutex(bpage);
341+ mutex = buf_page_get_mutex_enter(bpage);
342
343- mutex_enter(mutex);
344+ mutex_enter(&buf_pool->zip_free_mutex);
b4e1fa2c 345
db82db79
AM
346- if (buf_page_can_relocate(bpage)) {
347+ if (mutex && buf_page_can_relocate(bpage)) {
348 /* Relocate the compressed page. */
734d6226 349 ullint usec = ut_time_us(NULL);
db82db79 350 ut_a(bpage->zip.data == src);
db82db79
AM
351@@ -409,10 +457,22 @@
352 buddy_stat->relocated_usec
353 += ut_time_us(NULL) - usec;
b4e1fa2c
AM
354 }
355+
b4e1fa2c
AM
356+ if (!have_page_hash_mutex) {
357+ mutex_exit(&buf_pool->LRU_list_mutex);
358+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
359+ }
db82db79 360 return(TRUE);
b4e1fa2c
AM
361 }
362
db82db79
AM
363- mutex_exit(mutex);
364+ if (!have_page_hash_mutex) {
365+ mutex_exit(&buf_pool->LRU_list_mutex);
366+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
367+ }
368+
369+ if (mutex) {
370+ mutex_exit(mutex);
371+ }
b4e1fa2c 372 return(FALSE);
db82db79
AM
373 }
374
375@@ -425,13 +485,15 @@
b4e1fa2c
AM
376 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
377 void* buf, /*!< in: block to be freed, must not be
378 pointed to by the buffer pool */
379- ulint i) /*!< in: index of buf_pool->zip_free[],
380+ ulint i, /*!< in: index of buf_pool->zip_free[],
381 or BUF_BUDDY_SIZES */
382+ ibool have_page_hash_mutex)
383 {
384 buf_page_t* bpage;
385 buf_page_t* buddy;
386
387- ut_ad(buf_pool_mutex_own(buf_pool));
388+ //ut_ad(buf_pool_mutex_own(buf_pool));
389+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
390 ut_ad(!mutex_own(&buf_pool->zip_mutex));
391 ut_ad(i <= BUF_BUDDY_SIZES);
db82db79
AM
392 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
393@@ -443,7 +505,9 @@
394 ((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
b4e1fa2c
AM
395
396 if (i == BUF_BUDDY_SIZES) {
397- buf_buddy_block_free(buf_pool, buf);
398+ mutex_exit(&buf_pool->zip_free_mutex);
399+ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
400+ mutex_enter(&buf_pool->zip_free_mutex);
401 return;
402 }
403
db82db79
AM
404@@ -491,7 +555,7 @@
405
b4e1fa2c 406 ut_a(bpage != buf);
db82db79
AM
407 UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
408- bpage = UT_LIST_GET_NEXT(list, bpage);
409+ bpage = UT_LIST_GET_NEXT(zip_list, bpage);
410 }
b4e1fa2c 411
b4e1fa2c 412 #ifndef UNIV_DEBUG_VALGRIND
db82db79
AM
413@@ -501,7 +565,7 @@
414 ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
b4e1fa2c
AM
415
416 /* The buddy is not free. Is there a free block of this size? */
417- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
418+ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
419
420 if (bpage) {
db82db79
AM
421
422@@ -510,7 +574,7 @@
b4e1fa2c
AM
423 buf_buddy_remove_from_free(buf_pool, bpage, i);
424
425 /* Try to relocate the buddy of buf to the free block. */
426- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
427+ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
428
db82db79
AM
429 buddy->state = BUF_BLOCK_ZIP_FREE;
430 goto buddy_is_free;
431--- a/storage/innobase/buf/buf0buf.c
432+++ b/storage/innobase/buf/buf0buf.c
b4e1fa2c
AM
433@@ -263,6 +263,7 @@
434 #ifdef UNIV_PFS_RWLOCK
435 /* Keys to register buffer block related rwlocks and mutexes with
436 performance schema */
437+UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
438 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
439 # ifdef UNIV_SYNC_DEBUG
440 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
441@@ -273,6 +274,10 @@
442 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
443 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
444 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
445+UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
446+UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
447+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
448+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
449 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
450 #endif /* UNIV_PFS_MUTEX */
451
1bfc1981 452@@ -890,9 +895,13 @@
b4e1fa2c
AM
453 block->page.in_zip_hash = FALSE;
454 block->page.in_flush_list = FALSE;
455 block->page.in_free_list = FALSE;
456- block->in_unzip_LRU_list = FALSE;
457 #endif /* UNIV_DEBUG */
adf0fb13
AM
458+ block->page.flush_list.prev = NULL;
459+ block->page.flush_list.next = NULL;
460+ block->page.zip_list.prev = NULL;
461+ block->page.zip_list.next = NULL;
b4e1fa2c
AM
462 block->page.in_LRU_list = FALSE;
463+ block->in_unzip_LRU_list = FALSE;
464 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
465 block->n_pointers = 0;
466 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1bfc1981 467@@ -997,9 +1006,11 @@
b4e1fa2c
AM
468 memset(block->frame, '\0', UNIV_PAGE_SIZE);
469 #endif
470 /* Add the block to the free list */
471- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
472+ mutex_enter(&buf_pool->free_list_mutex);
473+ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
474
475 ut_d(block->page.in_free_list = TRUE);
476+ mutex_exit(&buf_pool->free_list_mutex);
477 ut_ad(buf_pool_from_block(block) == buf_pool);
478
479 block++;
1bfc1981 480@@ -1054,7 +1065,8 @@
b4e1fa2c
AM
481 buf_chunk_t* chunk = buf_pool->chunks;
482
483 ut_ad(buf_pool);
484- ut_ad(buf_pool_mutex_own(buf_pool));
485+ //ut_ad(buf_pool_mutex_own(buf_pool));
486+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
487 for (n = buf_pool->n_chunks; n--; chunk++) {
488
489 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1bfc1981 490@@ -1160,9 +1172,21 @@
b4e1fa2c
AM
491 ------------------------------- */
492 mutex_create(buf_pool_mutex_key,
493 &buf_pool->mutex, SYNC_BUF_POOL);
494+ mutex_create(buf_pool_LRU_list_mutex_key,
495+ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
496+ rw_lock_create(buf_pool_page_hash_key,
497+ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
498+ mutex_create(buf_pool_free_list_mutex_key,
499+ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
500+ mutex_create(buf_pool_zip_free_mutex_key,
501+ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
502+ mutex_create(buf_pool_zip_hash_mutex_key,
503+ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
504 mutex_create(buf_pool_zip_mutex_key,
505 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
506
507+ mutex_enter(&buf_pool->LRU_list_mutex);
508+ rw_lock_x_lock(&buf_pool->page_hash_latch);
509 buf_pool_mutex_enter(buf_pool);
510
511 if (buf_pool_size > 0) {
1bfc1981 512@@ -1175,6 +1199,8 @@
b4e1fa2c
AM
513 mem_free(chunk);
514 mem_free(buf_pool);
515
516+ mutex_exit(&buf_pool->LRU_list_mutex);
517+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
518 buf_pool_mutex_exit(buf_pool);
519
520 return(DB_ERROR);
1bfc1981 521@@ -1205,6 +1231,8 @@
b4e1fa2c
AM
522
523 /* All fields are initialized by mem_zalloc(). */
524
525+ mutex_exit(&buf_pool->LRU_list_mutex);
526+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
527 buf_pool_mutex_exit(buf_pool);
528
529 return(DB_SUCCESS);
29ffd636 530@@ -1376,7 +1404,11 @@
b4e1fa2c
AM
531 ulint fold;
532 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
533
534- ut_ad(buf_pool_mutex_own(buf_pool));
535+ //ut_ad(buf_pool_mutex_own(buf_pool));
536+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
537+#ifdef UNIV_SYNC_DEBUG
538+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
539+#endif
540 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
541 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
542 ut_a(bpage->buf_fix_count == 0);
29ffd636 543@@ -1487,21 +1519,32 @@
b4e1fa2c
AM
544 buf_page_t* bpage;
545 ulint i;
546 buf_pool_t* buf_pool = buf_pool_get(space, offset);
547+ mutex_t* block_mutex;
548
549- ut_ad(buf_pool_mutex_own(buf_pool));
550+ //ut_ad(buf_pool_mutex_own(buf_pool));
551
552+ rw_lock_x_lock(&buf_pool->page_hash_latch);
553 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
b4e1fa2c
AM
554
555 if (UNIV_LIKELY_NULL(bpage)) {
3d3ecf24
AM
556+
557+ block_mutex = buf_page_get_mutex_enter(bpage);
558+ ut_a(block_mutex);
559+
b4e1fa2c
AM
560 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
561 /* The page was loaded meanwhile. */
562+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
563 return(bpage);
564 }
565 /* Add to an existing watch. */
566 bpage->buf_fix_count++;
567+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
568+ mutex_exit(block_mutex);
569 return(NULL);
570 }
571
572+ /* buf_pool->watch is protected by zip_mutex for now */
573+ mutex_enter(&buf_pool->zip_mutex);
574 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
575 bpage = &buf_pool->watch[i];
576
29ffd636 577@@ -1525,10 +1568,12 @@
b4e1fa2c
AM
578 bpage->space = space;
579 bpage->offset = offset;
580 bpage->buf_fix_count = 1;
581-
582+ bpage->buf_pool_index = buf_pool_index(buf_pool);
583 ut_d(bpage->in_page_hash = TRUE);
584 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
585 fold, bpage);
586+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
587+ mutex_exit(&buf_pool->zip_mutex);
588 return(NULL);
589 case BUF_BLOCK_ZIP_PAGE:
590 ut_ad(bpage->in_page_hash);
29ffd636 591@@ -1546,6 +1591,8 @@
b4e1fa2c
AM
592 ut_error;
593
594 /* Fix compiler warning */
595+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
596+ mutex_exit(&buf_pool->zip_mutex);
597 return(NULL);
598 }
599
29ffd636 600@@ -1563,7 +1610,11 @@
b4e1fa2c
AM
601 space, offset) */
602 buf_page_t* watch) /*!< in/out: sentinel for watch */
603 {
604- ut_ad(buf_pool_mutex_own(buf_pool));
605+ //ut_ad(buf_pool_mutex_own(buf_pool));
606+#ifdef UNIV_SYNC_DEBUG
607+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
608+#endif
609+ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
610
611 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
612 ut_d(watch->in_page_hash = FALSE);
29ffd636 613@@ -1585,28 +1636,31 @@
b4e1fa2c
AM
614 buf_pool_t* buf_pool = buf_pool_get(space, offset);
615 ulint fold = buf_page_address_fold(space, offset);
616
617- buf_pool_mutex_enter(buf_pool);
618+ //buf_pool_mutex_enter(buf_pool);
619+ rw_lock_x_lock(&buf_pool->page_hash_latch);
620 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
621 /* The page must exist because buf_pool_watch_set()
622 increments buf_fix_count. */
623 ut_a(bpage);
624
625 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
626- mutex_t* mutex = buf_page_get_mutex(bpage);
627+ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
628
629- mutex_enter(mutex);
630 ut_a(bpage->buf_fix_count > 0);
631 bpage->buf_fix_count--;
632 mutex_exit(mutex);
633 } else {
634+ mutex_enter(&buf_pool->zip_mutex);
635 ut_a(bpage->buf_fix_count > 0);
636
637 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
638 buf_pool_watch_remove(buf_pool, fold, bpage);
639 }
640+ mutex_exit(&buf_pool->zip_mutex);
641 }
642
643- buf_pool_mutex_exit(buf_pool);
644+ //buf_pool_mutex_exit(buf_pool);
645+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
646 }
647
648 /****************************************************************//**
29ffd636 649@@ -1626,14 +1680,16 @@
b4e1fa2c
AM
650 buf_pool_t* buf_pool = buf_pool_get(space, offset);
651 ulint fold = buf_page_address_fold(space, offset);
652
653- buf_pool_mutex_enter(buf_pool);
654+ //buf_pool_mutex_enter(buf_pool);
655+ rw_lock_s_lock(&buf_pool->page_hash_latch);
656
657 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
658 /* The page must exist because buf_pool_watch_set()
659 increments buf_fix_count. */
660 ut_a(bpage);
661 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
662- buf_pool_mutex_exit(buf_pool);
663+ //buf_pool_mutex_exit(buf_pool);
664+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
665
666 return(ret);
667 }
29ffd636 668@@ -1650,13 +1706,15 @@
b4e1fa2c
AM
669 {
670 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
671
672- buf_pool_mutex_enter(buf_pool);
673+ //buf_pool_mutex_enter(buf_pool);
674+ mutex_enter(&buf_pool->LRU_list_mutex);
675
676 ut_a(buf_page_in_file(bpage));
677
678 buf_LRU_make_block_young(bpage);
679
680- buf_pool_mutex_exit(buf_pool);
681+ //buf_pool_mutex_exit(buf_pool);
682+ mutex_exit(&buf_pool->LRU_list_mutex);
683 }
684
685 /********************************************************************//**
29ffd636 686@@ -1680,14 +1738,20 @@
b4e1fa2c
AM
687 ut_a(buf_page_in_file(bpage));
688
689 if (buf_page_peek_if_too_old(bpage)) {
690- buf_pool_mutex_enter(buf_pool);
691+ //buf_pool_mutex_enter(buf_pool);
692+ mutex_enter(&buf_pool->LRU_list_mutex);
693 buf_LRU_make_block_young(bpage);
694- buf_pool_mutex_exit(buf_pool);
695+ //buf_pool_mutex_exit(buf_pool);
696+ mutex_exit(&buf_pool->LRU_list_mutex);
697 } else if (!access_time) {
698 ulint time_ms = ut_time_ms();
699- buf_pool_mutex_enter(buf_pool);
700+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
701+ //buf_pool_mutex_enter(buf_pool);
702+ if (block_mutex) {
703 buf_page_set_accessed(bpage, time_ms);
704- buf_pool_mutex_exit(buf_pool);
705+ mutex_exit(block_mutex);
706+ }
707+ //buf_pool_mutex_exit(buf_pool);
708 }
709 }
710
29ffd636 711@@ -1704,7 +1768,8 @@
b4e1fa2c
AM
712 buf_block_t* block;
713 buf_pool_t* buf_pool = buf_pool_get(space, offset);
714
715- buf_pool_mutex_enter(buf_pool);
716+ //buf_pool_mutex_enter(buf_pool);
717+ rw_lock_s_lock(&buf_pool->page_hash_latch);
718
719 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
720
29ffd636 721@@ -1713,7 +1778,8 @@
b4e1fa2c
AM
722 block->check_index_page_at_flush = FALSE;
723 }
724
725- buf_pool_mutex_exit(buf_pool);
726+ //buf_pool_mutex_exit(buf_pool);
727+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
728 }
729
13ceb006 730 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
29ffd636 731@@ -1733,7 +1799,8 @@
b4e1fa2c
AM
732 buf_page_t* bpage;
733 buf_pool_t* buf_pool = buf_pool_get(space, offset);
734
735- buf_pool_mutex_enter(buf_pool);
736+ //buf_pool_mutex_enter(buf_pool);
737+ rw_lock_s_lock(&buf_pool->page_hash_latch);
738
739 bpage = buf_page_hash_get(buf_pool, space, offset);
740
29ffd636 741@@ -1744,7 +1811,8 @@
b4e1fa2c
AM
742 bpage->file_page_was_freed = TRUE;
743 }
744
745- buf_pool_mutex_exit(buf_pool);
746+ //buf_pool_mutex_exit(buf_pool);
747+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
748
749 return(bpage);
750 }
29ffd636 751@@ -1765,7 +1833,8 @@
b4e1fa2c
AM
752 buf_page_t* bpage;
753 buf_pool_t* buf_pool = buf_pool_get(space, offset);
754
755- buf_pool_mutex_enter(buf_pool);
756+ //buf_pool_mutex_enter(buf_pool);
757+ rw_lock_s_lock(&buf_pool->page_hash_latch);
758
759 bpage = buf_page_hash_get(buf_pool, space, offset);
760
29ffd636 761@@ -1774,7 +1843,8 @@
b4e1fa2c
AM
762 bpage->file_page_was_freed = FALSE;
763 }
764
765- buf_pool_mutex_exit(buf_pool);
766+ //buf_pool_mutex_exit(buf_pool);
767+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
768
769 return(bpage);
770 }
29ffd636 771@@ -1806,8 +1876,9 @@
b4e1fa2c
AM
772 buf_pool->stat.n_page_gets++;
773
774 for (;;) {
775- buf_pool_mutex_enter(buf_pool);
776+ //buf_pool_mutex_enter(buf_pool);
777 lookup:
778+ rw_lock_s_lock(&buf_pool->page_hash_latch);
779 bpage = buf_page_hash_get(buf_pool, space, offset);
780 if (bpage) {
781 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
29ffd636 782@@ -1816,7 +1887,8 @@
b4e1fa2c
AM
783
784 /* Page not in buf_pool: needs to be read from file */
785
786- buf_pool_mutex_exit(buf_pool);
787+ //buf_pool_mutex_exit(buf_pool);
788+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
789
790 buf_read_page(space, zip_size, offset);
791
29ffd636 792@@ -1828,10 +1900,15 @@
b4e1fa2c
AM
793 if (UNIV_UNLIKELY(!bpage->zip.data)) {
794 /* There is no compressed page. */
795 err_exit:
796- buf_pool_mutex_exit(buf_pool);
797+ //buf_pool_mutex_exit(buf_pool);
798+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
799 return(NULL);
800 }
801
802+ block_mutex = buf_page_get_mutex_enter(bpage);
803+
804+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
805+
806 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
807
808 switch (buf_page_get_state(bpage)) {
29ffd636 809@@ -1840,24 +1917,43 @@
b4e1fa2c
AM
810 case BUF_BLOCK_MEMORY:
811 case BUF_BLOCK_REMOVE_HASH:
812 case BUF_BLOCK_ZIP_FREE:
813+ if (block_mutex)
814+ mutex_exit(block_mutex);
815 break;
816 case BUF_BLOCK_ZIP_PAGE:
817 case BUF_BLOCK_ZIP_DIRTY:
818- block_mutex = &buf_pool->zip_mutex;
819- mutex_enter(block_mutex);
820+ ut_a(block_mutex == &buf_pool->zip_mutex);
821 bpage->buf_fix_count++;
822 goto got_block;
823 case BUF_BLOCK_FILE_PAGE:
824- block_mutex = &((buf_block_t*) bpage)->mutex;
b4e1fa2c 825+ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
f7ab7acd
AM
826+
827+ /* release mutex to obey to latch-order */
828+ mutex_exit(block_mutex);
829+
830+ /* get LRU_list_mutex for buf_LRU_free_block() */
831+ mutex_enter(&buf_pool->LRU_list_mutex);
832 mutex_enter(block_mutex);
b4e1fa2c 833
f7ab7acd 834- /* Discard the uncompressed page frame if possible. */
db82db79 835- if (buf_LRU_free_block(bpage, FALSE)) {
f7ab7acd
AM
836+ if (UNIV_UNLIKELY(bpage->space != space
837+ || bpage->offset != offset
838+ || !bpage->in_LRU_list
839+ || !bpage->zip.data)) {
840+ /* someone should interrupt, retry */
841+ mutex_exit(&buf_pool->LRU_list_mutex);
842+ mutex_exit(block_mutex);
843+ goto lookup;
844+ }
b4e1fa2c 845
f7ab7acd 846+ /* Discard the uncompressed page frame if possible. */
db82db79 847+ if (buf_LRU_free_block(bpage, FALSE, TRUE)) {
f7ab7acd 848+ mutex_exit(&buf_pool->LRU_list_mutex);
b4e1fa2c 849 mutex_exit(block_mutex);
df1b5770 850 goto lookup;
f7ab7acd
AM
851 }
852
853+ mutex_exit(&buf_pool->LRU_list_mutex);
854+
855 buf_block_buf_fix_inc((buf_block_t*) bpage,
856 __FILE__, __LINE__);
857 goto got_block;
29ffd636 858@@ -1870,7 +1966,7 @@
b4e1fa2c
AM
859 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
860 access_time = buf_page_is_accessed(bpage);
861
862- buf_pool_mutex_exit(buf_pool);
863+ //buf_pool_mutex_exit(buf_pool);
864
865 mutex_exit(block_mutex);
866
29ffd636 867@@ -2181,7 +2277,7 @@
b4e1fa2c
AM
868 const buf_block_t* block) /*!< in: pointer to block,
869 not dereferenced */
870 {
871- ut_ad(buf_pool_mutex_own(buf_pool));
872+ //ut_ad(buf_pool_mutex_own(buf_pool));
873
874 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
875 /* The pointer should be aligned. */
29ffd636 876@@ -2217,6 +2313,7 @@
b4e1fa2c
AM
877 ulint fix_type;
878 ibool must_read;
879 ulint retries = 0;
880+ mutex_t* block_mutex = NULL;
881 buf_pool_t* buf_pool = buf_pool_get(space, offset);
882
883 ut_ad(mtr);
29ffd636 884@@ -2250,18 +2347,24 @@
b4e1fa2c
AM
885 fold = buf_page_address_fold(space, offset);
886 loop:
887 block = guess;
888- buf_pool_mutex_enter(buf_pool);
889+ //buf_pool_mutex_enter(buf_pool);
890
891 if (block) {
892+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
893+
894 /* If the guess is a compressed page descriptor that
db82db79
AM
895 has been allocated by buf_page_alloc_descriptor(),
896 it may have been freed by buf_relocate(). */
b4e1fa2c
AM
897
898- if (!buf_block_is_uncompressed(buf_pool, block)
899+ if (!block_mutex) {
900+ block = guess = NULL;
901+ } else if (!buf_block_is_uncompressed(buf_pool, block)
902 || offset != block->page.offset
903 || space != block->page.space
904 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
905
906+ mutex_exit(block_mutex);
907+
908 block = guess = NULL;
909 } else {
910 ut_ad(!block->page.in_zip_hash);
29ffd636 911@@ -2270,12 +2373,19 @@
b4e1fa2c
AM
912 }
913
914 if (block == NULL) {
915+ rw_lock_s_lock(&buf_pool->page_hash_latch);
916 block = (buf_block_t*) buf_page_hash_get_low(
917 buf_pool, space, offset, fold);
918+ if (block) {
919+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
920+ ut_a(block_mutex);
921+ }
922+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
923 }
924
925 loop2:
926 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
927+ mutex_exit(block_mutex);
928 block = NULL;
929 }
930
29ffd636 931@@ -2287,12 +2397,14 @@
b4e1fa2c
AM
932 space, offset, fold);
933
934 if (UNIV_LIKELY_NULL(block)) {
935-
936+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
937+ ut_a(block_mutex);
938+ ut_ad(mutex_own(block_mutex));
939 goto got_block;
940 }
941 }
942
943- buf_pool_mutex_exit(buf_pool);
944+ //buf_pool_mutex_exit(buf_pool);
945
946 if (mode == BUF_GET_IF_IN_POOL
adf0fb13 947 || mode == BUF_PEEK_IF_IN_POOL
29ffd636 948@@ -2345,7 +2457,8 @@
b4e1fa2c
AM
949 /* The page is being read to buffer pool,
950 but we cannot wait around for the read to
951 complete. */
952- buf_pool_mutex_exit(buf_pool);
953+ //buf_pool_mutex_exit(buf_pool);
954+ mutex_exit(block_mutex);
955
956 return(NULL);
957 }
29ffd636 958@@ -2355,38 +2468,49 @@
b4e1fa2c
AM
959 ibool success;
960
961 case BUF_BLOCK_FILE_PAGE:
962+ if (block_mutex == &buf_pool->zip_mutex) {
963+ /* it is wrong mutex... */
964+ mutex_exit(block_mutex);
965+ goto loop;
966+ }
967 break;
968
969 case BUF_BLOCK_ZIP_PAGE:
970 case BUF_BLOCK_ZIP_DIRTY:
971+ ut_ad(block_mutex == &buf_pool->zip_mutex);
972 bpage = &block->page;
973 /* Protect bpage->buf_fix_count. */
974- mutex_enter(&buf_pool->zip_mutex);
975+ //mutex_enter(&buf_pool->zip_mutex);
976
977 if (bpage->buf_fix_count
978 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
979 /* This condition often occurs when the buffer
980 is not buffer-fixed, but I/O-fixed by
981 buf_page_init_for_read(). */
982- mutex_exit(&buf_pool->zip_mutex);
983+ //mutex_exit(&buf_pool->zip_mutex);
984 wait_until_unfixed:
985 /* The block is buffer-fixed or I/O-fixed.
986 Try again later. */
987- buf_pool_mutex_exit(buf_pool);
988+ //buf_pool_mutex_exit(buf_pool);
989+ mutex_exit(block_mutex);
990 os_thread_sleep(WAIT_FOR_READ);
adf0fb13 991
b4e1fa2c
AM
992 goto loop;
993 }
994
995 /* Allocate an uncompressed page. */
996- buf_pool_mutex_exit(buf_pool);
997- mutex_exit(&buf_pool->zip_mutex);
998+ //buf_pool_mutex_exit(buf_pool);
999+ //mutex_exit(&buf_pool->zip_mutex);
1000+ mutex_exit(block_mutex);
1001
df1b5770 1002 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
1003 ut_a(block);
1004+ block_mutex = &block->mutex;
1005
1006- buf_pool_mutex_enter(buf_pool);
1007- mutex_enter(&block->mutex);
1008+ //buf_pool_mutex_enter(buf_pool);
1009+ mutex_enter(&buf_pool->LRU_list_mutex);
1010+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1011+ mutex_enter(block_mutex);
1012
1013 {
1014 buf_page_t* hash_bpage;
29ffd636 1015@@ -2399,35 +2523,47 @@
b4e1fa2c
AM
1016 while buf_pool->mutex was released.
1017 Free the block that was allocated. */
1018
1019- buf_LRU_block_free_non_file_page(block);
1020- mutex_exit(&block->mutex);
1021+ buf_LRU_block_free_non_file_page(block, TRUE);
1022+ mutex_exit(block_mutex);
1023
1024 block = (buf_block_t*) hash_bpage;
1025+ if (block) {
1026+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1027+ ut_a(block_mutex);
1028+ }
1029+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1030+ mutex_exit(&buf_pool->LRU_list_mutex);
1031 goto loop2;
1032 }
1033 }
1034
1035+ mutex_enter(&buf_pool->zip_mutex);
1036+
1037 if (UNIV_UNLIKELY
1038 (bpage->buf_fix_count
1039 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1040
1041+ mutex_exit(&buf_pool->zip_mutex);
1042 /* The block was buffer-fixed or I/O-fixed
1043 while buf_pool->mutex was not held by this thread.
1044 Free the block that was allocated and try again.
1045 This should be extremely unlikely. */
1046
1047- buf_LRU_block_free_non_file_page(block);
1048- mutex_exit(&block->mutex);
1049+ buf_LRU_block_free_non_file_page(block, TRUE);
1050+ //mutex_exit(&block->mutex);
1051
1052+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1053+ mutex_exit(&buf_pool->LRU_list_mutex);
1054 goto wait_until_unfixed;
1055 }
1056
1057 /* Move the compressed page from bpage to block,
1058 and uncompress it. */
1059
1060- mutex_enter(&buf_pool->zip_mutex);
1061-
1062 buf_relocate(bpage, &block->page);
1063+
1064+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1065+
1066 buf_block_init_low(block);
1067 block->lock_hash_val = lock_rec_hash(space, offset);
1068
29ffd636 1069@@ -2437,7 +2573,7 @@
b4e1fa2c
AM
1070 if (buf_page_get_state(&block->page)
1071 == BUF_BLOCK_ZIP_PAGE) {
db82db79 1072 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c
AM
1073- UT_LIST_REMOVE(list, buf_pool->zip_clean,
1074+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1075 &block->page);
db82db79 1076 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c 1077 ut_ad(!block->page.in_flush_list);
29ffd636 1078@@ -2455,18 +2591,23 @@
b4e1fa2c
AM
1079 /* Insert at the front of unzip_LRU list */
1080 buf_unzip_LRU_add_block(block, FALSE);
1081
1082+ mutex_exit(&buf_pool->LRU_list_mutex);
1083+
1084 block->page.buf_fix_count = 1;
1085 buf_block_set_io_fix(block, BUF_IO_READ);
1086 rw_lock_x_lock_func(&block->lock, 0, file, line);
1087
1088 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1089
1090- mutex_exit(&block->mutex);
1091+ mutex_exit(block_mutex);
1092 mutex_exit(&buf_pool->zip_mutex);
db82db79 1093- buf_pool->n_pend_unzip++;
b4e1fa2c 1094
db82db79
AM
1095+ buf_pool_mutex_enter(buf_pool);
1096+ buf_pool->n_pend_unzip++;
1097 buf_pool_mutex_exit(buf_pool);
b4e1fa2c 1098
b4e1fa2c 1099+ //buf_pool_mutex_exit(buf_pool);
db82db79
AM
1100+
1101 buf_page_free_descriptor(bpage);
b4e1fa2c
AM
1102
1103 /* Decompress the page and apply buffered operations
29ffd636 1104@@ -2480,12 +2621,15 @@
b4e1fa2c
AM
1105 }
1106
1107 /* Unfix and unlatch the block. */
1108- buf_pool_mutex_enter(buf_pool);
1109- mutex_enter(&block->mutex);
1110+ //buf_pool_mutex_enter(buf_pool);
1111+ block_mutex = &block->mutex;
1112+ mutex_enter(block_mutex);
1113 block->page.buf_fix_count--;
1114 buf_block_set_io_fix(block, BUF_IO_NONE);
1115- mutex_exit(&block->mutex);
1116+
1117+ buf_pool_mutex_enter(buf_pool);
1118 buf_pool->n_pend_unzip--;
1119+ buf_pool_mutex_exit(buf_pool);
1120 rw_lock_x_unlock(&block->lock);
1121
1122 break;
29ffd636 1123@@ -2501,7 +2645,7 @@
b4e1fa2c
AM
1124
1125 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1126
1127- mutex_enter(&block->mutex);
1128+ //mutex_enter(&block->mutex);
1129 #if UNIV_WORD_SIZE == 4
1130 /* On 32-bit systems, there is no padding in buf_page_t. On
1131 other systems, Valgrind could complain about uninitialized pad
29ffd636 1132@@ -2514,8 +2658,8 @@
b4e1fa2c
AM
1133 /* Try to evict the block from the buffer pool, to use the
1134 insert buffer (change buffer) as much as possible. */
1135
db82db79 1136- if (buf_LRU_free_block(&block->page, TRUE)) {
11822e22 1137- mutex_exit(&block->mutex);
db82db79 1138+ if (buf_LRU_free_block(&block->page, TRUE, FALSE)) {
11822e22 1139+ mutex_exit(block_mutex);
b4e1fa2c 1140 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
df1b5770 1141 /* Set the watch, as it would have
11822e22 1142 been set if the page were not in the
29ffd636 1143@@ -2524,6 +2668,9 @@
11822e22
AM
1144 space, offset, fold);
1145
1146 if (UNIV_LIKELY_NULL(block)) {
1147+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
1148+ ut_a(block_mutex);
1149+ ut_ad(mutex_own(block_mutex));
1150
1151 /* The page entered the buffer
1152 pool for some reason. Try to
29ffd636 1153@@ -2531,7 +2678,7 @@
11822e22
AM
1154 goto got_block;
1155 }
1156 }
1157- buf_pool_mutex_exit(buf_pool);
1158+ //buf_pool_mutex_exit(buf_pool);
1159 fprintf(stderr,
1160 "innodb_change_buffering_debug evict %u %u\n",
1161 (unsigned) space, (unsigned) offset);
29ffd636 1162@@ -2553,13 +2700,14 @@
db82db79
AM
1163 ut_a(mode == BUF_GET_POSSIBLY_FREED
1164 || !block->page.file_page_was_freed);
1165 #endif
b4e1fa2c
AM
1166- mutex_exit(&block->mutex);
1167+ //mutex_exit(&block->mutex);
1168
1169 /* Check if this is the first access to the page */
1170
1171 access_time = buf_page_is_accessed(&block->page);
1172
1173- buf_pool_mutex_exit(buf_pool);
1174+ //buf_pool_mutex_exit(buf_pool);
1175+ mutex_exit(block_mutex);
1176
adf0fb13
AM
1177 if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
1178 buf_page_set_accessed_make_young(&block->page, access_time);
29ffd636 1179@@ -2792,9 +2940,11 @@
b4e1fa2c
AM
1180 buf_pool = buf_pool_from_block(block);
1181
1182 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1183- buf_pool_mutex_enter(buf_pool);
1184+ //buf_pool_mutex_enter(buf_pool);
1185+ mutex_enter(&buf_pool->LRU_list_mutex);
1186 buf_LRU_make_block_young(&block->page);
1187- buf_pool_mutex_exit(buf_pool);
1188+ //buf_pool_mutex_exit(buf_pool);
1189+ mutex_exit(&buf_pool->LRU_list_mutex);
1190 } else if (!buf_page_is_accessed(&block->page)) {
1191 /* Above, we do a dirty read on purpose, to avoid
1192 mutex contention. The field buf_page_t::access_time
29ffd636 1193@@ -2802,9 +2952,11 @@
b4e1fa2c
AM
1194 field must be protected by mutex, however. */
1195 ulint time_ms = ut_time_ms();
1196
1197- buf_pool_mutex_enter(buf_pool);
1198+ //buf_pool_mutex_enter(buf_pool);
1199+ mutex_enter(&block->mutex);
1200 buf_page_set_accessed(&block->page, time_ms);
1201- buf_pool_mutex_exit(buf_pool);
1202+ //buf_pool_mutex_exit(buf_pool);
1203+ mutex_exit(&block->mutex);
1204 }
1205
adf0fb13 1206 ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
29ffd636 1207@@ -2871,18 +3023,21 @@
b4e1fa2c
AM
1208 ut_ad(mtr);
1209 ut_ad(mtr->state == MTR_ACTIVE);
1210
1211- buf_pool_mutex_enter(buf_pool);
1212+ //buf_pool_mutex_enter(buf_pool);
1213+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1214 block = buf_block_hash_get(buf_pool, space_id, page_no);
1215
1216 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1217- buf_pool_mutex_exit(buf_pool);
1218+ //buf_pool_mutex_exit(buf_pool);
1219+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1220 return(NULL);
1221 }
1222
1223 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1224
1225 mutex_enter(&block->mutex);
1226- buf_pool_mutex_exit(buf_pool);
1227+ //buf_pool_mutex_exit(buf_pool);
1228+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1229
1230 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1231 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
29ffd636 1232@@ -2972,7 +3127,10 @@
b4e1fa2c 1233 buf_page_t* hash_page;
b4e1fa2c 1234
734d6226 1235 ut_ad(buf_pool == buf_pool_get(space, offset));
b4e1fa2c
AM
1236- ut_ad(buf_pool_mutex_own(buf_pool));
1237+ //ut_ad(buf_pool_mutex_own(buf_pool));
1238+#ifdef UNIV_SYNC_DEBUG
1239+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1240+#endif
1241 ut_ad(mutex_own(&(block->mutex)));
1242 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1243
29ffd636 1244@@ -3001,11 +3159,14 @@
b4e1fa2c
AM
1245 if (UNIV_LIKELY(!hash_page)) {
1246 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1247 /* Preserve the reference count. */
1248- ulint buf_fix_count = hash_page->buf_fix_count;
1249+ ulint buf_fix_count;
1250
1251+ mutex_enter(&buf_pool->zip_mutex);
1252+ buf_fix_count = hash_page->buf_fix_count;
1253 ut_a(buf_fix_count > 0);
1254 block->page.buf_fix_count += buf_fix_count;
1255 buf_pool_watch_remove(buf_pool, fold, hash_page);
1256+ mutex_exit(&buf_pool->zip_mutex);
1257 } else {
1258 fprintf(stderr,
1259 "InnoDB: Error: page %lu %lu already found"
29ffd636 1260@@ -3015,7 +3176,8 @@
b4e1fa2c
AM
1261 (const void*) hash_page, (const void*) block);
1262 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1263 mutex_exit(&block->mutex);
1264- buf_pool_mutex_exit(buf_pool);
1265+ //buf_pool_mutex_exit(buf_pool);
1266+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1267 buf_print();
1268 buf_LRU_print();
1269 buf_validate();
29ffd636 1270@@ -3098,7 +3260,9 @@
b4e1fa2c
AM
1271
1272 fold = buf_page_address_fold(space, offset);
1273
1274- buf_pool_mutex_enter(buf_pool);
1275+ //buf_pool_mutex_enter(buf_pool);
1276+ mutex_enter(&buf_pool->LRU_list_mutex);
1277+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1278
1279 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1280 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
29ffd636 1281@@ -3107,9 +3271,15 @@
b4e1fa2c
AM
1282 err_exit:
1283 if (block) {
1284 mutex_enter(&block->mutex);
1285- buf_LRU_block_free_non_file_page(block);
1286+ mutex_exit(&buf_pool->LRU_list_mutex);
1287+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1288+ buf_LRU_block_free_non_file_page(block, FALSE);
1289 mutex_exit(&block->mutex);
1290 }
1291+ else {
1292+ mutex_exit(&buf_pool->LRU_list_mutex);
1293+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1294+ }
1295
1296 bpage = NULL;
1297 goto func_exit;
29ffd636 1298@@ -3132,6 +3302,8 @@
b4e1fa2c 1299
734d6226 1300 buf_page_init(buf_pool, space, offset, fold, block);
b4e1fa2c
AM
1301
1302+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1303+
1304 /* The block must be put to the LRU list, to the old blocks */
1305 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1306
29ffd636 1307@@ -3159,7 +3331,7 @@
b4e1fa2c
AM
1308 been added to buf_pool->LRU and
1309 buf_pool->page_hash. */
1310 mutex_exit(&block->mutex);
1311- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1312+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1313 mutex_enter(&block->mutex);
1314 block->page.zip.data = data;
1315
29ffd636 1316@@ -3172,13 +3344,14 @@
b4e1fa2c
AM
1317 buf_unzip_LRU_add_block(block, TRUE);
1318 }
1319
1320+ mutex_exit(&buf_pool->LRU_list_mutex);
1321 mutex_exit(&block->mutex);
1322 } else {
db82db79 1323 /* The compressed page must be allocated before the
b4e1fa2c
AM
1324 control block (bpage), in order to avoid the
1325 invocation of buf_buddy_relocate_block() on
1326 uninitialized data. */
1327- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
b4e1fa2c 1328+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
b4e1fa2c 1329
db82db79
AM
1330 /* If buf_buddy_alloc() allocated storage from the LRU list,
1331 it released and reacquired buf_pool->mutex. Thus, we must
29ffd636 1332@@ -3194,7 +3367,10 @@
db82db79 1333
b4e1fa2c
AM
1334 /* The block was added by some other thread. */
1335 watch_page = NULL;
b4e1fa2c 1336- buf_buddy_free(buf_pool, data, zip_size);
b4e1fa2c
AM
1337+ buf_buddy_free(buf_pool, data, zip_size, TRUE);
1338+
1339+ mutex_exit(&buf_pool->LRU_list_mutex);
1340+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1341
1342 bpage = NULL;
1343 goto func_exit;
29ffd636 1344@@ -3242,20 +3418,26 @@
b4e1fa2c
AM
1345 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1346 bpage);
1347
1348+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1349+
1350 /* The block must be put to the LRU list, to the old blocks */
1351 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
db82db79 1352 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c 1353 buf_LRU_insert_zip_clean(bpage);
db82db79 1354 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c
AM
1355
1356+ mutex_exit(&buf_pool->LRU_list_mutex);
1357+
1358 buf_page_set_io_fix(bpage, BUF_IO_READ);
1359
1360 mutex_exit(&buf_pool->zip_mutex);
1361 }
1362
1363+ buf_pool_mutex_enter(buf_pool);
1364 buf_pool->n_pend_reads++;
1365-func_exit:
1366 buf_pool_mutex_exit(buf_pool);
1367+func_exit:
1368+ //buf_pool_mutex_exit(buf_pool);
1369
1370 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1371
29ffd636 1372@@ -3297,7 +3479,9 @@
b4e1fa2c
AM
1373
1374 fold = buf_page_address_fold(space, offset);
1375
1376- buf_pool_mutex_enter(buf_pool);
1377+ //buf_pool_mutex_enter(buf_pool);
1378+ mutex_enter(&buf_pool->LRU_list_mutex);
1379+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1380
1381 block = (buf_block_t*) buf_page_hash_get_low(
1382 buf_pool, space, offset, fold);
29ffd636 1383@@ -3313,7 +3497,9 @@
df1b5770 1384 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
b4e1fa2c
AM
1385
1386 /* Page can be found in buf_pool */
1387- buf_pool_mutex_exit(buf_pool);
1388+ //buf_pool_mutex_exit(buf_pool);
1389+ mutex_exit(&buf_pool->LRU_list_mutex);
1390+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1391
1392 buf_block_free(free_block);
1393
29ffd636 1394@@ -3335,6 +3521,7 @@
b4e1fa2c
AM
1395 mutex_enter(&block->mutex);
1396
734d6226 1397 buf_page_init(buf_pool, space, offset, fold, block);
b4e1fa2c
AM
1398+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1399
1400 /* The block must be put to the LRU list */
1401 buf_LRU_add_block(&block->page, FALSE);
29ffd636 1402@@ -3361,7 +3548,7 @@
b4e1fa2c
AM
1403 the reacquisition of buf_pool->mutex. We also must
1404 defer this operation until after the block descriptor
1405 has been added to buf_pool->LRU and buf_pool->page_hash. */
1406- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1407+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1408 mutex_enter(&block->mutex);
1409 block->page.zip.data = data;
1410
29ffd636 1411@@ -3379,7 +3566,8 @@
b4e1fa2c
AM
1412
1413 buf_page_set_accessed(&block->page, time_ms);
1414
1415- buf_pool_mutex_exit(buf_pool);
1416+ //buf_pool_mutex_exit(buf_pool);
1417+ mutex_exit(&buf_pool->LRU_list_mutex);
1418
1419 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1420
29ffd636 1421@@ -3434,7 +3622,9 @@
734d6226
AM
1422 ibool ret = TRUE;
1423
1424 /* First unfix and release lock on the bpage */
1425- buf_pool_mutex_enter(buf_pool);
1426+ //buf_pool_mutex_enter(buf_pool);
1427+ mutex_enter(&buf_pool->LRU_list_mutex);
1428+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1429 mutex_enter(buf_page_get_mutex(bpage));
1430 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
1431 ut_ad(bpage->buf_fix_count == 0);
29ffd636 1432@@ -3455,11 +3645,15 @@
734d6226
AM
1433 ret = FALSE;
1434 }
1435
1436+ buf_pool_mutex_enter(buf_pool);
1437 ut_ad(buf_pool->n_pend_reads > 0);
1438 buf_pool->n_pend_reads--;
1439+ buf_pool_mutex_exit(buf_pool);
1440
1441 mutex_exit(buf_page_get_mutex(bpage));
1442- buf_pool_mutex_exit(buf_pool);
1443+ //buf_pool_mutex_exit(buf_pool);
1444+ mutex_exit(&buf_pool->LRU_list_mutex);
1445+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1446
1447 return(ret);
1448 }
29ffd636 1449@@ -3477,6 +3671,8 @@
b4e1fa2c
AM
1450 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1451 const ibool uncompressed = (buf_page_get_state(bpage)
1452 == BUF_BLOCK_FILE_PAGE);
1453+ ibool have_LRU_mutex = FALSE;
1454+ mutex_t* block_mutex;
1455
1456 ut_a(buf_page_in_file(bpage));
1457
29ffd636 1458@@ -3619,8 +3815,26 @@
b4e1fa2c
AM
1459 }
1460 }
1461
1462+ if (io_type == BUF_IO_WRITE
1463+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1464+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1465+ /* to keep consistency at buf_LRU_insert_zip_clean() */
1466+ have_LRU_mutex = TRUE; /* optimistic */
1467+ }
1468+retry_mutex:
1469+ if (have_LRU_mutex)
1470+ mutex_enter(&buf_pool->LRU_list_mutex);
1471+ block_mutex = buf_page_get_mutex_enter(bpage);
1472+ ut_a(block_mutex);
1473+ if (io_type == BUF_IO_WRITE
1474+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1475+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1476+ && !have_LRU_mutex) {
1477+ mutex_exit(block_mutex);
1478+ have_LRU_mutex = TRUE;
1479+ goto retry_mutex;
1480+ }
1481 buf_pool_mutex_enter(buf_pool);
1482- mutex_enter(buf_page_get_mutex(bpage));
1483
1484 #ifdef UNIV_IBUF_COUNT_DEBUG
1485 if (io_type == BUF_IO_WRITE || uncompressed) {
29ffd636 1486@@ -3643,6 +3857,7 @@
b4e1fa2c
AM
1487 the x-latch to this OS thread: do not let this confuse you in
1488 debugging! */
1489
1490+ ut_a(!have_LRU_mutex);
1491 ut_ad(buf_pool->n_pend_reads > 0);
1492 buf_pool->n_pend_reads--;
1493 buf_pool->stat.n_pages_read++;
29ffd636 1494@@ -3660,6 +3875,9 @@
b4e1fa2c
AM
1495
1496 buf_flush_write_complete(bpage);
1497
1498+ if (have_LRU_mutex)
1499+ mutex_exit(&buf_pool->LRU_list_mutex);
1500+
1501 if (uncompressed) {
1502 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1503 BUF_IO_WRITE);
29ffd636 1504@@ -3682,8 +3900,8 @@
b4e1fa2c
AM
1505 }
1506 #endif /* UNIV_DEBUG */
1507
1508- mutex_exit(buf_page_get_mutex(bpage));
1509 buf_pool_mutex_exit(buf_pool);
1510+ mutex_exit(block_mutex);
1511 }
1512
1513 /*********************************************************************//**
29ffd636 1514@@ -3700,7 +3918,9 @@
b4e1fa2c
AM
1515
1516 ut_ad(buf_pool);
1517
1518- buf_pool_mutex_enter(buf_pool);
1519+ //buf_pool_mutex_enter(buf_pool);
1520+ mutex_enter(&buf_pool->LRU_list_mutex);
1521+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1522
1523 chunk = buf_pool->chunks;
1524
29ffd636 1525@@ -3717,7 +3937,9 @@
b4e1fa2c
AM
1526 }
1527 }
1528
1529- buf_pool_mutex_exit(buf_pool);
1530+ //buf_pool_mutex_exit(buf_pool);
1531+ mutex_exit(&buf_pool->LRU_list_mutex);
1532+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1533
1534 return(TRUE);
1535 }
29ffd636 1536@@ -3765,7 +3987,8 @@
b4e1fa2c
AM
1537 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1538 }
1539
1540- buf_pool_mutex_enter(buf_pool);
1541+ //buf_pool_mutex_enter(buf_pool);
1542+ mutex_enter(&buf_pool->LRU_list_mutex);
1543
1544 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1545 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
29ffd636 1546@@ -3778,7 +4001,8 @@
b4e1fa2c
AM
1547 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1548 buf_refresh_io_stats(buf_pool);
1549
1550- buf_pool_mutex_exit(buf_pool);
1551+ //buf_pool_mutex_exit(buf_pool);
1552+ mutex_exit(&buf_pool->LRU_list_mutex);
1553 }
1554
1555 /*********************************************************************//**
29ffd636 1556@@ -3820,7 +4044,10 @@
b4e1fa2c
AM
1557
1558 ut_ad(buf_pool);
1559
1560- buf_pool_mutex_enter(buf_pool);
1561+ //buf_pool_mutex_enter(buf_pool);
1562+ mutex_enter(&buf_pool->LRU_list_mutex);
1563+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1564+ /* for keep the new latch order, it cannot validate correctly... */
1565
1566 chunk = buf_pool->chunks;
1567
29ffd636 1568@@ -3918,7 +4145,7 @@
b4e1fa2c
AM
1569 /* Check clean compressed-only blocks. */
1570
1571 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1572- b = UT_LIST_GET_NEXT(list, b)) {
1573+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1574 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1575 switch (buf_page_get_io_fix(b)) {
1576 case BUF_IO_NONE:
29ffd636 1577@@ -3950,7 +4177,7 @@
b4e1fa2c
AM
1578
1579 buf_flush_list_mutex_enter(buf_pool);
1580 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1581- b = UT_LIST_GET_NEXT(list, b)) {
1582+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1583 ut_ad(b->in_flush_list);
1584 ut_a(b->oldest_modification);
1585 n_flush++;
29ffd636 1586@@ -4010,6 +4237,8 @@
b4e1fa2c
AM
1587 }
1588
1589 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1590+ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1591+/*
1592 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1593 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1594 (ulong) UT_LIST_GET_LEN(buf_pool->free),
29ffd636 1595@@ -4020,8 +4249,11 @@
b4e1fa2c
AM
1596 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1597 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1598 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1599+*/
1600
1601- buf_pool_mutex_exit(buf_pool);
1602+ //buf_pool_mutex_exit(buf_pool);
1603+ mutex_exit(&buf_pool->LRU_list_mutex);
1604+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1605
1606 ut_a(buf_LRU_validate());
1607 ut_a(buf_flush_validate(buf_pool));
29ffd636 1608@@ -4077,7 +4309,9 @@
b4e1fa2c
AM
1609 index_ids = mem_alloc(size * sizeof *index_ids);
1610 counts = mem_alloc(sizeof(ulint) * size);
1611
1612- buf_pool_mutex_enter(buf_pool);
1613+ //buf_pool_mutex_enter(buf_pool);
1614+ mutex_enter(&buf_pool->LRU_list_mutex);
1615+ mutex_enter(&buf_pool->free_list_mutex);
1616 buf_flush_list_mutex_enter(buf_pool);
1617
1618 fprintf(stderr,
29ffd636 1619@@ -4146,7 +4380,9 @@
b4e1fa2c
AM
1620 }
1621 }
1622
1623- buf_pool_mutex_exit(buf_pool);
1624+ //buf_pool_mutex_exit(buf_pool);
1625+ mutex_exit(&buf_pool->LRU_list_mutex);
1626+ mutex_exit(&buf_pool->free_list_mutex);
1627
1628 for (i = 0; i < n_found; i++) {
1629 index = dict_index_get_if_in_cache(index_ids[i]);
29ffd636 1630@@ -4203,7 +4439,7 @@
b4e1fa2c
AM
1631 buf_chunk_t* chunk;
1632 ulint fixed_pages_number = 0;
1633
1634- buf_pool_mutex_enter(buf_pool);
1635+ //buf_pool_mutex_enter(buf_pool);
1636
1637 chunk = buf_pool->chunks;
1638
29ffd636 1639@@ -4237,7 +4473,7 @@
b4e1fa2c
AM
1640 /* Traverse the lists of clean and dirty compressed-only blocks. */
1641
1642 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1643- b = UT_LIST_GET_NEXT(list, b)) {
1644+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1645 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1646 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1647
29ffd636 1648@@ -4249,7 +4485,7 @@
b4e1fa2c
AM
1649
1650 buf_flush_list_mutex_enter(buf_pool);
1651 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1652- b = UT_LIST_GET_NEXT(list, b)) {
1653+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1654 ut_ad(b->in_flush_list);
1655
1656 switch (buf_page_get_state(b)) {
29ffd636 1657@@ -4275,7 +4511,7 @@
b4e1fa2c
AM
1658
1659 buf_flush_list_mutex_exit(buf_pool);
1660 mutex_exit(&buf_pool->zip_mutex);
1661- buf_pool_mutex_exit(buf_pool);
1662+ //buf_pool_mutex_exit(buf_pool);
1663
1664 return(fixed_pages_number);
1665 }
29ffd636 1666@@ -4433,6 +4669,8 @@
d8778560
AM
1667 /* Find appropriate pool_info to store stats for this buffer pool */
1668 pool_info = &all_pool_info[pool_id];
b4e1fa2c
AM
1669
1670+ mutex_enter(&buf_pool->LRU_list_mutex);
1671+ mutex_enter(&buf_pool->free_list_mutex);
1672 buf_pool_mutex_enter(buf_pool);
1673 buf_flush_list_mutex_enter(buf_pool);
1674
29ffd636 1675@@ -4548,6 +4786,8 @@
d8778560 1676 pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
b4e1fa2c
AM
1677
1678 buf_refresh_io_stats(buf_pool);
1679+ mutex_exit(&buf_pool->LRU_list_mutex);
1680+ mutex_exit(&buf_pool->free_list_mutex);
1681 buf_pool_mutex_exit(buf_pool);
1682 }
1683
29ffd636 1684@@ -4792,11 +5032,13 @@
b4e1fa2c
AM
1685 {
1686 ulint len;
1687
1688- buf_pool_mutex_enter(buf_pool);
1689+ //buf_pool_mutex_enter(buf_pool);
1690+ mutex_enter(&buf_pool->free_list_mutex);
1691
1692 len = UT_LIST_GET_LEN(buf_pool->free);
1693
1694- buf_pool_mutex_exit(buf_pool);
1695+ //buf_pool_mutex_exit(buf_pool);
1696+ mutex_exit(&buf_pool->free_list_mutex);
1697
1698 return(len);
1699 }
db82db79
AM
1700--- a/storage/innobase/buf/buf0flu.c
1701+++ b/storage/innobase/buf/buf0flu.c
d8778560 1702@@ -307,7 +307,7 @@
b4e1fa2c
AM
1703
1704 ut_d(block->page.in_flush_list = TRUE);
1705 block->page.oldest_modification = lsn;
1706- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1707+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1708
1709 #ifdef UNIV_DEBUG_VALGRIND
1710 {
d8778560 1711@@ -401,14 +401,14 @@
b4e1fa2c
AM
1712 > block->page.oldest_modification) {
1713 ut_ad(b->in_flush_list);
1714 prev_b = b;
1715- b = UT_LIST_GET_NEXT(list, b);
1716+ b = UT_LIST_GET_NEXT(flush_list, b);
1717 }
1718 }
1719
1720 if (prev_b == NULL) {
1721- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1722+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1723 } else {
1724- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1725+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1726 prev_b, &block->page);
1727 }
1728
d8778560 1729@@ -434,7 +434,7 @@
b4e1fa2c
AM
1730 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1731 //ut_ad(buf_pool_mutex_own(buf_pool));
1732 #endif
1733- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1734+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1735 //ut_ad(bpage->in_LRU_list);
1736
1737 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
d8778560 1738@@ -470,14 +470,14 @@
b4e1fa2c
AM
1739 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1740 {
1741 #ifdef UNIV_DEBUG
1742- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1743- ut_ad(buf_pool_mutex_own(buf_pool));
1744+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1745+ //ut_ad(buf_pool_mutex_own(buf_pool));
1746 #endif
1747- ut_a(buf_page_in_file(bpage));
1748+ //ut_a(buf_page_in_file(bpage));
1749 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1750 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1751
1752- if (bpage->oldest_modification != 0
1753+ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1754 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1755 ut_ad(bpage->in_flush_list);
1756
d8778560 1757@@ -508,7 +508,7 @@
b4e1fa2c
AM
1758 {
1759 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1760
1761- ut_ad(buf_pool_mutex_own(buf_pool));
1762+ //ut_ad(buf_pool_mutex_own(buf_pool));
1763 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1764 ut_ad(bpage->in_flush_list);
1765
db82db79 1766@@ -526,13 +526,13 @@
b4e1fa2c
AM
1767 return;
1768 case BUF_BLOCK_ZIP_DIRTY:
1769 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1770- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1771+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
db82db79 1772 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c 1773 buf_LRU_insert_zip_clean(bpage);
db82db79 1774 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c
AM
1775 break;
1776 case BUF_BLOCK_FILE_PAGE:
1777- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1778+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1779 break;
1780 }
1781
db82db79 1782@@ -576,7 +576,7 @@
b4e1fa2c
AM
1783 buf_page_t* prev_b = NULL;
1784 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1785
1786- ut_ad(buf_pool_mutex_own(buf_pool));
1787+ //ut_ad(buf_pool_mutex_own(buf_pool));
1788 /* Must reside in the same buffer pool. */
1789 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
1790
db82db79 1791@@ -605,18 +605,18 @@
b4e1fa2c
AM
1792 because we assert on in_flush_list in comparison function. */
1793 ut_d(bpage->in_flush_list = FALSE);
1794
1795- prev = UT_LIST_GET_PREV(list, bpage);
1796- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1797+ prev = UT_LIST_GET_PREV(flush_list, bpage);
1798+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1799
1800 if (prev) {
1801 ut_ad(prev->in_flush_list);
1802 UT_LIST_INSERT_AFTER(
1803- list,
1804+ flush_list,
1805 buf_pool->flush_list,
1806 prev, dpage);
1807 } else {
1808 UT_LIST_ADD_FIRST(
1809- list,
1810+ flush_list,
1811 buf_pool->flush_list,
1812 dpage);
1813 }
db82db79 1814@@ -1085,7 +1085,7 @@
b4e1fa2c
AM
1815
1816 #ifdef UNIV_DEBUG
1817 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1818- ut_ad(!buf_pool_mutex_own(buf_pool));
1819+ //ut_ad(!buf_pool_mutex_own(buf_pool));
1820 #endif
1821
1822 #ifdef UNIV_LOG_DEBUG
db82db79 1823@@ -1099,7 +1099,8 @@
b4e1fa2c
AM
1824 io_fixed and oldest_modification != 0. Thus, it cannot be
1825 relocated in the buffer pool or removed from flush_list or
1826 LRU_list. */
1827- ut_ad(!buf_pool_mutex_own(buf_pool));
1828+ //ut_ad(!buf_pool_mutex_own(buf_pool));
1829+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
1830 ut_ad(!buf_flush_list_mutex_own(buf_pool));
1831 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
1832 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
db82db79 1833@@ -1179,7 +1180,7 @@
11822e22
AM
1834 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
1835 buf_block_t* block) /*!< in/out: buffer control block */
1836 {
1837- ut_ad(buf_pool_mutex_own(buf_pool));
1838+ //ut_ad(buf_pool_mutex_own(buf_pool));
1839 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1840 ut_ad(mutex_own(&block->mutex));
1841
db82db79 1842@@ -1187,8 +1188,11 @@
11822e22
AM
1843 return(FALSE);
1844 }
1845
1846+ buf_pool_mutex_enter(buf_pool);
1847+
1848 if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
1849 || buf_pool->init_flush[BUF_FLUSH_LRU]) {
1850+ buf_pool_mutex_exit(buf_pool);
1851 /* There is already a flush batch of the same type running */
1852 return(FALSE);
1853 }
db82db79 1854@@ -1262,12 +1266,18 @@
b4e1fa2c
AM
1855 ibool is_uncompressed;
1856
1857 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1858- ut_ad(buf_pool_mutex_own(buf_pool));
1859+ //ut_ad(buf_pool_mutex_own(buf_pool));
1860+#ifdef UNIV_SYNC_DEBUG
1861+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
1862+#endif
1863 ut_ad(buf_page_in_file(bpage));
1864
1865 block_mutex = buf_page_get_mutex(bpage);
1866 ut_ad(mutex_own(block_mutex));
1867
1868+ buf_pool_mutex_enter(buf_pool);
1869+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1870+
1871 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
1872
1873 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
3d3ecf24 1874@@ -1455,14 +1465,16 @@
b4e1fa2c
AM
1875
1876 buf_pool = buf_pool_get(space, i);
1877
1878- buf_pool_mutex_enter(buf_pool);
1879+ //buf_pool_mutex_enter(buf_pool);
1880+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1881
1882 /* We only want to flush pages from this buffer pool. */
1883 bpage = buf_page_hash_get(buf_pool, space, i);
1884
1885 if (!bpage) {
1886
1887- buf_pool_mutex_exit(buf_pool);
1888+ //buf_pool_mutex_exit(buf_pool);
1889+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1bfc1981 1890 if (srv_flush_neighbor_pages == 2) {
b4e1fa2c 1891
1bfc1981 1892 /* This is contiguous neighbor page flush and
3d3ecf24 1893@@ -1480,11 +1492,9 @@
b4e1fa2c
AM
1894 if (flush_type != BUF_FLUSH_LRU
1895 || i == offset
1896 || buf_page_is_old(bpage)) {
1897- mutex_t* block_mutex = buf_page_get_mutex(bpage);
b4e1fa2c
AM
1898+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
1899
11822e22
AM
1900- mutex_enter(block_mutex);
1901-
b4e1fa2c
AM
1902- if (buf_flush_ready_for_flush(bpage, flush_type)
1903+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
1904 && (i == offset || !bpage->buf_fix_count)) {
1905 /* We only try to flush those
1906 neighbors != offset where the buf fix
3d3ecf24 1907@@ -1500,11 +1510,12 @@
b4e1fa2c
AM
1908 ut_ad(!buf_pool_mutex_own(buf_pool));
1909 count++;
1910 continue;
1911- } else {
1912+ } else if (block_mutex) {
1913 mutex_exit(block_mutex);
1914 }
1915 }
1916- buf_pool_mutex_exit(buf_pool);
1917+ //buf_pool_mutex_exit(buf_pool);
1918+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
b4e1fa2c 1919
3d3ecf24 1920 if (srv_flush_neighbor_pages == 2) {
1bfc1981 1921
3d3ecf24 1922@@ -1553,21 +1564,25 @@
b4e1fa2c
AM
1923 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1924 #endif /* UNIV_DEBUG */
1925
1926- ut_ad(buf_pool_mutex_own(buf_pool));
1927+ //ut_ad(buf_pool_mutex_own(buf_pool));
1928+ ut_ad(flush_type != BUF_FLUSH_LRU
1929+ || mutex_own(&buf_pool->LRU_list_mutex));
1930
1931- block_mutex = buf_page_get_mutex(bpage);
1932- mutex_enter(block_mutex);
1933+ block_mutex = buf_page_get_mutex_enter(bpage);
1934
1935- ut_a(buf_page_in_file(bpage));
1936+ //ut_a(buf_page_in_file(bpage));
1937
1938- if (buf_flush_ready_for_flush(bpage, flush_type)) {
1939+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
1940 ulint space;
1941 ulint offset;
1942 buf_pool_t* buf_pool;
1943
1944 buf_pool = buf_pool_from_bpage(bpage);
1945
1946- buf_pool_mutex_exit(buf_pool);
1947+ //buf_pool_mutex_exit(buf_pool);
1948+ if (flush_type == BUF_FLUSH_LRU) {
1949+ mutex_exit(&buf_pool->LRU_list_mutex);
1950+ }
1951
1952 /* These fields are protected by both the
1953 buffer pool mutex and block mutex. */
3d3ecf24 1954@@ -1583,13 +1598,18 @@
b4e1fa2c
AM
1955 *count,
1956 n_to_flush);
1957
1958- buf_pool_mutex_enter(buf_pool);
1959+ //buf_pool_mutex_enter(buf_pool);
1960+ if (flush_type == BUF_FLUSH_LRU) {
1961+ mutex_enter(&buf_pool->LRU_list_mutex);
1962+ }
1963 flushed = TRUE;
1964- } else {
1965+ } else if (block_mutex) {
1966 mutex_exit(block_mutex);
1967 }
1968
1969- ut_ad(buf_pool_mutex_own(buf_pool));
1970+ //ut_ad(buf_pool_mutex_own(buf_pool));
1971+ ut_ad(flush_type != BUF_FLUSH_LRU
1972+ || mutex_own(&buf_pool->LRU_list_mutex));
1973
1974 return(flushed);
1975 }
3d3ecf24 1976@@ -1610,7 +1630,8 @@
b4e1fa2c
AM
1977 buf_page_t* bpage;
1978 ulint count = 0;
1979
1980- ut_ad(buf_pool_mutex_own(buf_pool));
1981+ //ut_ad(buf_pool_mutex_own(buf_pool));
1982+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
1983
1984 do {
1985 /* Start from the end of the list looking for a
3d3ecf24 1986@@ -1632,7 +1653,8 @@
b4e1fa2c
AM
1987 should be flushed, we factor in this value. */
1988 buf_lru_flush_page_count += count;
1989
1990- ut_ad(buf_pool_mutex_own(buf_pool));
1991+ //ut_ad(buf_pool_mutex_own(buf_pool));
1992+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
1993
1994 return(count);
1995 }
3d3ecf24 1996@@ -1660,9 +1682,10 @@
b4e1fa2c
AM
1997 {
1998 ulint len;
1999 buf_page_t* bpage;
2000+ buf_page_t* prev_bpage = NULL;
2001 ulint count = 0;
2002
2003- ut_ad(buf_pool_mutex_own(buf_pool));
2004+ //ut_ad(buf_pool_mutex_own(buf_pool));
2005
2006 /* If we have flushed enough, leave the loop */
2007 do {
3d3ecf24 2008@@ -1681,6 +1704,7 @@
b4e1fa2c
AM
2009
2010 if (bpage) {
2011 ut_a(bpage->oldest_modification > 0);
2012+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2013 }
2014
2015 if (!bpage || bpage->oldest_modification >= lsn_limit) {
3d3ecf24 2016@@ -1722,9 +1746,17 @@
b4e1fa2c
AM
2017 break;
2018 }
2019
2020- bpage = UT_LIST_GET_PREV(list, bpage);
2021+ bpage = UT_LIST_GET_PREV(flush_list, bpage);
2022
2023- ut_ad(!bpage || bpage->in_flush_list);
2024+ //ut_ad(!bpage || bpage->in_flush_list);
2025+ if (bpage != prev_bpage) {
2026+ /* the search might warp.. retrying */
2027+ buf_flush_list_mutex_exit(buf_pool);
2028+ break;
2029+ }
2030+ if (bpage) {
2031+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2032+ }
2033
2034 buf_flush_list_mutex_exit(buf_pool);
2035
3d3ecf24 2036@@ -1733,7 +1765,7 @@
b4e1fa2c
AM
2037
2038 } while (count < min_n && bpage != NULL && len > 0);
2039
2040- ut_ad(buf_pool_mutex_own(buf_pool));
2041+ //ut_ad(buf_pool_mutex_own(buf_pool));
2042
2043 return(count);
2044 }
3d3ecf24 2045@@ -1772,13 +1804,15 @@
adf0fb13 2046 || sync_thread_levels_empty_except_dict());
b4e1fa2c
AM
2047 #endif /* UNIV_SYNC_DEBUG */
2048
2049- buf_pool_mutex_enter(buf_pool);
2050+ //buf_pool_mutex_enter(buf_pool);
2051
2052 /* Note: The buffer pool mutex is released and reacquired within
2053 the flush functions. */
2054 switch(flush_type) {
2055 case BUF_FLUSH_LRU:
2056+ mutex_enter(&buf_pool->LRU_list_mutex);
2057 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2058+ mutex_exit(&buf_pool->LRU_list_mutex);
2059 break;
2060 case BUF_FLUSH_LIST:
2061 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
3d3ecf24 2062@@ -1787,7 +1821,7 @@
b4e1fa2c
AM
2063 ut_error;
2064 }
2065
2066- buf_pool_mutex_exit(buf_pool);
2067+ //buf_pool_mutex_exit(buf_pool);
2068
2069 buf_flush_buffered_writes();
2070
3d3ecf24 2071@@ -2059,7 +2093,7 @@
b4e1fa2c
AM
2072 retry:
2073 //buf_pool_mutex_enter(buf_pool);
2074 if (have_LRU_mutex)
2075- buf_pool_mutex_enter(buf_pool);
2076+ mutex_enter(&buf_pool->LRU_list_mutex);
2077
2078 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2079
3d3ecf24 2080@@ -2076,15 +2110,15 @@
b4e1fa2c
AM
2081 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2082 continue;
2083 }
2084- block_mutex = buf_page_get_mutex(bpage);
2085-
2086- mutex_enter(block_mutex);
2087+ block_mutex = buf_page_get_mutex_enter(bpage);
2088
2089- if (buf_flush_ready_for_replace(bpage)) {
2090+ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2091 n_replaceable++;
2092 }
2093
2094- mutex_exit(block_mutex);
2095+ if (block_mutex) {
2096+ mutex_exit(block_mutex);
2097+ }
2098
2099 distance++;
2100
3d3ecf24 2101@@ -2093,7 +2127,7 @@
b4e1fa2c
AM
2102
2103 //buf_pool_mutex_exit(buf_pool);
2104 if (have_LRU_mutex)
2105- buf_pool_mutex_exit(buf_pool);
2106+ mutex_exit(&buf_pool->LRU_list_mutex);
2107
2108 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2109
3d3ecf24 2110@@ -2292,7 +2326,7 @@
b4e1fa2c
AM
2111
2112 ut_ad(buf_flush_list_mutex_own(buf_pool));
2113
2114- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2115+ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2116 ut_ad(ut_list_node_313->in_flush_list));
2117
2118 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3d3ecf24 2119@@ -2332,7 +2366,7 @@
b4e1fa2c
AM
2120 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2121 }
2122
2123- bpage = UT_LIST_GET_NEXT(list, bpage);
2124+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2125
2126 ut_a(!bpage || om >= bpage->oldest_modification);
2127 }
db82db79
AM
2128--- a/storage/innobase/buf/buf0lru.c
2129+++ b/storage/innobase/buf/buf0lru.c
29ffd636 2130@@ -147,8 +147,9 @@
b4e1fa2c
AM
2131 void
2132 buf_LRU_block_free_hashed_page(
2133 /*===========================*/
2134- buf_block_t* block); /*!< in: block, must contain a file page and
2135+ buf_block_t* block, /*!< in: block, must contain a file page and
2136 be in a state where it can be freed */
2137+ ibool have_page_hash_mutex);
2138
2139 /******************************************************************//**
2140 Determines if the unzip_LRU list should be used for evicting a victim
29ffd636 2141@@ -158,15 +159,20 @@
b4e1fa2c
AM
2142 ibool
2143 buf_LRU_evict_from_unzip_LRU(
2144 /*=========================*/
2145- buf_pool_t* buf_pool)
2146+ buf_pool_t* buf_pool,
2147+ ibool have_LRU_mutex)
2148 {
2149 ulint io_avg;
2150 ulint unzip_avg;
2151
2152- ut_ad(buf_pool_mutex_own(buf_pool));
2153+ //ut_ad(buf_pool_mutex_own(buf_pool));
2154
2155+ if (!have_LRU_mutex)
2156+ mutex_enter(&buf_pool->LRU_list_mutex);
2157 /* If the unzip_LRU list is empty, we can only use the LRU. */
2158 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2159+ if (!have_LRU_mutex)
2160+ mutex_exit(&buf_pool->LRU_list_mutex);
2161 return(FALSE);
2162 }
2163
29ffd636 2164@@ -175,14 +181,20 @@
b4e1fa2c
AM
2165 decompressed pages in the buffer pool. */
2166 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2167 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2168+ if (!have_LRU_mutex)
2169+ mutex_exit(&buf_pool->LRU_list_mutex);
2170 return(FALSE);
2171 }
2172
2173 /* If eviction hasn't started yet, we assume by default
2174 that a workload is disk bound. */
2175 if (buf_pool->freed_page_clock == 0) {
2176+ if (!have_LRU_mutex)
2177+ mutex_exit(&buf_pool->LRU_list_mutex);
2178 return(TRUE);
2179 }
2180+ if (!have_LRU_mutex)
2181+ mutex_exit(&buf_pool->LRU_list_mutex);
2182
2183 /* Calculate the average over past intervals, and add the values
2184 of the current interval. */
29ffd636 2185@@ -250,18 +262,25 @@
b4e1fa2c 2186 page_arr = ut_malloc(
29ffd636 2187 sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE);
b4e1fa2c
AM
2188
2189- buf_pool_mutex_enter(buf_pool);
2190+ //buf_pool_mutex_enter(buf_pool);
2191+ mutex_enter(&buf_pool->LRU_list_mutex);
adf0fb13 2192 num_entries = 0;
b4e1fa2c
AM
2193
2194 scan_again:
b4e1fa2c
AM
2195 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2196
2197 while (bpage != NULL) {
adf0fb13 2198+ /* bpage->state,space,io_fix,buf_fix_count are protected by block_mutex at XtraDB */
b4e1fa2c
AM
2199+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2200 buf_page_t* prev_bpage;
adf0fb13 2201 ibool is_fixed;
b4e1fa2c 2202
b4e1fa2c
AM
2203 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2204
adf0fb13 2205+ if (UNIV_UNLIKELY(!block_mutex)) {
b4e1fa2c
AM
2206+ goto next_page;
2207+ }
2208+
2209 ut_a(buf_page_in_file(bpage));
2210
2211 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
29ffd636 2212@@ -270,24 +289,30 @@
adf0fb13
AM
2213 /* Compressed pages are never hashed.
2214 Skip blocks of other tablespaces.
2215 Skip I/O-fixed blocks (to be dealt with later). */
2216+ mutex_exit(block_mutex);
2217 next_page:
2218 bpage = prev_bpage;
2219 continue;
2220 }
b4e1fa2c 2221
adf0fb13
AM
2222- mutex_enter(&((buf_block_t*) bpage)->mutex);
2223+ //mutex_enter(&((buf_block_t*) bpage)->mutex);
2224 is_fixed = bpage->buf_fix_count > 0
13ceb006 2225 || !((buf_block_t*) bpage)->index;
adf0fb13
AM
2226- mutex_exit(&((buf_block_t*) bpage)->mutex);
2227+ //mutex_exit(&((buf_block_t*) bpage)->mutex);
b4e1fa2c 2228
adf0fb13
AM
2229 if (is_fixed) {
2230+ mutex_exit(block_mutex);
2231 goto next_page;
2232 }
b4e1fa2c 2233
adf0fb13
AM
2234 /* Store the page number so that we can drop the hash
2235 index in a batch later. */
2236 page_arr[num_entries] = bpage->offset;
29ffd636 2237+
adf0fb13
AM
2238+ mutex_exit(block_mutex);
2239+
29ffd636
AM
2240 ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
2241+
adf0fb13 2242 ++num_entries;
b4e1fa2c 2243
29ffd636
AM
2244 if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) {
2245@@ -296,14 +321,16 @@
adf0fb13
AM
2246
2247 /* Array full. We release the buf_pool->mutex to obey
2248 the latching order. */
2249- buf_pool_mutex_exit(buf_pool);
2250+ //buf_pool_mutex_exit(buf_pool);
2251+ mutex_exit(&buf_pool->LRU_list_mutex);
2252
2253 buf_LRU_drop_page_hash_batch(
2254 id, zip_size, page_arr, num_entries);
2255
2256 num_entries = 0;
2257
2258- buf_pool_mutex_enter(buf_pool);
2259+ //buf_pool_mutex_enter(buf_pool);
2260+ mutex_enter(&buf_pool->LRU_list_mutex);
2261
2262 /* Note that we released the buf_pool mutex above
2263 after reading the prev_bpage during processing of a
29ffd636 2264@@ -321,13 +348,23 @@
adf0fb13
AM
2265 /* If, however, bpage has been removed from LRU list
2266 to the free list then we should restart the scan.
2267 bpage->state is protected by buf_pool mutex. */
2268+
2269+ /* obtain block_mutex again to avoid race condition of bpage->state */
2270+ block_mutex = buf_page_get_mutex_enter(bpage);
2271+ if (!block_mutex) {
2272+ goto scan_again;
2273+ }
2274+
2275 if (bpage
2276 && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2277+ mutex_exit(block_mutex);
2278 goto scan_again;
b4e1fa2c 2279 }
adf0fb13 2280+ mutex_exit(block_mutex);
b4e1fa2c
AM
2281 }
2282
2283- buf_pool_mutex_exit(buf_pool);
2284+ //buf_pool_mutex_exit(buf_pool);
2285+ mutex_exit(&buf_pool->LRU_list_mutex);
2286
2287 /* Drop any remaining batch of search hashed pages. */
2288 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
29ffd636
AM
2289@@ -351,7 +388,9 @@
2290 ulint i;
b4e1fa2c
AM
2291
2292 scan_again:
2293- buf_pool_mutex_enter(buf_pool);
2294+ //buf_pool_mutex_enter(buf_pool);
2295+ mutex_enter(&buf_pool->LRU_list_mutex);
2296+ rw_lock_x_lock(&buf_pool->page_hash_latch);
29ffd636 2297 buf_flush_list_mutex_enter(buf_pool);
b4e1fa2c
AM
2298
2299 all_freed = TRUE;
29ffd636 2300@@ -364,7 +403,7 @@
b4e1fa2c 2301
29ffd636 2302 ut_a(buf_page_in_file(bpage));
b4e1fa2c 2303
29ffd636
AM
2304- prev_bpage = UT_LIST_GET_PREV(list, bpage);
2305+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
b4e1fa2c 2306
29ffd636
AM
2307 /* bpage->space and bpage->io_fix are protected by
2308 buf_pool->mutex and block_mutex. It is safe to check
2309@@ -388,8 +427,14 @@
2310 will stay in the flush_list because buf_flush_remove()
2311 needs buf_pool->mutex as well. */
2312 buf_flush_list_mutex_exit(buf_pool);
2313- block_mutex = buf_page_get_mutex(bpage);
2314- mutex_enter(block_mutex);
2315+ block_mutex = buf_page_get_mutex_enter(bpage);
2316+
2317+ if (!block_mutex) {
2318+ /* It may be impossible case...
2319+ Something wrong, so will be scan_again */
2320+ all_freed = FALSE;
2321+ goto next_page;
2322+ }
b4e1fa2c 2323
29ffd636
AM
2324 if (bpage->buf_fix_count > 0) {
2325 mutex_exit(block_mutex);
2326@@ -440,9 +485,15 @@
2327 mutex_exit(block_mutex);
db82db79 2328
29ffd636
AM
2329 /* Now it is safe to release the buf_pool->mutex. */
2330- buf_pool_mutex_exit(buf_pool);
2331+ //buf_pool_mutex_exit(buf_pool);
2332+ mutex_exit(&buf_pool->LRU_list_mutex);
2333+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2334+
2335 os_thread_yield();
2336- buf_pool_mutex_enter(buf_pool);
2337+ //buf_pool_mutex_enter(buf_pool);
2338+ mutex_enter(&buf_pool->LRU_list_mutex);
2339+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2340+
b4e1fa2c 2341
29ffd636
AM
2342 mutex_enter(block_mutex);
2343 buf_page_unset_sticky(bpage);
2344@@ -454,7 +505,9 @@
2345 i = 0;
b4e1fa2c
AM
2346 }
2347
2348- buf_pool_mutex_exit(buf_pool);
29ffd636 2349+// buf_pool_mutex_exit(buf_pool);
b4e1fa2c
AM
2350+ mutex_exit(&buf_pool->LRU_list_mutex);
2351+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
29ffd636 2352 buf_flush_list_mutex_exit(buf_pool);
b4e1fa2c 2353
29ffd636
AM
2354 ut_ad(buf_flush_validate(buf_pool));
2355@@ -504,7 +557,9 @@
b4e1fa2c
AM
2356 buf_page_t* b;
2357 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2358
2359- ut_ad(buf_pool_mutex_own(buf_pool));
2360+ //ut_ad(buf_pool_mutex_own(buf_pool));
2361+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
11822e22 2362+ ut_ad(mutex_own(&buf_pool->zip_mutex));
b4e1fa2c
AM
2363 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2364
2365 /* Find the first successor of bpage in the LRU list
29ffd636 2366@@ -512,17 +567,17 @@
b4e1fa2c
AM
2367 b = bpage;
2368 do {
2369 b = UT_LIST_GET_NEXT(LRU, b);
2370- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2371+ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2372
2373 /* Insert bpage before b, i.e., after the predecessor of b. */
2374 if (b) {
2375- b = UT_LIST_GET_PREV(list, b);
2376+ b = UT_LIST_GET_PREV(zip_list, b);
2377 }
2378
2379 if (b) {
2380- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2381+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2382 } else {
2383- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2384+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2385 }
2386 }
db82db79 2387 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
29ffd636 2388@@ -536,18 +591,19 @@
b4e1fa2c
AM
2389 buf_LRU_free_from_unzip_LRU_list(
2390 /*=============================*/
2391 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2392- ulint n_iterations) /*!< in: how many times this has
2393+ ulint n_iterations, /*!< in: how many times this has
2394 been called repeatedly without
2395 result: a high value means that
2396 we should search farther; we will
2397 search n_iterations / 5 of the
2398 unzip_LRU list, or nothing if
2399 n_iterations >= 5 */
2400+ ibool have_LRU_mutex)
2401 {
2402 buf_block_t* block;
2403 ulint distance;
2404
2405- ut_ad(buf_pool_mutex_own(buf_pool));
2406+ //ut_ad(buf_pool_mutex_own(buf_pool));
2407
2408 /* Theoratically it should be much easier to find a victim
2409 from unzip_LRU as we can choose even a dirty block (as we'll
29ffd636 2410@@ -557,7 +613,7 @@
b4e1fa2c
AM
2411 if we have done five iterations so far. */
2412
2413 if (UNIV_UNLIKELY(n_iterations >= 5)
2414- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2415+ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2416
2417 return(FALSE);
2418 }
29ffd636 2419@@ -565,18 +621,25 @@
b4e1fa2c
AM
2420 distance = 100 + (n_iterations
2421 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2422
2423+restart:
2424 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2425 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2426 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2427
db82db79 2428 ibool freed;
b4e1fa2c
AM
2429
2430+ mutex_enter(&block->mutex);
2431+ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2432+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2433+ mutex_exit(&block->mutex);
2434+ goto restart;
2435+ }
2436+
2437 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2438 ut_ad(block->in_unzip_LRU_list);
2439 ut_ad(block->page.in_LRU_list);
2440
2441- mutex_enter(&block->mutex);
df1b5770
AM
2442- freed = buf_LRU_free_block(&block->page, FALSE);
2443+ freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
b4e1fa2c
AM
2444 mutex_exit(&block->mutex);
2445
db82db79 2446 if (freed) {
29ffd636 2447@@ -595,35 +658,46 @@
b4e1fa2c
AM
2448 buf_LRU_free_from_common_LRU_list(
2449 /*==============================*/
2450 buf_pool_t* buf_pool,
2451- ulint n_iterations)
2452+ ulint n_iterations,
2453 /*!< in: how many times this has been called
2454 repeatedly without result: a high value means
2455 that we should search farther; if
2456 n_iterations < 10, then we search
2457 n_iterations / 10 * buf_pool->curr_size
2458 pages from the end of the LRU list */
2459+ ibool have_LRU_mutex)
2460 {
2461 buf_page_t* bpage;
2462 ulint distance;
2463
2464- ut_ad(buf_pool_mutex_own(buf_pool));
2465+ //ut_ad(buf_pool_mutex_own(buf_pool));
2466
2467 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2468
2469+restart:
2470 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2471 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2472 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
db82db79
AM
2473
2474 ibool freed;
2475 unsigned accessed;
2476- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2477+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
b4e1fa2c
AM
2478+
2479+ if (!block_mutex) {
2480+ goto restart;
2481+ }
2482+
2483+ if (!bpage->in_LRU_list
2484+ || !buf_page_in_file(bpage)) {
2485+ mutex_exit(block_mutex);
2486+ goto restart;
2487+ }
2488
2489 ut_ad(buf_page_in_file(bpage));
2490 ut_ad(bpage->in_LRU_list);
2491
2492- mutex_enter(block_mutex);
2493 accessed = buf_page_is_accessed(bpage);
df1b5770
AM
2494- freed = buf_LRU_free_block(bpage, TRUE);
2495+ freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
b4e1fa2c
AM
2496 mutex_exit(block_mutex);
2497
db82db79 2498 if (freed) {
29ffd636 2499@@ -660,16 +734,23 @@
b4e1fa2c
AM
2500 n_iterations / 5 of the unzip_LRU list. */
2501 {
2502 ibool freed = FALSE;
2503+ ibool have_LRU_mutex = FALSE;
2504
2505- buf_pool_mutex_enter(buf_pool);
2506+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2507+ have_LRU_mutex = TRUE;
29ffd636 2508+
b4e1fa2c
AM
2509+ //buf_pool_mutex_enter(buf_pool);
2510+ if (have_LRU_mutex)
2511+ mutex_enter(&buf_pool->LRU_list_mutex);
29ffd636
AM
2512
2513- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
b4e1fa2c
AM
2514+ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2515
2516 if (!freed) {
2517 freed = buf_LRU_free_from_common_LRU_list(
2518- buf_pool, n_iterations);
2519+ buf_pool, n_iterations, have_LRU_mutex);
2520 }
2521
2522+ buf_pool_mutex_enter(buf_pool);
2523 if (!freed) {
2524 buf_pool->LRU_flush_ended = 0;
2525 } else if (buf_pool->LRU_flush_ended > 0) {
29ffd636 2526@@ -677,6 +758,8 @@
b4e1fa2c
AM
2527 }
2528
2529 buf_pool_mutex_exit(buf_pool);
2530+ if (have_LRU_mutex)
2531+ mutex_exit(&buf_pool->LRU_list_mutex);
2532
2533 return(freed);
2534 }
29ffd636 2535@@ -737,7 +820,9 @@
b4e1fa2c
AM
2536
2537 buf_pool = buf_pool_from_array(i);
2538
2539- buf_pool_mutex_enter(buf_pool);
2540+ //buf_pool_mutex_enter(buf_pool);
2541+ mutex_enter(&buf_pool->LRU_list_mutex);
2542+ mutex_enter(&buf_pool->free_list_mutex);
2543
2544 if (!recv_recovery_on
2545 && UT_LIST_GET_LEN(buf_pool->free)
29ffd636 2546@@ -747,7 +832,9 @@
b4e1fa2c
AM
2547 ret = TRUE;
2548 }
2549
2550- buf_pool_mutex_exit(buf_pool);
2551+ //buf_pool_mutex_exit(buf_pool);
2552+ mutex_exit(&buf_pool->LRU_list_mutex);
2553+ mutex_exit(&buf_pool->free_list_mutex);
2554 }
2555
2556 return(ret);
29ffd636 2557@@ -765,9 +852,10 @@
b4e1fa2c
AM
2558 {
2559 buf_block_t* block;
2560
2561- ut_ad(buf_pool_mutex_own(buf_pool));
2562+ //ut_ad(buf_pool_mutex_own(buf_pool));
2563
2564- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2565+ mutex_enter(&buf_pool->free_list_mutex);
2566+ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2567
2568 if (block) {
2569
29ffd636 2570@@ -776,7 +864,9 @@
b4e1fa2c
AM
2571 ut_ad(!block->page.in_flush_list);
2572 ut_ad(!block->page.in_LRU_list);
2573 ut_a(!buf_page_in_file(&block->page));
2574- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2575+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2576+
2577+ mutex_exit(&buf_pool->free_list_mutex);
2578
2579 mutex_enter(&block->mutex);
2580
29ffd636 2581@@ -786,6 +876,8 @@
b4e1fa2c
AM
2582 ut_ad(buf_pool_from_block(block) == buf_pool);
2583
2584 mutex_exit(&block->mutex);
2585+ } else {
2586+ mutex_exit(&buf_pool->free_list_mutex);
2587 }
2588
2589 return(block);
29ffd636 2590@@ -808,7 +900,7 @@
b4e1fa2c
AM
2591 ibool mon_value_was = FALSE;
2592 ibool started_monitor = FALSE;
2593 loop:
2594- buf_pool_mutex_enter(buf_pool);
2595+ //buf_pool_mutex_enter(buf_pool);
2596
2597 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2598 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
29ffd636 2599@@ -876,7 +968,7 @@
b4e1fa2c 2600
df1b5770
AM
2601 /* If there is a block in the free list, take it */
2602 block = buf_LRU_get_free_only(buf_pool);
b4e1fa2c
AM
2603- buf_pool_mutex_exit(buf_pool);
2604+ //buf_pool_mutex_exit(buf_pool);
2605
df1b5770
AM
2606 if (block) {
2607 ut_ad(buf_pool_from_block(block) == buf_pool);
29ffd636 2608@@ -976,7 +1068,8 @@
b4e1fa2c
AM
2609 ulint new_len;
2610
2611 ut_a(buf_pool->LRU_old);
2612- ut_ad(buf_pool_mutex_own(buf_pool));
2613+ //ut_ad(buf_pool_mutex_own(buf_pool));
2614+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2615 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2616 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2617 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
29ffd636 2618@@ -1042,7 +1135,8 @@
b4e1fa2c
AM
2619 {
2620 buf_page_t* bpage;
2621
2622- ut_ad(buf_pool_mutex_own(buf_pool));
2623+ //ut_ad(buf_pool_mutex_own(buf_pool));
2624+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2625 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2626
2627 /* We first initialize all blocks in the LRU list as old and then use
29ffd636 2628@@ -1077,13 +1171,14 @@
b4e1fa2c
AM
2629 ut_ad(buf_pool);
2630 ut_ad(bpage);
2631 ut_ad(buf_page_in_file(bpage));
2632- ut_ad(buf_pool_mutex_own(buf_pool));
2633+ //ut_ad(buf_pool_mutex_own(buf_pool));
2634+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2635
2636 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2637 buf_block_t* block = (buf_block_t*) bpage;
2638
2639 ut_ad(block->in_unzip_LRU_list);
2640- ut_d(block->in_unzip_LRU_list = FALSE);
2641+ block->in_unzip_LRU_list = FALSE;
2642
2643 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2644 }
29ffd636 2645@@ -1101,7 +1196,8 @@
b4e1fa2c
AM
2646
2647 ut_ad(buf_pool);
2648 ut_ad(bpage);
2649- ut_ad(buf_pool_mutex_own(buf_pool));
2650+ //ut_ad(buf_pool_mutex_own(buf_pool));
2651+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2652
2653 ut_a(buf_page_in_file(bpage));
2654
29ffd636 2655@@ -1178,12 +1274,13 @@
b4e1fa2c
AM
2656
2657 ut_ad(buf_pool);
2658 ut_ad(block);
2659- ut_ad(buf_pool_mutex_own(buf_pool));
2660+ //ut_ad(buf_pool_mutex_own(buf_pool));
2661+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2662
2663 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2664
2665 ut_ad(!block->in_unzip_LRU_list);
2666- ut_d(block->in_unzip_LRU_list = TRUE);
2667+ block->in_unzip_LRU_list = TRUE;
2668
2669 if (old) {
2670 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
29ffd636 2671@@ -1204,7 +1301,8 @@
b4e1fa2c
AM
2672
2673 ut_ad(buf_pool);
2674 ut_ad(bpage);
2675- ut_ad(buf_pool_mutex_own(buf_pool));
2676+ //ut_ad(buf_pool_mutex_own(buf_pool));
2677+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2678
2679 ut_a(buf_page_in_file(bpage));
2680
29ffd636 2681@@ -1255,7 +1353,8 @@
b4e1fa2c
AM
2682
2683 ut_ad(buf_pool);
2684 ut_ad(bpage);
2685- ut_ad(buf_pool_mutex_own(buf_pool));
2686+ //ut_ad(buf_pool_mutex_own(buf_pool));
2687+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2688
2689 ut_a(buf_page_in_file(bpage));
2690 ut_ad(!bpage->in_LRU_list);
29ffd636 2691@@ -1334,7 +1433,8 @@
b4e1fa2c
AM
2692 {
2693 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2694
2695- ut_ad(buf_pool_mutex_own(buf_pool));
2696+ //ut_ad(buf_pool_mutex_own(buf_pool));
2697+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2698
2699 if (bpage->old) {
2700 buf_pool->stat.n_pages_made_young++;
29ffd636 2701@@ -1373,17 +1473,18 @@
df1b5770
AM
2702 buf_LRU_free_block(
2703 /*===============*/
b4e1fa2c 2704 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
2705- ibool zip) /*!< in: TRUE if should remove also the
2706+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 2707 compressed page of an uncompressed page */
b4e1fa2c
AM
2708+ ibool have_LRU_mutex)
2709 {
2710 buf_page_t* b = NULL;
2711 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2712 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2713
2714- ut_ad(buf_pool_mutex_own(buf_pool));
2715+ //ut_ad(buf_pool_mutex_own(buf_pool));
2716 ut_ad(mutex_own(block_mutex));
2717 ut_ad(buf_page_in_file(bpage));
2718- ut_ad(bpage->in_LRU_list);
2719+ //ut_ad(bpage->in_LRU_list);
2720 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2721 #if UNIV_WORD_SIZE == 4
2722 /* On 32-bit systems, there is no padding in buf_page_t. On
29ffd636 2723@@ -1392,7 +1493,7 @@
b4e1fa2c
AM
2724 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2725 #endif
2726
2727- if (!buf_page_can_relocate(bpage)) {
2728+ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2729
2730 /* Do not free buffer-fixed or I/O-fixed blocks. */
db82db79 2731 return(FALSE);
29ffd636 2732@@ -1426,7 +1527,7 @@
b4e1fa2c 2733 alloc:
db82db79
AM
2734 b = buf_page_alloc_descriptor();
2735 ut_a(b);
b4e1fa2c
AM
2736- memcpy(b, bpage, sizeof *b);
2737+ //memcpy(b, bpage, sizeof *b);
2738 }
2739
2740 #ifdef UNIV_DEBUG
29ffd636 2741@@ -1437,6 +1538,39 @@
b4e1fa2c
AM
2742 }
2743 #endif /* UNIV_DEBUG */
2744
2745+ /* not to break latch order, must re-enter block_mutex */
2746+ mutex_exit(block_mutex);
2747+
2748+ if (!have_LRU_mutex)
2749+ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2750+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2751+ mutex_enter(block_mutex);
2752+
2753+ /* recheck states of block */
2754+ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2755+ || !buf_page_can_relocate(bpage)) {
2756+not_freed:
2757+ if (b) {
2758+ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2759+ }
2760+ if (!have_LRU_mutex)
2761+ mutex_exit(&buf_pool->LRU_list_mutex);
2762+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
db82db79 2763+ return(FALSE);
b4e1fa2c
AM
2764+ } else if (zip || !bpage->zip.data) {
2765+ if (bpage->oldest_modification)
2766+ goto not_freed;
2767+ } else if (bpage->oldest_modification) {
2768+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2769+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2770+ goto not_freed;
2771+ }
2772+ }
2773+
2774+ if (b) {
2775+ memcpy(b, bpage, sizeof *b);
2776+ }
2777+
2778 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2779 != BUF_BLOCK_ZIP_FREE) {
2780 ut_a(bpage->buf_fix_count == 0);
29ffd636 2781@@ -1453,6 +1587,10 @@
b4e1fa2c
AM
2782
2783 ut_a(!hash_b);
2784
2785+ while (prev_b && !prev_b->in_LRU_list) {
2786+ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2787+ }
2788+
2789 b->state = b->oldest_modification
2790 ? BUF_BLOCK_ZIP_DIRTY
2791 : BUF_BLOCK_ZIP_PAGE;
29ffd636 2792@@ -1528,6 +1666,7 @@
adf0fb13
AM
2793 buf_LRU_add_block_low(b, buf_page_is_old(b));
2794 }
2795
2796+ mutex_enter(&buf_pool->zip_mutex);
2797 if (b->state == BUF_BLOCK_ZIP_PAGE) {
db82db79 2798 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
adf0fb13 2799 buf_LRU_insert_zip_clean(b);
29ffd636
AM
2800@@ -1543,12 +1682,13 @@
2801 /* Prevent buf_page_get_gen() from
2802 decompressing the block while we release
adf0fb13 2803 buf_pool->mutex and block_mutex. */
29ffd636
AM
2804- mutex_enter(&buf_pool->zip_mutex);
2805 buf_page_set_sticky(b);
2806 mutex_exit(&buf_pool->zip_mutex);
b4e1fa2c
AM
2807 }
2808
2809- buf_pool_mutex_exit(buf_pool);
2810+ //buf_pool_mutex_exit(buf_pool);
2811+ mutex_exit(&buf_pool->LRU_list_mutex);
2812+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2813 mutex_exit(block_mutex);
2814
2815 /* Remove possible adaptive hash index on the page.
29ffd636 2816@@ -1580,7 +1720,9 @@
b4e1fa2c
AM
2817 : BUF_NO_CHECKSUM_MAGIC);
2818 }
2819
2820- buf_pool_mutex_enter(buf_pool);
2821+ //buf_pool_mutex_enter(buf_pool);
2822+ if (have_LRU_mutex)
2823+ mutex_enter(&buf_pool->LRU_list_mutex);
2824 mutex_enter(block_mutex);
2825
2826 if (b) {
29ffd636 2827@@ -1589,13 +1731,17 @@
b4e1fa2c
AM
2828 mutex_exit(&buf_pool->zip_mutex);
2829 }
2830
2831- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2832+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
2833 } else {
2834 /* The block_mutex should have been released by
2835 buf_LRU_block_remove_hashed_page() when it returns
2836 BUF_BLOCK_ZIP_FREE. */
2837 ut_ad(block_mutex == &buf_pool->zip_mutex);
2838 mutex_enter(block_mutex);
2839+
2840+ if (!have_LRU_mutex)
2841+ mutex_exit(&buf_pool->LRU_list_mutex);
2842+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2843 }
2844
db82db79 2845 return(TRUE);
29ffd636 2846@@ -1607,13 +1753,14 @@
b4e1fa2c
AM
2847 void
2848 buf_LRU_block_free_non_file_page(
2849 /*=============================*/
2850- buf_block_t* block) /*!< in: block, must not contain a file page */
2851+ buf_block_t* block, /*!< in: block, must not contain a file page */
2852+ ibool have_page_hash_mutex)
2853 {
2854 void* data;
2855 buf_pool_t* buf_pool = buf_pool_from_block(block);
2856
2857 ut_ad(block);
2858- ut_ad(buf_pool_mutex_own(buf_pool));
2859+ //ut_ad(buf_pool_mutex_own(buf_pool));
2860 ut_ad(mutex_own(&block->mutex));
2861
2862 switch (buf_block_get_state(block)) {
29ffd636 2863@@ -1647,18 +1794,21 @@
b4e1fa2c
AM
2864 if (data) {
2865 block->page.zip.data = NULL;
2866 mutex_exit(&block->mutex);
2867- buf_pool_mutex_exit_forbid(buf_pool);
2868+ //buf_pool_mutex_exit_forbid(buf_pool);
2869
2870 buf_buddy_free(
2871- buf_pool, data, page_zip_get_size(&block->page.zip));
2872+ buf_pool, data, page_zip_get_size(&block->page.zip),
2873+ have_page_hash_mutex);
2874
2875- buf_pool_mutex_exit_allow(buf_pool);
2876+ //buf_pool_mutex_exit_allow(buf_pool);
2877 mutex_enter(&block->mutex);
2878 page_zip_set_size(&block->page.zip, 0);
2879 }
2880
2881- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
2882+ mutex_enter(&buf_pool->free_list_mutex);
2883+ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
2884 ut_d(block->page.in_free_list = TRUE);
2885+ mutex_exit(&buf_pool->free_list_mutex);
2886
2887 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
2888 }
29ffd636 2889@@ -1688,7 +1838,11 @@
b4e1fa2c
AM
2890 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2891
2892 ut_ad(bpage);
2893- ut_ad(buf_pool_mutex_own(buf_pool));
2894+ //ut_ad(buf_pool_mutex_own(buf_pool));
2895+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2896+#ifdef UNIV_SYNC_DEBUG
2897+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
2898+#endif
2899 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2900
2901 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
29ffd636 2902@@ -1796,7 +1950,9 @@
b4e1fa2c
AM
2903
2904 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2905 mutex_exit(buf_page_get_mutex(bpage));
2906- buf_pool_mutex_exit(buf_pool);
2907+ //buf_pool_mutex_exit(buf_pool);
2908+ mutex_exit(&buf_pool->LRU_list_mutex);
2909+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2910 buf_print();
2911 buf_LRU_print();
2912 buf_validate();
29ffd636 2913@@ -1818,17 +1974,17 @@
b4e1fa2c
AM
2914 ut_a(buf_page_get_zip_size(bpage));
2915
db82db79 2916 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c
AM
2917- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
2918+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
db82db79 2919 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c
AM
2920
2921 mutex_exit(&buf_pool->zip_mutex);
2922- buf_pool_mutex_exit_forbid(buf_pool);
2923+ //buf_pool_mutex_exit_forbid(buf_pool);
2924
2925 buf_buddy_free(
2926 buf_pool, bpage->zip.data,
2927- page_zip_get_size(&bpage->zip));
2928+ page_zip_get_size(&bpage->zip), TRUE);
2929
b4e1fa2c 2930- buf_pool_mutex_exit_allow(buf_pool);
b4e1fa2c 2931+ //buf_pool_mutex_exit_allow(buf_pool);
db82db79 2932 buf_page_free_descriptor(bpage);
b4e1fa2c 2933 return(BUF_BLOCK_ZIP_FREE);
db82db79 2934
29ffd636 2935@@ -1850,13 +2006,13 @@
b4e1fa2c
AM
2936 ut_ad(!bpage->in_flush_list);
2937 ut_ad(!bpage->in_LRU_list);
2938 mutex_exit(&((buf_block_t*) bpage)->mutex);
2939- buf_pool_mutex_exit_forbid(buf_pool);
2940+ //buf_pool_mutex_exit_forbid(buf_pool);
2941
2942 buf_buddy_free(
2943 buf_pool, data,
2944- page_zip_get_size(&bpage->zip));
2945+ page_zip_get_size(&bpage->zip), TRUE);
2946
2947- buf_pool_mutex_exit_allow(buf_pool);
2948+ //buf_pool_mutex_exit_allow(buf_pool);
2949 mutex_enter(&((buf_block_t*) bpage)->mutex);
2950 page_zip_set_size(&bpage->zip, 0);
2951 }
29ffd636 2952@@ -1882,18 +2038,19 @@
b4e1fa2c
AM
2953 void
2954 buf_LRU_block_free_hashed_page(
2955 /*===========================*/
2956- buf_block_t* block) /*!< in: block, must contain a file page and
2957+ buf_block_t* block, /*!< in: block, must contain a file page and
2958 be in a state where it can be freed */
2959+ ibool have_page_hash_mutex)
2960 {
2961 #ifdef UNIV_DEBUG
2962- buf_pool_t* buf_pool = buf_pool_from_block(block);
2963- ut_ad(buf_pool_mutex_own(buf_pool));
2964+ //buf_pool_t* buf_pool = buf_pool_from_block(block);
2965+ //ut_ad(buf_pool_mutex_own(buf_pool));
2966 #endif
2967 ut_ad(mutex_own(&block->mutex));
2968
2969 buf_block_set_state(block, BUF_BLOCK_MEMORY);
2970
2971- buf_LRU_block_free_non_file_page(block);
2972+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
2973 }
2974
734d6226 2975 /******************************************************************//**
29ffd636 2976@@ -1908,7 +2065,7 @@
734d6226
AM
2977 {
2978 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2979 != BUF_BLOCK_ZIP_FREE) {
2980- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2981+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, TRUE);
2982 }
2983 }
2984
29ffd636 2985@@ -1936,7 +2093,8 @@
b4e1fa2c
AM
2986 }
2987
2988 if (adjust) {
2989- buf_pool_mutex_enter(buf_pool);
2990+ //buf_pool_mutex_enter(buf_pool);
2991+ mutex_enter(&buf_pool->LRU_list_mutex);
2992
2993 if (ratio != buf_pool->LRU_old_ratio) {
2994 buf_pool->LRU_old_ratio = ratio;
29ffd636 2995@@ -1948,7 +2106,8 @@
b4e1fa2c
AM
2996 }
2997 }
2998
2999- buf_pool_mutex_exit(buf_pool);
3000+ //buf_pool_mutex_exit(buf_pool);
3001+ mutex_exit(&buf_pool->LRU_list_mutex);
3002 } else {
3003 buf_pool->LRU_old_ratio = ratio;
3004 }
29ffd636 3005@@ -2053,7 +2212,8 @@
b4e1fa2c
AM
3006 ulint new_len;
3007
3008 ut_ad(buf_pool);
3009- buf_pool_mutex_enter(buf_pool);
3010+ //buf_pool_mutex_enter(buf_pool);
3011+ mutex_enter(&buf_pool->LRU_list_mutex);
3012
3013 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3014
29ffd636 3015@@ -2114,16 +2274,22 @@
b4e1fa2c
AM
3016
3017 ut_a(buf_pool->LRU_old_len == old_len);
3018
3019- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3020+ mutex_exit(&buf_pool->LRU_list_mutex);
3021+ mutex_enter(&buf_pool->free_list_mutex);
3022+
3023+ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3024 ut_ad(ut_list_node_313->in_free_list));
3025
3026 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3027 bpage != NULL;
3028- bpage = UT_LIST_GET_NEXT(list, bpage)) {
3029+ bpage = UT_LIST_GET_NEXT(free, bpage)) {
3030
3031 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3032 }
3033
3034+ mutex_exit(&buf_pool->free_list_mutex);
3035+ mutex_enter(&buf_pool->LRU_list_mutex);
3036+
3037 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3038 ut_ad(ut_list_node_313->in_unzip_LRU_list
3039 && ut_list_node_313->page.in_LRU_list));
29ffd636 3040@@ -2137,7 +2303,8 @@
b4e1fa2c
AM
3041 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3042 }
3043
3044- buf_pool_mutex_exit(buf_pool);
3045+ //buf_pool_mutex_exit(buf_pool);
3046+ mutex_exit(&buf_pool->LRU_list_mutex);
3047 }
3048
3049 /**********************************************************************//**
29ffd636 3050@@ -2173,7 +2340,8 @@
b4e1fa2c
AM
3051 const buf_page_t* bpage;
3052
3053 ut_ad(buf_pool);
3054- buf_pool_mutex_enter(buf_pool);
3055+ //buf_pool_mutex_enter(buf_pool);
3056+ mutex_enter(&buf_pool->LRU_list_mutex);
3057
3058 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3059
29ffd636 3060@@ -2230,7 +2398,8 @@
b4e1fa2c
AM
3061 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3062 }
3063
3064- buf_pool_mutex_exit(buf_pool);
3065+ //buf_pool_mutex_exit(buf_pool);
3066+ mutex_exit(&buf_pool->LRU_list_mutex);
3067 }
3068
3069 /**********************************************************************//**
db82db79
AM
3070--- a/storage/innobase/buf/buf0rea.c
3071+++ b/storage/innobase/buf/buf0rea.c
734d6226 3072@@ -478,6 +478,7 @@
b4e1fa2c
AM
3073
3074 return(0);
3075 }
3076+ buf_pool_mutex_exit(buf_pool);
3077
3078 /* Check that almost all pages in the area have been accessed; if
3079 offset == low, the accesses must be in a descending order, otherwise,
734d6226 3080@@ -496,6 +497,7 @@
b4e1fa2c
AM
3081
3082 fail_count = 0;
3083
3084+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3085 for (i = low; i < high; i++) {
3086 bpage = buf_page_hash_get(buf_pool, space, i);
3087
734d6226 3088@@ -523,7 +525,8 @@
b4e1fa2c
AM
3089
3090 if (fail_count > threshold) {
3091 /* Too many failures: return */
3092- buf_pool_mutex_exit(buf_pool);
3093+ //buf_pool_mutex_exit(buf_pool);
3094+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3095 return(0);
3096 }
3097
734d6226 3098@@ -538,7 +541,8 @@
b4e1fa2c
AM
3099 bpage = buf_page_hash_get(buf_pool, space, offset);
3100
3101 if (bpage == NULL) {
3102- buf_pool_mutex_exit(buf_pool);
3103+ //buf_pool_mutex_exit(buf_pool);
3104+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3105
3106 return(0);
3107 }
734d6226 3108@@ -564,7 +568,8 @@
b4e1fa2c
AM
3109 pred_offset = fil_page_get_prev(frame);
3110 succ_offset = fil_page_get_next(frame);
3111
3112- buf_pool_mutex_exit(buf_pool);
3113+ //buf_pool_mutex_exit(buf_pool);
3114+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3115
3116 if ((offset == low) && (succ_offset == offset + 1)) {
3117
db82db79
AM
3118--- a/storage/innobase/handler/ha_innodb.cc
3119+++ b/storage/innobase/handler/ha_innodb.cc
3120@@ -265,6 +265,10 @@
b4e1fa2c
AM
3121 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3122 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3123 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3124+ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3125+ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3126+ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3127+ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3128 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3129 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3130 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
db82db79 3131@@ -314,6 +318,7 @@
b4e1fa2c
AM
3132 {&archive_lock_key, "archive_lock", 0},
3133 # endif /* UNIV_LOG_ARCHIVE */
3134 {&btr_search_latch_key, "btr_search_latch", 0},
3135+ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3136 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3137 {&buf_block_lock_key, "buf_block_lock", 0},
3138 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
db82db79
AM
3139--- a/storage/innobase/handler/i_s.cc
3140+++ b/storage/innobase/handler/i_s.cc
734d6226 3141@@ -1583,7 +1583,8 @@
b4e1fa2c
AM
3142
3143 buf_pool = buf_pool_from_array(i);
3144
3145- buf_pool_mutex_enter(buf_pool);
3146+ //buf_pool_mutex_enter(buf_pool);
3147+ mutex_enter(&buf_pool->zip_free_mutex);
3148
3149 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3150 buf_buddy_stat_t* buddy_stat;
734d6226 3151@@ -1613,7 +1614,8 @@
b4e1fa2c
AM
3152 }
3153 }
3154
3155- buf_pool_mutex_exit(buf_pool);
3156+ //buf_pool_mutex_exit(buf_pool);
3157+ mutex_exit(&buf_pool->zip_free_mutex);
3158
3159 if (status) {
3160 break;
db82db79
AM
3161--- a/storage/innobase/ibuf/ibuf0ibuf.c
3162+++ b/storage/innobase/ibuf/ibuf0ibuf.c
1bfc1981 3163@@ -3760,9 +3760,11 @@
b4e1fa2c
AM
3164 ulint fold = buf_page_address_fold(space, page_no);
3165 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3166
3167- buf_pool_mutex_enter(buf_pool);
3168+ //buf_pool_mutex_enter(buf_pool);
3169+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3170 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3171- buf_pool_mutex_exit(buf_pool);
3172+ //buf_pool_mutex_exit(buf_pool);
3173+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3174
3175 if (UNIV_LIKELY_NULL(bpage)) {
3176 /* A buffer pool watch has been set or the
db82db79
AM
3177--- a/storage/innobase/include/buf0buddy.h
3178+++ b/storage/innobase/include/buf0buddy.h
3179@@ -49,11 +49,12 @@
3180 ulint size, /*!< in: compressed page size
3181 (between PAGE_ZIP_MIN_SIZE and
3182 UNIV_PAGE_SIZE) */
3183- ibool* lru) /*!< in: pointer to a variable
3184+ ibool* lru, /*!< in: pointer to a variable
3185 that will be assigned TRUE if
3186 storage was allocated from the
3187 LRU list and buf_pool->mutex was
3188 temporarily released */
3189+ ibool have_page_hash_mutex)
3190 __attribute__((malloc, nonnull));
b4e1fa2c
AM
3191
3192 /**********************************************************************//**
db82db79
AM
3193@@ -66,8 +67,9 @@
3194 the block resides */
3195 void* buf, /*!< in: block to be freed, must not
3196 be pointed to by the buffer pool */
3197- ulint size) /*!< in: block size,
3198+ ulint size, /*!< in: block size,
3199 up to UNIV_PAGE_SIZE */
3200+ ibool have_page_hash_mutex)
b4e1fa2c
AM
3201 __attribute__((nonnull));
3202
3203 #ifndef UNIV_NONINL
db82db79
AM
3204--- a/storage/innobase/include/buf0buddy.ic
3205+++ b/storage/innobase/include/buf0buddy.ic
3206@@ -45,11 +45,12 @@
3207 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
3208 ulint i, /*!< in: index of buf_pool->zip_free[],
3209 or BUF_BUDDY_SIZES */
3210- ibool* lru) /*!< in: pointer to a variable that
3211+ ibool* lru, /*!< in: pointer to a variable that
3212 will be assigned TRUE if storage was
3213 allocated from the LRU list and
3214 buf_pool->mutex was temporarily
3215 released */
3216+ ibool have_page_hash_mutex)
3217 __attribute__((malloc, nonnull));
b4e1fa2c
AM
3218
3219 /**********************************************************************//**
3220@@ -61,8 +62,9 @@
3221 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3222 void* buf, /*!< in: block to be freed, must not be
3223 pointed to by the buffer pool */
3224- ulint i) /*!< in: index of buf_pool->zip_free[],
3225+ ulint i, /*!< in: index of buf_pool->zip_free[],
3226 or BUF_BUDDY_SIZES */
3227+ ibool have_page_hash_mutex)
3228 __attribute__((nonnull));
3229
3230 /**********************************************************************//**
db82db79
AM
3231@@ -101,19 +103,20 @@
3232 ulint size, /*!< in: compressed page size
3233 (between PAGE_ZIP_MIN_SIZE and
3234 UNIV_PAGE_SIZE) */
b4e1fa2c
AM
3235- ibool* lru) /*!< in: pointer to a variable
3236+ ibool* lru, /*!< in: pointer to a variable
3237 that will be assigned TRUE if
3238 storage was allocated from the
3239 LRU list and buf_pool->mutex was
db82db79 3240 temporarily released */
b4e1fa2c
AM
3241+ ibool have_page_hash_mutex)
3242 {
3243- ut_ad(buf_pool_mutex_own(buf_pool));
3244+ //ut_ad(buf_pool_mutex_own(buf_pool));
db82db79
AM
3245 ut_ad(ut_is_2pow(size));
3246 ut_ad(size >= PAGE_ZIP_MIN_SIZE);
3247 ut_ad(size <= UNIV_PAGE_SIZE);
b4e1fa2c 3248
db82db79
AM
3249 return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
3250- lru));
3251+ lru, have_page_hash_mutex));
b4e1fa2c
AM
3252 }
3253
3254 /**********************************************************************//**
db82db79
AM
3255@@ -126,15 +129,28 @@
3256 the block resides */
3257 void* buf, /*!< in: block to be freed, must not
3258 be pointed to by the buffer pool */
3259- ulint size) /*!< in: block size,
3260+ ulint size, /*!< in: block size,
3261 up to UNIV_PAGE_SIZE */
b4e1fa2c
AM
3262+ ibool have_page_hash_mutex)
3263 {
3264- ut_ad(buf_pool_mutex_own(buf_pool));
3265+ //ut_ad(buf_pool_mutex_own(buf_pool));
db82db79
AM
3266 ut_ad(ut_is_2pow(size));
3267 ut_ad(size >= PAGE_ZIP_MIN_SIZE);
3268 ut_ad(size <= UNIV_PAGE_SIZE);
3269
3270- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
b4e1fa2c
AM
3271+ if (!have_page_hash_mutex) {
3272+ mutex_enter(&buf_pool->LRU_list_mutex);
3273+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3274+ }
db82db79 3275+
b4e1fa2c
AM
3276+ mutex_enter(&buf_pool->zip_free_mutex);
3277+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3278+ mutex_exit(&buf_pool->zip_free_mutex);
3279+
3280+ if (!have_page_hash_mutex) {
3281+ mutex_exit(&buf_pool->LRU_list_mutex);
3282+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3283+ }
3284 }
3285
3286 #ifdef UNIV_MATERIALIZE
db82db79
AM
3287--- a/storage/innobase/include/buf0buf.h
3288+++ b/storage/innobase/include/buf0buf.h
734d6226 3289@@ -212,6 +212,20 @@
b4e1fa2c
AM
3290 /*==========================*/
3291
3292 /********************************************************************//**
3293+*/
3294+UNIV_INLINE
3295+void
3296+buf_pool_page_hash_x_lock_all(void);
3297+/*================================*/
3298+
3299+/********************************************************************//**
3300+*/
3301+UNIV_INLINE
3302+void
3303+buf_pool_page_hash_x_unlock_all(void);
3304+/*==================================*/
3305+
3306+/********************************************************************//**
3307 Creates the buffer pool.
3308 @return own: buf_pool object, NULL if not enough memory or error */
3309 UNIV_INTERN
13ceb006 3310@@ -851,6 +865,15 @@
b4e1fa2c
AM
3311 const buf_page_t* bpage) /*!< in: pointer to control block */
3312 __attribute__((pure));
3313
3314+/*************************************************************************
3315+Gets the mutex of a block and enter the mutex with consistency. */
3316+UNIV_INLINE
3317+mutex_t*
3318+buf_page_get_mutex_enter(
3319+/*=========================*/
3320+ const buf_page_t* bpage) /*!< in: pointer to control block */
3321+ __attribute__((pure));
3322+
3323 /*********************************************************************//**
3324 Get the flush type of a page.
3325 @return flush type */
29ffd636 3326@@ -1352,7 +1375,7 @@
b4e1fa2c
AM
3327 All these are protected by buf_pool->mutex. */
3328 /* @{ */
3329
3330- UT_LIST_NODE_T(buf_page_t) list;
3331+ /* UT_LIST_NODE_T(buf_page_t) list; */
3332 /*!< based on state, this is a
3333 list node, protected either by
3334 buf_pool->mutex or by
29ffd636 3335@@ -1380,6 +1403,10 @@
b4e1fa2c
AM
3336 BUF_BLOCK_REMOVE_HASH or
3337 BUF_BLOCK_READY_IN_USE. */
3338
3339+ /* resplit for optimistic use */
3340+ UT_LIST_NODE_T(buf_page_t) free;
3341+ UT_LIST_NODE_T(buf_page_t) flush_list;
3342+ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3343 #ifdef UNIV_DEBUG
3344 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3345 when buf_pool->flush_list_mutex is
29ffd636 3346@@ -1472,11 +1499,11 @@
b4e1fa2c
AM
3347 a block is in the unzip_LRU list
3348 if page.state == BUF_BLOCK_FILE_PAGE
3349 and page.zip.data != NULL */
3350-#ifdef UNIV_DEBUG
3351+//#ifdef UNIV_DEBUG
3352 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3353 decompressed LRU list;
3354 used in debugging */
3355-#endif /* UNIV_DEBUG */
3356+//#endif /* UNIV_DEBUG */
3357 mutex_t mutex; /*!< mutex protecting this block:
3358 state (also protected by the buffer
3359 pool mutex), io_fix, buf_fix_count,
29ffd636 3360@@ -1656,6 +1683,11 @@
b4e1fa2c
AM
3361 pool instance, protects compressed
3362 only pages (of type buf_page_t, not
3363 buf_block_t */
3364+ mutex_t LRU_list_mutex;
3365+ rw_lock_t page_hash_latch;
3366+ mutex_t free_list_mutex;
3367+ mutex_t zip_free_mutex;
3368+ mutex_t zip_hash_mutex;
3369 ulint instance_no; /*!< Array index of this buffer
3370 pool instance */
3371 ulint old_pool_size; /*!< Old pool size in bytes */
29ffd636 3372@@ -1809,8 +1841,8 @@
11822e22
AM
3373 /** Test if a buffer pool mutex is owned. */
3374 #define buf_pool_mutex_own(b) mutex_own(&b->mutex)
3375 /** Acquire a buffer pool mutex. */
3376+/* the buf_pool_mutex is changed the latch order */
3377 #define buf_pool_mutex_enter(b) do { \
3378- ut_ad(!mutex_own(&b->zip_mutex)); \
3379 mutex_enter(&b->mutex); \
3380 } while (0)
3381
db82db79
AM
3382--- a/storage/innobase/include/buf0buf.ic
3383+++ b/storage/innobase/include/buf0buf.ic
734d6226 3384@@ -292,7 +292,7 @@
b4e1fa2c
AM
3385 case BUF_BLOCK_ZIP_FREE:
3386 /* This is a free page in buf_pool->zip_free[].
3387 Such pages should only be accessed by the buddy allocator. */
3388- ut_error;
3389+ /* ut_error; */ /* optimistic */
3390 break;
3391 case BUF_BLOCK_ZIP_PAGE:
3392 case BUF_BLOCK_ZIP_DIRTY:
734d6226 3393@@ -335,9 +335,16 @@
b4e1fa2c
AM
3394 {
3395 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3396
11822e22
AM
3397+ if (/*equivalent to buf_pool_watch_is_sentinel(buf_pool, bpage)*/
3398+ bpage >= &buf_pool->watch[0]
3399+ && bpage < &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
b4e1fa2c
AM
3400+ /* TODO: this code is the interim. should be confirmed later. */
3401+ return(&buf_pool->zip_mutex);
3402+ }
3403+
3404 switch (buf_page_get_state(bpage)) {
3405 case BUF_BLOCK_ZIP_FREE:
3406- ut_error;
3407+ /* ut_error; */ /* optimistic */
3408 return(NULL);
3409 case BUF_BLOCK_ZIP_PAGE:
3410 case BUF_BLOCK_ZIP_DIRTY:
734d6226 3411@@ -347,6 +354,28 @@
b4e1fa2c
AM
3412 }
3413 }
3414
3415+/*************************************************************************
3416+Gets the mutex of a block and enter the mutex with consistency. */
3417+UNIV_INLINE
3418+mutex_t*
3419+buf_page_get_mutex_enter(
3420+/*=========================*/
3421+ const buf_page_t* bpage) /*!< in: pointer to control block */
3422+{
3423+ mutex_t* block_mutex;
3424+
3425+ while(1) {
3426+ block_mutex = buf_page_get_mutex(bpage);
3427+ if (!block_mutex)
3428+ return block_mutex;
3429+
3430+ mutex_enter(block_mutex);
3431+ if (block_mutex == buf_page_get_mutex(bpage))
3432+ return block_mutex;
3433+ mutex_exit(block_mutex);
3434+ }
3435+}
3436+
3437 /*********************************************************************//**
3438 Get the flush type of a page.
3439 @return flush type */
29ffd636 3440@@ -444,8 +473,8 @@
b4e1fa2c
AM
3441 enum buf_io_fix io_fix) /*!< in: io_fix state */
3442 {
3443 #ifdef UNIV_DEBUG
3444- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3445- ut_ad(buf_pool_mutex_own(buf_pool));
3446+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3447+ //ut_ad(buf_pool_mutex_own(buf_pool));
3448 #endif
3449 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3450
29ffd636
AM
3451@@ -482,7 +511,7 @@
3452 {
3453 #ifdef UNIV_DEBUG
3454 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3455- ut_ad(buf_pool_mutex_own(buf_pool));
3456+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3457 #endif
3458 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3459 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3460@@ -500,7 +529,7 @@
3461 {
3462 #ifdef UNIV_DEBUG
3463 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3464- ut_ad(buf_pool_mutex_own(buf_pool));
3465+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3466 #endif
3467 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3468 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
3469@@ -518,14 +547,14 @@
b4e1fa2c
AM
3470 const buf_page_t* bpage) /*!< control block being relocated */
3471 {
3472 #ifdef UNIV_DEBUG
3473- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3474- ut_ad(buf_pool_mutex_own(buf_pool));
3475+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3476+ //ut_ad(buf_pool_mutex_own(buf_pool));
3477 #endif
3478 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3479 ut_ad(buf_page_in_file(bpage));
3480- ut_ad(bpage->in_LRU_list);
3481+ //ut_ad(bpage->in_LRU_list);
3482
3483- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3484+ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3485 && bpage->buf_fix_count == 0);
3486 }
3487
29ffd636 3488@@ -539,8 +568,8 @@
b4e1fa2c
AM
3489 const buf_page_t* bpage) /*!< in: control block */
3490 {
3491 #ifdef UNIV_DEBUG
3492- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3493- ut_ad(buf_pool_mutex_own(buf_pool));
3494+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3495+ //ut_ad(buf_pool_mutex_own(buf_pool));
3496 #endif
3497 ut_ad(buf_page_in_file(bpage));
3498
29ffd636 3499@@ -560,7 +589,8 @@
b4e1fa2c
AM
3500 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3501 #endif /* UNIV_DEBUG */
3502 ut_a(buf_page_in_file(bpage));
3503- ut_ad(buf_pool_mutex_own(buf_pool));
3504+ //ut_ad(buf_pool_mutex_own(buf_pool));
3505+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3506 ut_ad(bpage->in_LRU_list);
3507
3508 #ifdef UNIV_LRU_DEBUG
29ffd636 3509@@ -607,9 +637,10 @@
b4e1fa2c
AM
3510 ulint time_ms) /*!< in: ut_time_ms() */
3511 {
3512 #ifdef UNIV_DEBUG
3513- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3514- ut_ad(buf_pool_mutex_own(buf_pool));
3515+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3516+ //ut_ad(buf_pool_mutex_own(buf_pool));
3517 #endif
3518+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3519 ut_a(buf_page_in_file(bpage));
3520
3521 if (!bpage->access_time) {
29ffd636 3522@@ -852,19 +883,19 @@
b4e1fa2c
AM
3523 /*===========*/
3524 buf_block_t* block) /*!< in, own: block to be freed */
3525 {
3526- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3527+ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3528
3529- buf_pool_mutex_enter(buf_pool);
3530+ //buf_pool_mutex_enter(buf_pool);
3531
3532 mutex_enter(&block->mutex);
3533
3534 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3535
3536- buf_LRU_block_free_non_file_page(block);
3537+ buf_LRU_block_free_non_file_page(block, FALSE);
3538
3539 mutex_exit(&block->mutex);
3540
3541- buf_pool_mutex_exit(buf_pool);
3542+ //buf_pool_mutex_exit(buf_pool);
3543 }
3544 #endif /* !UNIV_HOTBACKUP */
3545
29ffd636 3546@@ -912,17 +943,17 @@
b4e1fa2c
AM
3547 page frame */
3548 {
3549 ib_uint64_t lsn;
3550- mutex_t* block_mutex = buf_page_get_mutex(bpage);
3551-
3552- mutex_enter(block_mutex);
3553+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3554
3555- if (buf_page_in_file(bpage)) {
3556+ if (block_mutex && buf_page_in_file(bpage)) {
3557 lsn = bpage->newest_modification;
3558 } else {
3559 lsn = 0;
3560 }
3561
3562- mutex_exit(block_mutex);
3563+ if (block_mutex) {
3564+ mutex_exit(block_mutex);
3565+ }
3566
3567 return(lsn);
3568 }
29ffd636 3569@@ -940,7 +971,7 @@
b4e1fa2c
AM
3570 #ifdef UNIV_SYNC_DEBUG
3571 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3572
3573- ut_ad((buf_pool_mutex_own(buf_pool)
3574+ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3575 && (block->page.buf_fix_count == 0))
3576 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3577 #endif /* UNIV_SYNC_DEBUG */
29ffd636 3578@@ -1070,7 +1101,11 @@
b4e1fa2c
AM
3579 buf_page_t* bpage;
3580
3581 ut_ad(buf_pool);
3582- ut_ad(buf_pool_mutex_own(buf_pool));
3583+ //ut_ad(buf_pool_mutex_own(buf_pool));
3584+#ifdef UNIV_SYNC_DEBUG
3585+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3586+ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3587+#endif
3588 ut_ad(fold == buf_page_address_fold(space, offset));
3589
3590 /* Look for the page in the hash table */
29ffd636 3591@@ -1155,11 +1190,13 @@
b4e1fa2c
AM
3592 const buf_page_t* bpage;
3593 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3594
3595- buf_pool_mutex_enter(buf_pool);
3596+ //buf_pool_mutex_enter(buf_pool);
3597+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3598
3599 bpage = buf_page_hash_get(buf_pool, space, offset);
3600
3601- buf_pool_mutex_exit(buf_pool);
3602+ //buf_pool_mutex_exit(buf_pool);
3603+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3604
3605 return(bpage != NULL);
3606 }
29ffd636 3607@@ -1287,4 +1324,38 @@
b4e1fa2c
AM
3608 buf_pool_mutex_exit(buf_pool);
3609 }
3610 }
3611+
3612+/********************************************************************//**
3613+*/
3614+UNIV_INLINE
3615+void
3616+buf_pool_page_hash_x_lock_all(void)
3617+/*===============================*/
3618+{
3619+ ulint i;
3620+
3621+ for (i = 0; i < srv_buf_pool_instances; i++) {
3622+ buf_pool_t* buf_pool;
3623+
3624+ buf_pool = buf_pool_from_array(i);
3625+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3626+ }
3627+}
3628+
3629+/********************************************************************//**
3630+*/
3631+UNIV_INLINE
3632+void
3633+buf_pool_page_hash_x_unlock_all(void)
3634+/*=================================*/
3635+{
3636+ ulint i;
3637+
3638+ for (i = 0; i < srv_buf_pool_instances; i++) {
3639+ buf_pool_t* buf_pool;
3640+
3641+ buf_pool = buf_pool_from_array(i);
3642+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3643+ }
3644+}
3645 #endif /* !UNIV_HOTBACKUP */
db82db79
AM
3646--- a/storage/innobase/include/buf0lru.h
3647+++ b/storage/innobase/include/buf0lru.h
3648@@ -100,8 +100,9 @@
df1b5770
AM
3649 buf_LRU_free_block(
3650 /*===============*/
b4e1fa2c 3651 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
3652- ibool zip) /*!< in: TRUE if should remove also the
3653+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 3654 compressed page of an uncompressed page */
df1b5770
AM
3655+ ibool have_LRU_mutex)
3656 __attribute__((nonnull));
b4e1fa2c
AM
3657 /******************************************************************//**
3658 Try to free a replaceable block.
db82db79 3659@@ -148,7 +149,8 @@
b4e1fa2c
AM
3660 void
3661 buf_LRU_block_free_non_file_page(
3662 /*=============================*/
3663- buf_block_t* block); /*!< in: block, must not contain a file page */
3664+ buf_block_t* block, /*!< in: block, must not contain a file page */
3665+ ibool have_page_hash_mutex);
3666 /******************************************************************//**
3667 Adds a block to the LRU list. */
3668 UNIV_INTERN
db82db79
AM
3669--- a/storage/innobase/include/sync0rw.h
3670+++ b/storage/innobase/include/sync0rw.h
b4e1fa2c
AM
3671@@ -112,6 +112,7 @@
3672 extern mysql_pfs_key_t archive_lock_key;
3673 # endif /* UNIV_LOG_ARCHIVE */
3674 extern mysql_pfs_key_t btr_search_latch_key;
3675+extern mysql_pfs_key_t buf_pool_page_hash_key;
3676 extern mysql_pfs_key_t buf_block_lock_key;
3677 # ifdef UNIV_SYNC_DEBUG
3678 extern mysql_pfs_key_t buf_block_debug_latch_key;
db82db79
AM
3679--- a/storage/innobase/include/sync0sync.h
3680+++ b/storage/innobase/include/sync0sync.h
b4e1fa2c
AM
3681@@ -75,6 +75,10 @@
3682 extern mysql_pfs_key_t buffer_block_mutex_key;
3683 extern mysql_pfs_key_t buf_pool_mutex_key;
3684 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3685+extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3686+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3687+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3688+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3689 extern mysql_pfs_key_t cache_last_read_mutex_key;
3690 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3691 extern mysql_pfs_key_t dict_sys_mutex_key;
734d6226 3692@@ -667,7 +671,7 @@
b4e1fa2c 3693 #define SYNC_TRX_SYS_HEADER 290
11822e22 3694 #define SYNC_PURGE_QUEUE 200
b4e1fa2c
AM
3695 #define SYNC_LOG 170
3696-#define SYNC_LOG_FLUSH_ORDER 147
3697+#define SYNC_LOG_FLUSH_ORDER 156
3698 #define SYNC_RECV 168
3699 #define SYNC_WORK_QUEUE 162
13ceb006
AM
3700 #define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
3701@@ -676,8 +680,13 @@
b4e1fa2c
AM
3702 SYNC_SEARCH_SYS, as memory allocation
3703 can call routines there! Otherwise
3704 the level is SYNC_MEM_HASH. */
3705+#define SYNC_BUF_LRU_LIST 158
3706+#define SYNC_BUF_PAGE_HASH 157
3707+#define SYNC_BUF_BLOCK 155 /* Block mutex */
3708+#define SYNC_BUF_FREE_LIST 153
3709+#define SYNC_BUF_ZIP_FREE 152
3710+#define SYNC_BUF_ZIP_HASH 151
3711 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3712-#define SYNC_BUF_BLOCK 146 /* Block mutex */
3713 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3714 #define SYNC_DOUBLEWRITE 140
3715 #define SYNC_ANY_LATCH 135
13ceb006 3716@@ -708,7 +717,7 @@
b4e1fa2c
AM
3717 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3718 when atomic operations are not enabled */
3719 #endif
3720- ulint waiters; /*!< This ulint is set to 1 if there are (or
3721+ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3722 may be) threads waiting in the global wait
3723 array for this mutex to be released.
3724 Otherwise, this is 0. */
db82db79
AM
3725--- a/storage/innobase/srv/srv0srv.c
3726+++ b/storage/innobase/srv/srv0srv.c
3d3ecf24 3727@@ -3105,7 +3105,7 @@
b4e1fa2c
AM
3728 level += log_sys->max_checkpoint_age
3729 - (lsn - oldest_modification);
3730 }
3731- bpage = UT_LIST_GET_NEXT(list, bpage);
3732+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3733 n_blocks++;
3734 }
3735
3d3ecf24 3736@@ -3191,7 +3191,7 @@
b4e1fa2c
AM
3737 found = TRUE;
3738 break;
3739 }
3740- bpage = UT_LIST_GET_NEXT(list, bpage);
3741+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3742 new_blocks_num++;
3743 }
3744 if (!found) {
db82db79
AM
3745--- a/storage/innobase/sync/sync0sync.c
3746+++ b/storage/innobase/sync/sync0sync.c
adf0fb13 3747@@ -285,7 +285,7 @@
b4e1fa2c
AM
3748 mutex->lock_word = 0;
3749 #endif
3750 mutex->event = os_event_create(NULL);
3751- mutex_set_waiters(mutex, 0);
3752+ mutex->waiters = 0;
3753 #ifdef UNIV_DEBUG
3754 mutex->magic_n = MUTEX_MAGIC_N;
3755 #endif /* UNIV_DEBUG */
adf0fb13 3756@@ -464,6 +464,15 @@
b4e1fa2c
AM
3757 mutex_t* mutex, /*!< in: mutex */
3758 ulint n) /*!< in: value to set */
3759 {
3760+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3761+ ut_ad(mutex);
3762+
3763+ if (n) {
3764+ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3765+ } else {
3766+ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3767+ }
3768+#else
3769 volatile ulint* ptr; /* declared volatile to ensure that
3770 the value is stored to memory */
3771 ut_ad(mutex);
adf0fb13 3772@@ -472,6 +481,7 @@
b4e1fa2c
AM
3773
3774 *ptr = n; /* Here we assume that the write of a single
3775 word in memory is atomic */
3776+#endif
3777 }
3778
3779 /******************************************************************//**
13ceb006 3780@@ -1233,7 +1243,12 @@
b4e1fa2c
AM
3781 ut_error;
3782 }
3783 break;
3784+ case SYNC_BUF_LRU_LIST:
3785 case SYNC_BUF_FLUSH_LIST:
3786+ case SYNC_BUF_PAGE_HASH:
3787+ case SYNC_BUF_FREE_LIST:
3788+ case SYNC_BUF_ZIP_FREE:
3789+ case SYNC_BUF_ZIP_HASH:
3790 case SYNC_BUF_POOL:
3791 /* We can have multiple mutexes of this type therefore we
3792 can only check whether the greater than condition holds. */
13ceb006 3793@@ -1251,7 +1266,8 @@
b4e1fa2c
AM
3794 buffer block (block->mutex or buf_pool->zip_mutex). */
3795 if (!sync_thread_levels_g(array, level, FALSE)) {
3796 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3797- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3798+ /* the exact rule is not fixed yet, for now */
3799+ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3800 }
3801 break;
3802 case SYNC_REC_LOCK:
This page took 0.924464 seconds and 4 git commands to generate.