]> git.pld-linux.org Git - packages/mysql.git/blame - innodb_split_buf_pool_mutex.patch
- mention innodb_file_per_table
[packages/mysql.git] / innodb_split_buf_pool_mutex.patch
CommitLineData
b4e1fa2c
AM
1# name : innodb_split_buf_pool_mutex.patch
2# introduced : 11 or before
3# maintainer : Yasufumi
4#
5#!!! notice !!!
6# Any small change to this file in the main branch
7# should be done or reviewed by the maintainer!
db82db79
AM
8--- a/storage/innobase/btr/btr0cur.c
9+++ b/storage/innobase/btr/btr0cur.c
10@@ -4142,7 +4142,8 @@
b4e1fa2c
AM
11
12 mtr_commit(mtr);
13
14- buf_pool_mutex_enter(buf_pool);
15+ //buf_pool_mutex_enter(buf_pool);
16+ mutex_enter(&buf_pool->LRU_list_mutex);
17 mutex_enter(&block->mutex);
18
19 /* Only free the block if it is still allocated to
db82db79 20@@ -4153,16 +4154,21 @@
b4e1fa2c
AM
21 && buf_block_get_space(block) == space
22 && buf_block_get_page_no(block) == page_no) {
23
db82db79 24- if (!buf_LRU_free_block(&block->page, all)
b4e1fa2c 25- && all && block->page.zip.data) {
db82db79 26+ if (!buf_LRU_free_block(&block->page, all, TRUE)
b4e1fa2c
AM
27+ && all && block->page.zip.data
28+ /* Now, buf_LRU_free_block() may release mutex temporarily */
29+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
30+ && buf_block_get_space(block) == space
31+ && buf_block_get_page_no(block) == page_no) {
32 /* Attempt to deallocate the uncompressed page
33 if the whole block cannot be deallocted. */
34
df1b5770
AM
35- buf_LRU_free_block(&block->page, FALSE);
36+ buf_LRU_free_block(&block->page, FALSE, TRUE);
b4e1fa2c
AM
37 }
38 }
39
40- buf_pool_mutex_exit(buf_pool);
41+ //buf_pool_mutex_exit(buf_pool);
42+ mutex_exit(&buf_pool->LRU_list_mutex);
43 mutex_exit(&block->mutex);
44 }
45
db82db79
AM
46--- a/storage/innobase/btr/btr0sea.c
47+++ b/storage/innobase/btr/btr0sea.c
d8778560 48@@ -1943,7 +1943,7 @@
b4e1fa2c
AM
49 rec_offs_init(offsets_);
50
51 rw_lock_x_lock(&btr_search_latch);
52- buf_pool_mutex_enter_all();
53+ buf_pool_page_hash_x_lock_all();
54
55 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
56
d8778560 57@@ -1951,11 +1951,11 @@
b4e1fa2c
AM
58 /* We release btr_search_latch every once in a while to
59 give other queries a chance to run. */
60 if ((i != 0) && ((i % chunk_size) == 0)) {
61- buf_pool_mutex_exit_all();
62+ buf_pool_page_hash_x_unlock_all();
63 rw_lock_x_unlock(&btr_search_latch);
64 os_thread_yield();
65 rw_lock_x_lock(&btr_search_latch);
66- buf_pool_mutex_enter_all();
67+ buf_pool_page_hash_x_lock_all();
68 }
69
70 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
d8778560 71@@ -2066,11 +2066,11 @@
b4e1fa2c
AM
72 /* We release btr_search_latch every once in a while to
73 give other queries a chance to run. */
74 if (i != 0) {
75- buf_pool_mutex_exit_all();
76+ buf_pool_page_hash_x_unlock_all();
77 rw_lock_x_unlock(&btr_search_latch);
78 os_thread_yield();
79 rw_lock_x_lock(&btr_search_latch);
80- buf_pool_mutex_enter_all();
81+ buf_pool_page_hash_x_lock_all();
82 }
83
84 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
d8778560 85@@ -2078,7 +2078,7 @@
b4e1fa2c
AM
86 }
87 }
88
89- buf_pool_mutex_exit_all();
90+ buf_pool_page_hash_x_unlock_all();
91 rw_lock_x_unlock(&btr_search_latch);
92 if (UNIV_LIKELY_NULL(heap)) {
93 mem_heap_free(heap);
db82db79
AM
94--- a/storage/innobase/buf/buf0buddy.c
95+++ b/storage/innobase/buf/buf0buddy.c
96@@ -58,7 +58,7 @@
97
98 /** Validate a given zip_free list. */
99 #define BUF_BUDDY_LIST_VALIDATE(b, i) \
100- UT_LIST_VALIDATE(list, buf_page_t, \
101+ UT_LIST_VALIDATE(zip_list, buf_page_t, \
102 b->zip_free[i], \
103 ut_ad(buf_page_get_state( \
104 ut_list_node_313) \
105@@ -75,10 +75,11 @@
106 ulint i) /*!< in: index of
107 buf_pool->zip_free[] */
108 {
b4e1fa2c
AM
109- ut_ad(buf_pool_mutex_own(buf_pool));
110+ //ut_ad(buf_pool_mutex_own(buf_pool));
111+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
112 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
113 ut_ad(buf_pool->zip_free[i].start != bpage);
114- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
115+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
db82db79 116 }
b4e1fa2c 117
db82db79
AM
118 /**********************************************************************//**
119@@ -93,16 +94,17 @@
b4e1fa2c
AM
120 buf_pool->zip_free[] */
121 {
db82db79 122 #ifdef UNIV_DEBUG
b4e1fa2c
AM
123- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
124- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
125+ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
126+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
127
db82db79 128 ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
b4e1fa2c 129 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
db82db79 130 #endif /* UNIV_DEBUG */
b4e1fa2c
AM
131
132- ut_ad(buf_pool_mutex_own(buf_pool));
133+ //ut_ad(buf_pool_mutex_own(buf_pool));
134+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
135 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
136- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
137+ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
db82db79 138 }
b4e1fa2c 139
db82db79
AM
140 /**********************************************************************//**
141@@ -117,7 +119,8 @@
b4e1fa2c
AM
142 {
143 buf_page_t* bpage;
144
145- ut_ad(buf_pool_mutex_own(buf_pool));
146+ //ut_ad(buf_pool_mutex_own(buf_pool));
147+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
148 ut_a(i < BUF_BUDDY_SIZES);
db82db79 149 ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
b4e1fa2c 150
db82db79 151@@ -159,16 +162,19 @@
b4e1fa2c
AM
152 buf_buddy_block_free(
153 /*=================*/
154 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
155- void* buf) /*!< in: buffer frame to deallocate */
156+ void* buf, /*!< in: buffer frame to deallocate */
157+ ibool have_page_hash_mutex)
158 {
159 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
160 buf_page_t* bpage;
161 buf_block_t* block;
162
163- ut_ad(buf_pool_mutex_own(buf_pool));
164+ //ut_ad(buf_pool_mutex_own(buf_pool));
165 ut_ad(!mutex_own(&buf_pool->zip_mutex));
166 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
167
168+ mutex_enter(&buf_pool->zip_hash_mutex);
169+
170 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
171 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
172 && bpage->in_zip_hash && !bpage->in_page_hash),
db82db79 173@@ -180,12 +186,14 @@
b4e1fa2c
AM
174 ut_d(bpage->in_zip_hash = FALSE);
175 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
176
177+ mutex_exit(&buf_pool->zip_hash_mutex);
178+
179 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
180 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
181
182 block = (buf_block_t*) bpage;
183 mutex_enter(&block->mutex);
184- buf_LRU_block_free_non_file_page(block);
185+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
186 mutex_exit(&block->mutex);
187
188 ut_ad(buf_pool->buddy_n_frames > 0);
db82db79 189@@ -202,7 +210,7 @@
b4e1fa2c
AM
190 {
191 buf_pool_t* buf_pool = buf_pool_from_block(block);
192 const ulint fold = BUF_POOL_ZIP_FOLD(block);
193- ut_ad(buf_pool_mutex_own(buf_pool));
194+ //ut_ad(buf_pool_mutex_own(buf_pool));
195 ut_ad(!mutex_own(&buf_pool->zip_mutex));
196 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
197
db82db79 198@@ -214,7 +222,10 @@
b4e1fa2c
AM
199 ut_ad(!block->page.in_page_hash);
200 ut_ad(!block->page.in_zip_hash);
201 ut_d(block->page.in_zip_hash = TRUE);
202+
203+ mutex_enter(&buf_pool->zip_hash_mutex);
204 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
205+ mutex_exit(&buf_pool->zip_hash_mutex);
206
207 ut_d(buf_pool->buddy_n_frames++);
208 }
db82db79
AM
209@@ -268,26 +279,30 @@
210 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
b4e1fa2c
AM
211 ulint i, /*!< in: index of buf_pool->zip_free[],
212 or BUF_BUDDY_SIZES */
213- ibool* lru) /*!< in: pointer to a variable that
214+ ibool* lru, /*!< in: pointer to a variable that
215 will be assigned TRUE if storage was
216 allocated from the LRU list and
217 buf_pool->mutex was temporarily
db82db79 218 released */
b4e1fa2c
AM
219+ ibool have_page_hash_mutex)
220 {
221 buf_block_t* block;
222
db82db79 223 ut_ad(lru);
b4e1fa2c
AM
224- ut_ad(buf_pool_mutex_own(buf_pool));
225+ //ut_ad(buf_pool_mutex_own(buf_pool));
226+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
227 ut_ad(!mutex_own(&buf_pool->zip_mutex));
db82db79 228 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
b4e1fa2c
AM
229
230 if (i < BUF_BUDDY_SIZES) {
231 /* Try to allocate from the buddy system. */
232+ mutex_enter(&buf_pool->zip_free_mutex);
233 block = buf_buddy_alloc_zip(buf_pool, i);
234
235 if (block) {
236 goto func_exit;
237 }
238+ mutex_exit(&buf_pool->zip_free_mutex);
239 }
240
241 /* Try allocating from the buf_pool->free list. */
db82db79 242@@ -299,19 +314,30 @@
b4e1fa2c
AM
243 }
244
245 /* Try replacing an uncompressed page in the buffer pool. */
246- buf_pool_mutex_exit(buf_pool);
247+ //buf_pool_mutex_exit(buf_pool);
248+ mutex_exit(&buf_pool->LRU_list_mutex);
249+ if (have_page_hash_mutex) {
250+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
251+ }
df1b5770 252 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
253 *lru = TRUE;
254- buf_pool_mutex_enter(buf_pool);
255+ //buf_pool_mutex_enter(buf_pool);
256+ mutex_enter(&buf_pool->LRU_list_mutex);
257+ if (have_page_hash_mutex) {
258+ rw_lock_x_lock(&buf_pool->page_hash_latch);
259+ }
260
261 alloc_big:
262 buf_buddy_block_register(block);
263
264+ mutex_enter(&buf_pool->zip_free_mutex);
265 block = buf_buddy_alloc_from(
266 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
267
268 func_exit:
269 buf_pool->buddy_stat[i].used++;
270+ mutex_exit(&buf_pool->zip_free_mutex);
271+
272 return(block);
273 }
274
db82db79 275@@ -325,8 +351,9 @@
b4e1fa2c
AM
276 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
277 void* src, /*!< in: block to relocate */
278 void* dst, /*!< in: free block to relocate to */
279- ulint i) /*!< in: index of
280+ ulint i, /*!< in: index of
281 buf_pool->zip_free[] */
282+ ibool have_page_hash_mutex)
283 {
284 buf_page_t* bpage;
285 const ulint size = BUF_BUDDY_LOW << i;
db82db79
AM
286@@ -335,13 +362,20 @@
287 ulint space;
288 ulint page_no;
b4e1fa2c
AM
289
290- ut_ad(buf_pool_mutex_own(buf_pool));
291+ //ut_ad(buf_pool_mutex_own(buf_pool));
292+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
293 ut_ad(!mutex_own(&buf_pool->zip_mutex));
294 ut_ad(!ut_align_offset(src, size));
295 ut_ad(!ut_align_offset(dst, size));
db82db79
AM
296 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
297 UNIV_MEM_ASSERT_W(dst, size);
b4e1fa2c 298
db82db79
AM
299+ if (!have_page_hash_mutex) {
300+ mutex_exit(&buf_pool->zip_free_mutex);
301+ mutex_enter(&buf_pool->LRU_list_mutex);
302+ rw_lock_x_lock(&buf_pool->page_hash_latch);
303+ }
b4e1fa2c 304+
db82db79
AM
305 /* We assume that all memory from buf_buddy_alloc()
306 is used for compressed page frames. */
b4e1fa2c 307
db82db79
AM
308@@ -375,6 +409,11 @@
309 added to buf_pool->page_hash yet. Obviously,
310 it cannot be relocated. */
b4e1fa2c 311
db82db79
AM
312+ if (!have_page_hash_mutex) {
313+ mutex_enter(&buf_pool->zip_free_mutex);
314+ mutex_exit(&buf_pool->LRU_list_mutex);
315+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
316+ }
317 return(FALSE);
318 }
b4e1fa2c 319
db82db79
AM
320@@ -384,18 +423,27 @@
321 For the sake of simplicity, give up. */
322 ut_ad(page_zip_get_size(&bpage->zip) < size);
b4e1fa2c 323
b4e1fa2c 324+ if (!have_page_hash_mutex) {
db82db79 325+ mutex_enter(&buf_pool->zip_free_mutex);
b4e1fa2c
AM
326+ mutex_exit(&buf_pool->LRU_list_mutex);
327+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
328+ }
db82db79
AM
329 return(FALSE);
330 }
331
332+ /* To keep latch order */
333+ if (have_page_hash_mutex)
b4e1fa2c
AM
334+ mutex_exit(&buf_pool->zip_free_mutex);
335+
db82db79
AM
336 /* The block must have been allocated, but it may
337 contain uninitialized data. */
338 UNIV_MEM_ASSERT_W(src, size);
339
340- mutex = buf_page_get_mutex(bpage);
341+ mutex = buf_page_get_mutex_enter(bpage);
342
343- mutex_enter(mutex);
344+ mutex_enter(&buf_pool->zip_free_mutex);
b4e1fa2c 345
db82db79
AM
346- if (buf_page_can_relocate(bpage)) {
347+ if (mutex && buf_page_can_relocate(bpage)) {
348 /* Relocate the compressed page. */
349 ut_a(bpage->zip.data == src);
350 memcpy(dst, src, size);
351@@ -409,10 +457,22 @@
352 buddy_stat->relocated_usec
353 += ut_time_us(NULL) - usec;
b4e1fa2c
AM
354 }
355+
b4e1fa2c
AM
356+ if (!have_page_hash_mutex) {
357+ mutex_exit(&buf_pool->LRU_list_mutex);
358+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
359+ }
db82db79 360 return(TRUE);
b4e1fa2c
AM
361 }
362
db82db79
AM
363- mutex_exit(mutex);
364+ if (!have_page_hash_mutex) {
365+ mutex_exit(&buf_pool->LRU_list_mutex);
366+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
367+ }
368+
369+ if (mutex) {
370+ mutex_exit(mutex);
371+ }
b4e1fa2c 372 return(FALSE);
db82db79
AM
373 }
374
375@@ -425,13 +485,15 @@
b4e1fa2c
AM
376 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
377 void* buf, /*!< in: block to be freed, must not be
378 pointed to by the buffer pool */
379- ulint i) /*!< in: index of buf_pool->zip_free[],
380+ ulint i, /*!< in: index of buf_pool->zip_free[],
381 or BUF_BUDDY_SIZES */
382+ ibool have_page_hash_mutex)
383 {
384 buf_page_t* bpage;
385 buf_page_t* buddy;
386
387- ut_ad(buf_pool_mutex_own(buf_pool));
388+ //ut_ad(buf_pool_mutex_own(buf_pool));
389+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
390 ut_ad(!mutex_own(&buf_pool->zip_mutex));
391 ut_ad(i <= BUF_BUDDY_SIZES);
db82db79
AM
392 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
393@@ -443,7 +505,9 @@
394 ((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
b4e1fa2c
AM
395
396 if (i == BUF_BUDDY_SIZES) {
397- buf_buddy_block_free(buf_pool, buf);
398+ mutex_exit(&buf_pool->zip_free_mutex);
399+ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
400+ mutex_enter(&buf_pool->zip_free_mutex);
401 return;
402 }
403
db82db79
AM
404@@ -491,7 +555,7 @@
405
b4e1fa2c 406 ut_a(bpage != buf);
db82db79
AM
407 UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
408- bpage = UT_LIST_GET_NEXT(list, bpage);
409+ bpage = UT_LIST_GET_NEXT(zip_list, bpage);
410 }
b4e1fa2c 411
b4e1fa2c 412 #ifndef UNIV_DEBUG_VALGRIND
db82db79
AM
413@@ -501,7 +565,7 @@
414 ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
b4e1fa2c
AM
415
416 /* The buddy is not free. Is there a free block of this size? */
417- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
418+ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
419
420 if (bpage) {
db82db79
AM
421
422@@ -510,7 +574,7 @@
b4e1fa2c
AM
423 buf_buddy_remove_from_free(buf_pool, bpage, i);
424
425 /* Try to relocate the buddy of buf to the free block. */
426- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
427+ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
428
db82db79
AM
429 buddy->state = BUF_BLOCK_ZIP_FREE;
430 goto buddy_is_free;
431--- a/storage/innobase/buf/buf0buf.c
432+++ b/storage/innobase/buf/buf0buf.c
b4e1fa2c
AM
433@@ -263,6 +263,7 @@
434 #ifdef UNIV_PFS_RWLOCK
435 /* Keys to register buffer block related rwlocks and mutexes with
436 performance schema */
437+UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
438 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
439 # ifdef UNIV_SYNC_DEBUG
440 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
441@@ -273,6 +274,10 @@
442 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
443 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
444 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
445+UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
446+UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
447+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
448+UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
449 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
450 #endif /* UNIV_PFS_MUTEX */
451
adf0fb13 452@@ -881,9 +886,13 @@
b4e1fa2c
AM
453 block->page.in_zip_hash = FALSE;
454 block->page.in_flush_list = FALSE;
455 block->page.in_free_list = FALSE;
456- block->in_unzip_LRU_list = FALSE;
457 #endif /* UNIV_DEBUG */
adf0fb13
AM
458+ block->page.flush_list.prev = NULL;
459+ block->page.flush_list.next = NULL;
460+ block->page.zip_list.prev = NULL;
461+ block->page.zip_list.next = NULL;
b4e1fa2c
AM
462 block->page.in_LRU_list = FALSE;
463+ block->in_unzip_LRU_list = FALSE;
464 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
465 block->n_pointers = 0;
466 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
adf0fb13 467@@ -981,9 +990,11 @@
b4e1fa2c
AM
468 memset(block->frame, '\0', UNIV_PAGE_SIZE);
469 #endif
470 /* Add the block to the free list */
471- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
472+ mutex_enter(&buf_pool->free_list_mutex);
473+ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
474
475 ut_d(block->page.in_free_list = TRUE);
476+ mutex_exit(&buf_pool->free_list_mutex);
477 ut_ad(buf_pool_from_block(block) == buf_pool);
478
479 block++;
adf0fb13 480@@ -1038,7 +1049,8 @@
b4e1fa2c
AM
481 buf_chunk_t* chunk = buf_pool->chunks;
482
483 ut_ad(buf_pool);
484- ut_ad(buf_pool_mutex_own(buf_pool));
485+ //ut_ad(buf_pool_mutex_own(buf_pool));
486+ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
487 for (n = buf_pool->n_chunks; n--; chunk++) {
488
489 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
db82db79 490@@ -1144,9 +1156,21 @@
b4e1fa2c
AM
491 ------------------------------- */
492 mutex_create(buf_pool_mutex_key,
493 &buf_pool->mutex, SYNC_BUF_POOL);
494+ mutex_create(buf_pool_LRU_list_mutex_key,
495+ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
496+ rw_lock_create(buf_pool_page_hash_key,
497+ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
498+ mutex_create(buf_pool_free_list_mutex_key,
499+ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
500+ mutex_create(buf_pool_zip_free_mutex_key,
501+ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
502+ mutex_create(buf_pool_zip_hash_mutex_key,
503+ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
504 mutex_create(buf_pool_zip_mutex_key,
505 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
506
507+ mutex_enter(&buf_pool->LRU_list_mutex);
508+ rw_lock_x_lock(&buf_pool->page_hash_latch);
509 buf_pool_mutex_enter(buf_pool);
510
511 if (buf_pool_size > 0) {
db82db79 512@@ -1159,6 +1183,8 @@
b4e1fa2c
AM
513 mem_free(chunk);
514 mem_free(buf_pool);
515
516+ mutex_exit(&buf_pool->LRU_list_mutex);
517+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
518 buf_pool_mutex_exit(buf_pool);
519
520 return(DB_ERROR);
db82db79 521@@ -1189,6 +1215,8 @@
b4e1fa2c
AM
522
523 /* All fields are initialized by mem_zalloc(). */
524
525+ mutex_exit(&buf_pool->LRU_list_mutex);
526+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
527 buf_pool_mutex_exit(buf_pool);
528
529 return(DB_SUCCESS);
db82db79 530@@ -1401,7 +1429,11 @@
b4e1fa2c
AM
531 ulint fold;
532 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
533
534- ut_ad(buf_pool_mutex_own(buf_pool));
535+ //ut_ad(buf_pool_mutex_own(buf_pool));
536+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
537+#ifdef UNIV_SYNC_DEBUG
538+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
539+#endif
540 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
541 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
542 ut_a(bpage->buf_fix_count == 0);
db82db79 543@@ -1512,21 +1544,32 @@
b4e1fa2c
AM
544 buf_page_t* bpage;
545 ulint i;
546 buf_pool_t* buf_pool = buf_pool_get(space, offset);
547+ mutex_t* block_mutex;
548
549- ut_ad(buf_pool_mutex_own(buf_pool));
550+ //ut_ad(buf_pool_mutex_own(buf_pool));
551
552+ rw_lock_x_lock(&buf_pool->page_hash_latch);
553 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
554+ if (bpage) {
555+ block_mutex = buf_page_get_mutex_enter(bpage);
556+ ut_a(block_mutex);
557+ }
558
559 if (UNIV_LIKELY_NULL(bpage)) {
560 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
561 /* The page was loaded meanwhile. */
562+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
563 return(bpage);
564 }
565 /* Add to an existing watch. */
566 bpage->buf_fix_count++;
567+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
568+ mutex_exit(block_mutex);
569 return(NULL);
570 }
571
572+ /* buf_pool->watch is protected by zip_mutex for now */
573+ mutex_enter(&buf_pool->zip_mutex);
574 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
575 bpage = &buf_pool->watch[i];
576
db82db79 577@@ -1550,10 +1593,12 @@
b4e1fa2c
AM
578 bpage->space = space;
579 bpage->offset = offset;
580 bpage->buf_fix_count = 1;
581-
582+ bpage->buf_pool_index = buf_pool_index(buf_pool);
583 ut_d(bpage->in_page_hash = TRUE);
584 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
585 fold, bpage);
586+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
587+ mutex_exit(&buf_pool->zip_mutex);
588 return(NULL);
589 case BUF_BLOCK_ZIP_PAGE:
590 ut_ad(bpage->in_page_hash);
db82db79 591@@ -1571,6 +1616,8 @@
b4e1fa2c
AM
592 ut_error;
593
594 /* Fix compiler warning */
595+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
596+ mutex_exit(&buf_pool->zip_mutex);
597 return(NULL);
598 }
599
db82db79 600@@ -1588,7 +1635,11 @@
b4e1fa2c
AM
601 space, offset) */
602 buf_page_t* watch) /*!< in/out: sentinel for watch */
603 {
604- ut_ad(buf_pool_mutex_own(buf_pool));
605+ //ut_ad(buf_pool_mutex_own(buf_pool));
606+#ifdef UNIV_SYNC_DEBUG
607+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
608+#endif
609+ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
610
611 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
612 ut_d(watch->in_page_hash = FALSE);
db82db79 613@@ -1610,28 +1661,31 @@
b4e1fa2c
AM
614 buf_pool_t* buf_pool = buf_pool_get(space, offset);
615 ulint fold = buf_page_address_fold(space, offset);
616
617- buf_pool_mutex_enter(buf_pool);
618+ //buf_pool_mutex_enter(buf_pool);
619+ rw_lock_x_lock(&buf_pool->page_hash_latch);
620 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
621 /* The page must exist because buf_pool_watch_set()
622 increments buf_fix_count. */
623 ut_a(bpage);
624
625 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
626- mutex_t* mutex = buf_page_get_mutex(bpage);
627+ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
628
629- mutex_enter(mutex);
630 ut_a(bpage->buf_fix_count > 0);
631 bpage->buf_fix_count--;
632 mutex_exit(mutex);
633 } else {
634+ mutex_enter(&buf_pool->zip_mutex);
635 ut_a(bpage->buf_fix_count > 0);
636
637 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
638 buf_pool_watch_remove(buf_pool, fold, bpage);
639 }
640+ mutex_exit(&buf_pool->zip_mutex);
641 }
642
643- buf_pool_mutex_exit(buf_pool);
644+ //buf_pool_mutex_exit(buf_pool);
645+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
646 }
647
648 /****************************************************************//**
db82db79 649@@ -1651,14 +1705,16 @@
b4e1fa2c
AM
650 buf_pool_t* buf_pool = buf_pool_get(space, offset);
651 ulint fold = buf_page_address_fold(space, offset);
652
653- buf_pool_mutex_enter(buf_pool);
654+ //buf_pool_mutex_enter(buf_pool);
655+ rw_lock_s_lock(&buf_pool->page_hash_latch);
656
657 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
658 /* The page must exist because buf_pool_watch_set()
659 increments buf_fix_count. */
660 ut_a(bpage);
661 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
662- buf_pool_mutex_exit(buf_pool);
663+ //buf_pool_mutex_exit(buf_pool);
664+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
665
666 return(ret);
667 }
db82db79 668@@ -1675,13 +1731,15 @@
b4e1fa2c
AM
669 {
670 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
671
672- buf_pool_mutex_enter(buf_pool);
673+ //buf_pool_mutex_enter(buf_pool);
674+ mutex_enter(&buf_pool->LRU_list_mutex);
675
676 ut_a(buf_page_in_file(bpage));
677
678 buf_LRU_make_block_young(bpage);
679
680- buf_pool_mutex_exit(buf_pool);
681+ //buf_pool_mutex_exit(buf_pool);
682+ mutex_exit(&buf_pool->LRU_list_mutex);
683 }
684
685 /********************************************************************//**
db82db79 686@@ -1705,14 +1763,20 @@
b4e1fa2c
AM
687 ut_a(buf_page_in_file(bpage));
688
689 if (buf_page_peek_if_too_old(bpage)) {
690- buf_pool_mutex_enter(buf_pool);
691+ //buf_pool_mutex_enter(buf_pool);
692+ mutex_enter(&buf_pool->LRU_list_mutex);
693 buf_LRU_make_block_young(bpage);
694- buf_pool_mutex_exit(buf_pool);
695+ //buf_pool_mutex_exit(buf_pool);
696+ mutex_exit(&buf_pool->LRU_list_mutex);
697 } else if (!access_time) {
698 ulint time_ms = ut_time_ms();
699- buf_pool_mutex_enter(buf_pool);
700+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
701+ //buf_pool_mutex_enter(buf_pool);
702+ if (block_mutex) {
703 buf_page_set_accessed(bpage, time_ms);
704- buf_pool_mutex_exit(buf_pool);
705+ mutex_exit(block_mutex);
706+ }
707+ //buf_pool_mutex_exit(buf_pool);
708 }
709 }
710
db82db79 711@@ -1729,7 +1793,8 @@
b4e1fa2c
AM
712 buf_block_t* block;
713 buf_pool_t* buf_pool = buf_pool_get(space, offset);
714
715- buf_pool_mutex_enter(buf_pool);
716+ //buf_pool_mutex_enter(buf_pool);
717+ rw_lock_s_lock(&buf_pool->page_hash_latch);
718
719 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
720
db82db79 721@@ -1738,7 +1803,8 @@
b4e1fa2c
AM
722 block->check_index_page_at_flush = FALSE;
723 }
724
725- buf_pool_mutex_exit(buf_pool);
726+ //buf_pool_mutex_exit(buf_pool);
727+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
728 }
729
730 /********************************************************************//**
db82db79 731@@ -1757,7 +1823,8 @@
b4e1fa2c
AM
732 ibool is_hashed;
733 buf_pool_t* buf_pool = buf_pool_get(space, offset);
734
735- buf_pool_mutex_enter(buf_pool);
736+ //buf_pool_mutex_enter(buf_pool);
737+ rw_lock_s_lock(&buf_pool->page_hash_latch);
738
739 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
740
db82db79 741@@ -1768,7 +1835,8 @@
b4e1fa2c
AM
742 is_hashed = block->is_hashed;
743 }
744
745- buf_pool_mutex_exit(buf_pool);
746+ //buf_pool_mutex_exit(buf_pool);
747+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
748
749 return(is_hashed);
750 }
db82db79 751@@ -1790,7 +1858,8 @@
b4e1fa2c
AM
752 buf_page_t* bpage;
753 buf_pool_t* buf_pool = buf_pool_get(space, offset);
754
755- buf_pool_mutex_enter(buf_pool);
756+ //buf_pool_mutex_enter(buf_pool);
757+ rw_lock_s_lock(&buf_pool->page_hash_latch);
758
759 bpage = buf_page_hash_get(buf_pool, space, offset);
760
db82db79 761@@ -1801,7 +1870,8 @@
b4e1fa2c
AM
762 bpage->file_page_was_freed = TRUE;
763 }
764
765- buf_pool_mutex_exit(buf_pool);
766+ //buf_pool_mutex_exit(buf_pool);
767+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
768
769 return(bpage);
770 }
db82db79 771@@ -1822,7 +1892,8 @@
b4e1fa2c
AM
772 buf_page_t* bpage;
773 buf_pool_t* buf_pool = buf_pool_get(space, offset);
774
775- buf_pool_mutex_enter(buf_pool);
776+ //buf_pool_mutex_enter(buf_pool);
777+ rw_lock_s_lock(&buf_pool->page_hash_latch);
778
779 bpage = buf_page_hash_get(buf_pool, space, offset);
780
db82db79 781@@ -1831,7 +1902,8 @@
b4e1fa2c
AM
782 bpage->file_page_was_freed = FALSE;
783 }
784
785- buf_pool_mutex_exit(buf_pool);
786+ //buf_pool_mutex_exit(buf_pool);
787+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
788
789 return(bpage);
790 }
db82db79 791@@ -1863,8 +1935,9 @@
b4e1fa2c
AM
792 buf_pool->stat.n_page_gets++;
793
794 for (;;) {
795- buf_pool_mutex_enter(buf_pool);
796+ //buf_pool_mutex_enter(buf_pool);
797 lookup:
798+ rw_lock_s_lock(&buf_pool->page_hash_latch);
799 bpage = buf_page_hash_get(buf_pool, space, offset);
800 if (bpage) {
801 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
db82db79 802@@ -1873,7 +1946,8 @@
b4e1fa2c
AM
803
804 /* Page not in buf_pool: needs to be read from file */
805
806- buf_pool_mutex_exit(buf_pool);
807+ //buf_pool_mutex_exit(buf_pool);
808+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
809
810 buf_read_page(space, zip_size, offset);
811
db82db79 812@@ -1885,10 +1959,15 @@
b4e1fa2c
AM
813 if (UNIV_UNLIKELY(!bpage->zip.data)) {
814 /* There is no compressed page. */
815 err_exit:
816- buf_pool_mutex_exit(buf_pool);
817+ //buf_pool_mutex_exit(buf_pool);
818+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
819 return(NULL);
820 }
821
822+ block_mutex = buf_page_get_mutex_enter(bpage);
823+
824+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
825+
826 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
827
828 switch (buf_page_get_state(bpage)) {
db82db79 829@@ -1897,24 +1976,43 @@
b4e1fa2c
AM
830 case BUF_BLOCK_MEMORY:
831 case BUF_BLOCK_REMOVE_HASH:
832 case BUF_BLOCK_ZIP_FREE:
833+ if (block_mutex)
834+ mutex_exit(block_mutex);
835 break;
836 case BUF_BLOCK_ZIP_PAGE:
837 case BUF_BLOCK_ZIP_DIRTY:
838- block_mutex = &buf_pool->zip_mutex;
839- mutex_enter(block_mutex);
840+ ut_a(block_mutex == &buf_pool->zip_mutex);
841 bpage->buf_fix_count++;
842 goto got_block;
843 case BUF_BLOCK_FILE_PAGE:
844- block_mutex = &((buf_block_t*) bpage)->mutex;
b4e1fa2c 845+ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
f7ab7acd
AM
846+
847+ /* release mutex to obey to latch-order */
848+ mutex_exit(block_mutex);
849+
850+ /* get LRU_list_mutex for buf_LRU_free_block() */
851+ mutex_enter(&buf_pool->LRU_list_mutex);
852 mutex_enter(block_mutex);
b4e1fa2c 853
f7ab7acd 854- /* Discard the uncompressed page frame if possible. */
db82db79 855- if (buf_LRU_free_block(bpage, FALSE)) {
f7ab7acd
AM
856+ if (UNIV_UNLIKELY(bpage->space != space
857+ || bpage->offset != offset
858+ || !bpage->in_LRU_list
859+ || !bpage->zip.data)) {
860+ /* someone should interrupt, retry */
861+ mutex_exit(&buf_pool->LRU_list_mutex);
862+ mutex_exit(block_mutex);
863+ goto lookup;
864+ }
b4e1fa2c 865
f7ab7acd 866+ /* Discard the uncompressed page frame if possible. */
db82db79 867+ if (buf_LRU_free_block(bpage, FALSE, TRUE)) {
f7ab7acd 868+ mutex_exit(&buf_pool->LRU_list_mutex);
b4e1fa2c 869 mutex_exit(block_mutex);
df1b5770 870 goto lookup;
f7ab7acd
AM
871 }
872
873+ mutex_exit(&buf_pool->LRU_list_mutex);
874+
875 buf_block_buf_fix_inc((buf_block_t*) bpage,
876 __FILE__, __LINE__);
877 goto got_block;
db82db79 878@@ -1927,7 +2025,7 @@
b4e1fa2c
AM
879 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
880 access_time = buf_page_is_accessed(bpage);
881
882- buf_pool_mutex_exit(buf_pool);
883+ //buf_pool_mutex_exit(buf_pool);
884
885 mutex_exit(block_mutex);
886
db82db79 887@@ -2239,7 +2337,7 @@
b4e1fa2c
AM
888 const buf_block_t* block) /*!< in: pointer to block,
889 not dereferenced */
890 {
891- ut_ad(buf_pool_mutex_own(buf_pool));
892+ //ut_ad(buf_pool_mutex_own(buf_pool));
893
894 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
895 /* The pointer should be aligned. */
db82db79 896@@ -2275,6 +2373,7 @@
b4e1fa2c
AM
897 ulint fix_type;
898 ibool must_read;
899 ulint retries = 0;
900+ mutex_t* block_mutex = NULL;
901 buf_pool_t* buf_pool = buf_pool_get(space, offset);
902
903 ut_ad(mtr);
db82db79 904@@ -2308,18 +2407,24 @@
b4e1fa2c
AM
905 fold = buf_page_address_fold(space, offset);
906 loop:
907 block = guess;
908- buf_pool_mutex_enter(buf_pool);
909+ //buf_pool_mutex_enter(buf_pool);
910
911 if (block) {
912+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
913+
914 /* If the guess is a compressed page descriptor that
db82db79
AM
915 has been allocated by buf_page_alloc_descriptor(),
916 it may have been freed by buf_relocate(). */
b4e1fa2c
AM
917
918- if (!buf_block_is_uncompressed(buf_pool, block)
919+ if (!block_mutex) {
920+ block = guess = NULL;
921+ } else if (!buf_block_is_uncompressed(buf_pool, block)
922 || offset != block->page.offset
923 || space != block->page.space
924 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
925
926+ mutex_exit(block_mutex);
927+
928 block = guess = NULL;
929 } else {
930 ut_ad(!block->page.in_zip_hash);
db82db79 931@@ -2328,12 +2433,19 @@
b4e1fa2c
AM
932 }
933
934 if (block == NULL) {
935+ rw_lock_s_lock(&buf_pool->page_hash_latch);
936 block = (buf_block_t*) buf_page_hash_get_low(
937 buf_pool, space, offset, fold);
938+ if (block) {
939+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
940+ ut_a(block_mutex);
941+ }
942+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
943 }
944
945 loop2:
946 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
947+ mutex_exit(block_mutex);
948 block = NULL;
949 }
950
db82db79 951@@ -2345,12 +2457,14 @@
b4e1fa2c
AM
952 space, offset, fold);
953
954 if (UNIV_LIKELY_NULL(block)) {
955-
956+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
957+ ut_a(block_mutex);
958+ ut_ad(mutex_own(block_mutex));
959 goto got_block;
960 }
961 }
962
963- buf_pool_mutex_exit(buf_pool);
964+ //buf_pool_mutex_exit(buf_pool);
965
966 if (mode == BUF_GET_IF_IN_POOL
adf0fb13 967 || mode == BUF_PEEK_IF_IN_POOL
db82db79 968@@ -2400,7 +2514,8 @@
b4e1fa2c
AM
969 /* The page is being read to buffer pool,
970 but we cannot wait around for the read to
971 complete. */
972- buf_pool_mutex_exit(buf_pool);
973+ //buf_pool_mutex_exit(buf_pool);
974+ mutex_exit(block_mutex);
975
976 return(NULL);
977 }
db82db79 978@@ -2410,38 +2525,49 @@
b4e1fa2c
AM
979 ibool success;
980
981 case BUF_BLOCK_FILE_PAGE:
982+ if (block_mutex == &buf_pool->zip_mutex) {
983+ /* it is wrong mutex... */
984+ mutex_exit(block_mutex);
985+ goto loop;
986+ }
987 break;
988
989 case BUF_BLOCK_ZIP_PAGE:
990 case BUF_BLOCK_ZIP_DIRTY:
991+ ut_ad(block_mutex == &buf_pool->zip_mutex);
992 bpage = &block->page;
993 /* Protect bpage->buf_fix_count. */
994- mutex_enter(&buf_pool->zip_mutex);
995+ //mutex_enter(&buf_pool->zip_mutex);
996
997 if (bpage->buf_fix_count
998 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
999 /* This condition often occurs when the buffer
1000 is not buffer-fixed, but I/O-fixed by
1001 buf_page_init_for_read(). */
1002- mutex_exit(&buf_pool->zip_mutex);
1003+ //mutex_exit(&buf_pool->zip_mutex);
1004 wait_until_unfixed:
1005 /* The block is buffer-fixed or I/O-fixed.
1006 Try again later. */
1007- buf_pool_mutex_exit(buf_pool);
1008+ //buf_pool_mutex_exit(buf_pool);
1009+ mutex_exit(block_mutex);
1010 os_thread_sleep(WAIT_FOR_READ);
adf0fb13 1011
b4e1fa2c
AM
1012 goto loop;
1013 }
1014
1015 /* Allocate an uncompressed page. */
1016- buf_pool_mutex_exit(buf_pool);
1017- mutex_exit(&buf_pool->zip_mutex);
1018+ //buf_pool_mutex_exit(buf_pool);
1019+ //mutex_exit(&buf_pool->zip_mutex);
1020+ mutex_exit(block_mutex);
1021
df1b5770 1022 block = buf_LRU_get_free_block(buf_pool);
b4e1fa2c
AM
1023 ut_a(block);
1024+ block_mutex = &block->mutex;
1025
1026- buf_pool_mutex_enter(buf_pool);
1027- mutex_enter(&block->mutex);
1028+ //buf_pool_mutex_enter(buf_pool);
1029+ mutex_enter(&buf_pool->LRU_list_mutex);
1030+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1031+ mutex_enter(block_mutex);
1032
1033 {
1034 buf_page_t* hash_bpage;
db82db79 1035@@ -2454,35 +2580,47 @@
b4e1fa2c
AM
1036 while buf_pool->mutex was released.
1037 Free the block that was allocated. */
1038
1039- buf_LRU_block_free_non_file_page(block);
1040- mutex_exit(&block->mutex);
1041+ buf_LRU_block_free_non_file_page(block, TRUE);
1042+ mutex_exit(block_mutex);
1043
1044 block = (buf_block_t*) hash_bpage;
1045+ if (block) {
1046+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1047+ ut_a(block_mutex);
1048+ }
1049+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1050+ mutex_exit(&buf_pool->LRU_list_mutex);
1051 goto loop2;
1052 }
1053 }
1054
1055+ mutex_enter(&buf_pool->zip_mutex);
1056+
1057 if (UNIV_UNLIKELY
1058 (bpage->buf_fix_count
1059 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1060
1061+ mutex_exit(&buf_pool->zip_mutex);
1062 /* The block was buffer-fixed or I/O-fixed
1063 while buf_pool->mutex was not held by this thread.
1064 Free the block that was allocated and try again.
1065 This should be extremely unlikely. */
1066
1067- buf_LRU_block_free_non_file_page(block);
1068- mutex_exit(&block->mutex);
1069+ buf_LRU_block_free_non_file_page(block, TRUE);
1070+ //mutex_exit(&block->mutex);
1071
1072+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1073+ mutex_exit(&buf_pool->LRU_list_mutex);
1074 goto wait_until_unfixed;
1075 }
1076
1077 /* Move the compressed page from bpage to block,
1078 and uncompress it. */
1079
1080- mutex_enter(&buf_pool->zip_mutex);
1081-
1082 buf_relocate(bpage, &block->page);
1083+
1084+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1085+
1086 buf_block_init_low(block);
1087 block->lock_hash_val = lock_rec_hash(space, offset);
1088
db82db79 1089@@ -2492,7 +2630,7 @@
b4e1fa2c
AM
1090 if (buf_page_get_state(&block->page)
1091 == BUF_BLOCK_ZIP_PAGE) {
db82db79 1092 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c
AM
1093- UT_LIST_REMOVE(list, buf_pool->zip_clean,
1094+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1095 &block->page);
db82db79 1096 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c 1097 ut_ad(!block->page.in_flush_list);
db82db79 1098@@ -2510,18 +2648,23 @@
b4e1fa2c
AM
1099 /* Insert at the front of unzip_LRU list */
1100 buf_unzip_LRU_add_block(block, FALSE);
1101
1102+ mutex_exit(&buf_pool->LRU_list_mutex);
1103+
1104 block->page.buf_fix_count = 1;
1105 buf_block_set_io_fix(block, BUF_IO_READ);
1106 rw_lock_x_lock_func(&block->lock, 0, file, line);
1107
1108 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1109
1110- mutex_exit(&block->mutex);
1111+ mutex_exit(block_mutex);
1112 mutex_exit(&buf_pool->zip_mutex);
db82db79 1113- buf_pool->n_pend_unzip++;
b4e1fa2c 1114
db82db79
AM
1115+ buf_pool_mutex_enter(buf_pool);
1116+ buf_pool->n_pend_unzip++;
1117 buf_pool_mutex_exit(buf_pool);
b4e1fa2c 1118
b4e1fa2c 1119+ //buf_pool_mutex_exit(buf_pool);
db82db79
AM
1120+
1121 buf_page_free_descriptor(bpage);
b4e1fa2c
AM
1122
1123 /* Decompress the page and apply buffered operations
db82db79 1124@@ -2535,12 +2678,15 @@
b4e1fa2c
AM
1125 }
1126
1127 /* Unfix and unlatch the block. */
1128- buf_pool_mutex_enter(buf_pool);
1129- mutex_enter(&block->mutex);
1130+ //buf_pool_mutex_enter(buf_pool);
1131+ block_mutex = &block->mutex;
1132+ mutex_enter(block_mutex);
1133 block->page.buf_fix_count--;
1134 buf_block_set_io_fix(block, BUF_IO_NONE);
1135- mutex_exit(&block->mutex);
1136+
1137+ buf_pool_mutex_enter(buf_pool);
1138 buf_pool->n_pend_unzip--;
1139+ buf_pool_mutex_exit(buf_pool);
1140 rw_lock_x_unlock(&block->lock);
1141
1142 break;
db82db79 1143@@ -2556,7 +2702,7 @@
b4e1fa2c
AM
1144
1145 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1146
1147- mutex_enter(&block->mutex);
1148+ //mutex_enter(&block->mutex);
1149 #if UNIV_WORD_SIZE == 4
1150 /* On 32-bit systems, there is no padding in buf_page_t. On
1151 other systems, Valgrind could complain about uninitialized pad
db82db79 1152@@ -2569,8 +2715,8 @@
b4e1fa2c
AM
1153 /* Try to evict the block from the buffer pool, to use the
1154 insert buffer (change buffer) as much as possible. */
1155
db82db79 1156- if (buf_LRU_free_block(&block->page, TRUE)) {
11822e22 1157- mutex_exit(&block->mutex);
db82db79 1158+ if (buf_LRU_free_block(&block->page, TRUE, FALSE)) {
11822e22 1159+ mutex_exit(block_mutex);
b4e1fa2c 1160 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
df1b5770 1161 /* Set the watch, as it would have
11822e22 1162 been set if the page were not in the
db82db79 1163@@ -2579,6 +2725,9 @@
11822e22
AM
1164 space, offset, fold);
1165
1166 if (UNIV_LIKELY_NULL(block)) {
1167+ block_mutex = buf_page_get_mutex((buf_page_t*)block);
1168+ ut_a(block_mutex);
1169+ ut_ad(mutex_own(block_mutex));
1170
1171 /* The page entered the buffer
1172 pool for some reason. Try to
db82db79 1173@@ -2586,7 +2735,7 @@
11822e22
AM
1174 goto got_block;
1175 }
1176 }
1177- buf_pool_mutex_exit(buf_pool);
1178+ //buf_pool_mutex_exit(buf_pool);
1179 fprintf(stderr,
1180 "innodb_change_buffering_debug evict %u %u\n",
1181 (unsigned) space, (unsigned) offset);
db82db79
AM
1182@@ -2608,13 +2757,14 @@
1183 ut_a(mode == BUF_GET_POSSIBLY_FREED
1184 || !block->page.file_page_was_freed);
1185 #endif
b4e1fa2c
AM
1186- mutex_exit(&block->mutex);
1187+ //mutex_exit(&block->mutex);
1188
1189 /* Check if this is the first access to the page */
1190
1191 access_time = buf_page_is_accessed(&block->page);
1192
1193- buf_pool_mutex_exit(buf_pool);
1194+ //buf_pool_mutex_exit(buf_pool);
1195+ mutex_exit(block_mutex);
1196
adf0fb13
AM
1197 if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
1198 buf_page_set_accessed_make_young(&block->page, access_time);
db82db79 1199@@ -2847,9 +2997,11 @@
b4e1fa2c
AM
1200 buf_pool = buf_pool_from_block(block);
1201
1202 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1203- buf_pool_mutex_enter(buf_pool);
1204+ //buf_pool_mutex_enter(buf_pool);
1205+ mutex_enter(&buf_pool->LRU_list_mutex);
1206 buf_LRU_make_block_young(&block->page);
1207- buf_pool_mutex_exit(buf_pool);
1208+ //buf_pool_mutex_exit(buf_pool);
1209+ mutex_exit(&buf_pool->LRU_list_mutex);
1210 } else if (!buf_page_is_accessed(&block->page)) {
1211 /* Above, we do a dirty read on purpose, to avoid
1212 mutex contention. The field buf_page_t::access_time
db82db79 1213@@ -2857,9 +3009,11 @@
b4e1fa2c
AM
1214 field must be protected by mutex, however. */
1215 ulint time_ms = ut_time_ms();
1216
1217- buf_pool_mutex_enter(buf_pool);
1218+ //buf_pool_mutex_enter(buf_pool);
1219+ mutex_enter(&block->mutex);
1220 buf_page_set_accessed(&block->page, time_ms);
1221- buf_pool_mutex_exit(buf_pool);
1222+ //buf_pool_mutex_exit(buf_pool);
1223+ mutex_exit(&block->mutex);
1224 }
1225
adf0fb13 1226 ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
db82db79 1227@@ -2926,18 +3080,21 @@
b4e1fa2c
AM
1228 ut_ad(mtr);
1229 ut_ad(mtr->state == MTR_ACTIVE);
1230
1231- buf_pool_mutex_enter(buf_pool);
1232+ //buf_pool_mutex_enter(buf_pool);
1233+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1234 block = buf_block_hash_get(buf_pool, space_id, page_no);
1235
1236 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1237- buf_pool_mutex_exit(buf_pool);
1238+ //buf_pool_mutex_exit(buf_pool);
1239+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1240 return(NULL);
1241 }
1242
1243 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1244
1245 mutex_enter(&block->mutex);
1246- buf_pool_mutex_exit(buf_pool);
1247+ //buf_pool_mutex_exit(buf_pool);
1248+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1249
1250 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1251 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
db82db79 1252@@ -3026,7 +3183,10 @@
b4e1fa2c
AM
1253 buf_page_t* hash_page;
1254 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1255
1256- ut_ad(buf_pool_mutex_own(buf_pool));
1257+ //ut_ad(buf_pool_mutex_own(buf_pool));
1258+#ifdef UNIV_SYNC_DEBUG
1259+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1260+#endif
1261 ut_ad(mutex_own(&(block->mutex)));
1262 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1263
db82db79 1264@@ -3055,11 +3215,14 @@
b4e1fa2c
AM
1265 if (UNIV_LIKELY(!hash_page)) {
1266 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1267 /* Preserve the reference count. */
1268- ulint buf_fix_count = hash_page->buf_fix_count;
1269+ ulint buf_fix_count;
1270
1271+ mutex_enter(&buf_pool->zip_mutex);
1272+ buf_fix_count = hash_page->buf_fix_count;
1273 ut_a(buf_fix_count > 0);
1274 block->page.buf_fix_count += buf_fix_count;
1275 buf_pool_watch_remove(buf_pool, fold, hash_page);
1276+ mutex_exit(&buf_pool->zip_mutex);
1277 } else {
1278 fprintf(stderr,
1279 "InnoDB: Error: page %lu %lu already found"
db82db79 1280@@ -3069,7 +3232,8 @@
b4e1fa2c
AM
1281 (const void*) hash_page, (const void*) block);
1282 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1283 mutex_exit(&block->mutex);
1284- buf_pool_mutex_exit(buf_pool);
1285+ //buf_pool_mutex_exit(buf_pool);
1286+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1287 buf_print();
1288 buf_LRU_print();
1289 buf_validate();
db82db79 1290@@ -3152,7 +3316,9 @@
b4e1fa2c
AM
1291
1292 fold = buf_page_address_fold(space, offset);
1293
1294- buf_pool_mutex_enter(buf_pool);
1295+ //buf_pool_mutex_enter(buf_pool);
1296+ mutex_enter(&buf_pool->LRU_list_mutex);
1297+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1298
1299 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1300 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
db82db79 1301@@ -3161,9 +3327,15 @@
b4e1fa2c
AM
1302 err_exit:
1303 if (block) {
1304 mutex_enter(&block->mutex);
1305- buf_LRU_block_free_non_file_page(block);
1306+ mutex_exit(&buf_pool->LRU_list_mutex);
1307+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1308+ buf_LRU_block_free_non_file_page(block, FALSE);
1309 mutex_exit(&block->mutex);
1310 }
1311+ else {
1312+ mutex_exit(&buf_pool->LRU_list_mutex);
1313+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1314+ }
1315
1316 bpage = NULL;
1317 goto func_exit;
db82db79 1318@@ -3186,6 +3358,8 @@
b4e1fa2c
AM
1319
1320 buf_page_init(space, offset, fold, block);
1321
1322+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1323+
1324 /* The block must be put to the LRU list, to the old blocks */
1325 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1326
db82db79 1327@@ -3213,7 +3387,7 @@
b4e1fa2c
AM
1328 been added to buf_pool->LRU and
1329 buf_pool->page_hash. */
1330 mutex_exit(&block->mutex);
1331- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1332+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1333 mutex_enter(&block->mutex);
1334 block->page.zip.data = data;
1335
db82db79 1336@@ -3226,13 +3400,14 @@
b4e1fa2c
AM
1337 buf_unzip_LRU_add_block(block, TRUE);
1338 }
1339
1340+ mutex_exit(&buf_pool->LRU_list_mutex);
1341 mutex_exit(&block->mutex);
1342 } else {
db82db79 1343 /* The compressed page must be allocated before the
b4e1fa2c
AM
1344 control block (bpage), in order to avoid the
1345 invocation of buf_buddy_relocate_block() on
1346 uninitialized data. */
1347- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
b4e1fa2c 1348+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
b4e1fa2c 1349
db82db79
AM
1350 /* If buf_buddy_alloc() allocated storage from the LRU list,
1351 it released and reacquired buf_pool->mutex. Thus, we must
1352@@ -3248,7 +3423,10 @@
1353
b4e1fa2c
AM
1354 /* The block was added by some other thread. */
1355 watch_page = NULL;
b4e1fa2c 1356- buf_buddy_free(buf_pool, data, zip_size);
b4e1fa2c
AM
1357+ buf_buddy_free(buf_pool, data, zip_size, TRUE);
1358+
1359+ mutex_exit(&buf_pool->LRU_list_mutex);
1360+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1361
1362 bpage = NULL;
1363 goto func_exit;
db82db79 1364@@ -3296,20 +3474,26 @@
b4e1fa2c
AM
1365 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1366 bpage);
1367
1368+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1369+
1370 /* The block must be put to the LRU list, to the old blocks */
1371 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
db82db79 1372 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c 1373 buf_LRU_insert_zip_clean(bpage);
db82db79 1374 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c
AM
1375
1376+ mutex_exit(&buf_pool->LRU_list_mutex);
1377+
1378 buf_page_set_io_fix(bpage, BUF_IO_READ);
1379
1380 mutex_exit(&buf_pool->zip_mutex);
1381 }
1382
1383+ buf_pool_mutex_enter(buf_pool);
1384 buf_pool->n_pend_reads++;
1385-func_exit:
1386 buf_pool_mutex_exit(buf_pool);
1387+func_exit:
1388+ //buf_pool_mutex_exit(buf_pool);
1389
1390 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1391
db82db79 1392@@ -3351,7 +3535,9 @@
b4e1fa2c
AM
1393
1394 fold = buf_page_address_fold(space, offset);
1395
1396- buf_pool_mutex_enter(buf_pool);
1397+ //buf_pool_mutex_enter(buf_pool);
1398+ mutex_enter(&buf_pool->LRU_list_mutex);
1399+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1400
1401 block = (buf_block_t*) buf_page_hash_get_low(
1402 buf_pool, space, offset, fold);
db82db79 1403@@ -3367,7 +3553,9 @@
df1b5770 1404 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
b4e1fa2c
AM
1405
1406 /* Page can be found in buf_pool */
1407- buf_pool_mutex_exit(buf_pool);
1408+ //buf_pool_mutex_exit(buf_pool);
1409+ mutex_exit(&buf_pool->LRU_list_mutex);
1410+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1411
1412 buf_block_free(free_block);
1413
db82db79 1414@@ -3389,6 +3577,7 @@
b4e1fa2c
AM
1415 mutex_enter(&block->mutex);
1416
1417 buf_page_init(space, offset, fold, block);
1418+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1419
1420 /* The block must be put to the LRU list */
1421 buf_LRU_add_block(&block->page, FALSE);
db82db79 1422@@ -3415,7 +3604,7 @@
b4e1fa2c
AM
1423 the reacquisition of buf_pool->mutex. We also must
1424 defer this operation until after the block descriptor
1425 has been added to buf_pool->LRU and buf_pool->page_hash. */
1426- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1427+ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1428 mutex_enter(&block->mutex);
1429 block->page.zip.data = data;
1430
db82db79 1431@@ -3433,7 +3622,8 @@
b4e1fa2c
AM
1432
1433 buf_page_set_accessed(&block->page, time_ms);
1434
1435- buf_pool_mutex_exit(buf_pool);
1436+ //buf_pool_mutex_exit(buf_pool);
1437+ mutex_exit(&buf_pool->LRU_list_mutex);
1438
1439 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1440
db82db79 1441@@ -3484,6 +3674,8 @@
b4e1fa2c
AM
1442 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1443 const ibool uncompressed = (buf_page_get_state(bpage)
1444 == BUF_BLOCK_FILE_PAGE);
1445+ ibool have_LRU_mutex = FALSE;
1446+ mutex_t* block_mutex;
1447
1448 ut_a(buf_page_in_file(bpage));
1449
db82db79 1450@@ -3617,8 +3809,26 @@
b4e1fa2c
AM
1451 }
1452 }
1453
1454+ if (io_type == BUF_IO_WRITE
1455+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1456+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1457+ /* to keep consistency at buf_LRU_insert_zip_clean() */
1458+ have_LRU_mutex = TRUE; /* optimistic */
1459+ }
1460+retry_mutex:
1461+ if (have_LRU_mutex)
1462+ mutex_enter(&buf_pool->LRU_list_mutex);
1463+ block_mutex = buf_page_get_mutex_enter(bpage);
1464+ ut_a(block_mutex);
1465+ if (io_type == BUF_IO_WRITE
1466+ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1467+ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1468+ && !have_LRU_mutex) {
1469+ mutex_exit(block_mutex);
1470+ have_LRU_mutex = TRUE;
1471+ goto retry_mutex;
1472+ }
1473 buf_pool_mutex_enter(buf_pool);
1474- mutex_enter(buf_page_get_mutex(bpage));
1475
1476 #ifdef UNIV_IBUF_COUNT_DEBUG
1477 if (io_type == BUF_IO_WRITE || uncompressed) {
db82db79 1478@@ -3641,6 +3851,7 @@
b4e1fa2c
AM
1479 the x-latch to this OS thread: do not let this confuse you in
1480 debugging! */
1481
1482+ ut_a(!have_LRU_mutex);
1483 ut_ad(buf_pool->n_pend_reads > 0);
1484 buf_pool->n_pend_reads--;
1485 buf_pool->stat.n_pages_read++;
db82db79 1486@@ -3658,6 +3869,9 @@
b4e1fa2c
AM
1487
1488 buf_flush_write_complete(bpage);
1489
1490+ if (have_LRU_mutex)
1491+ mutex_exit(&buf_pool->LRU_list_mutex);
1492+
1493 if (uncompressed) {
1494 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1495 BUF_IO_WRITE);
db82db79 1496@@ -3680,8 +3894,8 @@
b4e1fa2c
AM
1497 }
1498 #endif /* UNIV_DEBUG */
1499
1500- mutex_exit(buf_page_get_mutex(bpage));
1501 buf_pool_mutex_exit(buf_pool);
1502+ mutex_exit(block_mutex);
1503 }
1504
1505 /*********************************************************************//**
db82db79 1506@@ -3698,7 +3912,9 @@
b4e1fa2c
AM
1507
1508 ut_ad(buf_pool);
1509
1510- buf_pool_mutex_enter(buf_pool);
1511+ //buf_pool_mutex_enter(buf_pool);
1512+ mutex_enter(&buf_pool->LRU_list_mutex);
1513+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1514
1515 chunk = buf_pool->chunks;
1516
db82db79 1517@@ -3715,7 +3931,9 @@
b4e1fa2c
AM
1518 }
1519 }
1520
1521- buf_pool_mutex_exit(buf_pool);
1522+ //buf_pool_mutex_exit(buf_pool);
1523+ mutex_exit(&buf_pool->LRU_list_mutex);
1524+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1525
1526 return(TRUE);
1527 }
db82db79 1528@@ -3763,7 +3981,8 @@
b4e1fa2c
AM
1529 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1530 }
1531
1532- buf_pool_mutex_enter(buf_pool);
1533+ //buf_pool_mutex_enter(buf_pool);
1534+ mutex_enter(&buf_pool->LRU_list_mutex);
1535
1536 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1537 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
db82db79 1538@@ -3776,7 +3995,8 @@
b4e1fa2c
AM
1539 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1540 buf_refresh_io_stats(buf_pool);
1541
1542- buf_pool_mutex_exit(buf_pool);
1543+ //buf_pool_mutex_exit(buf_pool);
1544+ mutex_exit(&buf_pool->LRU_list_mutex);
1545 }
1546
1547 /*********************************************************************//**
db82db79 1548@@ -3818,7 +4038,10 @@
b4e1fa2c
AM
1549
1550 ut_ad(buf_pool);
1551
1552- buf_pool_mutex_enter(buf_pool);
1553+ //buf_pool_mutex_enter(buf_pool);
1554+ mutex_enter(&buf_pool->LRU_list_mutex);
1555+ rw_lock_x_lock(&buf_pool->page_hash_latch);
1556+ /* for keep the new latch order, it cannot validate correctly... */
1557
1558 chunk = buf_pool->chunks;
1559
db82db79 1560@@ -3913,7 +4136,7 @@
b4e1fa2c
AM
1561 /* Check clean compressed-only blocks. */
1562
1563 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1564- b = UT_LIST_GET_NEXT(list, b)) {
1565+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1566 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1567 switch (buf_page_get_io_fix(b)) {
1568 case BUF_IO_NONE:
db82db79 1569@@ -3944,7 +4167,7 @@
b4e1fa2c
AM
1570
1571 buf_flush_list_mutex_enter(buf_pool);
1572 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1573- b = UT_LIST_GET_NEXT(list, b)) {
1574+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1575 ut_ad(b->in_flush_list);
1576 ut_a(b->oldest_modification);
1577 n_flush++;
db82db79 1578@@ -4003,6 +4226,8 @@
b4e1fa2c
AM
1579 }
1580
1581 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1582+ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1583+/*
1584 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1585 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1586 (ulong) UT_LIST_GET_LEN(buf_pool->free),
db82db79 1587@@ -4013,8 +4238,11 @@
b4e1fa2c
AM
1588 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1589 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1590 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1591+*/
1592
1593- buf_pool_mutex_exit(buf_pool);
1594+ //buf_pool_mutex_exit(buf_pool);
1595+ mutex_exit(&buf_pool->LRU_list_mutex);
1596+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
1597
1598 ut_a(buf_LRU_validate());
1599 ut_a(buf_flush_validate(buf_pool));
db82db79 1600@@ -4070,7 +4298,9 @@
b4e1fa2c
AM
1601 index_ids = mem_alloc(size * sizeof *index_ids);
1602 counts = mem_alloc(sizeof(ulint) * size);
1603
1604- buf_pool_mutex_enter(buf_pool);
1605+ //buf_pool_mutex_enter(buf_pool);
1606+ mutex_enter(&buf_pool->LRU_list_mutex);
1607+ mutex_enter(&buf_pool->free_list_mutex);
1608 buf_flush_list_mutex_enter(buf_pool);
1609
1610 fprintf(stderr,
db82db79 1611@@ -4139,7 +4369,9 @@
b4e1fa2c
AM
1612 }
1613 }
1614
1615- buf_pool_mutex_exit(buf_pool);
1616+ //buf_pool_mutex_exit(buf_pool);
1617+ mutex_exit(&buf_pool->LRU_list_mutex);
1618+ mutex_exit(&buf_pool->free_list_mutex);
1619
1620 for (i = 0; i < n_found; i++) {
1621 index = dict_index_get_if_in_cache(index_ids[i]);
db82db79 1622@@ -4196,7 +4428,7 @@
b4e1fa2c
AM
1623 buf_chunk_t* chunk;
1624 ulint fixed_pages_number = 0;
1625
1626- buf_pool_mutex_enter(buf_pool);
1627+ //buf_pool_mutex_enter(buf_pool);
1628
1629 chunk = buf_pool->chunks;
1630
db82db79 1631@@ -4230,7 +4462,7 @@
b4e1fa2c
AM
1632 /* Traverse the lists of clean and dirty compressed-only blocks. */
1633
1634 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1635- b = UT_LIST_GET_NEXT(list, b)) {
1636+ b = UT_LIST_GET_NEXT(zip_list, b)) {
1637 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1638 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1639
db82db79 1640@@ -4242,7 +4474,7 @@
b4e1fa2c
AM
1641
1642 buf_flush_list_mutex_enter(buf_pool);
1643 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1644- b = UT_LIST_GET_NEXT(list, b)) {
1645+ b = UT_LIST_GET_NEXT(flush_list, b)) {
1646 ut_ad(b->in_flush_list);
1647
1648 switch (buf_page_get_state(b)) {
db82db79 1649@@ -4268,7 +4500,7 @@
b4e1fa2c
AM
1650
1651 buf_flush_list_mutex_exit(buf_pool);
1652 mutex_exit(&buf_pool->zip_mutex);
1653- buf_pool_mutex_exit(buf_pool);
1654+ //buf_pool_mutex_exit(buf_pool);
1655
1656 return(fixed_pages_number);
1657 }
db82db79 1658@@ -4424,6 +4656,8 @@
d8778560
AM
1659 /* Find appropriate pool_info to store stats for this buffer pool */
1660 pool_info = &all_pool_info[pool_id];
b4e1fa2c
AM
1661
1662+ mutex_enter(&buf_pool->LRU_list_mutex);
1663+ mutex_enter(&buf_pool->free_list_mutex);
1664 buf_pool_mutex_enter(buf_pool);
1665 buf_flush_list_mutex_enter(buf_pool);
1666
db82db79 1667@@ -4534,6 +4768,8 @@
d8778560 1668 pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
b4e1fa2c
AM
1669
1670 buf_refresh_io_stats(buf_pool);
1671+ mutex_exit(&buf_pool->LRU_list_mutex);
1672+ mutex_exit(&buf_pool->free_list_mutex);
1673 buf_pool_mutex_exit(buf_pool);
1674 }
1675
db82db79 1676@@ -4775,11 +5011,13 @@
b4e1fa2c
AM
1677 {
1678 ulint len;
1679
1680- buf_pool_mutex_enter(buf_pool);
1681+ //buf_pool_mutex_enter(buf_pool);
1682+ mutex_enter(&buf_pool->free_list_mutex);
1683
1684 len = UT_LIST_GET_LEN(buf_pool->free);
1685
1686- buf_pool_mutex_exit(buf_pool);
1687+ //buf_pool_mutex_exit(buf_pool);
1688+ mutex_exit(&buf_pool->free_list_mutex);
1689
1690 return(len);
1691 }
db82db79
AM
1692--- a/storage/innobase/buf/buf0flu.c
1693+++ b/storage/innobase/buf/buf0flu.c
d8778560 1694@@ -307,7 +307,7 @@
b4e1fa2c
AM
1695
1696 ut_d(block->page.in_flush_list = TRUE);
1697 block->page.oldest_modification = lsn;
1698- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1699+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1700
1701 #ifdef UNIV_DEBUG_VALGRIND
1702 {
d8778560 1703@@ -401,14 +401,14 @@
b4e1fa2c
AM
1704 > block->page.oldest_modification) {
1705 ut_ad(b->in_flush_list);
1706 prev_b = b;
1707- b = UT_LIST_GET_NEXT(list, b);
1708+ b = UT_LIST_GET_NEXT(flush_list, b);
1709 }
1710 }
1711
1712 if (prev_b == NULL) {
1713- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1714+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1715 } else {
1716- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1717+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1718 prev_b, &block->page);
1719 }
1720
d8778560 1721@@ -434,7 +434,7 @@
b4e1fa2c
AM
1722 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1723 //ut_ad(buf_pool_mutex_own(buf_pool));
1724 #endif
1725- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1726+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1727 //ut_ad(bpage->in_LRU_list);
1728
1729 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
d8778560 1730@@ -470,14 +470,14 @@
b4e1fa2c
AM
1731 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1732 {
1733 #ifdef UNIV_DEBUG
1734- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1735- ut_ad(buf_pool_mutex_own(buf_pool));
1736+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1737+ //ut_ad(buf_pool_mutex_own(buf_pool));
1738 #endif
1739- ut_a(buf_page_in_file(bpage));
1740+ //ut_a(buf_page_in_file(bpage));
1741 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1742 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1743
1744- if (bpage->oldest_modification != 0
1745+ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1746 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1747 ut_ad(bpage->in_flush_list);
1748
d8778560 1749@@ -508,7 +508,7 @@
b4e1fa2c
AM
1750 {
1751 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1752
1753- ut_ad(buf_pool_mutex_own(buf_pool));
1754+ //ut_ad(buf_pool_mutex_own(buf_pool));
1755 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1756 ut_ad(bpage->in_flush_list);
1757
db82db79 1758@@ -526,13 +526,13 @@
b4e1fa2c
AM
1759 return;
1760 case BUF_BLOCK_ZIP_DIRTY:
1761 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1762- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1763+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
db82db79 1764 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c 1765 buf_LRU_insert_zip_clean(bpage);
db82db79 1766 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c
AM
1767 break;
1768 case BUF_BLOCK_FILE_PAGE:
1769- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1770+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1771 break;
1772 }
1773
db82db79 1774@@ -576,7 +576,7 @@
b4e1fa2c
AM
1775 buf_page_t* prev_b = NULL;
1776 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1777
1778- ut_ad(buf_pool_mutex_own(buf_pool));
1779+ //ut_ad(buf_pool_mutex_own(buf_pool));
1780 /* Must reside in the same buffer pool. */
1781 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
1782
db82db79 1783@@ -605,18 +605,18 @@
b4e1fa2c
AM
1784 because we assert on in_flush_list in comparison function. */
1785 ut_d(bpage->in_flush_list = FALSE);
1786
1787- prev = UT_LIST_GET_PREV(list, bpage);
1788- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1789+ prev = UT_LIST_GET_PREV(flush_list, bpage);
1790+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1791
1792 if (prev) {
1793 ut_ad(prev->in_flush_list);
1794 UT_LIST_INSERT_AFTER(
1795- list,
1796+ flush_list,
1797 buf_pool->flush_list,
1798 prev, dpage);
1799 } else {
1800 UT_LIST_ADD_FIRST(
1801- list,
1802+ flush_list,
1803 buf_pool->flush_list,
1804 dpage);
1805 }
db82db79 1806@@ -1085,7 +1085,7 @@
b4e1fa2c
AM
1807
1808 #ifdef UNIV_DEBUG
1809 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1810- ut_ad(!buf_pool_mutex_own(buf_pool));
1811+ //ut_ad(!buf_pool_mutex_own(buf_pool));
1812 #endif
1813
1814 #ifdef UNIV_LOG_DEBUG
db82db79 1815@@ -1099,7 +1099,8 @@
b4e1fa2c
AM
1816 io_fixed and oldest_modification != 0. Thus, it cannot be
1817 relocated in the buffer pool or removed from flush_list or
1818 LRU_list. */
1819- ut_ad(!buf_pool_mutex_own(buf_pool));
1820+ //ut_ad(!buf_pool_mutex_own(buf_pool));
1821+ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
1822 ut_ad(!buf_flush_list_mutex_own(buf_pool));
1823 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
1824 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
db82db79 1825@@ -1179,7 +1180,7 @@
11822e22
AM
1826 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
1827 buf_block_t* block) /*!< in/out: buffer control block */
1828 {
1829- ut_ad(buf_pool_mutex_own(buf_pool));
1830+ //ut_ad(buf_pool_mutex_own(buf_pool));
1831 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1832 ut_ad(mutex_own(&block->mutex));
1833
db82db79 1834@@ -1187,8 +1188,11 @@
11822e22
AM
1835 return(FALSE);
1836 }
1837
1838+ buf_pool_mutex_enter(buf_pool);
1839+
1840 if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
1841 || buf_pool->init_flush[BUF_FLUSH_LRU]) {
1842+ buf_pool_mutex_exit(buf_pool);
1843 /* There is already a flush batch of the same type running */
1844 return(FALSE);
1845 }
db82db79 1846@@ -1262,12 +1266,18 @@
b4e1fa2c
AM
1847 ibool is_uncompressed;
1848
1849 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1850- ut_ad(buf_pool_mutex_own(buf_pool));
1851+ //ut_ad(buf_pool_mutex_own(buf_pool));
1852+#ifdef UNIV_SYNC_DEBUG
1853+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
1854+#endif
1855 ut_ad(buf_page_in_file(bpage));
1856
1857 block_mutex = buf_page_get_mutex(bpage);
1858 ut_ad(mutex_own(block_mutex));
1859
1860+ buf_pool_mutex_enter(buf_pool);
1861+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1862+
1863 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
1864
1865 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
db82db79 1866@@ -1429,14 +1439,16 @@
b4e1fa2c
AM
1867
1868 buf_pool = buf_pool_get(space, i);
1869
1870- buf_pool_mutex_enter(buf_pool);
1871+ //buf_pool_mutex_enter(buf_pool);
1872+ rw_lock_s_lock(&buf_pool->page_hash_latch);
1873
1874 /* We only want to flush pages from this buffer pool. */
1875 bpage = buf_page_hash_get(buf_pool, space, i);
1876
1877 if (!bpage) {
1878
1879- buf_pool_mutex_exit(buf_pool);
1880+ //buf_pool_mutex_exit(buf_pool);
1881+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1882 continue;
1883 }
1884
db82db79 1885@@ -1448,11 +1460,9 @@
b4e1fa2c
AM
1886 if (flush_type != BUF_FLUSH_LRU
1887 || i == offset
1888 || buf_page_is_old(bpage)) {
1889- mutex_t* block_mutex = buf_page_get_mutex(bpage);
b4e1fa2c
AM
1890+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
1891
11822e22
AM
1892- mutex_enter(block_mutex);
1893-
b4e1fa2c
AM
1894- if (buf_flush_ready_for_flush(bpage, flush_type)
1895+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
1896 && (i == offset || !bpage->buf_fix_count)) {
1897 /* We only try to flush those
1898 neighbors != offset where the buf fix
db82db79 1899@@ -1468,11 +1478,12 @@
b4e1fa2c
AM
1900 ut_ad(!buf_pool_mutex_own(buf_pool));
1901 count++;
1902 continue;
1903- } else {
1904+ } else if (block_mutex) {
1905 mutex_exit(block_mutex);
1906 }
1907 }
1908- buf_pool_mutex_exit(buf_pool);
1909+ //buf_pool_mutex_exit(buf_pool);
1910+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
1911 }
1912
1913 return(count);
db82db79 1914@@ -1505,21 +1516,25 @@
b4e1fa2c
AM
1915 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1916 #endif /* UNIV_DEBUG */
1917
1918- ut_ad(buf_pool_mutex_own(buf_pool));
1919+ //ut_ad(buf_pool_mutex_own(buf_pool));
1920+ ut_ad(flush_type != BUF_FLUSH_LRU
1921+ || mutex_own(&buf_pool->LRU_list_mutex));
1922
1923- block_mutex = buf_page_get_mutex(bpage);
1924- mutex_enter(block_mutex);
1925+ block_mutex = buf_page_get_mutex_enter(bpage);
1926
1927- ut_a(buf_page_in_file(bpage));
1928+ //ut_a(buf_page_in_file(bpage));
1929
1930- if (buf_flush_ready_for_flush(bpage, flush_type)) {
1931+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
1932 ulint space;
1933 ulint offset;
1934 buf_pool_t* buf_pool;
1935
1936 buf_pool = buf_pool_from_bpage(bpage);
1937
1938- buf_pool_mutex_exit(buf_pool);
1939+ //buf_pool_mutex_exit(buf_pool);
1940+ if (flush_type == BUF_FLUSH_LRU) {
1941+ mutex_exit(&buf_pool->LRU_list_mutex);
1942+ }
1943
1944 /* These fields are protected by both the
1945 buffer pool mutex and block mutex. */
db82db79 1946@@ -1535,13 +1550,18 @@
b4e1fa2c
AM
1947 *count,
1948 n_to_flush);
1949
1950- buf_pool_mutex_enter(buf_pool);
1951+ //buf_pool_mutex_enter(buf_pool);
1952+ if (flush_type == BUF_FLUSH_LRU) {
1953+ mutex_enter(&buf_pool->LRU_list_mutex);
1954+ }
1955 flushed = TRUE;
1956- } else {
1957+ } else if (block_mutex) {
1958 mutex_exit(block_mutex);
1959 }
1960
1961- ut_ad(buf_pool_mutex_own(buf_pool));
1962+ //ut_ad(buf_pool_mutex_own(buf_pool));
1963+ ut_ad(flush_type != BUF_FLUSH_LRU
1964+ || mutex_own(&buf_pool->LRU_list_mutex));
1965
1966 return(flushed);
1967 }
db82db79 1968@@ -1562,7 +1582,8 @@
b4e1fa2c
AM
1969 buf_page_t* bpage;
1970 ulint count = 0;
1971
1972- ut_ad(buf_pool_mutex_own(buf_pool));
1973+ //ut_ad(buf_pool_mutex_own(buf_pool));
1974+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
1975
1976 do {
1977 /* Start from the end of the list looking for a
db82db79 1978@@ -1584,7 +1605,8 @@
b4e1fa2c
AM
1979 should be flushed, we factor in this value. */
1980 buf_lru_flush_page_count += count;
1981
1982- ut_ad(buf_pool_mutex_own(buf_pool));
1983+ //ut_ad(buf_pool_mutex_own(buf_pool));
1984+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
1985
1986 return(count);
1987 }
db82db79 1988@@ -1612,9 +1634,10 @@
b4e1fa2c
AM
1989 {
1990 ulint len;
1991 buf_page_t* bpage;
1992+ buf_page_t* prev_bpage = NULL;
1993 ulint count = 0;
1994
1995- ut_ad(buf_pool_mutex_own(buf_pool));
1996+ //ut_ad(buf_pool_mutex_own(buf_pool));
1997
1998 /* If we have flushed enough, leave the loop */
1999 do {
db82db79 2000@@ -1633,6 +1656,7 @@
b4e1fa2c
AM
2001
2002 if (bpage) {
2003 ut_a(bpage->oldest_modification > 0);
2004+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2005 }
2006
2007 if (!bpage || bpage->oldest_modification >= lsn_limit) {
db82db79 2008@@ -1674,9 +1698,17 @@
b4e1fa2c
AM
2009 break;
2010 }
2011
2012- bpage = UT_LIST_GET_PREV(list, bpage);
2013+ bpage = UT_LIST_GET_PREV(flush_list, bpage);
2014
2015- ut_ad(!bpage || bpage->in_flush_list);
2016+ //ut_ad(!bpage || bpage->in_flush_list);
2017+ if (bpage != prev_bpage) {
2018+ /* the search might warp.. retrying */
2019+ buf_flush_list_mutex_exit(buf_pool);
2020+ break;
2021+ }
2022+ if (bpage) {
2023+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2024+ }
2025
2026 buf_flush_list_mutex_exit(buf_pool);
2027
db82db79 2028@@ -1685,7 +1717,7 @@
b4e1fa2c
AM
2029
2030 } while (count < min_n && bpage != NULL && len > 0);
2031
2032- ut_ad(buf_pool_mutex_own(buf_pool));
2033+ //ut_ad(buf_pool_mutex_own(buf_pool));
2034
2035 return(count);
2036 }
db82db79 2037@@ -1724,13 +1756,15 @@
adf0fb13 2038 || sync_thread_levels_empty_except_dict());
b4e1fa2c
AM
2039 #endif /* UNIV_SYNC_DEBUG */
2040
2041- buf_pool_mutex_enter(buf_pool);
2042+ //buf_pool_mutex_enter(buf_pool);
2043
2044 /* Note: The buffer pool mutex is released and reacquired within
2045 the flush functions. */
2046 switch(flush_type) {
2047 case BUF_FLUSH_LRU:
2048+ mutex_enter(&buf_pool->LRU_list_mutex);
2049 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2050+ mutex_exit(&buf_pool->LRU_list_mutex);
2051 break;
2052 case BUF_FLUSH_LIST:
2053 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
db82db79 2054@@ -1739,7 +1773,7 @@
b4e1fa2c
AM
2055 ut_error;
2056 }
2057
2058- buf_pool_mutex_exit(buf_pool);
2059+ //buf_pool_mutex_exit(buf_pool);
2060
2061 buf_flush_buffered_writes();
2062
db82db79 2063@@ -1995,7 +2029,7 @@
b4e1fa2c
AM
2064 retry:
2065 //buf_pool_mutex_enter(buf_pool);
2066 if (have_LRU_mutex)
2067- buf_pool_mutex_enter(buf_pool);
2068+ mutex_enter(&buf_pool->LRU_list_mutex);
2069
2070 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2071
db82db79 2072@@ -2012,15 +2046,15 @@
b4e1fa2c
AM
2073 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2074 continue;
2075 }
2076- block_mutex = buf_page_get_mutex(bpage);
2077-
2078- mutex_enter(block_mutex);
2079+ block_mutex = buf_page_get_mutex_enter(bpage);
2080
2081- if (buf_flush_ready_for_replace(bpage)) {
2082+ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2083 n_replaceable++;
2084 }
2085
2086- mutex_exit(block_mutex);
2087+ if (block_mutex) {
2088+ mutex_exit(block_mutex);
2089+ }
2090
2091 distance++;
2092
db82db79 2093@@ -2029,7 +2063,7 @@
b4e1fa2c
AM
2094
2095 //buf_pool_mutex_exit(buf_pool);
2096 if (have_LRU_mutex)
2097- buf_pool_mutex_exit(buf_pool);
2098+ mutex_exit(&buf_pool->LRU_list_mutex);
2099
2100 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2101
db82db79 2102@@ -2228,7 +2262,7 @@
b4e1fa2c
AM
2103
2104 ut_ad(buf_flush_list_mutex_own(buf_pool));
2105
2106- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2107+ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2108 ut_ad(ut_list_node_313->in_flush_list));
2109
2110 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
db82db79 2111@@ -2268,7 +2302,7 @@
b4e1fa2c
AM
2112 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2113 }
2114
2115- bpage = UT_LIST_GET_NEXT(list, bpage);
2116+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2117
2118 ut_a(!bpage || om >= bpage->oldest_modification);
2119 }
db82db79
AM
2120--- a/storage/innobase/buf/buf0lru.c
2121+++ b/storage/innobase/buf/buf0lru.c
b4e1fa2c
AM
2122@@ -143,8 +143,9 @@
2123 void
2124 buf_LRU_block_free_hashed_page(
2125 /*===========================*/
2126- buf_block_t* block); /*!< in: block, must contain a file page and
2127+ buf_block_t* block, /*!< in: block, must contain a file page and
2128 be in a state where it can be freed */
2129+ ibool have_page_hash_mutex);
2130
2131 /******************************************************************//**
2132 Determines if the unzip_LRU list should be used for evicting a victim
2133@@ -154,15 +155,20 @@
2134 ibool
2135 buf_LRU_evict_from_unzip_LRU(
2136 /*=========================*/
2137- buf_pool_t* buf_pool)
2138+ buf_pool_t* buf_pool,
2139+ ibool have_LRU_mutex)
2140 {
2141 ulint io_avg;
2142 ulint unzip_avg;
2143
2144- ut_ad(buf_pool_mutex_own(buf_pool));
2145+ //ut_ad(buf_pool_mutex_own(buf_pool));
2146
2147+ if (!have_LRU_mutex)
2148+ mutex_enter(&buf_pool->LRU_list_mutex);
2149 /* If the unzip_LRU list is empty, we can only use the LRU. */
2150 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2151+ if (!have_LRU_mutex)
2152+ mutex_exit(&buf_pool->LRU_list_mutex);
2153 return(FALSE);
2154 }
2155
2156@@ -171,14 +177,20 @@
2157 decompressed pages in the buffer pool. */
2158 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2159 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2160+ if (!have_LRU_mutex)
2161+ mutex_exit(&buf_pool->LRU_list_mutex);
2162 return(FALSE);
2163 }
2164
2165 /* If eviction hasn't started yet, we assume by default
2166 that a workload is disk bound. */
2167 if (buf_pool->freed_page_clock == 0) {
2168+ if (!have_LRU_mutex)
2169+ mutex_exit(&buf_pool->LRU_list_mutex);
2170 return(TRUE);
2171 }
2172+ if (!have_LRU_mutex)
2173+ mutex_exit(&buf_pool->LRU_list_mutex);
2174
2175 /* Calculate the average over past intervals, and add the values
2176 of the current interval. */
adf0fb13 2177@@ -246,18 +258,25 @@
b4e1fa2c
AM
2178 page_arr = ut_malloc(
2179 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2180
2181- buf_pool_mutex_enter(buf_pool);
2182+ //buf_pool_mutex_enter(buf_pool);
2183+ mutex_enter(&buf_pool->LRU_list_mutex);
adf0fb13 2184 num_entries = 0;
b4e1fa2c
AM
2185
2186 scan_again:
b4e1fa2c
AM
2187 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2188
2189 while (bpage != NULL) {
adf0fb13 2190+ /* bpage->state,space,io_fix,buf_fix_count are protected by block_mutex at XtraDB */
b4e1fa2c
AM
2191+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2192 buf_page_t* prev_bpage;
adf0fb13 2193 ibool is_fixed;
b4e1fa2c 2194
b4e1fa2c
AM
2195 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2196
adf0fb13 2197+ if (UNIV_UNLIKELY(!block_mutex)) {
b4e1fa2c
AM
2198+ goto next_page;
2199+ }
2200+
2201 ut_a(buf_page_in_file(bpage));
2202
2203 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
adf0fb13
AM
2204@@ -266,23 +285,27 @@
2205 /* Compressed pages are never hashed.
2206 Skip blocks of other tablespaces.
2207 Skip I/O-fixed blocks (to be dealt with later). */
2208+ mutex_exit(block_mutex);
2209 next_page:
2210 bpage = prev_bpage;
2211 continue;
2212 }
b4e1fa2c 2213
adf0fb13
AM
2214- mutex_enter(&((buf_block_t*) bpage)->mutex);
2215+ //mutex_enter(&((buf_block_t*) bpage)->mutex);
2216 is_fixed = bpage->buf_fix_count > 0
2217 || !((buf_block_t*) bpage)->is_hashed;
2218- mutex_exit(&((buf_block_t*) bpage)->mutex);
2219+ //mutex_exit(&((buf_block_t*) bpage)->mutex);
b4e1fa2c 2220
adf0fb13
AM
2221 if (is_fixed) {
2222+ mutex_exit(block_mutex);
2223 goto next_page;
2224 }
b4e1fa2c 2225
adf0fb13
AM
2226 /* Store the page number so that we can drop the hash
2227 index in a batch later. */
2228 page_arr[num_entries] = bpage->offset;
2229+ mutex_exit(block_mutex);
2230+
2231 ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
2232 ++num_entries;
b4e1fa2c 2233
adf0fb13
AM
2234@@ -292,14 +315,16 @@
2235
2236 /* Array full. We release the buf_pool->mutex to obey
2237 the latching order. */
2238- buf_pool_mutex_exit(buf_pool);
2239+ //buf_pool_mutex_exit(buf_pool);
2240+ mutex_exit(&buf_pool->LRU_list_mutex);
2241
2242 buf_LRU_drop_page_hash_batch(
2243 id, zip_size, page_arr, num_entries);
2244
2245 num_entries = 0;
2246
2247- buf_pool_mutex_enter(buf_pool);
2248+ //buf_pool_mutex_enter(buf_pool);
2249+ mutex_enter(&buf_pool->LRU_list_mutex);
2250
2251 /* Note that we released the buf_pool mutex above
2252 after reading the prev_bpage during processing of a
2253@@ -317,13 +342,23 @@
2254 /* If, however, bpage has been removed from LRU list
2255 to the free list then we should restart the scan.
2256 bpage->state is protected by buf_pool mutex. */
2257+
2258+ /* obtain block_mutex again to avoid race condition of bpage->state */
2259+ block_mutex = buf_page_get_mutex_enter(bpage);
2260+ if (!block_mutex) {
2261+ goto scan_again;
2262+ }
2263+
2264 if (bpage
2265 && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2266+ mutex_exit(block_mutex);
2267 goto scan_again;
b4e1fa2c 2268 }
adf0fb13 2269+ mutex_exit(block_mutex);
b4e1fa2c
AM
2270 }
2271
2272- buf_pool_mutex_exit(buf_pool);
2273+ //buf_pool_mutex_exit(buf_pool);
2274+ mutex_exit(&buf_pool->LRU_list_mutex);
2275
2276 /* Drop any remaining batch of search hashed pages. */
2277 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
adf0fb13 2278@@ -345,7 +380,9 @@
b4e1fa2c
AM
2279 ibool all_freed;
2280
2281 scan_again:
2282- buf_pool_mutex_enter(buf_pool);
2283+ //buf_pool_mutex_enter(buf_pool);
2284+ mutex_enter(&buf_pool->LRU_list_mutex);
2285+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2286
2287 all_freed = TRUE;
2288
db82db79 2289@@ -375,8 +412,15 @@
b4e1fa2c 2290 all_freed = FALSE;
db82db79 2291 goto next_page;
b4e1fa2c 2292 } else {
db82db79 2293- block_mutex = buf_page_get_mutex(bpage);
b4e1fa2c 2294- mutex_enter(block_mutex);
db82db79 2295+ block_mutex = buf_page_get_mutex_enter(bpage);
b4e1fa2c
AM
2296+
2297+ if (!block_mutex) {
2298+ /* It may be impossible case...
2299+ Something wrong, so will be scan_again */
2300+
2301+ all_freed = FALSE;
db82db79 2302+ goto next_page;
b4e1fa2c
AM
2303+ }
2304
2305 if (bpage->buf_fix_count > 0) {
2306
db82db79
AM
2307@@ -409,7 +453,9 @@
2308 ulint page_no;
2309 ulint zip_size;
b4e1fa2c 2310
db82db79
AM
2311- buf_pool_mutex_exit(buf_pool);
2312+ //buf_pool_mutex_exit(buf_pool);
2313+ mutex_exit(&buf_pool->LRU_list_mutex);
2314+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2315
2316 zip_size = buf_page_get_zip_size(bpage);
2317 page_no = buf_page_get_page_no(bpage);
2318@@ -433,7 +479,7 @@
b4e1fa2c 2319
db82db79
AM
2320 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2321 != BUF_BLOCK_ZIP_FREE) {
2322- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2323+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, TRUE);
2324 mutex_exit(block_mutex);
2325 } else {
2326 /* The block_mutex should have been released
2327@@ -446,7 +492,9 @@
b4e1fa2c
AM
2328 bpage = prev_bpage;
2329 }
2330
2331- buf_pool_mutex_exit(buf_pool);
2332+ //buf_pool_mutex_exit(buf_pool);
2333+ mutex_exit(&buf_pool->LRU_list_mutex);
2334+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2335
2336 if (!all_freed) {
2337 os_thread_sleep(20000);
db82db79 2338@@ -493,7 +541,9 @@
b4e1fa2c
AM
2339 buf_page_t* b;
2340 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2341
2342- ut_ad(buf_pool_mutex_own(buf_pool));
2343+ //ut_ad(buf_pool_mutex_own(buf_pool));
2344+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
11822e22 2345+ ut_ad(mutex_own(&buf_pool->zip_mutex));
b4e1fa2c
AM
2346 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2347
2348 /* Find the first successor of bpage in the LRU list
db82db79 2349@@ -501,17 +551,17 @@
b4e1fa2c
AM
2350 b = bpage;
2351 do {
2352 b = UT_LIST_GET_NEXT(LRU, b);
2353- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2354+ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2355
2356 /* Insert bpage before b, i.e., after the predecessor of b. */
2357 if (b) {
2358- b = UT_LIST_GET_PREV(list, b);
2359+ b = UT_LIST_GET_PREV(zip_list, b);
2360 }
2361
2362 if (b) {
2363- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2364+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2365 } else {
2366- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2367+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2368 }
2369 }
db82db79
AM
2370 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2371@@ -525,18 +575,19 @@
b4e1fa2c
AM
2372 buf_LRU_free_from_unzip_LRU_list(
2373 /*=============================*/
2374 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2375- ulint n_iterations) /*!< in: how many times this has
2376+ ulint n_iterations, /*!< in: how many times this has
2377 been called repeatedly without
2378 result: a high value means that
2379 we should search farther; we will
2380 search n_iterations / 5 of the
2381 unzip_LRU list, or nothing if
2382 n_iterations >= 5 */
2383+ ibool have_LRU_mutex)
2384 {
2385 buf_block_t* block;
2386 ulint distance;
2387
2388- ut_ad(buf_pool_mutex_own(buf_pool));
2389+ //ut_ad(buf_pool_mutex_own(buf_pool));
2390
2391 /* Theoratically it should be much easier to find a victim
2392 from unzip_LRU as we can choose even a dirty block (as we'll
db82db79 2393@@ -546,7 +597,7 @@
b4e1fa2c
AM
2394 if we have done five iterations so far. */
2395
2396 if (UNIV_UNLIKELY(n_iterations >= 5)
2397- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2398+ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2399
2400 return(FALSE);
2401 }
db82db79 2402@@ -554,18 +605,25 @@
b4e1fa2c
AM
2403 distance = 100 + (n_iterations
2404 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2405
2406+restart:
2407 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2408 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2409 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2410
db82db79 2411 ibool freed;
b4e1fa2c
AM
2412
2413+ mutex_enter(&block->mutex);
2414+ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2415+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2416+ mutex_exit(&block->mutex);
2417+ goto restart;
2418+ }
2419+
2420 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2421 ut_ad(block->in_unzip_LRU_list);
2422 ut_ad(block->page.in_LRU_list);
2423
2424- mutex_enter(&block->mutex);
df1b5770
AM
2425- freed = buf_LRU_free_block(&block->page, FALSE);
2426+ freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
b4e1fa2c
AM
2427 mutex_exit(&block->mutex);
2428
db82db79
AM
2429 if (freed) {
2430@@ -584,35 +642,46 @@
b4e1fa2c
AM
2431 buf_LRU_free_from_common_LRU_list(
2432 /*==============================*/
2433 buf_pool_t* buf_pool,
2434- ulint n_iterations)
2435+ ulint n_iterations,
2436 /*!< in: how many times this has been called
2437 repeatedly without result: a high value means
2438 that we should search farther; if
2439 n_iterations < 10, then we search
2440 n_iterations / 10 * buf_pool->curr_size
2441 pages from the end of the LRU list */
2442+ ibool have_LRU_mutex)
2443 {
2444 buf_page_t* bpage;
2445 ulint distance;
2446
2447- ut_ad(buf_pool_mutex_own(buf_pool));
2448+ //ut_ad(buf_pool_mutex_own(buf_pool));
2449
2450 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2451
2452+restart:
2453 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2454 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2455 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
db82db79
AM
2456
2457 ibool freed;
2458 unsigned accessed;
2459- mutex_t* block_mutex = buf_page_get_mutex(bpage);
2460+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
b4e1fa2c
AM
2461+
2462+ if (!block_mutex) {
2463+ goto restart;
2464+ }
2465+
2466+ if (!bpage->in_LRU_list
2467+ || !buf_page_in_file(bpage)) {
2468+ mutex_exit(block_mutex);
2469+ goto restart;
2470+ }
2471
2472 ut_ad(buf_page_in_file(bpage));
2473 ut_ad(bpage->in_LRU_list);
2474
2475- mutex_enter(block_mutex);
2476 accessed = buf_page_is_accessed(bpage);
df1b5770
AM
2477- freed = buf_LRU_free_block(bpage, TRUE);
2478+ freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
b4e1fa2c
AM
2479 mutex_exit(block_mutex);
2480
db82db79
AM
2481 if (freed) {
2482@@ -649,16 +718,23 @@
b4e1fa2c
AM
2483 n_iterations / 5 of the unzip_LRU list. */
2484 {
2485 ibool freed = FALSE;
2486+ ibool have_LRU_mutex = FALSE;
2487
2488- buf_pool_mutex_enter(buf_pool);
2489+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2490+ have_LRU_mutex = TRUE;
db82db79
AM
2491
2492- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
b4e1fa2c
AM
2493+ //buf_pool_mutex_enter(buf_pool);
2494+ if (have_LRU_mutex)
2495+ mutex_enter(&buf_pool->LRU_list_mutex);
db82db79 2496+
b4e1fa2c
AM
2497+ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2498
2499 if (!freed) {
2500 freed = buf_LRU_free_from_common_LRU_list(
2501- buf_pool, n_iterations);
2502+ buf_pool, n_iterations, have_LRU_mutex);
2503 }
2504
2505+ buf_pool_mutex_enter(buf_pool);
2506 if (!freed) {
2507 buf_pool->LRU_flush_ended = 0;
2508 } else if (buf_pool->LRU_flush_ended > 0) {
db82db79 2509@@ -666,6 +742,8 @@
b4e1fa2c
AM
2510 }
2511
2512 buf_pool_mutex_exit(buf_pool);
2513+ if (have_LRU_mutex)
2514+ mutex_exit(&buf_pool->LRU_list_mutex);
2515
2516 return(freed);
2517 }
db82db79 2518@@ -726,7 +804,9 @@
b4e1fa2c
AM
2519
2520 buf_pool = buf_pool_from_array(i);
2521
2522- buf_pool_mutex_enter(buf_pool);
2523+ //buf_pool_mutex_enter(buf_pool);
2524+ mutex_enter(&buf_pool->LRU_list_mutex);
2525+ mutex_enter(&buf_pool->free_list_mutex);
2526
2527 if (!recv_recovery_on
2528 && UT_LIST_GET_LEN(buf_pool->free)
db82db79 2529@@ -736,7 +816,9 @@
b4e1fa2c
AM
2530 ret = TRUE;
2531 }
2532
2533- buf_pool_mutex_exit(buf_pool);
2534+ //buf_pool_mutex_exit(buf_pool);
2535+ mutex_exit(&buf_pool->LRU_list_mutex);
2536+ mutex_exit(&buf_pool->free_list_mutex);
2537 }
2538
2539 return(ret);
db82db79 2540@@ -754,9 +836,10 @@
b4e1fa2c
AM
2541 {
2542 buf_block_t* block;
2543
2544- ut_ad(buf_pool_mutex_own(buf_pool));
2545+ //ut_ad(buf_pool_mutex_own(buf_pool));
2546
2547- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2548+ mutex_enter(&buf_pool->free_list_mutex);
2549+ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2550
2551 if (block) {
2552
db82db79 2553@@ -765,7 +848,9 @@
b4e1fa2c
AM
2554 ut_ad(!block->page.in_flush_list);
2555 ut_ad(!block->page.in_LRU_list);
2556 ut_a(!buf_page_in_file(&block->page));
2557- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2558+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2559+
2560+ mutex_exit(&buf_pool->free_list_mutex);
2561
2562 mutex_enter(&block->mutex);
2563
db82db79 2564@@ -775,6 +860,8 @@
b4e1fa2c
AM
2565 ut_ad(buf_pool_from_block(block) == buf_pool);
2566
2567 mutex_exit(&block->mutex);
2568+ } else {
2569+ mutex_exit(&buf_pool->free_list_mutex);
2570 }
2571
2572 return(block);
db82db79 2573@@ -797,7 +884,7 @@
b4e1fa2c
AM
2574 ibool mon_value_was = FALSE;
2575 ibool started_monitor = FALSE;
2576 loop:
2577- buf_pool_mutex_enter(buf_pool);
2578+ //buf_pool_mutex_enter(buf_pool);
2579
2580 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2581 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
db82db79 2582@@ -865,7 +952,7 @@
b4e1fa2c 2583
df1b5770
AM
2584 /* If there is a block in the free list, take it */
2585 block = buf_LRU_get_free_only(buf_pool);
b4e1fa2c
AM
2586- buf_pool_mutex_exit(buf_pool);
2587+ //buf_pool_mutex_exit(buf_pool);
2588
df1b5770
AM
2589 if (block) {
2590 ut_ad(buf_pool_from_block(block) == buf_pool);
db82db79 2591@@ -965,7 +1052,8 @@
b4e1fa2c
AM
2592 ulint new_len;
2593
2594 ut_a(buf_pool->LRU_old);
2595- ut_ad(buf_pool_mutex_own(buf_pool));
2596+ //ut_ad(buf_pool_mutex_own(buf_pool));
2597+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2598 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2599 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2600 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
db82db79 2601@@ -1031,7 +1119,8 @@
b4e1fa2c
AM
2602 {
2603 buf_page_t* bpage;
2604
2605- ut_ad(buf_pool_mutex_own(buf_pool));
2606+ //ut_ad(buf_pool_mutex_own(buf_pool));
2607+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2608 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2609
2610 /* We first initialize all blocks in the LRU list as old and then use
db82db79 2611@@ -1066,13 +1155,14 @@
b4e1fa2c
AM
2612 ut_ad(buf_pool);
2613 ut_ad(bpage);
2614 ut_ad(buf_page_in_file(bpage));
2615- ut_ad(buf_pool_mutex_own(buf_pool));
2616+ //ut_ad(buf_pool_mutex_own(buf_pool));
2617+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2618
2619 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2620 buf_block_t* block = (buf_block_t*) bpage;
2621
2622 ut_ad(block->in_unzip_LRU_list);
2623- ut_d(block->in_unzip_LRU_list = FALSE);
2624+ block->in_unzip_LRU_list = FALSE;
2625
2626 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2627 }
db82db79 2628@@ -1090,7 +1180,8 @@
b4e1fa2c
AM
2629
2630 ut_ad(buf_pool);
2631 ut_ad(bpage);
2632- ut_ad(buf_pool_mutex_own(buf_pool));
2633+ //ut_ad(buf_pool_mutex_own(buf_pool));
2634+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2635
2636 ut_a(buf_page_in_file(bpage));
2637
db82db79 2638@@ -1167,12 +1258,13 @@
b4e1fa2c
AM
2639
2640 ut_ad(buf_pool);
2641 ut_ad(block);
2642- ut_ad(buf_pool_mutex_own(buf_pool));
2643+ //ut_ad(buf_pool_mutex_own(buf_pool));
2644+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2645
2646 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2647
2648 ut_ad(!block->in_unzip_LRU_list);
2649- ut_d(block->in_unzip_LRU_list = TRUE);
2650+ block->in_unzip_LRU_list = TRUE;
2651
2652 if (old) {
2653 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
db82db79 2654@@ -1193,7 +1285,8 @@
b4e1fa2c
AM
2655
2656 ut_ad(buf_pool);
2657 ut_ad(bpage);
2658- ut_ad(buf_pool_mutex_own(buf_pool));
2659+ //ut_ad(buf_pool_mutex_own(buf_pool));
2660+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2661
2662 ut_a(buf_page_in_file(bpage));
2663
db82db79 2664@@ -1244,7 +1337,8 @@
b4e1fa2c
AM
2665
2666 ut_ad(buf_pool);
2667 ut_ad(bpage);
2668- ut_ad(buf_pool_mutex_own(buf_pool));
2669+ //ut_ad(buf_pool_mutex_own(buf_pool));
2670+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2671
2672 ut_a(buf_page_in_file(bpage));
2673 ut_ad(!bpage->in_LRU_list);
db82db79 2674@@ -1323,7 +1417,8 @@
b4e1fa2c
AM
2675 {
2676 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2677
2678- ut_ad(buf_pool_mutex_own(buf_pool));
2679+ //ut_ad(buf_pool_mutex_own(buf_pool));
2680+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2681
2682 if (bpage->old) {
2683 buf_pool->stat.n_pages_made_young++;
db82db79 2684@@ -1362,17 +1457,18 @@
df1b5770
AM
2685 buf_LRU_free_block(
2686 /*===============*/
b4e1fa2c 2687 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
2688- ibool zip) /*!< in: TRUE if should remove also the
2689+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 2690 compressed page of an uncompressed page */
b4e1fa2c
AM
2691+ ibool have_LRU_mutex)
2692 {
2693 buf_page_t* b = NULL;
2694 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2695 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2696
2697- ut_ad(buf_pool_mutex_own(buf_pool));
2698+ //ut_ad(buf_pool_mutex_own(buf_pool));
2699 ut_ad(mutex_own(block_mutex));
2700 ut_ad(buf_page_in_file(bpage));
2701- ut_ad(bpage->in_LRU_list);
2702+ //ut_ad(bpage->in_LRU_list);
2703 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2704 #if UNIV_WORD_SIZE == 4
2705 /* On 32-bit systems, there is no padding in buf_page_t. On
db82db79 2706@@ -1381,7 +1477,7 @@
b4e1fa2c
AM
2707 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2708 #endif
2709
2710- if (!buf_page_can_relocate(bpage)) {
2711+ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2712
2713 /* Do not free buffer-fixed or I/O-fixed blocks. */
db82db79
AM
2714 return(FALSE);
2715@@ -1415,7 +1511,7 @@
b4e1fa2c 2716 alloc:
db82db79
AM
2717 b = buf_page_alloc_descriptor();
2718 ut_a(b);
b4e1fa2c
AM
2719- memcpy(b, bpage, sizeof *b);
2720+ //memcpy(b, bpage, sizeof *b);
2721 }
2722
2723 #ifdef UNIV_DEBUG
db82db79 2724@@ -1426,6 +1522,39 @@
b4e1fa2c
AM
2725 }
2726 #endif /* UNIV_DEBUG */
2727
2728+ /* not to break latch order, must re-enter block_mutex */
2729+ mutex_exit(block_mutex);
2730+
2731+ if (!have_LRU_mutex)
2732+ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2733+ rw_lock_x_lock(&buf_pool->page_hash_latch);
2734+ mutex_enter(block_mutex);
2735+
2736+ /* recheck states of block */
2737+ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2738+ || !buf_page_can_relocate(bpage)) {
2739+not_freed:
2740+ if (b) {
2741+ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2742+ }
2743+ if (!have_LRU_mutex)
2744+ mutex_exit(&buf_pool->LRU_list_mutex);
2745+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
db82db79 2746+ return(FALSE);
b4e1fa2c
AM
2747+ } else if (zip || !bpage->zip.data) {
2748+ if (bpage->oldest_modification)
2749+ goto not_freed;
2750+ } else if (bpage->oldest_modification) {
2751+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2752+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2753+ goto not_freed;
2754+ }
2755+ }
2756+
2757+ if (b) {
2758+ memcpy(b, bpage, sizeof *b);
2759+ }
2760+
2761 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2762 != BUF_BLOCK_ZIP_FREE) {
2763 ut_a(bpage->buf_fix_count == 0);
db82db79 2764@@ -1442,6 +1571,10 @@
b4e1fa2c
AM
2765
2766 ut_a(!hash_b);
2767
2768+ while (prev_b && !prev_b->in_LRU_list) {
2769+ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2770+ }
2771+
2772 b->state = b->oldest_modification
2773 ? BUF_BLOCK_ZIP_DIRTY
2774 : BUF_BLOCK_ZIP_PAGE;
db82db79 2775@@ -1517,6 +1650,7 @@
adf0fb13
AM
2776 buf_LRU_add_block_low(b, buf_page_is_old(b));
2777 }
2778
2779+ mutex_enter(&buf_pool->zip_mutex);
2780 if (b->state == BUF_BLOCK_ZIP_PAGE) {
db82db79 2781 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
adf0fb13 2782 buf_LRU_insert_zip_clean(b);
db82db79 2783@@ -1534,9 +1668,12 @@
adf0fb13
AM
2784 buf_pool->mutex and block_mutex. */
2785 b->buf_fix_count++;
df1b5770 2786 b->io_fix = BUF_IO_READ;
adf0fb13 2787+ mutex_exit(&buf_pool->zip_mutex);
b4e1fa2c
AM
2788 }
2789
2790- buf_pool_mutex_exit(buf_pool);
2791+ //buf_pool_mutex_exit(buf_pool);
2792+ mutex_exit(&buf_pool->LRU_list_mutex);
2793+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2794 mutex_exit(block_mutex);
2795
2796 /* Remove possible adaptive hash index on the page.
db82db79 2797@@ -1568,7 +1705,9 @@
b4e1fa2c
AM
2798 : BUF_NO_CHECKSUM_MAGIC);
2799 }
2800
2801- buf_pool_mutex_enter(buf_pool);
2802+ //buf_pool_mutex_enter(buf_pool);
2803+ if (have_LRU_mutex)
2804+ mutex_enter(&buf_pool->LRU_list_mutex);
2805 mutex_enter(block_mutex);
2806
2807 if (b) {
db82db79 2808@@ -1578,13 +1717,17 @@
b4e1fa2c
AM
2809 mutex_exit(&buf_pool->zip_mutex);
2810 }
2811
2812- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2813+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
2814 } else {
2815 /* The block_mutex should have been released by
2816 buf_LRU_block_remove_hashed_page() when it returns
2817 BUF_BLOCK_ZIP_FREE. */
2818 ut_ad(block_mutex == &buf_pool->zip_mutex);
2819 mutex_enter(block_mutex);
2820+
2821+ if (!have_LRU_mutex)
2822+ mutex_exit(&buf_pool->LRU_list_mutex);
2823+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2824 }
2825
db82db79
AM
2826 return(TRUE);
2827@@ -1596,13 +1739,14 @@
b4e1fa2c
AM
2828 void
2829 buf_LRU_block_free_non_file_page(
2830 /*=============================*/
2831- buf_block_t* block) /*!< in: block, must not contain a file page */
2832+ buf_block_t* block, /*!< in: block, must not contain a file page */
2833+ ibool have_page_hash_mutex)
2834 {
2835 void* data;
2836 buf_pool_t* buf_pool = buf_pool_from_block(block);
2837
2838 ut_ad(block);
2839- ut_ad(buf_pool_mutex_own(buf_pool));
2840+ //ut_ad(buf_pool_mutex_own(buf_pool));
2841 ut_ad(mutex_own(&block->mutex));
2842
2843 switch (buf_block_get_state(block)) {
db82db79 2844@@ -1636,18 +1780,21 @@
b4e1fa2c
AM
2845 if (data) {
2846 block->page.zip.data = NULL;
2847 mutex_exit(&block->mutex);
2848- buf_pool_mutex_exit_forbid(buf_pool);
2849+ //buf_pool_mutex_exit_forbid(buf_pool);
2850
2851 buf_buddy_free(
2852- buf_pool, data, page_zip_get_size(&block->page.zip));
2853+ buf_pool, data, page_zip_get_size(&block->page.zip),
2854+ have_page_hash_mutex);
2855
2856- buf_pool_mutex_exit_allow(buf_pool);
2857+ //buf_pool_mutex_exit_allow(buf_pool);
2858 mutex_enter(&block->mutex);
2859 page_zip_set_size(&block->page.zip, 0);
2860 }
2861
2862- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
2863+ mutex_enter(&buf_pool->free_list_mutex);
2864+ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
2865 ut_d(block->page.in_free_list = TRUE);
2866+ mutex_exit(&buf_pool->free_list_mutex);
2867
2868 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
2869 }
db82db79 2870@@ -1677,7 +1824,11 @@
b4e1fa2c
AM
2871 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2872
2873 ut_ad(bpage);
2874- ut_ad(buf_pool_mutex_own(buf_pool));
2875+ //ut_ad(buf_pool_mutex_own(buf_pool));
2876+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2877+#ifdef UNIV_SYNC_DEBUG
2878+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
2879+#endif
2880 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2881
2882 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
db82db79 2883@@ -1785,7 +1936,9 @@
b4e1fa2c
AM
2884
2885 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2886 mutex_exit(buf_page_get_mutex(bpage));
2887- buf_pool_mutex_exit(buf_pool);
2888+ //buf_pool_mutex_exit(buf_pool);
2889+ mutex_exit(&buf_pool->LRU_list_mutex);
2890+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
2891 buf_print();
2892 buf_LRU_print();
2893 buf_validate();
db82db79 2894@@ -1807,17 +1960,17 @@
b4e1fa2c
AM
2895 ut_a(buf_page_get_zip_size(bpage));
2896
db82db79 2897 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
b4e1fa2c
AM
2898- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
2899+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
db82db79 2900 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
b4e1fa2c
AM
2901
2902 mutex_exit(&buf_pool->zip_mutex);
2903- buf_pool_mutex_exit_forbid(buf_pool);
2904+ //buf_pool_mutex_exit_forbid(buf_pool);
2905
2906 buf_buddy_free(
2907 buf_pool, bpage->zip.data,
2908- page_zip_get_size(&bpage->zip));
2909+ page_zip_get_size(&bpage->zip), TRUE);
2910
b4e1fa2c 2911- buf_pool_mutex_exit_allow(buf_pool);
b4e1fa2c 2912+ //buf_pool_mutex_exit_allow(buf_pool);
db82db79 2913 buf_page_free_descriptor(bpage);
b4e1fa2c 2914 return(BUF_BLOCK_ZIP_FREE);
db82db79
AM
2915
2916@@ -1839,13 +1992,13 @@
b4e1fa2c
AM
2917 ut_ad(!bpage->in_flush_list);
2918 ut_ad(!bpage->in_LRU_list);
2919 mutex_exit(&((buf_block_t*) bpage)->mutex);
2920- buf_pool_mutex_exit_forbid(buf_pool);
2921+ //buf_pool_mutex_exit_forbid(buf_pool);
2922
2923 buf_buddy_free(
2924 buf_pool, data,
2925- page_zip_get_size(&bpage->zip));
2926+ page_zip_get_size(&bpage->zip), TRUE);
2927
2928- buf_pool_mutex_exit_allow(buf_pool);
2929+ //buf_pool_mutex_exit_allow(buf_pool);
2930 mutex_enter(&((buf_block_t*) bpage)->mutex);
2931 page_zip_set_size(&bpage->zip, 0);
2932 }
db82db79 2933@@ -1871,18 +2024,19 @@
b4e1fa2c
AM
2934 void
2935 buf_LRU_block_free_hashed_page(
2936 /*===========================*/
2937- buf_block_t* block) /*!< in: block, must contain a file page and
2938+ buf_block_t* block, /*!< in: block, must contain a file page and
2939 be in a state where it can be freed */
2940+ ibool have_page_hash_mutex)
2941 {
2942 #ifdef UNIV_DEBUG
2943- buf_pool_t* buf_pool = buf_pool_from_block(block);
2944- ut_ad(buf_pool_mutex_own(buf_pool));
2945+ //buf_pool_t* buf_pool = buf_pool_from_block(block);
2946+ //ut_ad(buf_pool_mutex_own(buf_pool));
2947 #endif
2948 ut_ad(mutex_own(&block->mutex));
2949
2950 buf_block_set_state(block, BUF_BLOCK_MEMORY);
2951
2952- buf_LRU_block_free_non_file_page(block);
2953+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
2954 }
2955
2956 /**********************************************************************//**
db82db79 2957@@ -1909,7 +2063,8 @@
b4e1fa2c
AM
2958 }
2959
2960 if (adjust) {
2961- buf_pool_mutex_enter(buf_pool);
2962+ //buf_pool_mutex_enter(buf_pool);
2963+ mutex_enter(&buf_pool->LRU_list_mutex);
2964
2965 if (ratio != buf_pool->LRU_old_ratio) {
2966 buf_pool->LRU_old_ratio = ratio;
db82db79 2967@@ -1921,7 +2076,8 @@
b4e1fa2c
AM
2968 }
2969 }
2970
2971- buf_pool_mutex_exit(buf_pool);
2972+ //buf_pool_mutex_exit(buf_pool);
2973+ mutex_exit(&buf_pool->LRU_list_mutex);
2974 } else {
2975 buf_pool->LRU_old_ratio = ratio;
2976 }
db82db79 2977@@ -2026,7 +2182,8 @@
b4e1fa2c
AM
2978 ulint new_len;
2979
2980 ut_ad(buf_pool);
2981- buf_pool_mutex_enter(buf_pool);
2982+ //buf_pool_mutex_enter(buf_pool);
2983+ mutex_enter(&buf_pool->LRU_list_mutex);
2984
2985 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
2986
db82db79 2987@@ -2087,16 +2244,22 @@
b4e1fa2c
AM
2988
2989 ut_a(buf_pool->LRU_old_len == old_len);
2990
2991- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
2992+ mutex_exit(&buf_pool->LRU_list_mutex);
2993+ mutex_enter(&buf_pool->free_list_mutex);
2994+
2995+ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
2996 ut_ad(ut_list_node_313->in_free_list));
2997
2998 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
2999 bpage != NULL;
3000- bpage = UT_LIST_GET_NEXT(list, bpage)) {
3001+ bpage = UT_LIST_GET_NEXT(free, bpage)) {
3002
3003 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3004 }
3005
3006+ mutex_exit(&buf_pool->free_list_mutex);
3007+ mutex_enter(&buf_pool->LRU_list_mutex);
3008+
3009 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3010 ut_ad(ut_list_node_313->in_unzip_LRU_list
3011 && ut_list_node_313->page.in_LRU_list));
db82db79 3012@@ -2110,7 +2273,8 @@
b4e1fa2c
AM
3013 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3014 }
3015
3016- buf_pool_mutex_exit(buf_pool);
3017+ //buf_pool_mutex_exit(buf_pool);
3018+ mutex_exit(&buf_pool->LRU_list_mutex);
3019 }
3020
3021 /**********************************************************************//**
db82db79 3022@@ -2146,7 +2310,8 @@
b4e1fa2c
AM
3023 const buf_page_t* bpage;
3024
3025 ut_ad(buf_pool);
3026- buf_pool_mutex_enter(buf_pool);
3027+ //buf_pool_mutex_enter(buf_pool);
3028+ mutex_enter(&buf_pool->LRU_list_mutex);
3029
3030 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3031
db82db79 3032@@ -2203,7 +2368,8 @@
b4e1fa2c
AM
3033 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3034 }
3035
3036- buf_pool_mutex_exit(buf_pool);
3037+ //buf_pool_mutex_exit(buf_pool);
3038+ mutex_exit(&buf_pool->LRU_list_mutex);
3039 }
3040
3041 /**********************************************************************//**
db82db79
AM
3042--- a/storage/innobase/buf/buf0rea.c
3043+++ b/storage/innobase/buf/buf0rea.c
b4e1fa2c
AM
3044@@ -311,6 +311,7 @@
3045
3046 return(0);
3047 }
3048+ buf_pool_mutex_exit(buf_pool);
3049
3050 /* Check that almost all pages in the area have been accessed; if
3051 offset == low, the accesses must be in a descending order, otherwise,
3052@@ -329,6 +330,7 @@
3053
3054 fail_count = 0;
3055
3056+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3057 for (i = low; i < high; i++) {
3058 bpage = buf_page_hash_get(buf_pool, space, i);
3059
3060@@ -356,7 +358,8 @@
3061
3062 if (fail_count > threshold) {
3063 /* Too many failures: return */
3064- buf_pool_mutex_exit(buf_pool);
3065+ //buf_pool_mutex_exit(buf_pool);
3066+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3067 return(0);
3068 }
3069
3070@@ -371,7 +374,8 @@
3071 bpage = buf_page_hash_get(buf_pool, space, offset);
3072
3073 if (bpage == NULL) {
3074- buf_pool_mutex_exit(buf_pool);
3075+ //buf_pool_mutex_exit(buf_pool);
3076+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3077
3078 return(0);
3079 }
3080@@ -397,7 +401,8 @@
3081 pred_offset = fil_page_get_prev(frame);
3082 succ_offset = fil_page_get_next(frame);
3083
3084- buf_pool_mutex_exit(buf_pool);
3085+ //buf_pool_mutex_exit(buf_pool);
3086+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3087
3088 if ((offset == low) && (succ_offset == offset + 1)) {
3089
db82db79
AM
3090--- a/storage/innobase/handler/ha_innodb.cc
3091+++ b/storage/innobase/handler/ha_innodb.cc
3092@@ -265,6 +265,10 @@
b4e1fa2c
AM
3093 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3094 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3095 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3096+ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3097+ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3098+ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3099+ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3100 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3101 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3102 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
db82db79 3103@@ -314,6 +318,7 @@
b4e1fa2c
AM
3104 {&archive_lock_key, "archive_lock", 0},
3105 # endif /* UNIV_LOG_ARCHIVE */
3106 {&btr_search_latch_key, "btr_search_latch", 0},
3107+ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3108 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3109 {&buf_block_lock_key, "buf_block_lock", 0},
3110 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
db82db79
AM
3111--- a/storage/innobase/handler/i_s.cc
3112+++ b/storage/innobase/handler/i_s.cc
adf0fb13 3113@@ -1563,7 +1563,8 @@
b4e1fa2c
AM
3114
3115 buf_pool = buf_pool_from_array(i);
3116
3117- buf_pool_mutex_enter(buf_pool);
3118+ //buf_pool_mutex_enter(buf_pool);
3119+ mutex_enter(&buf_pool->zip_free_mutex);
3120
3121 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3122 buf_buddy_stat_t* buddy_stat;
adf0fb13 3123@@ -1593,7 +1594,8 @@
b4e1fa2c
AM
3124 }
3125 }
3126
3127- buf_pool_mutex_exit(buf_pool);
3128+ //buf_pool_mutex_exit(buf_pool);
3129+ mutex_exit(&buf_pool->zip_free_mutex);
3130
3131 if (status) {
3132 break;
db82db79
AM
3133--- a/storage/innobase/ibuf/ibuf0ibuf.c
3134+++ b/storage/innobase/ibuf/ibuf0ibuf.c
adf0fb13 3135@@ -3821,9 +3821,11 @@
b4e1fa2c
AM
3136 ulint fold = buf_page_address_fold(space, page_no);
3137 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3138
3139- buf_pool_mutex_enter(buf_pool);
3140+ //buf_pool_mutex_enter(buf_pool);
3141+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3142 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3143- buf_pool_mutex_exit(buf_pool);
3144+ //buf_pool_mutex_exit(buf_pool);
3145+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3146
3147 if (UNIV_LIKELY_NULL(bpage)) {
3148 /* A buffer pool watch has been set or the
db82db79
AM
3149--- a/storage/innobase/include/buf0buddy.h
3150+++ b/storage/innobase/include/buf0buddy.h
3151@@ -49,11 +49,12 @@
3152 ulint size, /*!< in: compressed page size
3153 (between PAGE_ZIP_MIN_SIZE and
3154 UNIV_PAGE_SIZE) */
3155- ibool* lru) /*!< in: pointer to a variable
3156+ ibool* lru, /*!< in: pointer to a variable
3157 that will be assigned TRUE if
3158 storage was allocated from the
3159 LRU list and buf_pool->mutex was
3160 temporarily released */
3161+ ibool have_page_hash_mutex)
3162 __attribute__((malloc, nonnull));
b4e1fa2c
AM
3163
3164 /**********************************************************************//**
db82db79
AM
3165@@ -66,8 +67,9 @@
3166 the block resides */
3167 void* buf, /*!< in: block to be freed, must not
3168 be pointed to by the buffer pool */
3169- ulint size) /*!< in: block size,
3170+ ulint size, /*!< in: block size,
3171 up to UNIV_PAGE_SIZE */
3172+ ibool have_page_hash_mutex)
b4e1fa2c
AM
3173 __attribute__((nonnull));
3174
3175 #ifndef UNIV_NONINL
db82db79
AM
3176--- a/storage/innobase/include/buf0buddy.ic
3177+++ b/storage/innobase/include/buf0buddy.ic
3178@@ -45,11 +45,12 @@
3179 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
3180 ulint i, /*!< in: index of buf_pool->zip_free[],
3181 or BUF_BUDDY_SIZES */
3182- ibool* lru) /*!< in: pointer to a variable that
3183+ ibool* lru, /*!< in: pointer to a variable that
3184 will be assigned TRUE if storage was
3185 allocated from the LRU list and
3186 buf_pool->mutex was temporarily
3187 released */
3188+ ibool have_page_hash_mutex)
3189 __attribute__((malloc, nonnull));
b4e1fa2c
AM
3190
3191 /**********************************************************************//**
3192@@ -61,8 +62,9 @@
3193 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3194 void* buf, /*!< in: block to be freed, must not be
3195 pointed to by the buffer pool */
3196- ulint i) /*!< in: index of buf_pool->zip_free[],
3197+ ulint i, /*!< in: index of buf_pool->zip_free[],
3198 or BUF_BUDDY_SIZES */
3199+ ibool have_page_hash_mutex)
3200 __attribute__((nonnull));
3201
3202 /**********************************************************************//**
db82db79
AM
3203@@ -101,19 +103,20 @@
3204 ulint size, /*!< in: compressed page size
3205 (between PAGE_ZIP_MIN_SIZE and
3206 UNIV_PAGE_SIZE) */
b4e1fa2c
AM
3207- ibool* lru) /*!< in: pointer to a variable
3208+ ibool* lru, /*!< in: pointer to a variable
3209 that will be assigned TRUE if
3210 storage was allocated from the
3211 LRU list and buf_pool->mutex was
db82db79 3212 temporarily released */
b4e1fa2c
AM
3213+ ibool have_page_hash_mutex)
3214 {
3215- ut_ad(buf_pool_mutex_own(buf_pool));
3216+ //ut_ad(buf_pool_mutex_own(buf_pool));
db82db79
AM
3217 ut_ad(ut_is_2pow(size));
3218 ut_ad(size >= PAGE_ZIP_MIN_SIZE);
3219 ut_ad(size <= UNIV_PAGE_SIZE);
b4e1fa2c 3220
db82db79
AM
3221 return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
3222- lru));
3223+ lru, have_page_hash_mutex));
b4e1fa2c
AM
3224 }
3225
3226 /**********************************************************************//**
db82db79
AM
3227@@ -126,15 +129,28 @@
3228 the block resides */
3229 void* buf, /*!< in: block to be freed, must not
3230 be pointed to by the buffer pool */
3231- ulint size) /*!< in: block size,
3232+ ulint size, /*!< in: block size,
3233 up to UNIV_PAGE_SIZE */
b4e1fa2c
AM
3234+ ibool have_page_hash_mutex)
3235 {
3236- ut_ad(buf_pool_mutex_own(buf_pool));
3237+ //ut_ad(buf_pool_mutex_own(buf_pool));
db82db79
AM
3238 ut_ad(ut_is_2pow(size));
3239 ut_ad(size >= PAGE_ZIP_MIN_SIZE);
3240 ut_ad(size <= UNIV_PAGE_SIZE);
3241
3242- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
b4e1fa2c
AM
3243+ if (!have_page_hash_mutex) {
3244+ mutex_enter(&buf_pool->LRU_list_mutex);
3245+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3246+ }
db82db79 3247+
b4e1fa2c
AM
3248+ mutex_enter(&buf_pool->zip_free_mutex);
3249+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3250+ mutex_exit(&buf_pool->zip_free_mutex);
3251+
3252+ if (!have_page_hash_mutex) {
3253+ mutex_exit(&buf_pool->LRU_list_mutex);
3254+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3255+ }
3256 }
3257
3258 #ifdef UNIV_MATERIALIZE
db82db79
AM
3259--- a/storage/innobase/include/buf0buf.h
3260+++ b/storage/innobase/include/buf0buf.h
3261@@ -208,6 +208,20 @@
b4e1fa2c
AM
3262 /*==========================*/
3263
3264 /********************************************************************//**
3265+*/
3266+UNIV_INLINE
3267+void
3268+buf_pool_page_hash_x_lock_all(void);
3269+/*================================*/
3270+
3271+/********************************************************************//**
3272+*/
3273+UNIV_INLINE
3274+void
3275+buf_pool_page_hash_x_unlock_all(void);
3276+/*==================================*/
3277+
3278+/********************************************************************//**
3279 Creates the buffer pool.
3280 @return own: buf_pool object, NULL if not enough memory or error */
3281 UNIV_INTERN
db82db79 3282@@ -873,6 +887,15 @@
b4e1fa2c
AM
3283 const buf_page_t* bpage) /*!< in: pointer to control block */
3284 __attribute__((pure));
3285
3286+/*************************************************************************
3287+Gets the mutex of a block and enter the mutex with consistency. */
3288+UNIV_INLINE
3289+mutex_t*
3290+buf_page_get_mutex_enter(
3291+/*=========================*/
3292+ const buf_page_t* bpage) /*!< in: pointer to control block */
3293+ __attribute__((pure));
3294+
3295 /*********************************************************************//**
3296 Get the flush type of a page.
3297 @return flush type */
db82db79 3298@@ -1354,7 +1377,7 @@
b4e1fa2c
AM
3299 All these are protected by buf_pool->mutex. */
3300 /* @{ */
3301
3302- UT_LIST_NODE_T(buf_page_t) list;
3303+ /* UT_LIST_NODE_T(buf_page_t) list; */
3304 /*!< based on state, this is a
3305 list node, protected either by
3306 buf_pool->mutex or by
db82db79 3307@@ -1382,6 +1405,10 @@
b4e1fa2c
AM
3308 BUF_BLOCK_REMOVE_HASH or
3309 BUF_BLOCK_READY_IN_USE. */
3310
3311+ /* resplit for optimistic use */
3312+ UT_LIST_NODE_T(buf_page_t) free;
3313+ UT_LIST_NODE_T(buf_page_t) flush_list;
3314+ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3315 #ifdef UNIV_DEBUG
3316 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3317 when buf_pool->flush_list_mutex is
db82db79 3318@@ -1474,11 +1501,11 @@
b4e1fa2c
AM
3319 a block is in the unzip_LRU list
3320 if page.state == BUF_BLOCK_FILE_PAGE
3321 and page.zip.data != NULL */
3322-#ifdef UNIV_DEBUG
3323+//#ifdef UNIV_DEBUG
3324 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3325 decompressed LRU list;
3326 used in debugging */
3327-#endif /* UNIV_DEBUG */
3328+//#endif /* UNIV_DEBUG */
3329 mutex_t mutex; /*!< mutex protecting this block:
3330 state (also protected by the buffer
3331 pool mutex), io_fix, buf_fix_count,
db82db79 3332@@ -1653,6 +1680,11 @@
b4e1fa2c
AM
3333 pool instance, protects compressed
3334 only pages (of type buf_page_t, not
3335 buf_block_t */
3336+ mutex_t LRU_list_mutex;
3337+ rw_lock_t page_hash_latch;
3338+ mutex_t free_list_mutex;
3339+ mutex_t zip_free_mutex;
3340+ mutex_t zip_hash_mutex;
3341 ulint instance_no; /*!< Array index of this buffer
3342 pool instance */
3343 ulint old_pool_size; /*!< Old pool size in bytes */
db82db79 3344@@ -1806,8 +1838,8 @@
11822e22
AM
3345 /** Test if a buffer pool mutex is owned. */
3346 #define buf_pool_mutex_own(b) mutex_own(&b->mutex)
3347 /** Acquire a buffer pool mutex. */
3348+/* the buf_pool_mutex is changed the latch order */
3349 #define buf_pool_mutex_enter(b) do { \
3350- ut_ad(!mutex_own(&b->zip_mutex)); \
3351 mutex_enter(&b->mutex); \
3352 } while (0)
3353
db82db79
AM
3354--- a/storage/innobase/include/buf0buf.ic
3355+++ b/storage/innobase/include/buf0buf.ic
b4e1fa2c
AM
3356@@ -274,7 +274,7 @@
3357 case BUF_BLOCK_ZIP_FREE:
3358 /* This is a free page in buf_pool->zip_free[].
3359 Such pages should only be accessed by the buddy allocator. */
3360- ut_error;
3361+ /* ut_error; */ /* optimistic */
3362 break;
3363 case BUF_BLOCK_ZIP_PAGE:
3364 case BUF_BLOCK_ZIP_DIRTY:
11822e22 3365@@ -317,9 +317,16 @@
b4e1fa2c
AM
3366 {
3367 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3368
11822e22
AM
3369+ if (/*equivalent to buf_pool_watch_is_sentinel(buf_pool, bpage)*/
3370+ bpage >= &buf_pool->watch[0]
3371+ && bpage < &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
b4e1fa2c
AM
3372+ /* TODO: this code is the interim. should be confirmed later. */
3373+ return(&buf_pool->zip_mutex);
3374+ }
3375+
3376 switch (buf_page_get_state(bpage)) {
3377 case BUF_BLOCK_ZIP_FREE:
3378- ut_error;
3379+ /* ut_error; */ /* optimistic */
3380 return(NULL);
3381 case BUF_BLOCK_ZIP_PAGE:
3382 case BUF_BLOCK_ZIP_DIRTY:
11822e22 3383@@ -329,6 +336,28 @@
b4e1fa2c
AM
3384 }
3385 }
3386
3387+/*************************************************************************
3388+Gets the mutex of a block and enter the mutex with consistency. */
3389+UNIV_INLINE
3390+mutex_t*
3391+buf_page_get_mutex_enter(
3392+/*=========================*/
3393+ const buf_page_t* bpage) /*!< in: pointer to control block */
3394+{
3395+ mutex_t* block_mutex;
3396+
3397+ while(1) {
3398+ block_mutex = buf_page_get_mutex(bpage);
3399+ if (!block_mutex)
3400+ return block_mutex;
3401+
3402+ mutex_enter(block_mutex);
3403+ if (block_mutex == buf_page_get_mutex(bpage))
3404+ return block_mutex;
3405+ mutex_exit(block_mutex);
3406+ }
3407+}
3408+
3409 /*********************************************************************//**
3410 Get the flush type of a page.
3411 @return flush type */
11822e22 3412@@ -425,8 +454,8 @@
b4e1fa2c
AM
3413 enum buf_io_fix io_fix) /*!< in: io_fix state */
3414 {
3415 #ifdef UNIV_DEBUG
3416- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3417- ut_ad(buf_pool_mutex_own(buf_pool));
3418+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3419+ //ut_ad(buf_pool_mutex_own(buf_pool));
3420 #endif
3421 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3422
11822e22 3423@@ -456,14 +485,14 @@
b4e1fa2c
AM
3424 const buf_page_t* bpage) /*!< control block being relocated */
3425 {
3426 #ifdef UNIV_DEBUG
3427- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3428- ut_ad(buf_pool_mutex_own(buf_pool));
3429+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3430+ //ut_ad(buf_pool_mutex_own(buf_pool));
3431 #endif
3432 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3433 ut_ad(buf_page_in_file(bpage));
3434- ut_ad(bpage->in_LRU_list);
3435+ //ut_ad(bpage->in_LRU_list);
3436
3437- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3438+ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3439 && bpage->buf_fix_count == 0);
3440 }
3441
11822e22 3442@@ -477,8 +506,8 @@
b4e1fa2c
AM
3443 const buf_page_t* bpage) /*!< in: control block */
3444 {
3445 #ifdef UNIV_DEBUG
3446- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3447- ut_ad(buf_pool_mutex_own(buf_pool));
3448+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3449+ //ut_ad(buf_pool_mutex_own(buf_pool));
3450 #endif
3451 ut_ad(buf_page_in_file(bpage));
3452
11822e22 3453@@ -498,7 +527,8 @@
b4e1fa2c
AM
3454 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3455 #endif /* UNIV_DEBUG */
3456 ut_a(buf_page_in_file(bpage));
3457- ut_ad(buf_pool_mutex_own(buf_pool));
3458+ //ut_ad(buf_pool_mutex_own(buf_pool));
3459+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3460 ut_ad(bpage->in_LRU_list);
3461
3462 #ifdef UNIV_LRU_DEBUG
11822e22 3463@@ -545,9 +575,10 @@
b4e1fa2c
AM
3464 ulint time_ms) /*!< in: ut_time_ms() */
3465 {
3466 #ifdef UNIV_DEBUG
3467- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3468- ut_ad(buf_pool_mutex_own(buf_pool));
3469+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3470+ //ut_ad(buf_pool_mutex_own(buf_pool));
3471 #endif
3472+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3473 ut_a(buf_page_in_file(bpage));
3474
3475 if (!bpage->access_time) {
db82db79 3476@@ -790,19 +821,19 @@
b4e1fa2c
AM
3477 /*===========*/
3478 buf_block_t* block) /*!< in, own: block to be freed */
3479 {
3480- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3481+ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3482
3483- buf_pool_mutex_enter(buf_pool);
3484+ //buf_pool_mutex_enter(buf_pool);
3485
3486 mutex_enter(&block->mutex);
3487
3488 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3489
3490- buf_LRU_block_free_non_file_page(block);
3491+ buf_LRU_block_free_non_file_page(block, FALSE);
3492
3493 mutex_exit(&block->mutex);
3494
3495- buf_pool_mutex_exit(buf_pool);
3496+ //buf_pool_mutex_exit(buf_pool);
3497 }
3498 #endif /* !UNIV_HOTBACKUP */
3499
db82db79 3500@@ -850,17 +881,17 @@
b4e1fa2c
AM
3501 page frame */
3502 {
3503 ib_uint64_t lsn;
3504- mutex_t* block_mutex = buf_page_get_mutex(bpage);
3505-
3506- mutex_enter(block_mutex);
3507+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3508
3509- if (buf_page_in_file(bpage)) {
3510+ if (block_mutex && buf_page_in_file(bpage)) {
3511 lsn = bpage->newest_modification;
3512 } else {
3513 lsn = 0;
3514 }
3515
3516- mutex_exit(block_mutex);
3517+ if (block_mutex) {
3518+ mutex_exit(block_mutex);
3519+ }
3520
3521 return(lsn);
3522 }
db82db79 3523@@ -878,7 +909,7 @@
b4e1fa2c
AM
3524 #ifdef UNIV_SYNC_DEBUG
3525 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3526
3527- ut_ad((buf_pool_mutex_own(buf_pool)
3528+ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3529 && (block->page.buf_fix_count == 0))
3530 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3531 #endif /* UNIV_SYNC_DEBUG */
db82db79 3532@@ -995,7 +1026,11 @@
b4e1fa2c
AM
3533 buf_page_t* bpage;
3534
3535 ut_ad(buf_pool);
3536- ut_ad(buf_pool_mutex_own(buf_pool));
3537+ //ut_ad(buf_pool_mutex_own(buf_pool));
3538+#ifdef UNIV_SYNC_DEBUG
3539+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3540+ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3541+#endif
3542 ut_ad(fold == buf_page_address_fold(space, offset));
3543
3544 /* Look for the page in the hash table */
db82db79 3545@@ -1080,11 +1115,13 @@
b4e1fa2c
AM
3546 const buf_page_t* bpage;
3547 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3548
3549- buf_pool_mutex_enter(buf_pool);
3550+ //buf_pool_mutex_enter(buf_pool);
3551+ rw_lock_s_lock(&buf_pool->page_hash_latch);
3552
3553 bpage = buf_page_hash_get(buf_pool, space, offset);
3554
3555- buf_pool_mutex_exit(buf_pool);
3556+ //buf_pool_mutex_exit(buf_pool);
3557+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
3558
3559 return(bpage != NULL);
3560 }
db82db79 3561@@ -1212,4 +1249,38 @@
b4e1fa2c
AM
3562 buf_pool_mutex_exit(buf_pool);
3563 }
3564 }
3565+
3566+/********************************************************************//**
3567+*/
3568+UNIV_INLINE
3569+void
3570+buf_pool_page_hash_x_lock_all(void)
3571+/*===============================*/
3572+{
3573+ ulint i;
3574+
3575+ for (i = 0; i < srv_buf_pool_instances; i++) {
3576+ buf_pool_t* buf_pool;
3577+
3578+ buf_pool = buf_pool_from_array(i);
3579+ rw_lock_x_lock(&buf_pool->page_hash_latch);
3580+ }
3581+}
3582+
3583+/********************************************************************//**
3584+*/
3585+UNIV_INLINE
3586+void
3587+buf_pool_page_hash_x_unlock_all(void)
3588+/*=================================*/
3589+{
3590+ ulint i;
3591+
3592+ for (i = 0; i < srv_buf_pool_instances; i++) {
3593+ buf_pool_t* buf_pool;
3594+
3595+ buf_pool = buf_pool_from_array(i);
3596+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
3597+ }
3598+}
3599 #endif /* !UNIV_HOTBACKUP */
db82db79
AM
3600--- a/storage/innobase/include/buf0lru.h
3601+++ b/storage/innobase/include/buf0lru.h
3602@@ -100,8 +100,9 @@
df1b5770
AM
3603 buf_LRU_free_block(
3604 /*===============*/
b4e1fa2c 3605 buf_page_t* bpage, /*!< in: block to be freed */
df1b5770
AM
3606- ibool zip) /*!< in: TRUE if should remove also the
3607+ ibool zip, /*!< in: TRUE if should remove also the
b4e1fa2c 3608 compressed page of an uncompressed page */
df1b5770
AM
3609+ ibool have_LRU_mutex)
3610 __attribute__((nonnull));
b4e1fa2c
AM
3611 /******************************************************************//**
3612 Try to free a replaceable block.
db82db79 3613@@ -148,7 +149,8 @@
b4e1fa2c
AM
3614 void
3615 buf_LRU_block_free_non_file_page(
3616 /*=============================*/
3617- buf_block_t* block); /*!< in: block, must not contain a file page */
3618+ buf_block_t* block, /*!< in: block, must not contain a file page */
3619+ ibool have_page_hash_mutex);
3620 /******************************************************************//**
3621 Adds a block to the LRU list. */
3622 UNIV_INTERN
db82db79
AM
3623--- a/storage/innobase/include/sync0rw.h
3624+++ b/storage/innobase/include/sync0rw.h
b4e1fa2c
AM
3625@@ -112,6 +112,7 @@
3626 extern mysql_pfs_key_t archive_lock_key;
3627 # endif /* UNIV_LOG_ARCHIVE */
3628 extern mysql_pfs_key_t btr_search_latch_key;
3629+extern mysql_pfs_key_t buf_pool_page_hash_key;
3630 extern mysql_pfs_key_t buf_block_lock_key;
3631 # ifdef UNIV_SYNC_DEBUG
3632 extern mysql_pfs_key_t buf_block_debug_latch_key;
db82db79
AM
3633--- a/storage/innobase/include/sync0sync.h
3634+++ b/storage/innobase/include/sync0sync.h
b4e1fa2c
AM
3635@@ -75,6 +75,10 @@
3636 extern mysql_pfs_key_t buffer_block_mutex_key;
3637 extern mysql_pfs_key_t buf_pool_mutex_key;
3638 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3639+extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3640+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3641+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3642+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3643 extern mysql_pfs_key_t cache_last_read_mutex_key;
3644 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3645 extern mysql_pfs_key_t dict_sys_mutex_key;
db82db79 3646@@ -670,7 +674,7 @@
b4e1fa2c 3647 #define SYNC_TRX_SYS_HEADER 290
11822e22 3648 #define SYNC_PURGE_QUEUE 200
b4e1fa2c
AM
3649 #define SYNC_LOG 170
3650-#define SYNC_LOG_FLUSH_ORDER 147
3651+#define SYNC_LOG_FLUSH_ORDER 156
3652 #define SYNC_RECV 168
3653 #define SYNC_WORK_QUEUE 162
3654 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
db82db79 3655@@ -680,8 +684,13 @@
b4e1fa2c
AM
3656 SYNC_SEARCH_SYS, as memory allocation
3657 can call routines there! Otherwise
3658 the level is SYNC_MEM_HASH. */
3659+#define SYNC_BUF_LRU_LIST 158
3660+#define SYNC_BUF_PAGE_HASH 157
3661+#define SYNC_BUF_BLOCK 155 /* Block mutex */
3662+#define SYNC_BUF_FREE_LIST 153
3663+#define SYNC_BUF_ZIP_FREE 152
3664+#define SYNC_BUF_ZIP_HASH 151
3665 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3666-#define SYNC_BUF_BLOCK 146 /* Block mutex */
3667 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3668 #define SYNC_DOUBLEWRITE 140
3669 #define SYNC_ANY_LATCH 135
db82db79 3670@@ -713,7 +722,7 @@
b4e1fa2c
AM
3671 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3672 when atomic operations are not enabled */
3673 #endif
3674- ulint waiters; /*!< This ulint is set to 1 if there are (or
3675+ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3676 may be) threads waiting in the global wait
3677 array for this mutex to be released.
3678 Otherwise, this is 0. */
db82db79
AM
3679--- a/storage/innobase/srv/srv0srv.c
3680+++ b/storage/innobase/srv/srv0srv.c
adf0fb13 3681@@ -3098,7 +3098,7 @@
b4e1fa2c
AM
3682 level += log_sys->max_checkpoint_age
3683 - (lsn - oldest_modification);
3684 }
3685- bpage = UT_LIST_GET_NEXT(list, bpage);
3686+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3687 n_blocks++;
3688 }
3689
adf0fb13 3690@@ -3184,7 +3184,7 @@
b4e1fa2c
AM
3691 found = TRUE;
3692 break;
3693 }
3694- bpage = UT_LIST_GET_NEXT(list, bpage);
3695+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3696 new_blocks_num++;
3697 }
3698 if (!found) {
db82db79
AM
3699--- a/storage/innobase/sync/sync0sync.c
3700+++ b/storage/innobase/sync/sync0sync.c
adf0fb13 3701@@ -285,7 +285,7 @@
b4e1fa2c
AM
3702 mutex->lock_word = 0;
3703 #endif
3704 mutex->event = os_event_create(NULL);
3705- mutex_set_waiters(mutex, 0);
3706+ mutex->waiters = 0;
3707 #ifdef UNIV_DEBUG
3708 mutex->magic_n = MUTEX_MAGIC_N;
3709 #endif /* UNIV_DEBUG */
adf0fb13 3710@@ -464,6 +464,15 @@
b4e1fa2c
AM
3711 mutex_t* mutex, /*!< in: mutex */
3712 ulint n) /*!< in: value to set */
3713 {
3714+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3715+ ut_ad(mutex);
3716+
3717+ if (n) {
3718+ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3719+ } else {
3720+ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3721+ }
3722+#else
3723 volatile ulint* ptr; /* declared volatile to ensure that
3724 the value is stored to memory */
3725 ut_ad(mutex);
adf0fb13 3726@@ -472,6 +481,7 @@
b4e1fa2c
AM
3727
3728 *ptr = n; /* Here we assume that the write of a single
3729 word in memory is atomic */
3730+#endif
3731 }
3732
3733 /******************************************************************//**
db82db79 3734@@ -1239,7 +1249,12 @@
b4e1fa2c
AM
3735 ut_error;
3736 }
3737 break;
3738+ case SYNC_BUF_LRU_LIST:
3739 case SYNC_BUF_FLUSH_LIST:
3740+ case SYNC_BUF_PAGE_HASH:
3741+ case SYNC_BUF_FREE_LIST:
3742+ case SYNC_BUF_ZIP_FREE:
3743+ case SYNC_BUF_ZIP_HASH:
3744 case SYNC_BUF_POOL:
3745 /* We can have multiple mutexes of this type therefore we
3746 can only check whether the greater than condition holds. */
db82db79 3747@@ -1257,7 +1272,8 @@
b4e1fa2c
AM
3748 buffer block (block->mutex or buf_pool->zip_mutex). */
3749 if (!sync_thread_levels_g(array, level, FALSE)) {
3750 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3751- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3752+ /* the exact rule is not fixed yet, for now */
3753+ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
3754 }
3755 break;
3756 case SYNC_REC_LOCK:
This page took 0.78213 seconds and 4 git commands to generate.