]> git.pld-linux.org Git - packages/mysql.git/blame - mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch
- up to 5.1.44
[packages/mysql.git] / mysql-split_buf_pool_mutex_fixed_optimistic_safe.patch
CommitLineData
31696e2e
AM
1diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0buf.c mysql-5.1.29-rc/storage/innobase/buf/buf0buf.c
2--- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0buf.c 2008-10-12 06:54:12.000000000 +0900
3+++ mysql-5.1.29-rc/storage/innobase/buf/buf0buf.c 2008-11-18 15:44:00.000000000 +0900
4@@ -596,6 +596,15 @@
5 ---------------------------- */
6 mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
7
8+ mutex_create(&(buf_pool->flush_list_mutex), SYNC_NO_ORDER_CHECK);
9+ mutex_create(&(buf_pool->LRU_mutex), SYNC_NO_ORDER_CHECK);
10+ mutex_create(&(buf_pool->free_mutex), SYNC_NO_ORDER_CHECK);
11+ mutex_create(&(buf_pool->hash_mutex), SYNC_NO_ORDER_CHECK);
12+
13+ mutex_enter(&(buf_pool->LRU_mutex));
14+ mutex_enter(&(buf_pool->flush_list_mutex));
15+ mutex_enter(&(buf_pool->free_mutex));
16+ mutex_enter(&(buf_pool->hash_mutex));
17 mutex_enter(&(buf_pool->mutex));
18
19 if (srv_use_awe) {
20@@ -773,6 +782,10 @@
21 block->in_free_list = TRUE;
22 }
23
24+ mutex_exit(&(buf_pool->LRU_mutex));
25+ mutex_exit(&(buf_pool->flush_list_mutex));
26+ mutex_exit(&(buf_pool->free_mutex));
27+ mutex_exit(&(buf_pool->hash_mutex));
28 mutex_exit(&(buf_pool->mutex));
29
30 if (srv_use_adaptive_hash_indexes) {
31@@ -905,12 +918,12 @@
32
33 if (buf_block_peek_if_too_old(block)) {
34
35- mutex_enter(&buf_pool->mutex);
36+ mutex_enter(&(buf_pool->LRU_mutex));
37 /* There has been freeing activity in the LRU list:
38 best to move to the head of the LRU list */
39
40 buf_LRU_make_block_young(block);
41- mutex_exit(&buf_pool->mutex);
42+ mutex_exit(&(buf_pool->LRU_mutex));
43 }
44 }
45
46@@ -926,7 +939,7 @@
47 {
48 buf_block_t* block;
49
50- mutex_enter(&(buf_pool->mutex));
51+ mutex_enter(&(buf_pool->LRU_mutex));
52
53 block = buf_block_align(frame);
54
55@@ -934,7 +947,7 @@
56
57 buf_LRU_make_block_young(block);
58
59- mutex_exit(&(buf_pool->mutex));
60+ mutex_exit(&(buf_pool->LRU_mutex));
61 }
62
63 /************************************************************************
64@@ -945,7 +958,7 @@
65 /*===========*/
66 buf_block_t* block) /* in, own: block to be freed */
67 {
68- mutex_enter(&(buf_pool->mutex));
69+ mutex_enter(&(buf_pool->free_mutex));
70
71 mutex_enter(&block->mutex);
72
73@@ -955,7 +968,7 @@
74
75 mutex_exit(&block->mutex);
76
77- mutex_exit(&(buf_pool->mutex));
78+ mutex_exit(&(buf_pool->free_mutex));
79 }
80
81 /*************************************************************************
82@@ -996,11 +1009,11 @@
83 {
84 buf_block_t* block;
85
86- mutex_enter_fast(&(buf_pool->mutex));
87+ mutex_enter_fast(&(buf_pool->hash_mutex));
88
89 block = buf_page_hash_get(space, offset);
90
91- mutex_exit(&(buf_pool->mutex));
92+ mutex_exit(&(buf_pool->hash_mutex));
93
94 return(block);
95 }
96@@ -1017,7 +1030,7 @@
97 {
98 buf_block_t* block;
99
100- mutex_enter_fast(&(buf_pool->mutex));
101+ mutex_enter_fast(&(buf_pool->hash_mutex));
102
103 block = buf_page_hash_get(space, offset);
104
105@@ -1025,7 +1038,7 @@
106 block->check_index_page_at_flush = FALSE;
107 }
108
109- mutex_exit(&(buf_pool->mutex));
110+ mutex_exit(&(buf_pool->hash_mutex));
111 }
112
113 /************************************************************************
114@@ -1044,7 +1057,7 @@
115 buf_block_t* block;
116 ibool is_hashed;
117
118- mutex_enter_fast(&(buf_pool->mutex));
119+ mutex_enter_fast(&(buf_pool->hash_mutex));
120
121 block = buf_page_hash_get(space, offset);
122
123@@ -1054,7 +1067,7 @@
124 is_hashed = block->is_hashed;
125 }
126
127- mutex_exit(&(buf_pool->mutex));
128+ mutex_exit(&(buf_pool->hash_mutex));
129
130 return(is_hashed);
131 }
132@@ -1096,7 +1109,7 @@
133 {
134 buf_block_t* block;
135
136- mutex_enter_fast(&(buf_pool->mutex));
137+ mutex_enter_fast(&(buf_pool->hash_mutex));
138
139 block = buf_page_hash_get(space, offset);
140
141@@ -1104,7 +1117,7 @@
142 block->file_page_was_freed = TRUE;
143 }
144
145- mutex_exit(&(buf_pool->mutex));
146+ mutex_exit(&(buf_pool->hash_mutex));
147
148 return(block);
149 }
150@@ -1125,7 +1138,7 @@
151 {
152 buf_block_t* block;
153
154- mutex_enter_fast(&(buf_pool->mutex));
155+ mutex_enter_fast(&(buf_pool->hash_mutex));
156
157 block = buf_page_hash_get(space, offset);
158
159@@ -1133,7 +1146,7 @@
160 block->file_page_was_freed = FALSE;
161 }
162
163- mutex_exit(&(buf_pool->mutex));
164+ mutex_exit(&(buf_pool->hash_mutex));
165
166 return(block);
167 }
168@@ -1174,26 +1187,33 @@
169 buf_pool->n_page_gets++;
170 loop:
171 block = NULL;
172- mutex_enter_fast(&(buf_pool->mutex));
173+ // mutex_enter_fast(&(buf_pool->mutex));
174
175 if (guess) {
176 block = buf_block_align(guess);
177
178+ mutex_enter(&block->mutex);
179 if ((offset != block->offset) || (space != block->space)
180 || (block->state != BUF_BLOCK_FILE_PAGE)) {
181
182+ mutex_exit(&block->mutex);
183 block = NULL;
184 }
185 }
186
187 if (block == NULL) {
188+ mutex_enter_fast(&(buf_pool->hash_mutex));
189 block = buf_page_hash_get(space, offset);
190+ if(block) {
191+ mutex_enter(&block->mutex);
192+ }
193+ mutex_exit(&(buf_pool->hash_mutex));
194 }
195
196 if (block == NULL) {
197 /* Page not in buf_pool: needs to be read from file */
198
199- mutex_exit(&(buf_pool->mutex));
200+ // mutex_exit(&(buf_pool->mutex));
201
202 if (mode == BUF_GET_IF_IN_POOL) {
203
204@@ -1212,7 +1232,7 @@
205 goto loop;
206 }
207
208- mutex_enter(&block->mutex);
209+ // mutex_enter(&block->mutex);
210
211 ut_a(block->state == BUF_BLOCK_FILE_PAGE);
212
213@@ -1224,7 +1244,7 @@
214
215 if (mode == BUF_GET_IF_IN_POOL) {
216 /* The page is only being read to buffer */
217- mutex_exit(&buf_pool->mutex);
218+ // mutex_exit(&buf_pool->mutex);
219 mutex_exit(&block->mutex);
220
221 return(NULL);
222@@ -1241,7 +1261,9 @@
223 LRU list and we must put it to awe_LRU_free_mapped list once
224 mapped to a frame */
225
226+ mutex_enter_fast(&(buf_pool->mutex));
227 buf_awe_map_page_to_frame(block, TRUE);
228+ mutex_exit(&buf_pool->mutex);
229 }
230
231 #ifdef UNIV_SYNC_DEBUG
232@@ -1249,7 +1271,7 @@
233 #else
234 buf_block_buf_fix_inc(block);
235 #endif
236- mutex_exit(&buf_pool->mutex);
237+ // mutex_exit(&buf_pool->mutex);
238
239 /* Check if this is the first access to the page */
240
241@@ -1747,7 +1769,8 @@
242
243 ut_a(block);
244
245- mutex_enter(&(buf_pool->mutex));
246+ mutex_enter(&(buf_pool->LRU_mutex));
247+ mutex_enter(&(buf_pool->hash_mutex));
248 mutex_enter(&block->mutex);
249
250 if (fil_tablespace_deleted_or_being_deleted_in_mem(
251@@ -1763,7 +1786,8 @@
252 already in buf_pool, return */
253
254 mutex_exit(&block->mutex);
255- mutex_exit(&(buf_pool->mutex));
256+ mutex_exit(&(buf_pool->LRU_mutex));
257+ mutex_exit(&(buf_pool->hash_mutex));
258
259 buf_block_free(block);
260
261@@ -1778,10 +1802,14 @@
262 ut_ad(block);
263
264 buf_page_init(space, offset, block);
265+ mutex_exit(&(buf_pool->hash_mutex));
266
267 /* The block must be put to the LRU list, to the old blocks */
268
269 buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
270+ mutex_exit(&(buf_pool->LRU_mutex));
271+
272+ mutex_enter(&(buf_pool->mutex)); /* for consistency about aio */
273
274 block->io_fix = BUF_IO_READ;
275
276@@ -1830,7 +1858,8 @@
277
278 free_block = buf_LRU_get_free_block();
279
280- mutex_enter(&(buf_pool->mutex));
281+ mutex_enter(&(buf_pool->LRU_mutex));
282+ mutex_enter(&(buf_pool->hash_mutex));
283
284 block = buf_page_hash_get(space, offset);
285
286@@ -1841,7 +1870,8 @@
287 block->file_page_was_freed = FALSE;
288
289 /* Page can be found in buf_pool */
290- mutex_exit(&(buf_pool->mutex));
291+ mutex_exit(&(buf_pool->LRU_mutex));
292+ mutex_exit(&(buf_pool->hash_mutex));
293
294 buf_block_free(free_block);
295
296@@ -1864,6 +1894,7 @@
297 mutex_enter(&block->mutex);
298
299 buf_page_init(space, offset, block);
300+ mutex_exit(&(buf_pool->hash_mutex));
301
302 /* The block must be put to the LRU list */
303 buf_LRU_add_block(block, FALSE);
304@@ -1875,7 +1906,7 @@
305 #endif
306 buf_pool->n_pages_created++;
307
308- mutex_exit(&(buf_pool->mutex));
309+ mutex_exit(&(buf_pool->LRU_mutex));
310
311 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
312
313@@ -1889,7 +1920,7 @@
314 ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
315
316 /* Flush pages from the end of the LRU list if necessary */
317- buf_flush_free_margin();
318+ buf_flush_free_margin(FALSE);
319
320 frame = block->frame;
321
322@@ -1928,6 +1959,7 @@
323 buf_block_t* block) /* in: pointer to the block in question */
324 {
325 ulint io_type;
326+ ulint flush_type;
327
328 ut_ad(block);
329
330@@ -2040,9 +2072,6 @@
331 }
332 }
333
334- mutex_enter(&(buf_pool->mutex));
335- mutex_enter(&block->mutex);
336-
337 #ifdef UNIV_IBUF_DEBUG
338 ut_a(ibuf_count_get(block->space, block->offset) == 0);
339 #endif
340@@ -2051,9 +2080,12 @@
341 removes the newest lock debug record, without checking the thread
342 id. */
343
344- block->io_fix = 0;
345-
346 if (io_type == BUF_IO_READ) {
347+ mutex_enter(&block->mutex);
348+ mutex_enter(&(buf_pool->mutex));
349+
350+ block->io_fix = 0;
351+
352 /* NOTE that the call to ibuf may have moved the ownership of
353 the x-latch to this OS thread: do not let this confuse you in
354 debugging! */
355@@ -2064,6 +2096,8 @@
356
357 rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
358
359+ mutex_exit(&(buf_pool->mutex));
360+ mutex_exit(&block->mutex);
361 #ifdef UNIV_DEBUG
362 if (buf_debug_prints) {
363 fputs("Has read ", stderr);
364@@ -2072,15 +2106,33 @@
365 } else {
366 ut_ad(io_type == BUF_IO_WRITE);
367
368+ flush_type = block->flush_type;
369+ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
370+ mutex_enter(&(buf_pool->LRU_mutex));
371+ }
372+ mutex_enter(&(buf_pool->flush_list_mutex));
373+ mutex_enter(&block->mutex);
374+ mutex_enter(&(buf_pool->mutex));
375+
376+ block->io_fix = 0;
377+
378 /* Write means a flush operation: call the completion
379 routine in the flush system */
380
381 buf_flush_write_complete(block);
382
383+ mutex_exit(&(buf_pool->flush_list_mutex));
384+ if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
385+ mutex_exit(&(buf_pool->LRU_mutex));
386+ }
387+
388 rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
389
390 buf_pool->n_pages_written++;
391
392+ mutex_exit(&(buf_pool->mutex));
393+ mutex_exit(&block->mutex);
394+
395 #ifdef UNIV_DEBUG
396 if (buf_debug_prints) {
397 fputs("Has written ", stderr);
398@@ -2088,9 +2140,6 @@
399 #endif /* UNIV_DEBUG */
400 }
401
402- mutex_exit(&block->mutex);
403- mutex_exit(&(buf_pool->mutex));
404-
405 #ifdef UNIV_DEBUG
406 if (buf_debug_prints) {
407 fprintf(stderr, "page space %lu page no %lu\n",
408@@ -2118,11 +2167,11 @@
409 freed = buf_LRU_search_and_free_block(100);
410 }
411
412- mutex_enter(&(buf_pool->mutex));
413+ mutex_enter(&(buf_pool->LRU_mutex));
414
415 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
416
417- mutex_exit(&(buf_pool->mutex));
418+ mutex_exit(&(buf_pool->LRU_mutex));
419 }
420
421 #ifdef UNIV_DEBUG
422@@ -2142,10 +2191,22 @@
423 ulint n_flush = 0;
424 ulint n_free = 0;
425 ulint n_page = 0;
426+ ulint n_single_flush_tmp = 0;
427+ ulint n_lru_flush_tmp = 0;
428+ ulint n_list_flush_tmp = 0;
429
430 ut_ad(buf_pool);
431
432+ mutex_enter(&(buf_pool->LRU_mutex));
433+ mutex_enter(&(buf_pool->flush_list_mutex));
434+ mutex_enter(&(buf_pool->free_mutex));
435+ mutex_enter(&(buf_pool->hash_mutex));
436+
437 mutex_enter(&(buf_pool->mutex));
438+ n_single_flush_tmp = buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
439+ n_list_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LIST];
440+ n_lru_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LRU];
441+ mutex_exit(&(buf_pool->mutex));
442
443 for (i = 0; i < buf_pool->curr_size; i++) {
444
445@@ -2216,11 +2277,14 @@
446 }
447 ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
448
449- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
450- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
451- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
452-
453- mutex_exit(&(buf_pool->mutex));
454+ ut_a(n_single_flush_tmp == n_single_flush);
455+ ut_a(n_list_flush_tmp == n_list_flush);
456+ ut_a(n_lru_flush_tmp == n_lru_flush);
457+
458+ mutex_exit(&(buf_pool->LRU_mutex));
459+ mutex_exit(&(buf_pool->flush_list_mutex));
460+ mutex_exit(&(buf_pool->free_mutex));
461+ mutex_exit(&(buf_pool->hash_mutex));
462
463 ut_a(buf_LRU_validate());
464 ut_a(buf_flush_validate());
465@@ -2252,7 +2316,9 @@
466 index_ids = mem_alloc(sizeof(dulint) * size);
467 counts = mem_alloc(sizeof(ulint) * size);
468
469- mutex_enter(&(buf_pool->mutex));
470+ mutex_enter(&(buf_pool->LRU_mutex));
471+ mutex_enter(&(buf_pool->flush_list_mutex));
472+ mutex_enter(&(buf_pool->free_mutex));
473
474 fprintf(stderr,
475 "buf_pool size %lu\n"
476@@ -2305,7 +2371,9 @@
477 }
478 }
479
480- mutex_exit(&(buf_pool->mutex));
481+ mutex_exit(&(buf_pool->LRU_mutex));
482+ mutex_exit(&(buf_pool->flush_list_mutex));
483+ mutex_exit(&(buf_pool->free_mutex));
484
485 for (i = 0; i < n_found; i++) {
486 index = dict_index_get_if_in_cache(index_ids[i]);
487@@ -2339,8 +2407,6 @@
488 ulint i;
489 ulint fixed_pages_number = 0;
490
491- mutex_enter(&(buf_pool->mutex));
492-
493 for (i = 0; i < buf_pool->curr_size; i++) {
494
495 block = buf_pool_get_nth_block(buf_pool, i);
496@@ -2356,7 +2422,6 @@
497 }
498 }
499
500- mutex_exit(&(buf_pool->mutex));
501
502 return(fixed_pages_number);
503 }
504@@ -2385,7 +2450,9 @@
505 {
506 ulint ratio;
507
508- mutex_enter(&(buf_pool->mutex));
509+ mutex_enter(&(buf_pool->LRU_mutex));
510+ mutex_enter(&(buf_pool->flush_list_mutex));
511+ mutex_enter(&(buf_pool->free_mutex));
512
513 ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
514 / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
515@@ -2393,7 +2460,9 @@
516
517 /* 1 + is there to avoid division by zero */
518
519- mutex_exit(&(buf_pool->mutex));
520+ mutex_exit(&(buf_pool->LRU_mutex));
521+ mutex_exit(&(buf_pool->flush_list_mutex));
522+ mutex_exit(&(buf_pool->free_mutex));
523
524 return(ratio);
525 }
526@@ -2413,6 +2482,9 @@
527 ut_ad(buf_pool);
528 size = buf_pool->curr_size;
529
530+ mutex_enter(&(buf_pool->LRU_mutex));
531+ mutex_enter(&(buf_pool->flush_list_mutex));
532+ mutex_enter(&(buf_pool->free_mutex));
533 mutex_enter(&(buf_pool->mutex));
534
535 if (srv_use_awe) {
536@@ -2487,6 +2559,9 @@
537 buf_pool->n_pages_written_old = buf_pool->n_pages_written;
538 buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
539
540+ mutex_exit(&(buf_pool->LRU_mutex));
541+ mutex_exit(&(buf_pool->flush_list_mutex));
542+ mutex_exit(&(buf_pool->free_mutex));
543 mutex_exit(&(buf_pool->mutex));
544 }
545
546@@ -2517,8 +2592,6 @@
547
548 ut_ad(buf_pool);
549
550- mutex_enter(&(buf_pool->mutex));
551-
552 for (i = 0; i < buf_pool->curr_size; i++) {
553
554 block = buf_pool_get_nth_block(buf_pool, i);
555@@ -2540,8 +2613,6 @@
556 mutex_exit(&block->mutex);
557 }
558
559- mutex_exit(&(buf_pool->mutex));
560-
561 return(TRUE);
562 }
563
564@@ -2580,11 +2651,11 @@
565 {
566 ulint len;
567
568- mutex_enter(&(buf_pool->mutex));
569+ mutex_enter(&(buf_pool->free_mutex));
570
571 len = UT_LIST_GET_LEN(buf_pool->free);
572
573- mutex_exit(&(buf_pool->mutex));
574+ mutex_exit(&(buf_pool->free_mutex));
575
576 return(len);
577 }
578diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0flu.c mysql-5.1.29-rc/storage/innobase/buf/buf0flu.c
579--- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0flu.c 2008-10-12 06:54:12.000000000 +0900
580+++ mysql-5.1.29-rc/storage/innobase/buf/buf0flu.c 2008-11-18 15:26:07.000000000 +0900
581@@ -109,13 +109,15 @@
582 ut_ad(mutex_own(&(buf_pool->mutex)));
583 ut_ad(mutex_own(&block->mutex));
584 if (block->state != BUF_BLOCK_FILE_PAGE) {
585+ /* It is permited not to own LRU_mutex.. */
586+/*
587 ut_print_timestamp(stderr);
588 fprintf(stderr,
589 " InnoDB: Error: buffer block state %lu"
590 " in the LRU list!\n",
591 (ulong)block->state);
592 ut_print_buf(stderr, block, sizeof(buf_block_t));
593-
594+*/
595 return(FALSE);
596 }
597
598@@ -546,18 +548,20 @@
599 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
600 || flush_type == BUF_FLUSH_SINGLE_PAGE);
601
602- mutex_enter(&(buf_pool->mutex));
603+ mutex_enter(&(buf_pool->hash_mutex));
604
605 block = buf_page_hash_get(space, offset);
606
607 ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
608
609 if (!block) {
610- mutex_exit(&(buf_pool->mutex));
611+ mutex_exit(&(buf_pool->hash_mutex));
612 return(0);
613 }
614
615 mutex_enter(&block->mutex);
616+ mutex_enter(&(buf_pool->mutex));
617+ mutex_exit(&(buf_pool->hash_mutex));
618
619 if (flush_type == BUF_FLUSH_LIST
620 && buf_flush_ready_for_flush(block, flush_type)) {
621@@ -755,7 +759,7 @@
622 high = fil_space_get_size(space);
623 }
624
625- mutex_enter(&(buf_pool->mutex));
626+ mutex_enter(&(buf_pool->hash_mutex));
627
628 for (i = low; i < high; i++) {
629
630@@ -789,7 +793,7 @@
631
632 mutex_exit(&block->mutex);
633
634- mutex_exit(&(buf_pool->mutex));
635+ mutex_exit(&(buf_pool->hash_mutex));
636
637 /* Note: as we release the buf_pool mutex
638 above, in buf_flush_try_page we cannot be sure
639@@ -800,14 +804,14 @@
640 count += buf_flush_try_page(space, i,
641 flush_type);
642
643- mutex_enter(&(buf_pool->mutex));
644+ mutex_enter(&(buf_pool->hash_mutex));
645 } else {
646 mutex_exit(&block->mutex);
647 }
648 }
649 }
650
651- mutex_exit(&(buf_pool->mutex));
652+ mutex_exit(&(buf_pool->hash_mutex));
653
654 return(count);
655 }
656@@ -863,6 +867,13 @@
657
658 (buf_pool->init_flush)[flush_type] = TRUE;
659
660+ mutex_exit(&(buf_pool->mutex));
661+
662+ if (flush_type == BUF_FLUSH_LRU) {
663+ mutex_enter(&(buf_pool->LRU_mutex));
664+ }
665+ mutex_enter(&(buf_pool->flush_list_mutex));
666+
667 for (;;) {
668 /* If we have flushed enough, leave the loop */
669 if (page_count >= min_n) {
670@@ -908,7 +919,10 @@
671 offset = block->offset;
672
673 mutex_exit(&block->mutex);
674- mutex_exit(&(buf_pool->mutex));
675+ if (flush_type == BUF_FLUSH_LRU) {
676+ mutex_exit(&(buf_pool->LRU_mutex));
677+ }
678+ mutex_exit(&(buf_pool->flush_list_mutex));
679
680 old_page_count = page_count;
681
682@@ -920,7 +934,10 @@
683 flush_type, offset,
684 page_count - old_page_count); */
685
686- mutex_enter(&(buf_pool->mutex));
687+ if (flush_type == BUF_FLUSH_LRU) {
688+ mutex_enter(&(buf_pool->LRU_mutex));
689+ }
690+ mutex_enter(&(buf_pool->flush_list_mutex));
691
692 } else if (flush_type == BUF_FLUSH_LRU) {
693
694@@ -943,6 +960,13 @@
695 }
696 }
697
698+ if (flush_type == BUF_FLUSH_LRU) {
699+ mutex_exit(&(buf_pool->LRU_mutex));
700+ }
701+ mutex_exit(&(buf_pool->flush_list_mutex));
702+
703+ mutex_enter(&(buf_pool->mutex));
704+
705 (buf_pool->init_flush)[flush_type] = FALSE;
706
707 if ((buf_pool->n_flush[flush_type] == 0)
708@@ -1001,10 +1025,14 @@
709 ulint n_replaceable;
710 ulint distance = 0;
711
712- mutex_enter(&(buf_pool->mutex));
713+ /* optimistic search... */
714+ //mutex_enter(&(buf_pool->LRU_mutex));
715+ //mutex_enter(&(buf_pool->free_mutex));
716
717 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
718
719+ //mutex_exit(&(buf_pool->free_mutex));
720+
721 block = UT_LIST_GET_LAST(buf_pool->LRU);
722
723 while ((block != NULL)
724@@ -1025,7 +1053,7 @@
725 block = UT_LIST_GET_PREV(LRU, block);
726 }
727
728- mutex_exit(&(buf_pool->mutex));
729+ //mutex_exit(&(buf_pool->LRU_mutex));
730
731 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
732
733@@ -1044,8 +1072,9 @@
734 immediately, without waiting. */
735
736 void
737-buf_flush_free_margin(void)
738+buf_flush_free_margin(
739 /*=======================*/
740+ ibool wait)
741 {
742 ulint n_to_flush;
743 ulint n_flushed;
744@@ -1055,7 +1084,7 @@
745 if (n_to_flush > 0) {
746 n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
747 ut_dulint_zero);
748- if (n_flushed == ULINT_UNDEFINED) {
749+ if (wait && n_flushed == ULINT_UNDEFINED) {
750 /* There was an LRU type flush batch already running;
751 let us wait for it to end */
752
753@@ -1105,11 +1134,11 @@
754 {
755 ibool ret;
756
757- mutex_enter(&(buf_pool->mutex));
758+ mutex_enter(&(buf_pool->flush_list_mutex));
759
760 ret = buf_flush_validate_low();
761
762- mutex_exit(&(buf_pool->mutex));
763+ mutex_exit(&(buf_pool->flush_list_mutex));
764
765 return(ret);
766 }
767diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0lru.c mysql-5.1.29-rc/storage/innobase/buf/buf0lru.c
768--- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0lru.c 2008-10-12 06:54:12.000000000 +0900
769+++ mysql-5.1.29-rc/storage/innobase/buf/buf0lru.c 2008-11-18 15:09:58.000000000 +0900
770@@ -79,7 +79,10 @@
771 ibool all_freed;
772
773 scan_again:
774- mutex_enter(&(buf_pool->mutex));
775+ mutex_enter(&(buf_pool->LRU_mutex));
776+ mutex_enter(&(buf_pool->flush_list_mutex));
777+ mutex_enter(&(buf_pool->free_mutex));
778+ mutex_enter(&(buf_pool->hash_mutex));
779
780 all_freed = TRUE;
781
782@@ -119,7 +122,10 @@
783
784 mutex_exit(&block->mutex);
785
786- mutex_exit(&(buf_pool->mutex));
787+ mutex_exit(&(buf_pool->LRU_mutex));
788+ mutex_exit(&(buf_pool->flush_list_mutex));
789+ mutex_exit(&(buf_pool->free_mutex));
790+ mutex_exit(&(buf_pool->hash_mutex));
791
792 /* Note that the following call will acquire
793 an S-latch on the page */
794@@ -149,7 +155,10 @@
795 block = prev_block;
796 }
797
798- mutex_exit(&(buf_pool->mutex));
799+ mutex_exit(&(buf_pool->LRU_mutex));
800+ mutex_exit(&(buf_pool->flush_list_mutex));
801+ mutex_exit(&(buf_pool->free_mutex));
802+ mutex_exit(&(buf_pool->hash_mutex));
803
804 if (!all_freed) {
805 os_thread_sleep(20000);
806@@ -172,14 +181,14 @@
807 ulint len;
808 ulint limit;
809
810- mutex_enter(&(buf_pool->mutex));
811+ mutex_enter(&(buf_pool->LRU_mutex));
812
813 len = UT_LIST_GET_LEN(buf_pool->LRU);
814
815 if (len < BUF_LRU_OLD_MIN_LEN) {
816 /* The LRU list is too short to do read-ahead */
817
818- mutex_exit(&(buf_pool->mutex));
819+ mutex_exit(&(buf_pool->LRU_mutex));
820
821 return(0);
822 }
823@@ -188,7 +197,7 @@
824
825 limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
826
827- mutex_exit(&(buf_pool->mutex));
828+ mutex_exit(&(buf_pool->LRU_mutex));
829
830 return(limit);
831 }
832@@ -212,13 +221,15 @@
833 ulint distance = 0;
834 ibool freed;
835
836- mutex_enter(&(buf_pool->mutex));
837+ /* optimistic search... */
838+ //mutex_enter(&(buf_pool->LRU_mutex));
839
840+retry:
841 freed = FALSE;
842 block = UT_LIST_GET_LAST(buf_pool->LRU);
843
844 while (block != NULL) {
845- ut_a(block->in_LRU_list);
846+ //ut_a(block->in_LRU_list); /* optimistic */
847
848 mutex_enter(&block->mutex);
849
850@@ -234,9 +245,17 @@
851 }
852 #endif /* UNIV_DEBUG */
853
854+ mutex_exit(&block->mutex);
855+
856+ mutex_enter(&(buf_pool->LRU_mutex));/* optimistic */
857+
858+ mutex_enter(&(buf_pool->hash_mutex));
859+ mutex_enter(&block->mutex);
860+ if(block->in_LRU_list && buf_flush_ready_for_replace(block)) {
861 buf_LRU_block_remove_hashed_page(block);
862+ mutex_exit(&(buf_pool->hash_mutex));
863
864- mutex_exit(&(buf_pool->mutex));
865+ mutex_exit(&(buf_pool->LRU_mutex));
866 mutex_exit(&block->mutex);
867
868 /* Remove possible adaptive hash index built on the
869@@ -257,14 +276,25 @@
870
871 ut_a(block->buf_fix_count == 0);
872
873- mutex_enter(&(buf_pool->mutex));
874+ mutex_enter(&(buf_pool->free_mutex));
875 mutex_enter(&block->mutex);
876
877 buf_LRU_block_free_hashed_page(block);
878 freed = TRUE;
879+ mutex_exit(&(buf_pool->free_mutex));
880 mutex_exit(&block->mutex);
881
882 break;
883+ } else { /* someone may interrupt...??? */
884+ mutex_exit(&(buf_pool->LRU_mutex));/* optimistic */
885+
886+ mutex_exit(&(buf_pool->hash_mutex));
887+
888+ if (!(block->in_LRU_list)) {
889+ mutex_exit(&block->mutex);
890+ goto retry;
891+ }
892+ }
893 }
894
895 mutex_exit(&block->mutex);
896@@ -275,13 +305,21 @@
897 if (!freed && n_iterations <= 10
898 && distance > 100 + (n_iterations * buf_pool->curr_size)
899 / 10) {
900- buf_pool->LRU_flush_ended = 0;
901
902+ mutex_enter(&(buf_pool->mutex));
903+ buf_pool->LRU_flush_ended = 0;
904 mutex_exit(&(buf_pool->mutex));
905
906+ //mutex_exit(&(buf_pool->LRU_mutex));
907+
908 return(FALSE);
909 }
910 }
911+ if (!freed) {
912+ //mutex_exit(&(buf_pool->LRU_mutex));
913+ }
914+
915+ mutex_enter(&(buf_pool->mutex));
916 if (buf_pool->LRU_flush_ended > 0) {
917 buf_pool->LRU_flush_ended--;
918 }
919@@ -333,7 +371,8 @@
920 {
921 ibool ret = FALSE;
922
923- mutex_enter(&(buf_pool->mutex));
924+ mutex_enter(&(buf_pool->LRU_mutex));
925+ mutex_enter(&(buf_pool->free_mutex));
926
927 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
928 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) {
929@@ -341,7 +380,8 @@
930 ret = TRUE;
931 }
932
933- mutex_exit(&(buf_pool->mutex));
934+ mutex_exit(&(buf_pool->LRU_mutex));
935+ mutex_exit(&(buf_pool->free_mutex));
936
937 return(ret);
938 }
939@@ -364,7 +404,7 @@
940 ibool mon_value_was = FALSE;
941 ibool started_monitor = FALSE;
942 loop:
943- mutex_enter(&(buf_pool->mutex));
944+ mutex_enter(&(buf_pool->free_mutex)); /* LRU info:optimistic */
945
946 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
947 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
948@@ -461,7 +501,7 @@
949
950 mutex_exit(&block->mutex);
951
952- mutex_exit(&(buf_pool->mutex));
953+ mutex_exit(&(buf_pool->free_mutex));
954
955 if (started_monitor) {
956 srv_print_innodb_monitor = mon_value_was;
957@@ -473,7 +513,7 @@
958 /* If no block was in the free list, search from the end of the LRU
959 list and try to free a block there */
960
961- mutex_exit(&(buf_pool->mutex));
962+ mutex_exit(&(buf_pool->free_mutex));
963
964 freed = buf_LRU_search_and_free_block(n_iterations);
965
966@@ -517,7 +557,7 @@
967
968 /* No free block was found: try to flush the LRU list */
969
970- buf_flush_free_margin();
971+ buf_flush_free_margin(TRUE);
972 ++srv_buf_pool_wait_free;
973
974 os_aio_simulated_wake_handler_threads();
975@@ -988,7 +1028,7 @@
976 ulint LRU_pos;
977
978 ut_ad(buf_pool);
979- mutex_enter(&(buf_pool->mutex));
980+ mutex_enter(&(buf_pool->LRU_mutex));
981
982 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
983
984@@ -1033,6 +1073,9 @@
985 ut_a(buf_pool->LRU_old_len == old_len);
986 }
987
988+ mutex_exit(&(buf_pool->LRU_mutex));
989+ mutex_enter(&(buf_pool->free_mutex));
990+
991 UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
992
993 block = UT_LIST_GET_FIRST(buf_pool->free);
994@@ -1043,7 +1086,7 @@
995 block = UT_LIST_GET_NEXT(free, block);
996 }
997
998- mutex_exit(&(buf_pool->mutex));
999+ mutex_exit(&(buf_pool->free_mutex));
1000 return(TRUE);
1001 }
1002
1003@@ -1059,7 +1102,7 @@
1004 ulint len;
1005
1006 ut_ad(buf_pool);
1007- mutex_enter(&(buf_pool->mutex));
1008+ mutex_enter(&(buf_pool->LRU_mutex));
1009
1010 fprintf(stderr, "Pool ulint clock %lu\n",
1011 (ulong) buf_pool->ulint_clock);
1012@@ -1105,6 +1148,6 @@
1013 }
1014 }
1015
1016- mutex_exit(&(buf_pool->mutex));
1017+ mutex_exit(&(buf_pool->LRU_mutex));
1018 }
1019 #endif /* UNIV_DEBUG */
1020diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0rea.c mysql-5.1.29-rc/storage/innobase/buf/buf0rea.c
1021--- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0rea.c 2008-10-12 06:54:12.000000000 +0900
1022+++ mysql-5.1.29-rc/storage/innobase/buf/buf0rea.c 2008-11-18 15:28:13.000000000 +0900
1023@@ -219,10 +219,12 @@
1024
1025 return(0);
1026 }
1027+ mutex_exit(&(buf_pool->mutex));
1028
1029 /* Count how many blocks in the area have been recently accessed,
1030 that is, reside near the start of the LRU list. */
1031
1032+ mutex_enter(&(buf_pool->hash_mutex));
1033 for (i = low; i < high; i++) {
1034 block = buf_page_hash_get(space, i);
1035
1036@@ -233,8 +235,9 @@
1037 recent_blocks++;
1038 }
1039 }
1040+ mutex_exit(&(buf_pool->hash_mutex));
1041
1042- mutex_exit(&(buf_pool->mutex));
1043+ // mutex_exit(&(buf_pool->mutex));
1044
1045 if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
1046 /* Do nothing */
1047@@ -334,7 +337,7 @@
1048 }
1049
1050 /* Flush pages from the end of the LRU list if necessary */
1051- buf_flush_free_margin();
1052+ buf_flush_free_margin(FALSE);
1053
1054 return(count + count2);
1055 }
1056@@ -432,6 +435,7 @@
1057
1058 return(0);
1059 }
1060+ mutex_exit(&(buf_pool->mutex));
1061
1062 /* Check that almost all pages in the area have been accessed; if
1063 offset == low, the accesses must be in a descending order, otherwise,
1064@@ -445,6 +449,7 @@
1065
1066 fail_count = 0;
1067
1068+ mutex_enter(&(buf_pool->hash_mutex));
1069 for (i = low; i < high; i++) {
1070 block = buf_page_hash_get(space, i);
1071
1072@@ -462,12 +467,13 @@
1073 pred_block = block;
1074 }
1075 }
1076+ mutex_exit(&(buf_pool->hash_mutex));
1077
1078 if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
1079 - BUF_READ_AHEAD_LINEAR_THRESHOLD) {
1080 /* Too many failures: return */
1081
1082- mutex_exit(&(buf_pool->mutex));
1083+ //mutex_exit(&(buf_pool->mutex));
1084
1085 return(0);
1086 }
1087@@ -475,10 +481,11 @@
1088 /* If we got this far, we know that enough pages in the area have
1089 been accessed in the right order: linear read-ahead can be sensible */
1090
1091+ mutex_enter(&(buf_pool->hash_mutex));
1092 block = buf_page_hash_get(space, offset);
1093
1094 if (block == NULL) {
1095- mutex_exit(&(buf_pool->mutex));
1096+ mutex_exit(&(buf_pool->hash_mutex));
1097
1098 return(0);
1099 }
1100@@ -494,7 +501,7 @@
1101 pred_offset = fil_page_get_prev(frame);
1102 succ_offset = fil_page_get_next(frame);
1103
1104- mutex_exit(&(buf_pool->mutex));
1105+ mutex_exit(&(buf_pool->hash_mutex));
1106
1107 if ((offset == low) && (succ_offset == offset + 1)) {
1108
1109@@ -573,7 +580,7 @@
1110 os_aio_simulated_wake_handler_threads();
1111
1112 /* Flush pages from the end of the LRU list if necessary */
1113- buf_flush_free_margin();
1114+ buf_flush_free_margin(FALSE);
1115
1116 #ifdef UNIV_DEBUG
1117 if (buf_debug_prints && (count > 0)) {
1118@@ -639,7 +646,7 @@
1119 os_aio_simulated_wake_handler_threads();
1120
1121 /* Flush pages from the end of the LRU list if necessary */
1122- buf_flush_free_margin();
1123+ buf_flush_free_margin(FALSE);
1124
1125 #ifdef UNIV_DEBUG
1126 if (buf_debug_prints) {
1127@@ -716,7 +723,7 @@
1128 os_aio_simulated_wake_handler_threads();
1129
1130 /* Flush pages from the end of the LRU list if necessary */
1131- buf_flush_free_margin();
1132+ buf_flush_free_margin(FALSE);
1133
1134 #ifdef UNIV_DEBUG
1135 if (buf_debug_prints) {
1136diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.h mysql-5.1.29-rc/storage/innobase/include/buf0buf.h
1137--- mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.h 2008-10-12 06:54:13.000000000 +0900
1138+++ mysql-5.1.29-rc/storage/innobase/include/buf0buf.h 2008-11-18 15:09:58.000000000 +0900
1139@@ -926,6 +926,7 @@
1140 currently always the same as
1141 max_size */
1142 hash_table_t* page_hash; /* hash table of the file pages */
1143+ mutex_t hash_mutex;
1144
1145 ulint n_pend_reads; /* number of pending read operations */
1146
1147@@ -958,6 +959,7 @@
1148 UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
1149 /* base node of the modified block
1150 list */
1151+ mutex_t flush_list_mutex;
1152 ibool init_flush[BUF_FLUSH_LIST + 1];
1153 /* this is TRUE when a flush of the
1154 given type is being initialized */
1155@@ -991,8 +993,10 @@
1156 in the case of AWE, at the start are
1157 always free blocks for which the
1158 physical memory is mapped to a frame */
1159+ mutex_t free_mutex;
1160 UT_LIST_BASE_NODE_T(buf_block_t) LRU;
1161 /* base node of the LRU list */
1162+ mutex_t LRU_mutex;
1163 buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
1164 blocks in the LRU list; NULL if LRU
1165 length less than BUF_LRU_OLD_MIN_LEN */
1166diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.ic mysql-5.1.29-rc/storage/innobase/include/buf0buf.ic
1167--- mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.ic 2008-10-12 06:54:13.000000000 +0900
1168+++ mysql-5.1.29-rc/storage/innobase/include/buf0buf.ic 2008-11-18 15:09:58.000000000 +0900
1169@@ -104,7 +104,7 @@
1170 buf_block_t* block;
1171 dulint lsn;
1172
1173- mutex_enter(&(buf_pool->mutex));
1174+ mutex_enter(&(buf_pool->flush_list_mutex));
1175
1176 block = UT_LIST_GET_LAST(buf_pool->flush_list);
1177
1178@@ -114,7 +114,7 @@
1179 lsn = block->oldest_modification;
1180 }
1181
1182- mutex_exit(&(buf_pool->mutex));
1183+ mutex_exit(&(buf_pool->flush_list_mutex));
1184
1185 return(lsn);
1186 }
1187@@ -388,18 +388,18 @@
1188 /* out: TRUE if io going on */
1189 buf_block_t* block) /* in: buf_pool block, must be bufferfixed */
1190 {
1191- mutex_enter(&(buf_pool->mutex));
1192+ mutex_enter(&block->mutex);
1193
1194 ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
1195 ut_ad(block->buf_fix_count > 0);
1196
1197 if (block->io_fix != 0) {
1198- mutex_exit(&(buf_pool->mutex));
1199+ mutex_exit(&block->mutex);
1200
1201 return(TRUE);
1202 }
1203
1204- mutex_exit(&(buf_pool->mutex));
1205+ mutex_exit(&block->mutex);
1206
1207 return(FALSE);
1208 }
1209@@ -421,7 +421,7 @@
1210
1211 block = buf_block_align(frame);
1212
1213- mutex_enter(&(buf_pool->mutex));
1214+ mutex_enter(&block->mutex);
1215
1216 if (block->state == BUF_BLOCK_FILE_PAGE) {
1217 lsn = block->newest_modification;
1218@@ -429,7 +429,7 @@
1219 lsn = ut_dulint_zero;
1220 }
1221
1222- mutex_exit(&(buf_pool->mutex));
1223+ mutex_exit(&block->mutex);
1224
1225 return(lsn);
1226 }
1227@@ -624,9 +624,9 @@
1228 ut_a(block->buf_fix_count > 0);
1229
1230 if (rw_latch == RW_X_LATCH && mtr->modifications) {
1231- mutex_enter(&buf_pool->mutex);
1232+ mutex_enter(&buf_pool->flush_list_mutex);
1233 buf_flush_note_modification(block, mtr);
1234- mutex_exit(&buf_pool->mutex);
1235+ mutex_exit(&buf_pool->flush_list_mutex);
1236 }
1237
1238 mutex_enter(&block->mutex);
1239diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.h mysql-5.1.29-rc/storage/innobase/include/buf0flu.h
1240--- mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.h 2008-10-12 06:54:13.000000000 +0900
1241+++ mysql-5.1.29-rc/storage/innobase/include/buf0flu.h 2008-11-18 15:09:58.000000000 +0900
1242@@ -26,8 +26,9 @@
1243 a margin of replaceable pages there. */
1244
1245 void
1246-buf_flush_free_margin(void);
1247+buf_flush_free_margin(
1248 /*=======================*/
1249+ ibool wait);
1250 /************************************************************************
1251 Initializes a page for writing to the tablespace. */
1252
1253diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.ic mysql-5.1.29-rc/storage/innobase/include/buf0flu.ic
1254--- mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.ic 2008-10-12 06:54:13.000000000 +0900
1255+++ mysql-5.1.29-rc/storage/innobase/include/buf0flu.ic 2008-11-18 15:09:58.000000000 +0900
1256@@ -84,7 +84,7 @@
1257 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1258 #endif /* UNIV_SYNC_DEBUG */
1259
1260- mutex_enter(&(buf_pool->mutex));
1261+ mutex_enter(&(buf_pool->flush_list_mutex));
1262
1263 ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
1264
1265@@ -102,5 +102,5 @@
1266 start_lsn) <= 0);
1267 }
1268
1269- mutex_exit(&(buf_pool->mutex));
1270+ mutex_exit(&(buf_pool->flush_list_mutex));
1271 }
1272diff -ruN mysql-5.1.29-rc_orig/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info mysql-5.1.29-rc/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info
1273--- /dev/null 1970-01-01 09:00:00.000000000 +0900
1274+++ mysql-5.1.29-rc/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info 2008-11-18 15:09:58.000000000 +0900
1275@@ -0,0 +1,6 @@
1276+File=split_buf_pool_mutex_fixed_optimistic_safe.patch
1277+Name=InnoDB patch to fix buffer pool scalability
1278+Version=1.0
1279+Author=Yasufumi Kinoshita
1280+License=BSD
1281+Comment=
This page took 0.194597 seconds and 4 git commands to generate.