]>
Commit | Line | Data |
---|---|---|
1 | # name : innodb_extend_slow.patch | |
2 | # introduced : 11 or before | |
3 | # maintainer : Yasufumi | |
4 | # | |
5 | #!!! notice !!! | |
6 | # Any small change to this file in the main branch | |
7 | # should be done or reviewed by the maintainer! | |
8 | --- a/storage/innobase/buf/buf0buf.c | |
9 | +++ b/storage/innobase/buf/buf0buf.c | |
10 | @@ -51,6 +51,40 @@ | |
11 | #include "dict0dict.h" | |
12 | #include "log0recv.h" | |
13 | #include "page0zip.h" | |
14 | +#include "trx0trx.h" | |
15 | + | |
16 | +/* prototypes for new functions added to ha_innodb.cc */ | |
17 | +trx_t* innobase_get_trx(); | |
18 | + | |
19 | +inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) | |
20 | +{ | |
21 | + ulint block_hash; | |
22 | + ulint block_hash_byte; | |
23 | + byte block_hash_offset; | |
24 | + | |
25 | + ut_ad(block); | |
26 | + | |
27 | + if (!innobase_get_slow_log() || !trx || !trx->take_stats) | |
28 | + return; | |
29 | + | |
30 | + if (!trx->distinct_page_access_hash) { | |
31 | + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); | |
32 | + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); | |
33 | + } | |
34 | + | |
35 | + block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space + | |
36 | + block->page.offset, DPAH_SIZE << 3); | |
37 | + block_hash_byte = block_hash >> 3; | |
38 | + block_hash_offset = (byte) block_hash & 0x07; | |
39 | + if (block_hash_byte >= DPAH_SIZE) | |
40 | + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); | |
41 | + if (block_hash_offset > 7) | |
42 | + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); | |
43 | + if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) | |
44 | + trx->distinct_page_access++; | |
45 | + trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; | |
46 | + return; | |
47 | +} | |
48 | ||
49 | /* | |
50 | IMPLEMENTATION OF THE BUFFER POOL | |
51 | @@ -1869,8 +1903,16 @@ | |
52 | mutex_t* block_mutex; | |
53 | ibool must_read; | |
54 | unsigned access_time; | |
55 | + trx_t* trx = NULL; | |
56 | + ulint sec; | |
57 | + ulint ms; | |
58 | + ib_uint64_t start_time; | |
59 | + ib_uint64_t finish_time; | |
60 | buf_pool_t* buf_pool = buf_pool_get(space, offset); | |
61 | ||
62 | + if (innobase_get_slow_log()) { | |
63 | + trx = innobase_get_trx(); | |
64 | + } | |
65 | buf_pool->stat.n_page_gets++; | |
66 | ||
67 | for (;;) { | |
68 | @@ -1888,7 +1930,7 @@ | |
69 | //buf_pool_mutex_exit(buf_pool); | |
70 | rw_lock_s_unlock(&buf_pool->page_hash_latch); | |
71 | ||
72 | - buf_read_page(space, zip_size, offset); | |
73 | + buf_read_page(space, zip_size, offset, trx); | |
74 | ||
75 | #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG | |
76 | ut_a(++buf_dbg_counter % 37 || buf_validate()); | |
77 | @@ -1984,6 +2026,13 @@ | |
78 | /* Let us wait until the read operation | |
79 | completes */ | |
80 | ||
81 | + if (innobase_get_slow_log() && trx && trx->take_stats) | |
82 | + { | |
83 | + ut_usectime(&sec, &ms); | |
84 | + start_time = (ib_uint64_t)sec * 1000000 + ms; | |
85 | + } else { | |
86 | + start_time = 0; | |
87 | + } | |
88 | for (;;) { | |
89 | enum buf_io_fix io_fix; | |
90 | ||
91 | @@ -1998,6 +2047,12 @@ | |
92 | break; | |
93 | } | |
94 | } | |
95 | + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) | |
96 | + { | |
97 | + ut_usectime(&sec, &ms); | |
98 | + finish_time = (ib_uint64_t)sec * 1000000 + ms; | |
99 | + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); | |
100 | + } | |
101 | } | |
102 | ||
103 | #ifdef UNIV_IBUF_COUNT_DEBUG | |
104 | @@ -2312,6 +2367,11 @@ | |
105 | ibool must_read; | |
106 | ulint retries = 0; | |
107 | mutex_t* block_mutex = NULL; | |
108 | + trx_t* trx = NULL; | |
109 | + ulint sec; | |
110 | + ulint ms; | |
111 | + ib_uint64_t start_time; | |
112 | + ib_uint64_t finish_time; | |
113 | buf_pool_t* buf_pool = buf_pool_get(space, offset); | |
114 | ||
115 | ut_ad(mtr); | |
116 | @@ -2341,6 +2401,9 @@ | |
117 | || ibuf_page_low(space, zip_size, offset, | |
118 | FALSE, file, line, NULL)); | |
119 | #endif | |
120 | + if (innobase_get_slow_log()) { | |
121 | + trx = innobase_get_trx(); | |
122 | + } | |
123 | buf_pool->stat.n_page_gets++; | |
124 | fold = buf_page_address_fold(space, offset); | |
125 | loop: | |
126 | @@ -2411,9 +2474,9 @@ | |
127 | return(NULL); | |
128 | } | |
129 | ||
130 | - if (buf_read_page(space, zip_size, offset)) { | |
131 | + if (buf_read_page(space, zip_size, offset, trx)) { | |
132 | buf_read_ahead_random(space, zip_size, offset, | |
133 | - ibuf_inside(mtr)); | |
134 | + ibuf_inside(mtr), trx); | |
135 | ||
136 | retries = 0; | |
137 | } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { | |
138 | @@ -2723,6 +2786,13 @@ | |
139 | /* Let us wait until the read operation | |
140 | completes */ | |
141 | ||
142 | + if (innobase_get_slow_log() && trx && trx->take_stats) | |
143 | + { | |
144 | + ut_usectime(&sec, &ms); | |
145 | + start_time = (ib_uint64_t)sec * 1000000 + ms; | |
146 | + } else { | |
147 | + start_time = 0; | |
148 | + } | |
149 | for (;;) { | |
150 | enum buf_io_fix io_fix; | |
151 | ||
152 | @@ -2737,6 +2807,12 @@ | |
153 | break; | |
154 | } | |
155 | } | |
156 | + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) | |
157 | + { | |
158 | + ut_usectime(&sec, &ms); | |
159 | + finish_time = (ib_uint64_t)sec * 1000000 + ms; | |
160 | + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); | |
161 | + } | |
162 | } | |
163 | ||
164 | fix_type = MTR_MEMO_BUF_FIX; | |
165 | @@ -2763,13 +2839,17 @@ | |
166 | read-ahead */ | |
167 | ||
168 | buf_read_ahead_linear(space, zip_size, offset, | |
169 | - ibuf_inside(mtr)); | |
170 | + ibuf_inside(mtr), trx); | |
171 | } | |
172 | ||
173 | #ifdef UNIV_IBUF_COUNT_DEBUG | |
174 | ut_a(ibuf_count_get(buf_block_get_space(block), | |
175 | buf_block_get_page_no(block)) == 0); | |
176 | #endif | |
177 | + if (innobase_get_slow_log()) { | |
178 | + _increment_page_get_statistics(block, trx); | |
179 | + } | |
180 | + | |
181 | return(block); | |
182 | } | |
183 | ||
184 | @@ -2793,6 +2873,7 @@ | |
185 | unsigned access_time; | |
186 | ibool success; | |
187 | ulint fix_type; | |
188 | + trx_t* trx = NULL; | |
189 | ||
190 | ut_ad(block); | |
191 | ut_ad(mtr); | |
192 | @@ -2870,6 +2951,10 @@ | |
193 | #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG | |
194 | ut_a(block->page.file_page_was_freed == FALSE); | |
195 | #endif | |
196 | + if (innobase_get_slow_log()) { | |
197 | + trx = innobase_get_trx(); | |
198 | + } | |
199 | + | |
200 | if (UNIV_UNLIKELY(!access_time)) { | |
201 | /* In the case of a first access, try to apply linear | |
202 | read-ahead */ | |
203 | @@ -2877,7 +2962,7 @@ | |
204 | buf_read_ahead_linear(buf_block_get_space(block), | |
205 | buf_block_get_zip_size(block), | |
206 | buf_block_get_page_no(block), | |
207 | - ibuf_inside(mtr)); | |
208 | + ibuf_inside(mtr), trx); | |
209 | } | |
210 | ||
211 | #ifdef UNIV_IBUF_COUNT_DEBUG | |
212 | @@ -2887,6 +2972,9 @@ | |
213 | buf_pool = buf_pool_from_block(block); | |
214 | buf_pool->stat.n_page_gets++; | |
215 | ||
216 | + if (innobase_get_slow_log()) { | |
217 | + _increment_page_get_statistics(block, trx); | |
218 | + } | |
219 | return(TRUE); | |
220 | } | |
221 | ||
222 | @@ -2909,6 +2997,7 @@ | |
223 | buf_pool_t* buf_pool; | |
224 | ibool success; | |
225 | ulint fix_type; | |
226 | + trx_t* trx = NULL; | |
227 | ||
228 | ut_ad(mtr); | |
229 | ut_ad(mtr->state == MTR_ACTIVE); | |
230 | @@ -2995,6 +3084,11 @@ | |
231 | #endif | |
232 | buf_pool->stat.n_page_gets++; | |
233 | ||
234 | + if (innobase_get_slow_log()) { | |
235 | + trx = innobase_get_trx(); | |
236 | + _increment_page_get_statistics(block, trx); | |
237 | + } | |
238 | + | |
239 | return(TRUE); | |
240 | } | |
241 | ||
242 | --- a/storage/innobase/buf/buf0rea.c | |
243 | +++ b/storage/innobase/buf/buf0rea.c | |
244 | @@ -79,7 +79,8 @@ | |
245 | treat the tablespace as dropped; this is a timestamp we | |
246 | use to stop dangling page reads from a tablespace | |
247 | which we have DISCARDed + IMPORTed back */ | |
248 | - ulint offset) /*!< in: page number */ | |
249 | + ulint offset, /*!< in: page number */ | |
250 | + trx_t* trx) | |
251 | { | |
252 | buf_page_t* bpage; | |
253 | ulint wake_later; | |
254 | @@ -181,15 +182,15 @@ | |
255 | ||
256 | thd_wait_begin(NULL, THD_WAIT_DISKIO); | |
257 | if (zip_size) { | |
258 | - *err = fil_io(OS_FILE_READ | wake_later, | |
259 | + *err = _fil_io(OS_FILE_READ | wake_later, | |
260 | sync, space, zip_size, offset, 0, zip_size, | |
261 | - bpage->zip.data, bpage); | |
262 | + bpage->zip.data, bpage, trx); | |
263 | } else { | |
264 | ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); | |
265 | ||
266 | - *err = fil_io(OS_FILE_READ | wake_later, | |
267 | + *err = _fil_io(OS_FILE_READ | wake_later, | |
268 | sync, space, 0, offset, 0, UNIV_PAGE_SIZE, | |
269 | - ((buf_block_t*) bpage)->frame, bpage); | |
270 | + ((buf_block_t*) bpage)->frame, bpage, trx); | |
271 | } | |
272 | thd_wait_end(NULL); | |
273 | ut_a(*err == DB_SUCCESS); | |
274 | @@ -226,8 +227,9 @@ | |
275 | or 0 */ | |
276 | ulint offset, /*!< in: page number of a page which | |
277 | the current thread wants to access */ | |
278 | - ibool inside_ibuf) /*!< in: TRUE if we are inside ibuf | |
279 | + ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf | |
280 | routine */ | |
281 | + trx_t* trx) | |
282 | { | |
283 | buf_pool_t* buf_pool = buf_pool_get(space, offset); | |
284 | ib_int64_t tablespace_version; | |
285 | @@ -330,7 +332,7 @@ | |
286 | &err, FALSE, | |
287 | ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, | |
288 | space, zip_size, FALSE, | |
289 | - tablespace_version, i); | |
290 | + tablespace_version, i, trx); | |
291 | if (err == DB_TABLESPACE_DELETED) { | |
292 | ut_print_timestamp(stderr); | |
293 | fprintf(stderr, | |
294 | @@ -380,7 +382,8 @@ | |
295 | /*==========*/ | |
296 | ulint space, /*!< in: space id */ | |
297 | ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ | |
298 | - ulint offset) /*!< in: page number */ | |
299 | + ulint offset, /*!< in: page number */ | |
300 | + trx_t* trx) | |
301 | { | |
302 | buf_pool_t* buf_pool = buf_pool_get(space, offset); | |
303 | ib_int64_t tablespace_version; | |
304 | @@ -394,7 +397,7 @@ | |
305 | ||
306 | count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, | |
307 | zip_size, FALSE, | |
308 | - tablespace_version, offset); | |
309 | + tablespace_version, offset, trx); | |
310 | srv_buf_pool_reads += count; | |
311 | if (err == DB_TABLESPACE_DELETED) { | |
312 | ut_print_timestamp(stderr); | |
313 | @@ -446,7 +449,8 @@ | |
314 | ulint space, /*!< in: space id */ | |
315 | ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ | |
316 | ulint offset, /*!< in: page number; see NOTE 3 above */ | |
317 | - ibool inside_ibuf) /*!< in: TRUE if we are inside ibuf routine */ | |
318 | + ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */ | |
319 | + trx_t* trx) | |
320 | { | |
321 | buf_pool_t* buf_pool = buf_pool_get(space, offset); | |
322 | ib_int64_t tablespace_version; | |
323 | @@ -665,7 +669,7 @@ | |
324 | count += buf_read_page_low( | |
325 | &err, FALSE, | |
326 | ibuf_mode, | |
327 | - space, zip_size, FALSE, tablespace_version, i); | |
328 | + space, zip_size, FALSE, tablespace_version, i, trx); | |
329 | if (err == DB_TABLESPACE_DELETED) { | |
330 | ut_print_timestamp(stderr); | |
331 | fprintf(stderr, | |
332 | @@ -758,7 +762,7 @@ | |
333 | buf_read_page_low(&err, sync && (i + 1 == n_stored), | |
334 | BUF_READ_ANY_PAGE, space_ids[i], | |
335 | zip_size, TRUE, space_versions[i], | |
336 | - page_nos[i]); | |
337 | + page_nos[i], NULL); | |
338 | ||
339 | if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) { | |
340 | tablespace_deleted: | |
341 | @@ -900,12 +904,12 @@ | |
342 | if ((i + 1 == n_stored) && sync) { | |
343 | buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, | |
344 | zip_size, TRUE, tablespace_version, | |
345 | - page_nos[i]); | |
346 | + page_nos[i], NULL); | |
347 | } else { | |
348 | buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE | |
349 | | OS_AIO_SIMULATED_WAKE_LATER, | |
350 | space, zip_size, TRUE, | |
351 | - tablespace_version, page_nos[i]); | |
352 | + tablespace_version, page_nos[i], NULL); | |
353 | } | |
354 | } | |
355 | ||
356 | --- a/storage/innobase/fil/fil0fil.c | |
357 | +++ b/storage/innobase/fil/fil0fil.c | |
358 | @@ -4757,7 +4757,7 @@ | |
359 | node->name, node->handle, buf, | |
360 | offset_low, offset_high, | |
361 | page_size * n_pages, | |
362 | - NULL, NULL); | |
363 | + NULL, NULL, NULL); | |
364 | #endif | |
365 | if (success) { | |
366 | node->size += n_pages; | |
367 | @@ -5084,7 +5084,7 @@ | |
368 | i/o on a tablespace which does not exist */ | |
369 | UNIV_INTERN | |
370 | ulint | |
371 | -fil_io( | |
372 | +_fil_io( | |
373 | /*===*/ | |
374 | ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, | |
375 | ORed to OS_FILE_LOG, if a log i/o | |
376 | @@ -5109,8 +5109,9 @@ | |
377 | void* buf, /*!< in/out: buffer where to store read data | |
378 | or from where to write; in aio this must be | |
379 | appropriately aligned */ | |
380 | - void* message) /*!< in: message for aio handler if non-sync | |
381 | + void* message, /*!< in: message for aio handler if non-sync | |
382 | aio used, else ignored */ | |
383 | + trx_t* trx) | |
384 | { | |
385 | ulint mode; | |
386 | fil_space_t* space; | |
387 | @@ -5278,7 +5279,7 @@ | |
388 | #else | |
389 | /* Queue the aio request */ | |
390 | ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, | |
391 | - offset_low, offset_high, len, node, message); | |
392 | + offset_low, offset_high, len, node, message, trx); | |
393 | #endif | |
394 | ut_a(ret); | |
395 | ||
396 | --- a/storage/innobase/handler/ha_innodb.cc | |
397 | +++ b/storage/innobase/handler/ha_innodb.cc | |
398 | @@ -1584,6 +1584,16 @@ | |
399 | trx->check_unique_secondary = !thd_test_options( | |
400 | thd, OPTION_RELAXED_UNIQUE_CHECKS); | |
401 | ||
402 | +#ifdef EXTENDED_SLOWLOG | |
403 | + if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) { | |
404 | + trx->take_stats = TRUE; | |
405 | + } else { | |
406 | + trx->take_stats = FALSE; | |
407 | + } | |
408 | +#else | |
409 | + trx->take_stats = FALSE; | |
410 | +#endif | |
411 | + | |
412 | DBUG_VOID_RETURN; | |
413 | } | |
414 | ||
415 | @@ -1638,6 +1648,32 @@ | |
416 | return(trx); | |
417 | } | |
418 | ||
419 | +/************************************************************************* | |
420 | +Gets current trx. */ | |
421 | +extern "C" | |
422 | +trx_t* | |
423 | +innobase_get_trx() | |
424 | +{ | |
425 | + THD *thd=current_thd; | |
426 | + if (likely(thd != 0)) { | |
427 | + trx_t*& trx = thd_to_trx(thd); | |
428 | + return(trx); | |
429 | + } else { | |
430 | + return(NULL); | |
431 | + } | |
432 | +} | |
433 | + | |
434 | +extern "C" | |
435 | +ibool | |
436 | +innobase_get_slow_log() | |
437 | +{ | |
438 | +#ifdef EXTENDED_SLOWLOG | |
439 | + return((ibool) thd_opt_slow_log()); | |
440 | +#else | |
441 | + return(FALSE); | |
442 | +#endif | |
443 | +} | |
444 | + | |
445 | /*********************************************************************//** | |
446 | Note that a transaction has been registered with MySQL. | |
447 | @return true if transaction is registered with MySQL 2PC coordinator */ | |
448 | @@ -9418,6 +9454,25 @@ | |
449 | statement has ended */ | |
450 | ||
451 | if (trx->n_mysql_tables_in_use == 0) { | |
452 | +#ifdef EXTENDED_SLOWLOG | |
453 | + increment_thd_innodb_stats(thd, | |
454 | + (unsigned long long) trx->id, | |
455 | + trx->io_reads, | |
456 | + trx->io_read, | |
457 | + trx->io_reads_wait_timer, | |
458 | + trx->lock_que_wait_timer, | |
459 | + trx->innodb_que_wait_timer, | |
460 | + trx->distinct_page_access); | |
461 | + | |
462 | + trx->io_reads = 0; | |
463 | + trx->io_read = 0; | |
464 | + trx->io_reads_wait_timer = 0; | |
465 | + trx->lock_que_wait_timer = 0; | |
466 | + trx->innodb_que_wait_timer = 0; | |
467 | + trx->distinct_page_access = 0; | |
468 | + if (trx->distinct_page_access_hash) | |
469 | + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); | |
470 | +#endif | |
471 | ||
472 | trx->mysql_n_tables_locked = 0; | |
473 | prebuilt->used_in_HANDLER = FALSE; | |
474 | --- a/storage/innobase/include/buf0rea.h | |
475 | +++ b/storage/innobase/include/buf0rea.h | |
476 | @@ -27,6 +27,7 @@ | |
477 | #define buf0rea_h | |
478 | ||
479 | #include "univ.i" | |
480 | +#include "trx0types.h" | |
481 | #include "buf0types.h" | |
482 | ||
483 | /********************************************************************//** | |
484 | @@ -41,7 +42,8 @@ | |
485 | /*==========*/ | |
486 | ulint space, /*!< in: space id */ | |
487 | ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ | |
488 | - ulint offset);/*!< in: page number */ | |
489 | + ulint offset, /*!< in: page number */ | |
490 | + trx_t* trx); | |
491 | /********************************************************************//** | |
492 | Applies a random read-ahead in buf_pool if there are at least a threshold | |
493 | value of accessed pages from the random read-ahead area. Does not read any | |
494 | @@ -65,8 +67,9 @@ | |
495 | or 0 */ | |
496 | ulint offset, /*!< in: page number of a page which | |
497 | the current thread wants to access */ | |
498 | - ibool inside_ibuf); /*!< in: TRUE if we are inside ibuf | |
499 | + ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf | |
500 | routine */ | |
501 | + trx_t* trx); | |
502 | /********************************************************************//** | |
503 | Applies linear read-ahead if in the buf_pool the page is a border page of | |
504 | a linear read-ahead area and all the pages in the area have been accessed. | |
505 | @@ -98,7 +101,8 @@ | |
506 | ulint space, /*!< in: space id */ | |
507 | ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ | |
508 | ulint offset, /*!< in: page number; see NOTE 3 above */ | |
509 | - ibool inside_ibuf); /*!< in: TRUE if we are inside ibuf routine */ | |
510 | + ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */ | |
511 | + trx_t* trx); | |
512 | /********************************************************************//** | |
513 | Issues read requests for pages which the ibuf module wants to read in, in | |
514 | order to contract the insert buffer tree. Technically, this function is like | |
515 | --- a/storage/innobase/include/fil0fil.h | |
516 | +++ b/storage/innobase/include/fil0fil.h | |
517 | @@ -615,9 +615,12 @@ | |
518 | Reads or writes data. This operation is asynchronous (aio). | |
519 | @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do | |
520 | i/o on a tablespace which does not exist */ | |
521 | +#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \ | |
522 | + _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL) | |
523 | + | |
524 | UNIV_INTERN | |
525 | ulint | |
526 | -fil_io( | |
527 | +_fil_io( | |
528 | /*===*/ | |
529 | ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, | |
530 | ORed to OS_FILE_LOG, if a log i/o | |
531 | @@ -642,8 +645,9 @@ | |
532 | void* buf, /*!< in/out: buffer where to store read data | |
533 | or from where to write; in aio this must be | |
534 | appropriately aligned */ | |
535 | - void* message); /*!< in: message for aio handler if non-sync | |
536 | + void* message, /*!< in: message for aio handler if non-sync | |
537 | aio used, else ignored */ | |
538 | + trx_t* trx); | |
539 | /**********************************************************************//** | |
540 | Waits for an aio operation to complete. This function is used to write the | |
541 | handler for completed requests. The aio array of pending requests is divided | |
542 | --- a/storage/innobase/include/os0file.h | |
543 | +++ b/storage/innobase/include/os0file.h | |
544 | @@ -36,6 +36,7 @@ | |
545 | #define os0file_h | |
546 | ||
547 | #include "univ.i" | |
548 | +#include "trx0types.h" | |
549 | ||
550 | #ifndef __WIN__ | |
551 | #include <dirent.h> | |
552 | @@ -277,13 +278,17 @@ | |
553 | pfs_os_file_close_func(file, __FILE__, __LINE__) | |
554 | ||
555 | # define os_aio(type, mode, name, file, buf, offset, offset_high, \ | |
556 | - n, message1, message2) \ | |
557 | + n, message1, message2, trx) \ | |
558 | pfs_os_aio_func(type, mode, name, file, buf, offset, \ | |
559 | - offset_high, n, message1, message2, \ | |
560 | + offset_high, n, message1, message2, trx, \ | |
561 | __FILE__, __LINE__) | |
562 | ||
563 | # define os_file_read(file, buf, offset, offset_high, n) \ | |
564 | - pfs_os_file_read_func(file, buf, offset, offset_high, n, \ | |
565 | + pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL, \ | |
566 | + __FILE__, __LINE__) | |
567 | + | |
568 | +# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \ | |
569 | + pfs_os_file_read_func(file, buf, offset, offset_high, n, trx, \ | |
570 | __FILE__, __LINE__) | |
571 | ||
572 | # define os_file_read_no_error_handling(file, buf, offset, \ | |
573 | @@ -319,12 +324,15 @@ | |
574 | # define os_file_close(file) os_file_close_func(file) | |
575 | ||
576 | # define os_aio(type, mode, name, file, buf, offset, offset_high, \ | |
577 | - n, message1, message2) \ | |
578 | + n, message1, message2, trx) \ | |
579 | os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\ | |
580 | - message1, message2) | |
581 | + message1, message2, trx) | |
582 | ||
583 | # define os_file_read(file, buf, offset, offset_high, n) \ | |
584 | - os_file_read_func(file, buf, offset, offset_high, n) | |
585 | + os_file_read_func(file, buf, offset, offset_high, n, NULL) | |
586 | + | |
587 | +# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \ | |
588 | + os_file_read_func(file, buf, offset, offset_high, n, trx) | |
589 | ||
590 | # define os_file_read_no_error_handling(file, buf, offset, \ | |
591 | offset_high, n) \ | |
592 | @@ -692,6 +700,7 @@ | |
593 | ulint offset_high,/*!< in: most significant 32 bits of | |
594 | offset */ | |
595 | ulint n, /*!< in: number of bytes to read */ | |
596 | + trx_t* trx, | |
597 | const char* src_file,/*!< in: file name where func invoked */ | |
598 | ulint src_line);/*!< in: line where the func invoked */ | |
599 | ||
600 | @@ -746,6 +755,7 @@ | |
601 | (can be used to identify a completed | |
602 | aio operation); ignored if mode is | |
603 | OS_AIO_SYNC */ | |
604 | + trx_t* trx, | |
605 | const char* src_file,/*!< in: file name where func invoked */ | |
606 | ulint src_line);/*!< in: line where the func invoked */ | |
607 | /*******************************************************************//** | |
608 | @@ -889,7 +899,8 @@ | |
609 | offset where to read */ | |
610 | ulint offset_high,/*!< in: most significant 32 bits of | |
611 | offset */ | |
612 | - ulint n); /*!< in: number of bytes to read */ | |
613 | + ulint n, /*!< in: number of bytes to read */ | |
614 | + trx_t* trx); | |
615 | /*******************************************************************//** | |
616 | Rewind file to its start, read at most size - 1 bytes from it to str, and | |
617 | NUL-terminate str. All errors are silently ignored. This function is | |
618 | @@ -1048,10 +1059,11 @@ | |
619 | (can be used to identify a completed | |
620 | aio operation); ignored if mode is | |
621 | OS_AIO_SYNC */ | |
622 | - void* message2);/*!< in: message for the aio handler | |
623 | + void* message2,/*!< in: message for the aio handler | |
624 | (can be used to identify a completed | |
625 | aio operation); ignored if mode is | |
626 | OS_AIO_SYNC */ | |
627 | + trx_t* trx); | |
628 | /************************************************************************//** | |
629 | Wakes up all async i/o threads so that they know to exit themselves in | |
630 | shutdown. */ | |
631 | --- a/storage/innobase/include/os0file.ic | |
632 | +++ b/storage/innobase/include/os0file.ic | |
633 | @@ -229,6 +229,7 @@ | |
634 | (can be used to identify a completed | |
635 | aio operation); ignored if mode is | |
636 | OS_AIO_SYNC */ | |
637 | + trx_t* trx, | |
638 | const char* src_file,/*!< in: file name where func invoked */ | |
639 | ulint src_line)/*!< in: line where the func invoked */ | |
640 | { | |
641 | @@ -244,7 +245,7 @@ | |
642 | src_file, src_line); | |
643 | ||
644 | result = os_aio_func(type, mode, name, file, buf, offset, offset_high, | |
645 | - n, message1, message2); | |
646 | + n, message1, message2, trx); | |
647 | ||
648 | register_pfs_file_io_end(locker, n); | |
649 | ||
650 | @@ -268,6 +269,7 @@ | |
651 | ulint offset_high,/*!< in: most significant 32 bits of | |
652 | offset */ | |
653 | ulint n, /*!< in: number of bytes to read */ | |
654 | + trx_t* trx, | |
655 | const char* src_file,/*!< in: file name where func invoked */ | |
656 | ulint src_line)/*!< in: line where the func invoked */ | |
657 | { | |
658 | @@ -278,7 +280,7 @@ | |
659 | register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, | |
660 | src_file, src_line); | |
661 | ||
662 | - result = os_file_read_func(file, buf, offset, offset_high, n); | |
663 | + result = os_file_read_func(file, buf, offset, offset_high, n, trx); | |
664 | ||
665 | register_pfs_file_io_end(locker, n); | |
666 | ||
667 | --- a/storage/innobase/include/srv0srv.h | |
668 | +++ b/storage/innobase/include/srv0srv.h | |
669 | @@ -71,6 +71,9 @@ | |
670 | #define SRV_AUTO_EXTEND_INCREMENT \ | |
671 | (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) | |
672 | ||
673 | +/* prototypes for new functions added to ha_innodb.cc */ | |
674 | +ibool innobase_get_slow_log(); | |
675 | + | |
676 | /* Mutex for locking srv_monitor_file */ | |
677 | extern mutex_t srv_monitor_file_mutex; | |
678 | /* Temporary file for innodb monitor output */ | |
679 | --- a/storage/innobase/include/trx0trx.h | |
680 | +++ b/storage/innobase/include/trx0trx.h | |
681 | @@ -743,6 +743,17 @@ | |
682 | /*------------------------------*/ | |
683 | char detailed_error[256]; /*!< detailed error message for last | |
684 | error, or empty. */ | |
685 | + /*------------------------------*/ | |
686 | + ulint io_reads; | |
687 | + ib_uint64_t io_read; | |
688 | + ulint io_reads_wait_timer; | |
689 | + ib_uint64_t lock_que_wait_ustarted; | |
690 | + ulint lock_que_wait_timer; | |
691 | + ulint innodb_que_wait_timer; | |
692 | + ulint distinct_page_access; | |
693 | +#define DPAH_SIZE 8192 | |
694 | + byte* distinct_page_access_hash; | |
695 | + ibool take_stats; | |
696 | }; | |
697 | ||
698 | #define TRX_MAX_N_THREADS 32 /* maximum number of | |
699 | --- a/storage/innobase/lock/lock0lock.c | |
700 | +++ b/storage/innobase/lock/lock0lock.c | |
701 | @@ -1765,6 +1765,8 @@ | |
702 | { | |
703 | lock_t* lock; | |
704 | trx_t* trx; | |
705 | + ulint sec; | |
706 | + ulint ms; | |
707 | ||
708 | ut_ad(mutex_own(&kernel_mutex)); | |
709 | ||
710 | @@ -1823,6 +1825,10 @@ | |
711 | trx->que_state = TRX_QUE_LOCK_WAIT; | |
712 | trx->was_chosen_as_deadlock_victim = FALSE; | |
713 | trx->wait_started = time(NULL); | |
714 | + if (innobase_get_slow_log() && trx->take_stats) { | |
715 | + ut_usectime(&sec, &ms); | |
716 | + trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; | |
717 | + } | |
718 | ||
719 | ut_a(que_thr_stop(thr)); | |
720 | ||
721 | @@ -3766,6 +3772,8 @@ | |
722 | { | |
723 | lock_t* lock; | |
724 | trx_t* trx; | |
725 | + ulint sec; | |
726 | + ulint ms; | |
727 | ||
728 | ut_ad(mutex_own(&kernel_mutex)); | |
729 | ||
730 | @@ -3821,6 +3829,10 @@ | |
731 | return(DB_SUCCESS); | |
732 | } | |
733 | ||
734 | + if (innobase_get_slow_log() && trx->take_stats) { | |
735 | + ut_usectime(&sec, &ms); | |
736 | + trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; | |
737 | + } | |
738 | trx->que_state = TRX_QUE_LOCK_WAIT; | |
739 | trx->was_chosen_as_deadlock_victim = FALSE; | |
740 | trx->wait_started = time(NULL); | |
741 | --- a/storage/innobase/os/os0file.c | |
742 | +++ b/storage/innobase/os/os0file.c | |
743 | @@ -43,6 +43,8 @@ | |
744 | #include "srv0start.h" | |
745 | #include "fil0fil.h" | |
746 | #include "buf0buf.h" | |
747 | +#include "trx0sys.h" | |
748 | +#include "trx0trx.h" | |
749 | #include "log0recv.h" | |
750 | #ifndef UNIV_HOTBACKUP | |
751 | # include "os0sync.h" | |
752 | @@ -2213,13 +2215,18 @@ | |
753 | ulint n, /*!< in: number of bytes to read */ | |
754 | ulint offset, /*!< in: least significant 32 bits of file | |
755 | offset from where to read */ | |
756 | - ulint offset_high) /*!< in: most significant 32 bits of | |
757 | + ulint offset_high, /*!< in: most significant 32 bits of | |
758 | offset */ | |
759 | + trx_t* trx) | |
760 | { | |
761 | off_t offs; | |
762 | #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) | |
763 | ssize_t n_bytes; | |
764 | #endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */ | |
765 | + ulint sec; | |
766 | + ulint ms; | |
767 | + ib_uint64_t start_time; | |
768 | + ib_uint64_t finish_time; | |
769 | ||
770 | ut_a((offset & 0xFFFFFFFFUL) == offset); | |
771 | ||
772 | @@ -2240,6 +2247,15 @@ | |
773 | ||
774 | os_n_file_reads++; | |
775 | ||
776 | + if (innobase_get_slow_log() && trx && trx->take_stats) | |
777 | + { | |
778 | + trx->io_reads++; | |
779 | + trx->io_read += n; | |
780 | + ut_usectime(&sec, &ms); | |
781 | + start_time = (ib_uint64_t)sec * 1000000 + ms; | |
782 | + } else { | |
783 | + start_time = 0; | |
784 | + } | |
785 | #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) | |
786 | os_mutex_enter(os_file_count_mutex); | |
787 | os_file_n_pending_preads++; | |
788 | @@ -2253,6 +2269,13 @@ | |
789 | os_n_pending_reads--; | |
790 | os_mutex_exit(os_file_count_mutex); | |
791 | ||
792 | + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) | |
793 | + { | |
794 | + ut_usectime(&sec, &ms); | |
795 | + finish_time = (ib_uint64_t)sec * 1000000 + ms; | |
796 | + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); | |
797 | + } | |
798 | + | |
799 | return(n_bytes); | |
800 | #else | |
801 | { | |
802 | @@ -2289,6 +2312,13 @@ | |
803 | os_n_pending_reads--; | |
804 | os_mutex_exit(os_file_count_mutex); | |
805 | ||
806 | + if (innobase_get_slow_log() && trx && trx->take_stats && start_time) | |
807 | + { | |
808 | + ut_usectime(&sec, &ms); | |
809 | + finish_time = (ib_uint64_t)sec * 1000000 + ms; | |
810 | + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); | |
811 | + } | |
812 | + | |
813 | return(ret); | |
814 | } | |
815 | #endif | |
816 | @@ -2429,7 +2459,8 @@ | |
817 | offset where to read */ | |
818 | ulint offset_high, /*!< in: most significant 32 bits of | |
819 | offset */ | |
820 | - ulint n) /*!< in: number of bytes to read */ | |
821 | + ulint n, /*!< in: number of bytes to read */ | |
822 | + trx_t* trx) | |
823 | { | |
824 | #ifdef __WIN__ | |
825 | BOOL ret; | |
826 | @@ -2504,7 +2535,7 @@ | |
827 | os_bytes_read_since_printout += n; | |
828 | ||
829 | try_again: | |
830 | - ret = os_file_pread(file, buf, n, offset, offset_high); | |
831 | + ret = os_file_pread(file, buf, n, offset, offset_high, trx); | |
832 | ||
833 | if ((ulint)ret == n) { | |
834 | ||
835 | @@ -2633,7 +2664,7 @@ | |
836 | os_bytes_read_since_printout += n; | |
837 | ||
838 | try_again: | |
839 | - ret = os_file_pread(file, buf, n, offset, offset_high); | |
840 | + ret = os_file_pread(file, buf, n, offset, offset_high, NULL); | |
841 | ||
842 | if ((ulint)ret == n) { | |
843 | ||
844 | @@ -4027,10 +4058,11 @@ | |
845 | (can be used to identify a completed | |
846 | aio operation); ignored if mode is | |
847 | OS_AIO_SYNC */ | |
848 | - void* message2)/*!< in: message for the aio handler | |
849 | + void* message2,/*!< in: message for the aio handler | |
850 | (can be used to identify a completed | |
851 | aio operation); ignored if mode is | |
852 | OS_AIO_SYNC */ | |
853 | + trx_t* trx) | |
854 | { | |
855 | os_aio_array_t* array; | |
856 | os_aio_slot_t* slot; | |
857 | @@ -4078,7 +4110,7 @@ | |
858 | ||
859 | if (type == OS_FILE_READ) { | |
860 | return(os_file_read_func(file, buf, offset, | |
861 | - offset_high, n)); | |
862 | + offset_high, n, trx)); | |
863 | } | |
864 | ||
865 | ut_a(type == OS_FILE_WRITE); | |
866 | @@ -4119,6 +4151,11 @@ | |
867 | array = NULL; /* Eliminate compiler warning */ | |
868 | } | |
869 | ||
870 | + if (trx && type == OS_FILE_READ) | |
871 | + { | |
872 | + trx->io_reads++; | |
873 | + trx->io_read += n; | |
874 | + } | |
875 | slot = os_aio_array_reserve_slot(type, array, message1, message2, file, | |
876 | name, buf, offset, offset_high, n); | |
877 | if (type == OS_FILE_READ) { | |
878 | --- a/storage/innobase/srv/srv0srv.c | |
879 | +++ b/storage/innobase/srv/srv0srv.c | |
880 | @@ -87,6 +87,9 @@ | |
881 | #include "mysql/plugin.h" | |
882 | #include "mysql/service_thd_wait.h" | |
883 | ||
884 | +/* prototypes for new functions added to ha_innodb.cc */ | |
885 | +ibool innobase_get_slow_log(); | |
886 | + | |
887 | /* The following counter is incremented whenever there is some user activity | |
888 | in the server */ | |
889 | UNIV_INTERN ulint srv_activity_count = 0; | |
890 | @@ -1234,6 +1237,10 @@ | |
891 | ibool has_slept = FALSE; | |
892 | srv_conc_slot_t* slot = NULL; | |
893 | ulint i; | |
894 | + ib_uint64_t start_time = 0L; | |
895 | + ib_uint64_t finish_time = 0L; | |
896 | + ulint sec; | |
897 | + ulint ms; | |
898 | ||
899 | #ifdef UNIV_SYNC_DEBUG | |
900 | ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); | |
901 | @@ -1314,6 +1321,7 @@ | |
902 | switches. */ | |
903 | if (SRV_THREAD_SLEEP_DELAY > 0) { | |
904 | os_thread_sleep(SRV_THREAD_SLEEP_DELAY); | |
905 | + trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY; | |
906 | } | |
907 | ||
908 | trx->op_info = ""; | |
909 | @@ -1373,6 +1381,14 @@ | |
910 | #ifdef UNIV_SYNC_DEBUG | |
911 | ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); | |
912 | #endif /* UNIV_SYNC_DEBUG */ | |
913 | + | |
914 | + if (innobase_get_slow_log() && trx->take_stats) { | |
915 | + ut_usectime(&sec, &ms); | |
916 | + start_time = (ib_uint64_t)sec * 1000000 + ms; | |
917 | + } else { | |
918 | + start_time = 0; | |
919 | + } | |
920 | + | |
921 | trx->op_info = "waiting in InnoDB queue"; | |
922 | ||
923 | thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK); | |
924 | @@ -1381,6 +1397,12 @@ | |
925 | ||
926 | trx->op_info = ""; | |
927 | ||
928 | + if (innobase_get_slow_log() && trx->take_stats && start_time) { | |
929 | + ut_usectime(&sec, &ms); | |
930 | + finish_time = (ib_uint64_t)sec * 1000000 + ms; | |
931 | + trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); | |
932 | + } | |
933 | + | |
934 | os_fast_mutex_lock(&srv_conc_mutex); | |
935 | ||
936 | srv_conc_n_waiting_threads--; | |
937 | --- a/storage/innobase/trx/trx0trx.c | |
938 | +++ b/storage/innobase/trx/trx0trx.c | |
939 | @@ -188,6 +188,15 @@ | |
940 | trx->global_read_view = NULL; | |
941 | trx->read_view = NULL; | |
942 | ||
943 | + trx->io_reads = 0; | |
944 | + trx->io_read = 0; | |
945 | + trx->io_reads_wait_timer = 0; | |
946 | + trx->lock_que_wait_timer = 0; | |
947 | + trx->innodb_que_wait_timer = 0; | |
948 | + trx->distinct_page_access = 0; | |
949 | + trx->distinct_page_access_hash = NULL; | |
950 | + trx->take_stats = FALSE; | |
951 | + | |
952 | /* Set X/Open XA transaction identification to NULL */ | |
953 | memset(&trx->xid, 0, sizeof(trx->xid)); | |
954 | trx->xid.formatID = -1; | |
955 | @@ -221,6 +230,11 @@ | |
956 | ||
957 | mutex_exit(&kernel_mutex); | |
958 | ||
959 | + if (innobase_get_slow_log() && trx->take_stats) { | |
960 | + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); | |
961 | + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); | |
962 | + } | |
963 | + | |
964 | return(trx); | |
965 | } | |
966 | ||
967 | @@ -406,6 +420,12 @@ | |
968 | /*===============*/ | |
969 | trx_t* trx) /*!< in, own: trx object */ | |
970 | { | |
971 | + if (trx->distinct_page_access_hash) | |
972 | + { | |
973 | + mem_free(trx->distinct_page_access_hash); | |
974 | + trx->distinct_page_access_hash= NULL; | |
975 | + } | |
976 | + | |
977 | mutex_enter(&kernel_mutex); | |
978 | ||
979 | UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); | |
980 | @@ -427,6 +447,12 @@ | |
981 | /*====================*/ | |
982 | trx_t* trx) /*!< in, own: trx object */ | |
983 | { | |
984 | + if (trx->distinct_page_access_hash) | |
985 | + { | |
986 | + mem_free(trx->distinct_page_access_hash); | |
987 | + trx->distinct_page_access_hash= NULL; | |
988 | + } | |
989 | + | |
990 | mutex_enter(&kernel_mutex); | |
991 | ||
992 | trx_free(trx); | |
993 | @@ -1212,6 +1238,9 @@ | |
994 | trx_t* trx) /*!< in: transaction */ | |
995 | { | |
996 | que_thr_t* thr; | |
997 | + ulint sec; | |
998 | + ulint ms; | |
999 | + ib_uint64_t now; | |
1000 | ||
1001 | ut_ad(mutex_own(&kernel_mutex)); | |
1002 | ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); | |
1003 | @@ -1226,6 +1255,11 @@ | |
1004 | thr = UT_LIST_GET_FIRST(trx->wait_thrs); | |
1005 | } | |
1006 | ||
1007 | + if (innobase_get_slow_log() && trx->take_stats) { | |
1008 | + ut_usectime(&sec, &ms); | |
1009 | + now = (ib_uint64_t)sec * 1000000 + ms; | |
1010 | + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); | |
1011 | + } | |
1012 | trx->que_state = TRX_QUE_RUNNING; | |
1013 | } | |
1014 | ||
1015 | @@ -1239,6 +1273,9 @@ | |
1016 | trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */ | |
1017 | { | |
1018 | que_thr_t* thr; | |
1019 | + ulint sec; | |
1020 | + ulint ms; | |
1021 | + ib_uint64_t now; | |
1022 | ||
1023 | ut_ad(mutex_own(&kernel_mutex)); | |
1024 | ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); | |
1025 | @@ -1253,6 +1290,11 @@ | |
1026 | thr = UT_LIST_GET_FIRST(trx->wait_thrs); | |
1027 | } | |
1028 | ||
1029 | + if (innobase_get_slow_log() && trx->take_stats) { | |
1030 | + ut_usectime(&sec, &ms); | |
1031 | + now = (ib_uint64_t)sec * 1000000 + ms; | |
1032 | + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); | |
1033 | + } | |
1034 | trx->que_state = TRX_QUE_RUNNING; | |
1035 | } | |
1036 |