]>
Commit | Line | Data |
---|---|---|
13ceb006 AM |
1 | --- a/include/my_sys.h |
2 | +++ b/include/my_sys.h | |
3 | @@ -524,6 +524,8 @@ | |
4 | ||
5 | #define my_b_tell(info) ((info)->pos_in_file + \ | |
6 | (size_t) (*(info)->current_pos - (info)->request_pos)) | |
7 | +#define my_b_write_tell(info) ((info)->pos_in_file + \ | |
8 | + ((info)->write_pos - (info)->write_buffer)) | |
9 | ||
10 | #define my_b_get_buffer_start(info) (info)->request_pos | |
11 | #define my_b_get_bytes_in_buffer(info) (char*) (info)->read_end - \ | |
12 | --- a/include/mysql/plugin.h | |
13 | +++ b/include/mysql/plugin.h | |
14 | @@ -559,6 +559,8 @@ | |
15 | ||
16 | #define EXTENDED_FOR_USERSTAT | |
17 | ||
18 | +#define EXTENDED_FOR_COMMIT_ORDERED | |
19 | + | |
20 | /** | |
21 | Create a temporary file. | |
22 | ||
23 | --- a/sql/handler.cc | |
24 | +++ b/sql/handler.cc | |
25 | @@ -90,6 +90,8 @@ | |
26 | static TYPELIB known_extensions= {0,"known_exts", NULL, NULL}; | |
27 | uint known_extensions_id= 0; | |
28 | ||
29 | +static int commit_one_phase_low(THD *thd, bool all, THD_TRANS *trans, | |
30 | + bool is_real_trans); | |
31 | ||
32 | ||
33 | static plugin_ref ha_default_plugin(THD *thd) | |
34 | @@ -1119,7 +1121,8 @@ | |
35 | */ | |
36 | bool is_real_trans= all || thd->transaction.all.ha_list == 0; | |
37 | Ha_trx_info *ha_info= trans->ha_list; | |
38 | - my_xid xid= thd->transaction.xid_state.xid.get_my_xid(); | |
39 | + bool need_commit_ordered; | |
40 | + my_xid xid; | |
41 | DBUG_ENTER("ha_commit_trans"); | |
42 | ||
43 | /* | |
44 | @@ -1152,13 +1155,20 @@ | |
45 | DBUG_RETURN(2); | |
46 | } | |
47 | ||
48 | - if (ha_info) | |
49 | + if (!ha_info) | |
50 | + { | |
51 | + /* Free resources and perform other cleanup even for 'empty' transactions. */ | |
52 | + if (is_real_trans) | |
53 | + thd->transaction.cleanup(); | |
54 | + DBUG_RETURN(0); | |
55 | + } | |
56 | + else | |
57 | { | |
58 | uint rw_ha_count; | |
59 | bool rw_trans; | |
60 | MDL_request mdl_request; | |
61 | ||
62 | - DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE();); | |
63 | + DBUG_EXECUTE_IF("crash_commit_before", abort();); | |
64 | ||
65 | /* Close all cursors that can not survive COMMIT */ | |
66 | if (is_real_trans) /* not a statement commit */ | |
67 | @@ -1197,57 +1207,80 @@ | |
68 | !thd->slave_thread) | |
69 | { | |
70 | my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); | |
71 | - ha_rollback_trans(thd, all); | |
72 | - error= 1; | |
73 | - goto end; | |
74 | + goto err; | |
75 | } | |
76 | ||
77 | - if (!trans->no_2pc && (rw_ha_count > 1)) | |
78 | + if (trans->no_2pc || (rw_ha_count <= 1)) | |
79 | { | |
80 | - for (; ha_info && !error; ha_info= ha_info->next()) | |
81 | + error= ha_commit_one_phase(thd, all); | |
82 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
83 | + goto end; | |
84 | + } | |
85 | + | |
86 | + need_commit_ordered= FALSE; | |
87 | + xid= thd->transaction.xid_state.xid.get_my_xid(); | |
88 | + | |
89 | + for (Ha_trx_info *hi= ha_info; hi; hi= hi->next()) | |
90 | { | |
91 | int err; | |
92 | - handlerton *ht= ha_info->ht(); | |
93 | + handlerton *ht= hi->ht(); | |
94 | /* | |
95 | Do not call two-phase commit if this particular | |
96 | transaction is read-only. This allows for simpler | |
97 | implementation in engines that are always read-only. | |
98 | */ | |
99 | - if (! ha_info->is_trx_read_write()) | |
100 | + if (! hi->is_trx_read_write()) | |
101 | continue; | |
102 | /* | |
103 | Sic: we know that prepare() is not NULL since otherwise | |
104 | trans->no_2pc would have been set. | |
105 | */ | |
106 | - if ((err= ht->prepare(ht, thd, all))) | |
107 | - { | |
108 | - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); | |
109 | - error= 1; | |
110 | - } | |
111 | + err= ht->prepare(ht, thd, all); | |
112 | status_var_increment(thd->status_var.ha_prepare_count); | |
113 | + if (err) | |
114 | + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); | |
115 | + | |
116 | + if (err) | |
117 | + goto err; | |
118 | + | |
119 | + need_commit_ordered|= (ht->commit_ordered != NULL); | |
120 | } | |
121 | - DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); | |
122 | - if (error || (is_real_trans && xid && | |
123 | - (error= !(cookie= tc_log->log_xid(thd, xid))))) | |
124 | + DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT();); | |
125 | + | |
126 | + if (!is_real_trans) | |
127 | { | |
128 | - ha_rollback_trans(thd, all); | |
129 | - error= 1; | |
130 | + error= commit_one_phase_low(thd, all, trans, is_real_trans); | |
131 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
132 | goto end; | |
133 | } | |
134 | - DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE();); | |
135 | - } | |
136 | - error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0; | |
137 | - DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE();); | |
138 | - if (cookie) | |
139 | + | |
140 | + cookie= tc_log->log_and_order(thd, xid, all, need_commit_ordered); | |
141 | + if (!cookie) | |
142 | + goto err; | |
143 | + | |
144 | + DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT();); | |
145 | + | |
146 | + error= commit_one_phase_low(thd, all, trans, is_real_trans) ? 2 : 0; | |
147 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
148 | + if (is_real_trans) /* userstat.patch */ | |
149 | + thd->diff_commit_trans++; /* userstat.patch */ | |
150 | + RUN_HOOK(transaction, after_commit, (thd, FALSE)); | |
151 | + | |
152 | + DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT();); | |
153 | if(tc_log->unlog(cookie, xid)) | |
154 | { | |
155 | error= 2; | |
156 | goto end; | |
157 | } | |
158 | - DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE();); | |
159 | - if (is_real_trans) | |
160 | - thd->diff_commit_trans++; | |
161 | - RUN_HOOK(transaction, after_commit, (thd, FALSE)); | |
162 | + | |
163 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
164 | + goto end; | |
165 | + | |
166 | + /* Come here if error and we need to rollback. */ | |
167 | +err: | |
168 | + error= 1; /* Transaction was rolled back */ | |
169 | + ha_rollback_trans(thd, all); | |
170 | + | |
171 | end: | |
172 | if (rw_trans && mdl_request.ticket) | |
173 | { | |
174 | @@ -1260,9 +1293,6 @@ | |
175 | thd->mdl_context.release_lock(mdl_request.ticket); | |
176 | } | |
177 | } | |
178 | - /* Free resources and perform other cleanup even for 'empty' transactions. */ | |
179 | - else if (is_real_trans) | |
180 | - thd->transaction.cleanup(); | |
181 | DBUG_RETURN(error); | |
182 | } | |
183 | ||
184 | @@ -1279,7 +1309,6 @@ | |
185 | ||
186 | int ha_commit_one_phase(THD *thd, bool all) | |
187 | { | |
188 | - int error=0; | |
189 | THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; | |
190 | /* | |
191 | "real" is a nick name for a transaction for which a commit will | |
192 | @@ -1295,8 +1324,16 @@ | |
193 | transaction.all.ha_list, see why in trans_register_ha()). | |
194 | */ | |
195 | bool is_real_trans=all || thd->transaction.all.ha_list == 0; | |
196 | - Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; | |
197 | DBUG_ENTER("ha_commit_one_phase"); | |
198 | + DBUG_RETURN(commit_one_phase_low(thd, all, trans, is_real_trans)); | |
199 | +} | |
200 | + | |
201 | +static int | |
202 | +commit_one_phase_low(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) | |
203 | +{ | |
204 | + int error= 0; | |
205 | + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; | |
206 | + DBUG_ENTER("commit_one_phase_low"); | |
207 | ||
208 | if (ha_info) | |
209 | { | |
210 | @@ -1894,7 +1931,16 @@ | |
211 | { | |
212 | bool warn= true; | |
213 | ||
214 | + /* | |
215 | + Holding the LOCK_commit_ordered mutex ensures that we get the same | |
216 | + snapshot for all engines (including the binary log). This allows us | |
217 | + among other things to do backups with | |
218 | + START TRANSACTION WITH CONSISTENT SNAPSHOT and | |
219 | + have a consistent binlog position. | |
220 | + */ | |
221 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
222 | plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn); | |
223 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
224 | ||
225 | /* | |
226 | Same idea as when one wants to CREATE TABLE in one engine which does not | |
227 | --- a/sql/handler.h | |
228 | +++ b/sql/handler.h | |
229 | @@ -756,6 +756,53 @@ | |
230 | and 'real commit' mean the same event. | |
231 | */ | |
232 | int (*commit)(handlerton *hton, THD *thd, bool all); | |
233 | + /* | |
234 | + The commit_ordered() method is called prior to the commit() method, after | |
235 | + the transaction manager has decided to commit (not rollback) the | |
236 | + transaction. Unlike commit(), commit_ordered() is called only when the | |
237 | + full transaction is committed, not for each commit of statement | |
238 | + transaction in a multi-statement transaction. | |
239 | + | |
240 | + Not that like prepare(), commit_ordered() is only called when 2-phase | |
241 | + commit takes place. Ie. when no binary log and only a single engine | |
242 | + participates in a transaction, one commit() is called, no | |
243 | + commit_ordered(). So engines must be prepared for this. | |
244 | + | |
245 | + The calls to commit_ordered() in multiple parallel transactions is | |
246 | + guaranteed to happen in the same order in every participating | |
247 | + handler. This can be used to ensure the same commit order among multiple | |
248 | + handlers (eg. in table handler and binlog). So if transaction T1 calls | |
249 | + into commit_ordered() of handler A before T2, then T1 will also call | |
250 | + commit_ordered() of handler B before T2. | |
251 | + | |
252 | + Engines that implement this method should during this call make the | |
253 | + transaction visible to other transactions, thereby making the order of | |
254 | + transaction commits be defined by the order of commit_ordered() calls. | |
255 | + | |
256 | + The intention is that commit_ordered() should do the minimal amount of | |
257 | + work that needs to happen in consistent commit order among handlers. To | |
258 | + preserve ordering, calls need to be serialised on a global mutex, so | |
259 | + doing any time-consuming or blocking operations in commit_ordered() will | |
260 | + limit scalability. | |
261 | + | |
262 | + Handlers can rely on commit_ordered() calls to be serialised (no two | |
263 | + calls can run in parallel, so no extra locking on the handler part is | |
264 | + required to ensure this). | |
265 | + | |
266 | + Note that commit_ordered() can be called from a different thread than the | |
267 | + one handling the transaction! So it can not do anything that depends on | |
268 | + thread local storage, in particular it can not call my_error() and | |
269 | + friends (instead it can store the error code and delay the call of | |
270 | + my_error() to the commit() method). | |
271 | + | |
272 | + Similarly, since commit_ordered() returns void, any return error code | |
273 | + must be saved and returned from the commit() method instead. | |
274 | + | |
275 | + The commit_ordered method is optional, and can be left unset if not | |
276 | + needed in a particular handler (then there will be no ordering guarantees | |
277 | + wrt. other engines and binary log). | |
278 | + */ | |
279 | + void (*commit_ordered)(handlerton *hton, THD *thd, bool all); | |
280 | int (*rollback)(handlerton *hton, THD *thd, bool all); | |
281 | int (*prepare)(handlerton *hton, THD *thd, bool all); | |
282 | int (*recover)(handlerton *hton, XID *xid_list, uint len); | |
283 | --- a/sql/log.cc | |
284 | +++ b/sql/log.cc | |
285 | @@ -49,6 +49,7 @@ | |
286 | ||
287 | #include "sql_plugin.h" | |
288 | #include "rpl_handler.h" | |
289 | +#include "debug_sync.h" | |
290 | ||
291 | /* max size of the log message */ | |
292 | #define MAX_LOG_BUFFER_SIZE 1024 | |
293 | @@ -71,6 +72,25 @@ | |
294 | static int binlog_rollback(handlerton *hton, THD *thd, bool all); | |
295 | static int binlog_prepare(handlerton *hton, THD *thd, bool all); | |
296 | ||
297 | +static LEX_STRING const write_error_msg= | |
298 | + { C_STRING_WITH_LEN("error writing to the binary log") }; | |
299 | + | |
300 | +static my_bool mutexes_inited; | |
301 | +mysql_mutex_t LOCK_group_commit_queue; | |
302 | +mysql_mutex_t LOCK_commit_ordered; | |
303 | + | |
304 | +static ulonglong binlog_status_var_num_commits; | |
305 | +static ulonglong binlog_status_var_num_group_commits; | |
306 | + | |
307 | +static SHOW_VAR binlog_status_vars_detail[]= | |
308 | +{ | |
309 | + {"commits", | |
310 | + (char *)&binlog_status_var_num_commits, SHOW_LONGLONG}, | |
311 | + {"group_commits", | |
312 | + (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG}, | |
313 | + {NullS, NullS, SHOW_LONG} | |
314 | +}; | |
315 | + | |
316 | /** | |
317 | purge logs, master and slave sides both, related error code | |
318 | convertor. | |
319 | @@ -167,41 +187,6 @@ | |
320 | } | |
321 | ||
322 | /* | |
323 | - Helper class to hold a mutex for the duration of the | |
324 | - block. | |
325 | - | |
326 | - Eliminates the need for explicit unlocking of mutexes on, e.g., | |
327 | - error returns. On passing a null pointer, the sentry will not do | |
328 | - anything. | |
329 | - */ | |
330 | -class Mutex_sentry | |
331 | -{ | |
332 | -public: | |
333 | - Mutex_sentry(mysql_mutex_t *mutex) | |
334 | - : m_mutex(mutex) | |
335 | - { | |
336 | - if (m_mutex) | |
337 | - mysql_mutex_lock(mutex); | |
338 | - } | |
339 | - | |
340 | - ~Mutex_sentry() | |
341 | - { | |
342 | - if (m_mutex) | |
343 | - mysql_mutex_unlock(m_mutex); | |
344 | -#ifndef DBUG_OFF | |
345 | - m_mutex= 0; | |
346 | -#endif | |
347 | - } | |
348 | - | |
349 | -private: | |
350 | - mysql_mutex_t *m_mutex; | |
351 | - | |
352 | - // It's not allowed to copy this object in any way | |
353 | - Mutex_sentry(Mutex_sentry const&); | |
354 | - void operator=(Mutex_sentry const&); | |
355 | -}; | |
356 | - | |
357 | -/* | |
358 | Helper classes to store non-transactional and transactional data | |
359 | before copying it to the binary log. | |
360 | */ | |
361 | @@ -211,7 +196,8 @@ | |
362 | binlog_cache_data(): m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF), | |
363 | incident(FALSE), changes_to_non_trans_temp_table_flag(FALSE), | |
364 | saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0), | |
365 | - ptr_binlog_cache_disk_use(0) | |
366 | + ptr_binlog_cache_disk_use(0), commit_bin_log_file_pos(0), | |
367 | + using_xa(FALSE), xa_xid(0) | |
368 | { } | |
369 | ||
370 | ~binlog_cache_data() | |
371 | @@ -270,6 +256,8 @@ | |
372 | variable after truncating the cache. | |
373 | */ | |
374 | cache_log.disk_writes= 0; | |
375 | + using_xa= FALSE; | |
376 | + commit_bin_log_file_pos= 0; | |
377 | DBUG_ASSERT(empty()); | |
378 | } | |
379 | ||
380 | @@ -411,6 +399,20 @@ | |
381 | ||
382 | binlog_cache_data& operator=(const binlog_cache_data& info); | |
383 | binlog_cache_data(const binlog_cache_data& info); | |
384 | + | |
385 | +public: | |
386 | + /* | |
387 | + Binlog position after current commit, available to storage engines during | |
388 | + commit_ordered() and commit(). | |
389 | + */ | |
390 | + ulonglong commit_bin_log_file_pos; | |
391 | + | |
392 | + /* | |
393 | + Flag set true if this transaction is committed with log_xid() as part of | |
394 | + XA, false if not. | |
395 | + */ | |
396 | + bool using_xa; | |
397 | + my_xid xa_xid; | |
398 | }; | |
399 | ||
400 | class binlog_cache_mngr { | |
401 | @@ -1627,7 +1629,7 @@ | |
402 | */ | |
403 | static inline int | |
404 | binlog_flush_cache(THD *thd, binlog_cache_data* cache_data, Log_event *end_evt, | |
405 | - bool is_transactional) | |
406 | + bool is_transactional, bool all) | |
407 | { | |
408 | DBUG_ENTER("binlog_flush_cache"); | |
409 | int error= 0; | |
410 | @@ -1646,8 +1648,8 @@ | |
411 | were, we would have to ensure that we're not ending a statement | |
412 | inside a stored function. | |
413 | */ | |
414 | - error= mysql_bin_log.write(thd, &cache_data->cache_log, end_evt, | |
415 | - cache_data->has_incident()); | |
416 | + error= mysql_bin_log.write_transaction_to_binlog(thd, cache_data, | |
417 | + end_evt, all); | |
418 | } | |
419 | cache_data->reset(); | |
420 | ||
421 | @@ -1666,12 +1668,12 @@ | |
422 | */ | |
423 | static inline int | |
424 | binlog_commit_flush_stmt_cache(THD *thd, | |
425 | - binlog_cache_mngr *cache_mngr) | |
426 | + binlog_cache_mngr *cache_mngr, bool all) | |
427 | { | |
428 | Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), | |
429 | FALSE, FALSE, TRUE, 0); | |
430 | return (binlog_flush_cache(thd, &cache_mngr->stmt_cache, &end_evt, | |
431 | - FALSE)); | |
432 | + FALSE, all)); | |
433 | } | |
434 | ||
435 | /** | |
436 | @@ -1684,12 +1686,12 @@ | |
437 | nonzero if an error pops up when flushing the cache. | |
438 | */ | |
439 | static inline int | |
440 | -binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr) | |
441 | +binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all) | |
442 | { | |
443 | Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), | |
444 | TRUE, FALSE, TRUE, 0); | |
445 | return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt, | |
446 | - TRUE)); | |
447 | + TRUE, all)); | |
448 | } | |
449 | ||
450 | /** | |
451 | @@ -1702,12 +1704,12 @@ | |
452 | nonzero if an error pops up when flushing the cache. | |
453 | */ | |
454 | static inline int | |
455 | -binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr) | |
456 | +binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all) | |
457 | { | |
458 | Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"), | |
459 | TRUE, FALSE, TRUE, 0); | |
460 | return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt, | |
461 | - TRUE)); | |
462 | + TRUE, all)); | |
463 | } | |
464 | ||
465 | /** | |
466 | @@ -1722,11 +1724,11 @@ | |
467 | */ | |
468 | static inline int | |
469 | binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, | |
470 | - my_xid xid) | |
471 | + my_xid xid, bool all) | |
472 | { | |
473 | Xid_log_event end_evt(thd, xid); | |
474 | return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt, | |
475 | - TRUE)); | |
476 | + TRUE, all)); | |
477 | } | |
478 | ||
479 | /** | |
480 | @@ -1788,7 +1790,7 @@ | |
481 | do nothing. | |
482 | just pretend we can do 2pc, so that MySQL won't | |
483 | switch to 1pc. | |
484 | - real work will be done in MYSQL_BIN_LOG::log_xid() | |
485 | + real work will be done in MYSQL_BIN_LOG::log_and_order() | |
486 | */ | |
487 | return 0; | |
488 | } | |
489 | @@ -1821,7 +1823,7 @@ | |
490 | ||
491 | if (!cache_mngr->stmt_cache.empty()) | |
492 | { | |
493 | - error= binlog_commit_flush_stmt_cache(thd, cache_mngr); | |
494 | + error= binlog_commit_flush_stmt_cache(thd, cache_mngr, all); | |
495 | } | |
496 | ||
497 | if (cache_mngr->trx_cache.empty()) | |
498 | @@ -1840,7 +1842,7 @@ | |
499 | Otherwise, we accumulate the changes. | |
500 | */ | |
501 | if (!error && ending_trans(thd, all)) | |
502 | - error= binlog_commit_flush_trx_cache(thd, cache_mngr); | |
503 | + error= binlog_commit_flush_trx_cache(thd, cache_mngr, all); | |
504 | ||
505 | /* | |
506 | This is part of the stmt rollback. | |
507 | @@ -1884,7 +1886,7 @@ | |
508 | } | |
509 | else if (!cache_mngr->stmt_cache.empty()) | |
510 | { | |
511 | - error= binlog_commit_flush_stmt_cache(thd, cache_mngr); | |
512 | + error= binlog_commit_flush_stmt_cache(thd, cache_mngr, all); | |
513 | } | |
514 | ||
515 | if (cache_mngr->trx_cache.empty()) | |
516 | @@ -1932,7 +1934,7 @@ | |
517 | (trans_has_updated_non_trans_table(thd) && | |
518 | ending_single_stmt_trans(thd,all) && | |
519 | thd->variables.binlog_format == BINLOG_FORMAT_MIXED))) | |
520 | - error= binlog_rollback_flush_trx_cache(thd, cache_mngr); | |
521 | + error= binlog_rollback_flush_trx_cache(thd, cache_mngr, all); | |
522 | /* | |
523 | Truncate the cache if: | |
524 | . aborting a single or multi-statement transaction or; | |
525 | @@ -2907,6 +2909,7 @@ | |
526 | MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period) | |
527 | :bytes_written(0), prepared_xids(0), file_id(1), open_count(1), | |
528 | need_start_event(TRUE), | |
529 | + group_commit_queue(0), num_commits(0), num_group_commits(0), | |
530 | sync_period_ptr(sync_period), | |
531 | is_relay_log(0), signal_cnt(0), | |
532 | description_event_for_exec(0), description_event_for_queue(0) | |
533 | @@ -5279,19 +5282,15 @@ | |
534 | SYNOPSIS | |
535 | write_cache() | |
536 | cache Cache to write to the binary log | |
537 | - lock_log True if the LOCK_log mutex should be aquired, false otherwise | |
538 | - sync_log True if the log should be flushed and synced | |
539 | ||
540 | DESCRIPTION | |
541 | Write the contents of the cache to the binary log. The cache will | |
542 | be reset as a READ_CACHE to be able to read the contents from it. | |
543 | */ | |
544 | ||
545 | -int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, | |
546 | - bool lock_log, bool sync_log) | |
547 | +int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache) | |
548 | { | |
549 | - Mutex_sentry sentry(lock_log ? &LOCK_log : NULL); | |
550 | - | |
551 | + mysql_mutex_assert_owner(&LOCK_log); | |
552 | if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) | |
553 | return ER_ERROR_ON_WRITE; | |
554 | uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs; | |
555 | @@ -5402,6 +5401,8 @@ | |
556 | } | |
557 | ||
558 | /* Write data to the binary log file */ | |
559 | + DBUG_EXECUTE_IF("fail_binlog_write_1", | |
560 | + errno= 28; return ER_ERROR_ON_WRITE;); | |
561 | if (my_b_write(&log_file, cache->read_pos, length)) | |
562 | return ER_ERROR_ON_WRITE; | |
563 | thd->binlog_bytes_written+= length; | |
564 | @@ -5410,9 +5411,6 @@ | |
565 | ||
566 | DBUG_ASSERT(carry == 0); | |
567 | ||
568 | - if (sync_log) | |
569 | - return flush_and_sync(0); | |
570 | - | |
571 | return 0; // All OK | |
572 | } | |
573 | ||
574 | @@ -5453,8 +5451,6 @@ | |
575 | if (!is_open()) | |
576 | DBUG_RETURN(error); | |
577 | ||
578 | - LEX_STRING const write_error_msg= | |
579 | - { C_STRING_WITH_LEN("error writing to the binary log") }; | |
580 | Incident incident= INCIDENT_LOST_EVENTS; | |
581 | Incident_log_event ev(thd, incident, write_error_msg); | |
582 | if (lock) | |
583 | @@ -5496,104 +5492,320 @@ | |
584 | 'cache' needs to be reinitialized after this functions returns. | |
585 | */ | |
586 | ||
587 | -bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event, | |
588 | - bool incident) | |
589 | +bool | |
590 | +MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_cache_data *cache_data, | |
591 | + Log_event *end_ev, bool all) | |
592 | { | |
593 | - DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)"); | |
594 | + group_commit_entry entry; | |
595 | + bool ret; | |
596 | + DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog"); | |
597 | + | |
598 | + entry.thd= thd; | |
599 | + entry.cache_data= cache_data; | |
600 | + entry.error= 0; | |
601 | + entry.all= all; | |
602 | + | |
603 | + /* | |
604 | + Log "BEGIN" at the beginning of every transaction. Here, a transaction is | |
605 | + either a BEGIN..COMMIT block or a single statement in autocommit mode. | |
606 | + | |
607 | + Create the necessary events here, where we have the correct THD (and | |
608 | + thread context). | |
609 | + | |
610 | + Due to group commit the actual writing to binlog may happen in a different | |
611 | + thread. | |
612 | + */ | |
613 | + Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0); | |
614 | + entry.begin_event= &qinfo; | |
615 | + entry.end_event= end_ev; | |
616 | + if (cache_data->has_incident()) | |
617 | + { | |
618 | + Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg); | |
619 | + entry.incident_event= &inc_ev; | |
620 | + ret = write_transaction_to_binlog_events(&entry); | |
621 | + } | |
622 | + else | |
623 | + { | |
624 | + entry.incident_event= NULL; | |
625 | + ret = write_transaction_to_binlog_events(&entry); | |
626 | + } | |
627 | + if (!ret) /* userstat.patch */ | |
628 | + thd->binlog_bytes_written += qinfo.data_written; /* userstat.patch */ | |
629 | + DBUG_RETURN(ret); | |
630 | +} | |
631 | + | |
632 | +bool | |
633 | +MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) | |
634 | +{ | |
635 | + /* | |
636 | + To facilitate group commit for the binlog, we first queue up ourselves in | |
637 | + the group commit queue. Then the first thread to enter the queue waits for | |
638 | + the LOCK_log mutex, and commits for everyone in the queue once it gets the | |
639 | + lock. Any other threads in the queue just wait for the first one to finish | |
640 | + the commit and wake them up. | |
641 | + */ | |
642 | + entry->thd->clear_wakeup_ready(); | |
643 | + mysql_mutex_lock(&LOCK_group_commit_queue); | |
644 | + group_commit_entry *orig_queue= group_commit_queue; | |
645 | + entry->next= orig_queue; | |
646 | + group_commit_queue= entry; | |
647 | + DEBUG_SYNC(entry->thd, "commit_group_commit_queue"); | |
648 | + mysql_mutex_unlock(&LOCK_group_commit_queue); | |
649 | + | |
650 | + /* | |
651 | + The first in the queue handle group commit for all; the others just wait | |
652 | + to be signalled when group commit is done. | |
653 | + */ | |
654 | + if (orig_queue != NULL) | |
655 | + entry->thd->wait_for_wakeup_ready(); | |
656 | + else | |
657 | + trx_group_commit_leader(entry); | |
658 | + | |
659 | + if (likely(!entry->error)) | |
660 | + return 0; | |
661 | + | |
662 | + switch (entry->error) | |
663 | + { | |
664 | + case ER_ERROR_ON_WRITE: | |
665 | + my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno); | |
666 | + break; | |
667 | + case ER_ERROR_ON_READ: | |
668 | + my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH), | |
669 | + entry->cache_data->cache_log.file_name, entry->commit_errno); | |
670 | + break; | |
671 | + default: | |
672 | + /* | |
673 | + There are not (and should not be) any errors thrown not covered above. | |
674 | + But just in case one is added later without updating the above switch | |
675 | + statement, include a catch-all. | |
676 | + */ | |
677 | + my_printf_error(entry->error, | |
678 | + "Error writing transaction to binary log: %d", | |
679 | + MYF(ME_NOREFRESH), entry->error); | |
680 | + } | |
681 | + | |
682 | + /* | |
683 | + Since we return error, this transaction XID will not be committed, so | |
684 | + we need to mark it as not needed for recovery (unlog() is not called | |
685 | + for a transaction if log_xid() fails). | |
686 | + */ | |
687 | + if (entry->cache_data->using_xa && entry->cache_data->xa_xid) | |
688 | + mark_xid_done(); | |
689 | + | |
690 | + return 1; | |
691 | +} | |
692 | + | |
693 | +/* | |
694 | + Do binlog group commit as the lead thread. | |
695 | + | |
696 | + This must be called when this thread/transaction is queued at the start of | |
697 | + the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group | |
698 | + commit all the transactions in the queue (more may have entered while waiting | |
699 | + for LOCK_log). After commit is done, all other threads in the queue will be | |
700 | + signalled. | |
701 | + | |
702 | + */ | |
703 | +void | |
704 | +MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) | |
705 | +{ | |
706 | + DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader"); | |
707 | + uint xid_count= 0; | |
708 | + uint write_count= 0; | |
709 | + | |
710 | + /* | |
711 | + Lock the LOCK_log(), and once we get it, collect any additional writes | |
712 | + that queued up while we were waiting. | |
713 | + */ | |
714 | mysql_mutex_lock(&LOCK_log); | |
715 | + DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log"); | |
716 | + mysql_mutex_lock(&LOCK_group_commit_queue); | |
717 | + group_commit_entry *current= group_commit_queue; | |
718 | + group_commit_queue= NULL; | |
719 | + mysql_mutex_unlock(&LOCK_group_commit_queue); | |
720 | + | |
721 | + /* As the queue is in reverse order of entering, reverse it. */ | |
722 | + group_commit_entry *queue= NULL; | |
723 | + while (current) | |
724 | + { | |
725 | + group_commit_entry *next= current->next; | |
726 | + current->next= queue; | |
727 | + queue= current; | |
728 | + current= next; | |
729 | + } | |
730 | + DBUG_ASSERT(leader == queue /* the leader should be first in queue */); | |
731 | ||
732 | + /* Now we have in queue the list of transactions to be committed in order. */ | |
733 | DBUG_ASSERT(is_open()); | |
734 | if (likely(is_open())) // Should always be true | |
735 | { | |
736 | /* | |
737 | - We only bother to write to the binary log if there is anything | |
738 | - to write. | |
739 | - */ | |
740 | - if (my_b_tell(cache) > 0) | |
741 | + Commit every transaction in the queue. | |
742 | + | |
743 | + Note that we are doing this in a different thread than the one running | |
744 | + the transaction! So we are limited in the operations we can do. In | |
745 | + particular, we cannot call my_error() on behalf of a transaction, as | |
746 | + that obtains the THD from thread local storage. Instead, we must set | |
747 | + current->error and let the thread do the error reporting itself once | |
748 | + we wake it up. | |
749 | + */ | |
750 | + for (current= queue; current != NULL; current= current->next) | |
751 | { | |
752 | + binlog_cache_data *cache_data= current->cache_data; | |
753 | + IO_CACHE *cache= &cache_data->cache_log; | |
754 | + | |
755 | /* | |
756 | - Log "BEGIN" at the beginning of every transaction. Here, a | |
757 | - transaction is either a BEGIN..COMMIT block or a single | |
758 | - statement in autocommit mode. | |
759 | + We only bother to write to the binary log if there is anything | |
760 | + to write. | |
761 | */ | |
762 | - Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0); | |
763 | - if (qinfo.write(&log_file)) | |
764 | - goto err; | |
765 | - thd->binlog_bytes_written+= qinfo.data_written; | |
766 | - DBUG_EXECUTE_IF("crash_before_writing_xid", | |
767 | - { | |
768 | - if ((write_error= write_cache(thd, cache, false, true))) | |
769 | - DBUG_PRINT("info", ("error writing binlog cache: %d", | |
770 | - write_error)); | |
771 | - DBUG_PRINT("info", ("crashing before writing xid")); | |
772 | - DBUG_SUICIDE(); | |
773 | - }); | |
774 | + if (my_b_tell(cache) > 0) | |
775 | + { | |
776 | + if ((current->error= write_transaction(current))) | |
777 | + current->commit_errno= errno; | |
778 | ||
779 | - if ((write_error= write_cache(thd, cache, false, false))) | |
780 | - goto err; | |
781 | + write_count++; | |
782 | + } | |
783 | ||
784 | - if (commit_event && commit_event->write(&log_file)) | |
785 | - goto err; | |
786 | - if (commit_event) | |
787 | - thd->binlog_bytes_written+= commit_event->data_written; | |
788 | + cache_data->commit_bin_log_file_pos= my_b_write_tell(&log_file); | |
789 | + if (cache_data->using_xa && cache_data->xa_xid) | |
790 | + xid_count++; | |
791 | + } | |
792 | ||
793 | - if (incident && write_incident(thd, FALSE)) | |
794 | - goto err; | |
795 | ||
796 | + if (write_count > 0) | |
797 | + { | |
798 | bool synced= 0; | |
799 | if (flush_and_sync(&synced)) | |
800 | - goto err; | |
801 | - DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE();); | |
802 | - if (cache->error) // Error on read | |
803 | { | |
804 | - sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno); | |
805 | - write_error=1; // Don't give more errors | |
806 | - goto err; | |
807 | + for (current= queue; current != NULL; current= current->next) | |
808 | + { | |
809 | + if (!current->error) | |
810 | + { | |
811 | + current->error= ER_ERROR_ON_WRITE; | |
812 | + current->commit_errno= errno; | |
813 | + } | |
814 | + } | |
815 | + } | |
816 | + else | |
817 | + { | |
818 | + signal_update(); | |
819 | } | |
820 | ||
821 | if (RUN_HOOK(binlog_storage, after_flush, | |
822 | - (thd, log_file_name, log_file.pos_in_file, synced))) | |
823 | + (leader->thd, log_file_name, log_file.pos_in_file, synced))) | |
824 | { | |
825 | sql_print_error("Failed to run 'after_flush' hooks"); | |
826 | - write_error=1; | |
827 | - goto err; | |
828 | + for (current= queue; current != NULL; current= current->next) | |
829 | + { | |
830 | + if (!current->error) | |
831 | + { | |
832 | + current->error= ER_ERROR_ON_WRITE; | |
833 | + current->commit_errno= errno; | |
834 | + } | |
835 | + } | |
836 | } | |
837 | ||
838 | - signal_update(); | |
839 | } | |
840 | ||
841 | /* | |
842 | - if commit_event is Xid_log_event, increase the number of | |
843 | - prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated | |
844 | + if any commit_events are Xid_log_event, increase the number of | |
845 | + prepared_xids (it's decreased in ::unlog()). Binlog cannot be rotated | |
846 | if there're prepared xids in it - see the comment in new_file() for | |
847 | an explanation. | |
848 | - If the commit_event is not Xid_log_event (then it's a Query_log_event) | |
849 | - rotate binlog, if necessary. | |
850 | + If no Xid_log_events (then it's all Query_log_event) rotate binlog, | |
851 | + if necessary. | |
852 | */ | |
853 | - if (commit_event && commit_event->get_type_code() == XID_EVENT) | |
854 | + if (xid_count > 0) | |
855 | { | |
856 | - mysql_mutex_lock(&LOCK_prep_xids); | |
857 | - prepared_xids++; | |
858 | - mysql_mutex_unlock(&LOCK_prep_xids); | |
859 | + mark_xids_active(xid_count); | |
860 | } | |
861 | else | |
862 | if (rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED)) | |
863 | - goto err; | |
864 | + { | |
865 | + for (current= queue; current != NULL; current= current->next) | |
866 | + { | |
867 | + if (!current->error) | |
868 | + { | |
869 | + current->error= ER_ERROR_ON_WRITE; | |
870 | + current->commit_errno= errno; | |
871 | + } | |
872 | + } | |
873 | + } | |
874 | } | |
875 | + DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered"); | |
876 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
877 | + /* | |
878 | + We cannot unlock LOCK_log until we have locked LOCK_commit_ordered; | |
879 | + otherwise scheduling could allow the next group commit to run ahead of us, | |
880 | + messing up the order of commit_ordered() calls. But as soon as | |
881 | + LOCK_commit_ordered is obtained, we can let the next group commit start. | |
882 | + */ | |
883 | mysql_mutex_unlock(&LOCK_log); | |
884 | + DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log"); | |
885 | + ++num_group_commits; | |
886 | ||
887 | - DBUG_RETURN(0); | |
888 | - | |
889 | -err: | |
890 | - if (!write_error) | |
891 | + /* | |
892 | + Wakeup each participant waiting for our group commit, first calling the | |
893 | + commit_ordered() methods for any transactions doing 2-phase commit. | |
894 | + */ | |
895 | + current= queue; | |
896 | + while (current != NULL) | |
897 | { | |
898 | - write_error= 1; | |
899 | - sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); | |
900 | + group_commit_entry *next; | |
901 | + | |
902 | + DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered"); | |
903 | + ++num_commits; | |
904 | + if (current->cache_data->using_xa && !current->error) | |
905 | + run_commit_ordered(current->thd, current->all); | |
906 | + | |
907 | + /* | |
908 | + Careful not to access current->next after waking up the other thread! As | |
909 | + it may change immediately after wakeup. | |
910 | + */ | |
911 | + next= current->next; | |
912 | + if (current != leader) // Don't wake up ourself | |
913 | + current->thd->signal_wakeup_ready(); | |
914 | + current= next; | |
915 | } | |
916 | - mysql_mutex_unlock(&LOCK_log); | |
917 | - DBUG_RETURN(1); | |
918 | + DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered"); | |
919 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
920 | + | |
921 | + DBUG_VOID_RETURN; | |
922 | } | |
923 | ||
924 | +int | |
925 | +MYSQL_BIN_LOG::write_transaction(group_commit_entry *entry) | |
926 | +{ | |
927 | + binlog_cache_data *cache_data= entry->cache_data; | |
928 | + IO_CACHE *cache= &cache_data->cache_log; | |
929 | + | |
930 | + if (entry->begin_event->write(&log_file)) | |
931 | + return ER_ERROR_ON_WRITE; | |
932 | + | |
933 | + DBUG_EXECUTE_IF("crash_before_writing_xid", | |
934 | + { | |
935 | + if ((write_cache(entry->thd, cache))) | |
936 | + DBUG_PRINT("info", ("error writing binlog cache")); | |
937 | + else | |
938 | + flush_and_sync(0); | |
939 | + | |
940 | + DBUG_PRINT("info", ("crashing before writing xid")); | |
941 | + abort(); | |
942 | + }); | |
943 | + | |
944 | + if (write_cache(entry->thd, cache)) | |
945 | + return ER_ERROR_ON_WRITE; | |
946 | + | |
947 | + if (entry->end_event->write(&log_file)) | |
948 | + return ER_ERROR_ON_WRITE; | |
949 | + | |
950 | + if (entry->incident_event && entry->incident_event->write(&log_file)) | |
951 | + return ER_ERROR_ON_WRITE; | |
952 | + | |
953 | + if (cache->error) // Error on read | |
954 | + return ER_ERROR_ON_READ; | |
955 | + | |
956 | + return 0; | |
957 | +} | |
958 | ||
959 | /** | |
960 | Wait until we get a signal that the relay log has been updated. | |
961 | @@ -5999,6 +6211,68 @@ | |
962 | } | |
963 | ||
964 | ||
965 | +void | |
966 | +TC_init() | |
967 | +{ | |
968 | + mysql_mutex_init(key_LOCK_group_commit_queue, &LOCK_group_commit_queue, MY_MUTEX_INIT_SLOW); | |
969 | + mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered, MY_MUTEX_INIT_SLOW); | |
970 | + mutexes_inited= TRUE; | |
971 | +} | |
972 | + | |
973 | + | |
974 | +void | |
975 | +TC_destroy() | |
976 | +{ | |
977 | + if (mutexes_inited) | |
978 | + { | |
979 | + mysql_mutex_destroy(&LOCK_group_commit_queue); | |
980 | + mysql_mutex_destroy(&LOCK_commit_ordered); | |
981 | + mutexes_inited= FALSE; | |
982 | + } | |
983 | +} | |
984 | + | |
985 | + | |
986 | +void | |
987 | +TC_LOG::run_commit_ordered(THD *thd, bool all) | |
988 | +{ | |
989 | + Ha_trx_info *ha_info= | |
990 | + all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list; | |
991 | + | |
992 | + mysql_mutex_assert_owner(&LOCK_commit_ordered); | |
993 | + for (; ha_info; ha_info= ha_info->next()) | |
994 | + { | |
995 | + handlerton *ht= ha_info->ht(); | |
996 | + if (!ht->commit_ordered) | |
997 | + continue; | |
998 | + ht->commit_ordered(ht, thd, all); | |
999 | + DEBUG_SYNC(thd, "commit_after_run_commit_ordered"); | |
1000 | + } | |
1001 | +} | |
1002 | + | |
1003 | +int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all, | |
1004 | + bool need_commit_ordered) | |
1005 | +{ | |
1006 | + int cookie; | |
1007 | + | |
1008 | + cookie= 0; | |
1009 | + if (xid) | |
1010 | + cookie= log_one_transaction(xid); | |
1011 | + | |
1012 | + if (need_commit_ordered) | |
1013 | + { | |
1014 | + /* Only run commit_ordered() if log_xid was successful. */ | |
1015 | + if (cookie) | |
1016 | + { | |
1017 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
1018 | + run_commit_ordered(thd, all); | |
1019 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
1020 | + } | |
1021 | + } | |
1022 | + | |
1023 | + return cookie; | |
1024 | +} | |
1025 | + | |
1026 | + | |
1027 | /********* transaction coordinator log for 2pc - mmap() based solution *******/ | |
1028 | ||
1029 | /* | |
1030 | @@ -6135,6 +6409,7 @@ | |
1031 | mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST); | |
1032 | mysql_cond_init(key_COND_active, &COND_active, 0); | |
1033 | mysql_cond_init(key_COND_pool, &COND_pool, 0); | |
1034 | + mysql_cond_init(key_COND_queue_busy, &COND_queue_busy, 0); | |
1035 | ||
1036 | inited=6; | |
1037 | ||
1038 | @@ -6142,6 +6417,8 @@ | |
1039 | active=pages; | |
1040 | pool=pages+1; | |
1041 | pool_last=pages+npages-1; | |
1042 | + commit_ordered_queue= NULL; | |
1043 | + commit_ordered_queue_busy= false; | |
1044 | ||
1045 | return 0; | |
1046 | ||
1047 | @@ -6247,7 +6524,7 @@ | |
1048 | to the position in memory where xid was logged to. | |
1049 | */ | |
1050 | ||
1051 | -int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid) | |
1052 | +int TC_LOG_MMAP::log_one_transaction(my_xid xid) | |
1053 | { | |
1054 | int err; | |
1055 | PAGE *p; | |
1056 | @@ -6386,7 +6663,9 @@ | |
1057 | mysql_mutex_destroy(&LOCK_sync); | |
1058 | mysql_mutex_destroy(&LOCK_active); | |
1059 | mysql_mutex_destroy(&LOCK_pool); | |
1060 | + mysql_cond_destroy(&COND_active); | |
1061 | mysql_cond_destroy(&COND_pool); | |
1062 | + mysql_cond_destroy(&COND_queue_busy); | |
1063 | case 5: | |
1064 | data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails | |
1065 | case 4: | |
1066 | @@ -6596,42 +6875,87 @@ | |
1067 | mysql_cond_destroy(&COND_prep_xids); | |
1068 | } | |
1069 | ||
1070 | -/** | |
1071 | - @todo | |
1072 | - group commit | |
1073 | +/* | |
1074 | + Do a binlog log_xid() for a group of transactions, linked through | |
1075 | + thd->next_commit_ordered. | |
1076 | ||
1077 | @retval | |
1078 | 0 error | |
1079 | @retval | |
1080 | 1 success | |
1081 | */ | |
1082 | -int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid) | |
1083 | +int TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, | |
1084 | + bool need_commit_ordered __attribute__((unused))) | |
1085 | { | |
1086 | - DBUG_ENTER("TC_LOG_BINLOG::log"); | |
1087 | + DBUG_ENTER("TC_LOG_BINLOG::log_and_order"); | |
1088 | binlog_cache_mngr *cache_mngr= | |
1089 | (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); | |
1090 | + | |
1091 | + cache_mngr->trx_cache.using_xa= TRUE; | |
1092 | + cache_mngr->trx_cache.xa_xid= xid; | |
1093 | /* | |
1094 | We always commit the entire transaction when writing an XID. Also | |
1095 | note that the return value is inverted. | |
1096 | */ | |
1097 | - DBUG_RETURN(!binlog_commit_flush_stmt_cache(thd, cache_mngr) && | |
1098 | - !binlog_commit_flush_trx_cache(thd, cache_mngr, xid)); | |
1099 | + DBUG_RETURN(!binlog_commit_flush_stmt_cache(thd, cache_mngr, all) && | |
1100 | + !binlog_commit_flush_trx_cache(thd, cache_mngr, xid, all)); | |
1101 | } | |
1102 | ||
1103 | -int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) | |
1104 | +/* | |
1105 | + After an XID is logged, we need to hold on to the current binlog file until | |
1106 | + it is fully committed in the storage engine. The reason is that crash | |
1107 | + recovery only looks at the latest binlog, so we must make sure there are no | |
1108 | + outstanding prepared (but not committed) transactions before rotating the | |
1109 | + binlog. | |
1110 | + | |
1111 | + To handle this, we keep a count of outstanding XIDs. This function is used | |
1112 | + to increase this count when committing one or more transactions to the | |
1113 | + binary log. | |
1114 | +*/ | |
1115 | +void | |
1116 | +TC_LOG_BINLOG::mark_xids_active(uint xid_count) | |
1117 | { | |
1118 | - DBUG_ENTER("TC_LOG_BINLOG::unlog"); | |
1119 | + DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active"); | |
1120 | + DBUG_PRINT("info", ("xid_count=%u", xid_count)); | |
1121 | + mysql_mutex_lock(&LOCK_prep_xids); | |
1122 | + prepared_xids+= xid_count; | |
1123 | + mysql_mutex_unlock(&LOCK_prep_xids); | |
1124 | + DBUG_VOID_RETURN; | |
1125 | +} | |
1126 | + | |
1127 | +/* | |
1128 | + Once an XID is committed, it is safe to rotate the binary log, as it can no | |
1129 | + longer be needed during crash recovery. | |
1130 | + | |
1131 | + This function is called to mark an XID this way. It needs to decrease the | |
1132 | + count of pending XIDs, and signal the log rotator thread when it reaches zero. | |
1133 | +*/ | |
1134 | +void | |
1135 | +TC_LOG_BINLOG::mark_xid_done() | |
1136 | +{ | |
1137 | + my_bool send_signal; | |
1138 | + | |
1139 | + DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done"); | |
1140 | mysql_mutex_lock(&LOCK_prep_xids); | |
1141 | // prepared_xids can be 0 if the transaction had ignorable errors. | |
1142 | DBUG_ASSERT(prepared_xids >= 0); | |
1143 | if (prepared_xids > 0) | |
1144 | prepared_xids--; | |
1145 | - if (prepared_xids == 0) { | |
1146 | + send_signal= (prepared_xids == 0); | |
1147 | + mysql_mutex_unlock(&LOCK_prep_xids); | |
1148 | + if (send_signal) { | |
1149 | DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids)); | |
1150 | mysql_cond_signal(&COND_prep_xids); | |
1151 | } | |
1152 | - mysql_mutex_unlock(&LOCK_prep_xids); | |
1153 | - DBUG_RETURN(rotate_and_purge(0)); // as ::write() did not rotate | |
1154 | + DBUG_VOID_RETURN; | |
1155 | +} | |
1156 | + | |
1157 | +int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) | |
1158 | +{ | |
1159 | + DBUG_ENTER("TC_LOG_BINLOG::unlog"); | |
1160 | + if (xid) | |
1161 | + mark_xid_done(); | |
1162 | + DBUG_RETURN(rotate_and_purge(0)); | |
1163 | } | |
1164 | ||
1165 | int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle) | |
1166 | @@ -6700,9 +7024,67 @@ | |
1167 | { | |
1168 | return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file; | |
1169 | } | |
1170 | +/* | |
1171 | + Get the current position of the MySQL binlog for transaction currently being | |
1172 | + committed. | |
1173 | + | |
1174 | + This is valid to call from within storage engine commit_ordered() and | |
1175 | + commit() methods only. | |
1176 | + | |
1177 | + Since it stores the position inside THD, it is safe to call without any | |
1178 | + locking. | |
1179 | + | |
1180 | + Note that currently the binlog file name is not stored inside THD, but this | |
1181 | + is still safe as it can only change when the log is rotated, and we never | |
1182 | + rotate the binlog while commits are pending inside storage engines. | |
1183 | +*/ | |
1184 | +extern "C" | |
1185 | +void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file) | |
1186 | +{ | |
1187 | + binlog_cache_mngr *cache_mngr; | |
1188 | + if (binlog_hton->state == SHOW_OPTION_YES | |
1189 | + && (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton))) | |
1190 | + { | |
1191 | + *out_pos= cache_mngr->trx_cache.commit_bin_log_file_pos; | |
1192 | + *out_file= mysql_bin_log.get_log_fname(); | |
1193 | + } | |
1194 | + else | |
1195 | + { | |
1196 | + *out_pos= NULL; | |
1197 | + *out_file= NULL; | |
1198 | + } | |
1199 | +} | |
1200 | #endif /* INNODB_COMPATIBILITY_HOOKS */ | |
1201 | ||
1202 | ||
1203 | +static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff) | |
1204 | +{ | |
1205 | + mysql_bin_log.set_status_variables(); | |
1206 | + var->type= SHOW_ARRAY; | |
1207 | + var->value= (char *)&binlog_status_vars_detail; | |
1208 | + return 0; | |
1209 | +} | |
1210 | + | |
1211 | +static SHOW_VAR binlog_status_vars_top[]= { | |
1212 | + {"binlog", (char *) &show_binlog_vars, SHOW_FUNC}, | |
1213 | + {NullS, NullS, SHOW_LONG} | |
1214 | +}; | |
1215 | + | |
1216 | +/* | |
1217 | + Copy out current values of status variables, for SHOW STATUS or | |
1218 | + information_schema.global_status. | |
1219 | + | |
1220 | + This is called only under LOCK_status, so we can fill in a static array. | |
1221 | +*/ | |
1222 | +void | |
1223 | +TC_LOG_BINLOG::set_status_variables() | |
1224 | +{ | |
1225 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
1226 | + binlog_status_var_num_commits= this->num_commits; | |
1227 | + binlog_status_var_num_group_commits= this->num_group_commits; | |
1228 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
1229 | +} | |
1230 | + | |
1231 | struct st_mysql_storage_engine binlog_storage_engine= | |
1232 | { MYSQL_HANDLERTON_INTERFACE_VERSION }; | |
1233 | ||
1234 | @@ -6717,7 +7099,7 @@ | |
1235 | binlog_init, /* Plugin Init */ | |
1236 | NULL, /* Plugin Deinit */ | |
1237 | 0x0100 /* 1.0 */, | |
1238 | - NULL, /* status variables */ | |
1239 | + binlog_status_vars_top, /* status variables */ | |
1240 | NULL, /* system variables */ | |
1241 | NULL, /* config options */ | |
1242 | 0, /* flags */ | |
1243 | --- a/sql/log.h | |
1244 | +++ b/sql/log.h | |
1245 | @@ -44,17 +44,42 @@ | |
1246 | ||
1247 | virtual int open(const char *opt_name)=0; | |
1248 | virtual void close()=0; | |
1249 | - virtual int log_xid(THD *thd, my_xid xid)=0; | |
1250 | + virtual int log_and_order(THD *thd, my_xid xid, bool all, | |
1251 | + bool need_commit_ordered)=0; | |
1252 | virtual int unlog(ulong cookie, my_xid xid)=0; | |
1253 | + | |
1254 | + protected: | |
1255 | + void run_commit_ordered(THD *thd, bool all); | |
1256 | }; | |
1257 | ||
1258 | +/* | |
1259 | + Locks used to ensure serialised execution of | |
1260 | + TC_LOG::run_commit_ordered(), or any other code that calls handler | |
1261 | + commit_ordered() methods. | |
1262 | +*/ | |
1263 | +extern mysql_mutex_t LOCK_group_commit_queue; | |
1264 | +extern mysql_mutex_t LOCK_commit_ordered; | |
1265 | + | |
1266 | +extern void TC_init(); | |
1267 | +extern void TC_destroy(); | |
1268 | + | |
1269 | class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging | |
1270 | { | |
1271 | public: | |
1272 | TC_LOG_DUMMY() {} | |
1273 | int open(const char *opt_name) { return 0; } | |
1274 | void close() { } | |
1275 | - int log_xid(THD *thd, my_xid xid) { return 1; } | |
1276 | + /* | |
1277 | + TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we | |
1278 | + only use internal XA during commit when >= 2 XA-capable engines | |
1279 | + participate. | |
1280 | + */ | |
1281 | + int log_and_order(THD *thd, my_xid xid, bool all, | |
1282 | + bool need_commit_ordered) | |
1283 | + { | |
1284 | + DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); | |
1285 | + return 1; | |
1286 | + } | |
1287 | int unlog(ulong cookie, my_xid xid) { return 0; } | |
1288 | }; | |
1289 | ||
1290 | @@ -80,6 +105,13 @@ | |
1291 | mysql_cond_t cond; // to wait for a sync | |
1292 | } PAGE; | |
1293 | ||
1294 | + /* List of THDs for which to invoke commit_ordered(), in order. */ | |
1295 | + struct commit_entry | |
1296 | + { | |
1297 | + struct commit_entry *next; | |
1298 | + THD *thd; | |
1299 | + }; | |
1300 | + | |
1301 | char logname[FN_REFLEN]; | |
1302 | File fd; | |
1303 | my_off_t file_length; | |
1304 | @@ -94,16 +126,38 @@ | |
1305 | */ | |
1306 | mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync; | |
1307 | mysql_cond_t COND_pool, COND_active; | |
1308 | + /* | |
1309 | + Queue of threads that need to call commit_ordered(). | |
1310 | + Access to this queue must be protected by LOCK_group_commit_queue | |
1311 | + */ | |
1312 | + commit_entry *commit_ordered_queue; | |
1313 | + /* | |
1314 | + This flag and condition is used to reserve the queue while threads in it | |
1315 | + each run the commit_ordered() methods one after the other. Only once the | |
1316 | + last commit_ordered() in the queue is done can we start on a new queue | |
1317 | + run. | |
1318 | + | |
1319 | + Since we start this process in the first thread in the queue and finish in | |
1320 | + the last (and possibly different) thread, we need a condition variable for | |
1321 | + this (we cannot unlock a mutex in a different thread than the one who | |
1322 | + locked it). | |
1323 | + | |
1324 | + The condition is used together with the LOCK_group_commit_queue mutex. | |
1325 | + */ | |
1326 | + my_bool commit_ordered_queue_busy; | |
1327 | + mysql_cond_t COND_queue_busy; | |
1328 | ||
1329 | public: | |
1330 | TC_LOG_MMAP(): inited(0) {} | |
1331 | int open(const char *opt_name); | |
1332 | void close(); | |
1333 | - int log_xid(THD *thd, my_xid xid); | |
1334 | + int log_and_order(THD *thd, my_xid xid, bool all, | |
1335 | + bool need_commit_ordered); | |
1336 | int unlog(ulong cookie, my_xid xid); | |
1337 | int recover(); | |
1338 | ||
1339 | private: | |
1340 | + int log_one_transaction(my_xid xid); | |
1341 | void get_active_from_pool(); | |
1342 | int sync(); | |
1343 | int overflow(); | |
1344 | @@ -271,9 +325,31 @@ | |
1345 | time_t last_time; | |
1346 | }; | |
1347 | ||
1348 | +class binlog_cache_data; | |
1349 | class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG | |
1350 | { | |
1351 | private: | |
1352 | + struct group_commit_entry | |
1353 | + { | |
1354 | + struct group_commit_entry *next; | |
1355 | + THD *thd; | |
1356 | + binlog_cache_data *cache_data; | |
1357 | + /* | |
1358 | + Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be | |
1359 | + written during group commit. The incident_event is only valid if | |
1360 | + trx_data->has_incident() is true. | |
1361 | + */ | |
1362 | + Log_event *begin_event; | |
1363 | + Log_event *end_event; | |
1364 | + Log_event *incident_event; | |
1365 | + /* Set during group commit to record any per-thread error. */ | |
1366 | + int error; | |
1367 | + int commit_errno; | |
1368 | + /* This is the `all' parameter for ha_commit_ordered(). */ | |
1369 | + bool all; | |
1370 | + /* True if we come in through XA log_and_order(), false otherwise. */ | |
1371 | + }; | |
1372 | + | |
1373 | #ifdef HAVE_PSI_INTERFACE | |
1374 | /** The instrumentation key to use for @ LOCK_index. */ | |
1375 | PSI_mutex_key m_key_LOCK_index; | |
1376 | @@ -325,6 +401,12 @@ | |
1377 | In 5.0 it's 0 for relay logs too! | |
1378 | */ | |
1379 | bool no_auto_events; | |
1380 | + /* Queue of transactions queued up to participate in group commit. */ | |
1381 | + group_commit_entry *group_commit_queue; | |
1382 | + /* Total number of committed transactions. */ | |
1383 | + ulonglong num_commits; | |
1384 | + /* Number of group commits done. */ | |
1385 | + ulonglong num_group_commits; | |
1386 | ||
1387 | /* pointer to the sync period variable, for binlog this will be | |
1388 | sync_binlog_period, for relay log this will be | |
1389 | @@ -346,6 +428,11 @@ | |
1390 | */ | |
1391 | int new_file_without_locking(); | |
1392 | int new_file_impl(bool need_lock); | |
1393 | + int write_transaction(group_commit_entry *entry); | |
1394 | + bool write_transaction_to_binlog_events(group_commit_entry *entry); | |
1395 | + void trx_group_commit_leader(group_commit_entry *leader); | |
1396 | + void mark_xid_done(); | |
1397 | + void mark_xids_active(uint xid_count); | |
1398 | ||
1399 | public: | |
1400 | MYSQL_LOG::generate_name; | |
1401 | @@ -387,7 +474,8 @@ | |
1402 | ||
1403 | int open(const char *opt_name); | |
1404 | void close(); | |
1405 | - int log_xid(THD *thd, my_xid xid); | |
1406 | + int log_and_order(THD *thd, my_xid xid, bool all, | |
1407 | + bool need_commit_ordered); | |
1408 | int unlog(ulong cookie, my_xid xid); | |
1409 | int recover(IO_CACHE *log, Format_description_log_event *fdle); | |
1410 | #if !defined(MYSQL_CLIENT) | |
1411 | @@ -434,11 +522,11 @@ | |
1412 | int new_file(); | |
1413 | ||
1414 | bool write(Log_event* event_info); // binary log write | |
1415 | - bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident); | |
1416 | + bool write_transaction_to_binlog(THD *thd, binlog_cache_data *cache_data, | |
1417 | + Log_event *end_ev, bool all); | |
1418 | bool write_incident(THD *thd, bool lock); | |
1419 | ||
1420 | - int write_cache(THD *thd, IO_CACHE *cache, | |
1421 | - bool lock_log, bool flush_and_sync); | |
1422 | + int write_cache(THD *thd, IO_CACHE *cache); | |
1423 | void set_write_error(THD *thd, bool is_transactional); | |
1424 | bool check_write_error(THD *thd); | |
1425 | ||
1426 | @@ -507,6 +595,7 @@ | |
1427 | inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);} | |
1428 | inline IO_CACHE *get_index_file() { return &index_file;} | |
1429 | inline uint32 get_open_count() { return open_count; } | |
1430 | + void set_status_variables(); | |
1431 | }; | |
1432 | ||
1433 | class Log_event_handler | |
1434 | --- a/sql/mysqld.cc | |
1435 | +++ b/sql/mysqld.cc | |
1436 | @@ -1490,6 +1490,7 @@ | |
1437 | ha_end(); | |
1438 | if (tc_log) | |
1439 | tc_log->close(); | |
1440 | + TC_destroy(); | |
1441 | delegates_destroy(); | |
1442 | xid_cache_free(); | |
1443 | table_def_free(); | |
1444 | @@ -4061,6 +4062,8 @@ | |
1445 | query_response_time_init(); | |
1446 | #endif // HAVE_RESPONSE_TIME_DISTRIBUTION | |
1447 | /* We have to initialize the storage engines before CSV logging */ | |
1448 | + TC_init(); | |
1449 | + | |
1450 | init_global_table_stats(); | |
1451 | init_global_index_stats(); | |
1452 | ||
1453 | @@ -8004,6 +8007,7 @@ | |
1454 | key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count, | |
1455 | key_PARTITION_LOCK_auto_inc; | |
1456 | PSI_mutex_key key_RELAYLOG_LOCK_index; | |
1457 | +PSI_mutex_key key_LOCK_wakeup_ready, key_LOCK_group_commit_queue, key_LOCK_commit_ordered; | |
1458 | ||
1459 | static PSI_mutex_info all_server_mutexes[]= | |
1460 | { | |
1461 | @@ -8024,6 +8028,7 @@ | |
1462 | { &key_delayed_insert_mutex, "Delayed_insert::mutex", 0}, | |
1463 | { &key_hash_filo_lock, "hash_filo::lock", 0}, | |
1464 | { &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL}, | |
1465 | + { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL}, | |
1466 | { &key_LOCK_connection_count, "LOCK_connection_count", PSI_FLAG_GLOBAL}, | |
1467 | { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL}, | |
1468 | { &key_LOCK_delayed_create, "LOCK_delayed_create", PSI_FLAG_GLOBAL}, | |
1469 | @@ -8039,6 +8044,7 @@ | |
1470 | "LOCK_global_index_stats", PSI_FLAG_GLOBAL}, | |
1471 | { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL}, | |
1472 | { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL}, | |
1473 | + { &key_LOCK_group_commit_queue, "LOCK_group_commit_queue", PSI_FLAG_GLOBAL}, | |
1474 | { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL}, | |
1475 | { &key_LOCK_prepared_stmt_count, "LOCK_prepared_stmt_count", PSI_FLAG_GLOBAL}, | |
1476 | { &key_LOCK_rpl_status, "LOCK_rpl_status", PSI_FLAG_GLOBAL}, | |
1477 | @@ -8050,6 +8056,7 @@ | |
1478 | { &key_LOCK_temporary_tables, "THD::LOCK_temporary_tables", 0}, | |
1479 | { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL}, | |
1480 | { &key_LOCK_uuid_generator, "LOCK_uuid_generator", PSI_FLAG_GLOBAL}, | |
1481 | + { &key_LOCK_wakeup_ready, "THD::LOCK_wakeup_ready", 0}, | |
1482 | { &key_LOG_LOCK_log, "LOG::LOCK_log", 0}, | |
1483 | { &key_master_info_data_lock, "Master_info::data_lock", 0}, | |
1484 | { &key_master_info_run_lock, "Master_info::run_lock", 0}, | |
1485 | @@ -8097,6 +8104,7 @@ | |
1486 | key_TABLE_SHARE_cond, key_user_level_lock_cond, | |
1487 | key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache; | |
1488 | PSI_cond_key key_RELAYLOG_update_cond; | |
1489 | +PSI_cond_key key_COND_wakeup_ready, key_COND_queue_busy; | |
1490 | ||
1491 | static PSI_cond_info all_server_conds[]= | |
1492 | { | |
1493 | @@ -8113,8 +8121,10 @@ | |
1494 | { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0}, | |
1495 | { &key_COND_cache_status_changed, "Query_cache::COND_cache_status_changed", 0}, | |
1496 | { &key_COND_manager, "COND_manager", PSI_FLAG_GLOBAL}, | |
1497 | + { &key_COND_queue_busy, "COND_queue_busy", PSI_FLAG_GLOBAL}, | |
1498 | { &key_COND_rpl_status, "COND_rpl_status", PSI_FLAG_GLOBAL}, | |
1499 | { &key_COND_server_started, "COND_server_started", PSI_FLAG_GLOBAL}, | |
1500 | + { &key_COND_wakeup_ready, "THD::COND_wakeup_ready", 0}, | |
1501 | { &key_delayed_insert_cond, "Delayed_insert::cond", 0}, | |
1502 | { &key_delayed_insert_cond_client, "Delayed_insert::cond_client", 0}, | |
1503 | { &key_item_func_sleep_cond, "Item_func_sleep::cond", 0}, | |
1504 | --- a/sql/mysqld.h | |
1505 | +++ b/sql/mysqld.h | |
1506 | @@ -273,6 +273,7 @@ | |
1507 | key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, | |
1508 | key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc; | |
1509 | extern PSI_mutex_key key_RELAYLOG_LOCK_index; | |
1510 | +extern PSI_mutex_key key_LOCK_wakeup_ready, key_LOCK_group_commit_queue, key_LOCK_commit_ordered; | |
1511 | ||
1512 | extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, | |
1513 | key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, | |
1514 | @@ -293,6 +294,7 @@ | |
1515 | key_TABLE_SHARE_cond, key_user_level_lock_cond, | |
1516 | key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache; | |
1517 | extern PSI_cond_key key_RELAYLOG_update_cond; | |
1518 | +extern PSI_cond_key key_COND_wakeup_ready, key_COND_queue_busy; | |
1519 | ||
1520 | extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, | |
1521 | key_thread_handle_manager, key_thread_kill_server, key_thread_main, | |
1522 | --- a/sql/sql_class.cc | |
1523 | +++ b/sql/sql_class.cc | |
1524 | @@ -912,6 +912,8 @@ | |
1525 | mysql_mutex_init(key_LOCK_thd_data, &LOCK_thd_data, MY_MUTEX_INIT_FAST); | |
1526 | mysql_mutex_init(key_LOCK_temporary_tables, &LOCK_temporary_tables, | |
1527 | MY_MUTEX_INIT_FAST); | |
1528 | + mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); | |
1529 | + mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, NULL); | |
1530 | ||
1531 | /* Variables with default values */ | |
1532 | proc_info="login"; | |
1533 | @@ -1516,6 +1518,8 @@ | |
1534 | my_free(db); | |
1535 | db= NULL; | |
1536 | free_root(&transaction.mem_root,MYF(0)); | |
1537 | + mysql_cond_destroy(&COND_wakeup_ready); | |
1538 | + mysql_mutex_destroy(&LOCK_wakeup_ready); | |
1539 | mysql_mutex_destroy(&LOCK_thd_data); | |
1540 | mysql_mutex_destroy(&LOCK_temporary_tables); | |
1541 | #ifndef DBUG_OFF | |
1542 | @@ -5199,6 +5203,24 @@ | |
1543 | DBUG_RETURN(0); | |
1544 | } | |
1545 | ||
1546 | +void | |
1547 | +THD::wait_for_wakeup_ready() | |
1548 | +{ | |
1549 | + mysql_mutex_lock(&LOCK_wakeup_ready); | |
1550 | + while (!wakeup_ready) | |
1551 | + mysql_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready); | |
1552 | + mysql_mutex_unlock(&LOCK_wakeup_ready); | |
1553 | +} | |
1554 | + | |
1555 | +void | |
1556 | +THD::signal_wakeup_ready() | |
1557 | +{ | |
1558 | + mysql_mutex_lock(&LOCK_wakeup_ready); | |
1559 | + wakeup_ready= true; | |
1560 | + mysql_mutex_unlock(&LOCK_wakeup_ready); | |
1561 | + mysql_cond_signal(&COND_wakeup_ready); | |
1562 | +} | |
1563 | + | |
1564 | bool Discrete_intervals_list::append(ulonglong start, ulonglong val, | |
1565 | ulonglong incr) | |
1566 | { | |
1567 | --- a/sql/sql_class.h | |
1568 | +++ b/sql/sql_class.h | |
1569 | @@ -3017,6 +3017,14 @@ | |
1570 | LEX_STRING get_invoker_user() { return invoker_user; } | |
1571 | LEX_STRING get_invoker_host() { return invoker_host; } | |
1572 | bool has_invoker() { return invoker_user.length > 0; } | |
1573 | + void clear_wakeup_ready() { wakeup_ready= false; } | |
1574 | + /* | |
1575 | + Sleep waiting for others to wake us up with signal_wakeup_ready(). | |
1576 | + Must call clear_wakeup_ready() before waiting. | |
1577 | + */ | |
1578 | + void wait_for_wakeup_ready(); | |
1579 | + /* Wake this thread up from wait_for_wakeup_ready(). */ | |
1580 | + void signal_wakeup_ready(); | |
1581 | private: | |
1582 | ||
1583 | /** The current internal error handler for this thread, or NULL. */ | |
1584 | @@ -3059,6 +3067,16 @@ | |
1585 | */ | |
1586 | LEX_STRING invoker_user; | |
1587 | LEX_STRING invoker_host; | |
1588 | + /* | |
1589 | + Flag, mutex and condition for a thread to wait for a signal from another | |
1590 | + thread. | |
1591 | + | |
1592 | + Currently used to wait for group commit to complete, can also be used for | |
1593 | + other purposes. | |
1594 | + */ | |
1595 | + bool wakeup_ready; | |
1596 | + mysql_mutex_t LOCK_wakeup_ready; | |
1597 | + mysql_cond_t COND_wakeup_ready; | |
1598 | }; | |
1599 | ||
1600 | /* Returns string as 'IP' for the client-side of the connection represented by | |
1601 | --- a/sql/sql_parse.cc | |
1602 | +++ b/sql/sql_parse.cc | |
1603 | @@ -889,6 +889,10 @@ | |
1604 | DBUG_ENTER("dispatch_command"); | |
1605 | DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command)); | |
1606 | ||
1607 | + DBUG_EXECUTE_IF("crash_dispatch_command_before", | |
1608 | + { DBUG_PRINT("crash_dispatch_command_before", ("now")); | |
1609 | + DBUG_ABORT(); }); | |
1610 | + | |
1611 | #if defined(ENABLED_PROFILING) | |
1612 | thd->profiling.start_new_query(); | |
1613 | #endif | |
1614 | --- a/mysql-test/suite/perfschema/r/dml_setup_instruments.result | |
1615 | +++ b/mysql-test/suite/perfschema/r/dml_setup_instruments.result | |
1616 | @@ -11,9 +11,9 @@ | |
1617 | wait/synch/mutex/sql/HA_DATA_PARTITION::LOCK_auto_inc YES YES | |
1618 | wait/synch/mutex/sql/LOCK_active_mi YES YES | |
1619 | wait/synch/mutex/sql/LOCK_audit_mask YES YES | |
1620 | +wait/synch/mutex/sql/LOCK_commit_ordered YES YES | |
1621 | wait/synch/mutex/sql/LOCK_connection_count YES YES | |
1622 | wait/synch/mutex/sql/LOCK_crypt YES YES | |
1623 | -wait/synch/mutex/sql/LOCK_delayed_create YES YES | |
1624 | select * from performance_schema.setup_instruments | |
1625 | where name like 'Wait/Synch/Rwlock/sql/%' | |
1626 | and name not in ('wait/synch/rwlock/sql/CRYPTO_dynlock_value::lock') | |
1627 | @@ -38,6 +38,7 @@ | |
1628 | NAME ENABLED TIMED | |
1629 | wait/synch/cond/sql/COND_flush_thread_cache YES YES | |
1630 | wait/synch/cond/sql/COND_manager YES YES | |
1631 | +wait/synch/cond/sql/COND_queue_busy YES YES | |
1632 | wait/synch/cond/sql/COND_queue_state YES YES | |
1633 | wait/synch/cond/sql/COND_rpl_status YES YES | |
1634 | wait/synch/cond/sql/COND_server_started YES YES | |
1635 | @@ -45,7 +46,6 @@ | |
1636 | wait/synch/cond/sql/COND_thread_count YES YES | |
1637 | wait/synch/cond/sql/Delayed_insert::cond YES YES | |
1638 | wait/synch/cond/sql/Delayed_insert::cond_client YES YES | |
1639 | -wait/synch/cond/sql/Event_scheduler::COND_state YES YES | |
1640 | select * from performance_schema.setup_instruments | |
1641 | where name='Wait'; | |
1642 | select * from performance_schema.setup_instruments | |
1643 | --- a/storage/innobase/handler/ha_innodb.cc | |
1644 | +++ b/storage/innobase/handler/ha_innodb.cc | |
1645 | @@ -375,6 +375,9 @@ | |
1646 | static INNOBASE_SHARE *get_share(const char *table_name); | |
1647 | static void free_share(INNOBASE_SHARE *share); | |
1648 | static int innobase_close_connection(handlerton *hton, THD* thd); | |
1649 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1650 | +static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all); | |
1651 | +#endif | |
1652 | static int innobase_commit(handlerton *hton, THD* thd, bool all); | |
1653 | static int innobase_rollback(handlerton *hton, THD* thd, bool all); | |
1654 | static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd, | |
1655 | @@ -1699,7 +1702,10 @@ | |
1656 | trx_t* trx) /*!< in/out: InnoDB transaction handle */ | |
1657 | { | |
1658 | DBUG_ENTER("innobase_trx_init"); | |
1659 | +#ifndef EXTENDED_FOR_COMMIT_ORDERED | |
1660 | + /* used by innobase_commit_ordered */ | |
1661 | DBUG_ASSERT(EQ_CURRENT_THD(thd)); | |
1662 | +#endif | |
1663 | DBUG_ASSERT(thd == trx->mysql_thd); | |
1664 | ||
1665 | trx->check_foreigns = !thd_test_options( | |
1666 | @@ -1760,7 +1766,10 @@ | |
1667 | { | |
1668 | trx_t*& trx = thd_to_trx(thd); | |
1669 | ||
1670 | +#ifndef EXTENDED_FOR_COMMIT_ORDERED | |
1671 | + /* used by innobase_commit_ordered */ | |
1672 | ut_ad(EQ_CURRENT_THD(thd)); | |
1673 | +#endif | |
1674 | ||
1675 | if (trx == NULL) { | |
1676 | trx = innobase_trx_allocate(thd); | |
1677 | @@ -1846,6 +1855,7 @@ | |
1678 | { | |
1679 | trx->is_registered = 0; | |
1680 | trx->owns_prepare_mutex = 0; | |
1681 | + trx->called_commit_ordered = 0; | |
1682 | } | |
1683 | ||
1684 | /*********************************************************************//** | |
1685 | @@ -1861,6 +1871,29 @@ | |
1686 | } | |
1687 | ||
1688 | /*********************************************************************//** | |
1689 | +*/ | |
1690 | +static inline | |
1691 | +void | |
1692 | +trx_called_commit_ordered_set( | |
1693 | +/*==========================*/ | |
1694 | + trx_t* trx) | |
1695 | +{ | |
1696 | + ut_a(trx_is_registered_for_2pc(trx)); | |
1697 | + trx->called_commit_ordered = 1; | |
1698 | +} | |
1699 | + | |
1700 | +/*********************************************************************//** | |
1701 | +*/ | |
1702 | +static inline | |
1703 | +bool | |
1704 | +trx_called_commit_ordered( | |
1705 | +/*======================*/ | |
1706 | + const trx_t* trx) | |
1707 | +{ | |
1708 | + return(trx->called_commit_ordered == 1); | |
1709 | +} | |
1710 | + | |
1711 | +/*********************************************************************//** | |
1712 | Check if transaction is started. | |
1713 | @reutrn true if transaction is in state started */ | |
1714 | static | |
1715 | @@ -2435,6 +2468,9 @@ | |
1716 | innobase_hton->savepoint_set=innobase_savepoint; | |
1717 | innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint; | |
1718 | innobase_hton->savepoint_release=innobase_release_savepoint; | |
1719 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1720 | + innobase_hton->commit_ordered=innobase_commit_ordered; | |
1721 | +#endif | |
1722 | innobase_hton->commit=innobase_commit; | |
1723 | innobase_hton->rollback=innobase_rollback; | |
1724 | innobase_hton->prepare=innobase_xa_prepare; | |
1725 | @@ -3187,6 +3223,126 @@ | |
1726 | DBUG_RETURN(0); | |
1727 | } | |
1728 | ||
1729 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1730 | +/* MEMO: | |
1731 | + InnoDB is coded with intention that always trx is accessed by the owner thd. | |
1732 | + (not protected by any mutex/lock) | |
1733 | + So, the caller of innobase_commit_ordered() should be conscious of | |
1734 | + cache coherency between multi CPU about the trx, if called from another thd. | |
1735 | + | |
1736 | + MariaDB's first implementation about it seems the cherency is protected by | |
1737 | + the pthread_mutex LOCK_wakeup_ready. So, no problem for now. | |
1738 | + | |
1739 | + But we should be aware the importance of the coherency. | |
1740 | + */ | |
1741 | +/*****************************************************************//** | |
1742 | +low function function innobase_commit_ordered().*/ | |
1743 | +static | |
1744 | +void | |
1745 | +innobase_commit_ordered_low( | |
1746 | +/*========================*/ | |
1747 | + trx_t* trx, /*!< in: Innodb transaction */ | |
1748 | + THD* thd) /*!< in: MySQL thread handle */ | |
1749 | +{ | |
1750 | + ulonglong tmp_pos; | |
1751 | + DBUG_ENTER("innobase_commit_ordered"); | |
1752 | + | |
1753 | + /* This part was from innobase_commit() */ | |
1754 | + | |
1755 | + /* We need current binlog position for ibbackup to work. | |
1756 | + Note, the position is current because commit_ordered is guaranteed | |
1757 | + to be called in same sequenece as writing to binlog. */ | |
1758 | +retry: | |
1759 | + if (innobase_commit_concurrency > 0) { | |
1760 | + mysql_mutex_lock(&commit_cond_m); | |
1761 | + commit_threads++; | |
1762 | + | |
1763 | + if (commit_threads > innobase_commit_concurrency) { | |
1764 | + commit_threads--; | |
1765 | + mysql_cond_wait(&commit_cond, | |
1766 | + &commit_cond_m); | |
1767 | + mysql_mutex_unlock(&commit_cond_m); | |
1768 | + goto retry; | |
1769 | + } | |
1770 | + else { | |
1771 | + mysql_mutex_unlock(&commit_cond_m); | |
1772 | + } | |
1773 | + } | |
1774 | + | |
1775 | + mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name)); | |
1776 | + trx->mysql_log_offset = (ib_int64_t) tmp_pos; | |
1777 | + | |
1778 | + /* Don't do write + flush right now. For group commit | |
1779 | + to work we want to do the flush in the innobase_commit() | |
1780 | + method, which runs without holding any locks. */ | |
1781 | + trx->flush_log_later = TRUE; | |
1782 | + innobase_commit_low(trx); | |
1783 | + trx->flush_log_later = FALSE; | |
1784 | + | |
1785 | + if (innobase_commit_concurrency > 0) { | |
1786 | + mysql_mutex_lock(&commit_cond_m); | |
1787 | + commit_threads--; | |
1788 | + mysql_cond_signal(&commit_cond); | |
1789 | + mysql_mutex_unlock(&commit_cond_m); | |
1790 | + } | |
1791 | + | |
1792 | + DBUG_VOID_RETURN; | |
1793 | +} | |
1794 | + | |
1795 | +/*****************************************************************//** | |
1796 | +Perform the first, fast part of InnoDB commit. | |
1797 | + | |
1798 | +Doing it in this call ensures that we get the same commit order here | |
1799 | +as in binlog and any other participating transactional storage engines. | |
1800 | + | |
1801 | +Note that we want to do as little as really needed here, as we run | |
1802 | +under a global mutex. The expensive fsync() is done later, in | |
1803 | +innobase_commit(), without a lock so group commit can take place. | |
1804 | + | |
1805 | +Note also that this method can be called from a different thread than | |
1806 | +the one handling the rest of the transaction. */ | |
1807 | +static | |
1808 | +void | |
1809 | +innobase_commit_ordered( | |
1810 | +/*====================*/ | |
1811 | + handlerton *hton, /*!< in: Innodb handlerton */ | |
1812 | + THD* thd, /*!< in: MySQL thread handle of the user for whom | |
1813 | + the transaction should be committed */ | |
1814 | + bool all) /*!< in: TRUE - commit transaction | |
1815 | + FALSE - the current SQL statement ended */ | |
1816 | +{ | |
1817 | + trx_t* trx; | |
1818 | + DBUG_ENTER("innobase_commit_ordered"); | |
1819 | + DBUG_ASSERT(hton == innodb_hton_ptr); | |
1820 | + | |
1821 | + trx = check_trx_exists(thd); | |
1822 | + | |
1823 | + /* Since we will reserve the kernel mutex, we have to release | |
1824 | + the search system latch first to obey the latching order. */ | |
1825 | + | |
1826 | + if (trx->has_search_latch) { | |
1827 | + trx_search_latch_release_if_reserved(trx); | |
1828 | + } | |
1829 | + | |
1830 | + if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) { | |
1831 | + /* We cannot throw error here; instead we will catch this error | |
1832 | + again in innobase_commit() and report it from there. */ | |
1833 | + DBUG_VOID_RETURN; | |
1834 | + } | |
1835 | + | |
1836 | + /* commit_ordered is only called when committing the whole transaction | |
1837 | + (or an SQL statement when autocommit is on). */ | |
1838 | + DBUG_ASSERT(all || | |
1839 | + (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); | |
1840 | + | |
1841 | + innobase_commit_ordered_low(trx, thd); | |
1842 | + | |
1843 | + trx_called_commit_ordered_set(trx); | |
1844 | + | |
1845 | + DBUG_VOID_RETURN; | |
1846 | +} | |
1847 | +#endif /* EXTENDED_FOR_COMMIT_ORDERED */ | |
1848 | + | |
1849 | /*****************************************************************//** | |
1850 | Commits a transaction in an InnoDB database or marks an SQL statement | |
1851 | ended. | |
1852 | @@ -3238,6 +3394,16 @@ | |
1853 | /* We were instructed to commit the whole transaction, or | |
1854 | this is an SQL statement end and autocommit is on */ | |
1855 | ||
1856 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1857 | + ut_ad(!trx_has_prepare_commit_mutex(trx)); | |
1858 | + | |
1859 | + /* Run the fast part of commit if we did not already. */ | |
1860 | + if (!trx_called_commit_ordered(trx)) { | |
1861 | + innobase_commit_ordered_low(trx, thd); | |
1862 | + } | |
1863 | +#else | |
1864 | + ut_ad(!trx_called_commit_ordered(trx)); | |
1865 | + | |
1866 | /* We need current binlog position for ibbackup to work. | |
1867 | Note, the position is current because of | |
1868 | prepare_commit_mutex */ | |
1869 | @@ -3292,6 +3458,7 @@ | |
1870 | ||
1871 | mysql_mutex_unlock(&prepare_commit_mutex); | |
1872 | } | |
1873 | +#endif /* EXTENDED_FOR_COMMIT_ORDERED */ | |
1874 | ||
1875 | trx_deregister_from_2pc(trx); | |
1876 | ||
1877 | @@ -10973,6 +11140,7 @@ | |
1878 | ||
1879 | srv_active_wake_master_thread(); | |
1880 | ||
1881 | +#ifndef EXTENDED_FOR_COMMIT_ORDERED | |
1882 | if (thd_sql_command(thd) != SQLCOM_XA_PREPARE | |
1883 | && (all | |
1884 | || !thd_test_options( | |
1885 | @@ -10999,6 +11167,7 @@ | |
1886 | mysql_mutex_lock(&prepare_commit_mutex); | |
1887 | trx_owns_prepare_commit_mutex_set(trx); | |
1888 | } | |
1889 | +#endif /* ifndef EXTENDED_FOR_COMMIT_ORDERED */ | |
1890 | ||
1891 | return(error); | |
1892 | } | |
1893 | --- a/storage/innobase/handler/ha_innodb.h | |
1894 | +++ b/storage/innobase/handler/ha_innodb.h | |
1895 | @@ -240,6 +240,12 @@ | |
1896 | struct charset_info_st *thd_charset(MYSQL_THD thd); | |
1897 | LEX_STRING *thd_query_string(MYSQL_THD thd); | |
1898 | ||
1899 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1900 | +/** Get the file name and position of the MySQL binlog corresponding to the | |
1901 | + * current commit. | |
1902 | + */ | |
1903 | +void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file); | |
1904 | +#else | |
1905 | /** Get the file name of the MySQL binlog. | |
1906 | * @return the name of the binlog file | |
1907 | */ | |
1908 | @@ -249,6 +255,7 @@ | |
1909 | * @return byte offset from the beginning of the binlog | |
1910 | */ | |
1911 | ulonglong mysql_bin_log_file_pos(void); | |
1912 | +#endif | |
1913 | ||
1914 | /** | |
1915 | Check if a user thread is a replication slave thread | |
1916 | --- a/storage/innobase/include/trx0trx.h | |
1917 | +++ b/storage/innobase/include/trx0trx.h | |
1918 | @@ -494,6 +494,7 @@ | |
1919 | this is set to 1 then registered should | |
1920 | also be set to 1. This is used in the | |
1921 | XA code */ | |
1922 | + unsigned called_commit_ordered:1;/* 1 if innobase_commit_ordered has run. */ | |
1923 | /*------------------------------*/ | |
1924 | ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ | |
1925 | ulint check_foreigns; /* normally TRUE, but if the user | |
1926 | --- a/storage/innobase/trx/trx0trx.c | |
1927 | +++ b/storage/innobase/trx/trx0trx.c | |
1928 | @@ -111,6 +111,7 @@ | |
1929 | ||
1930 | trx->is_registered = 0; | |
1931 | trx->owns_prepare_mutex = 0; | |
1932 | + trx->called_commit_ordered = 0; | |
1933 | ||
1934 | trx->start_time = ut_time(); | |
1935 | ||
1936 | --- /dev/null | |
1937 | +++ b/mysql-test/r/group_commit.result | |
1938 | @@ -0,0 +1,63 @@ | |
1939 | +CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb; | |
1940 | +SELECT variable_value INTO @commits FROM information_schema.global_status | |
1941 | +WHERE variable_name = 'binlog_commits'; | |
1942 | +SELECT variable_value INTO @group_commits FROM information_schema.global_status | |
1943 | +WHERE variable_name = 'binlog_group_commits'; | |
1944 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued"; | |
1945 | +INSERT INTO t1 VALUES ("con1"); | |
1946 | +set DEBUG_SYNC= "now WAIT_FOR group1_running"; | |
1947 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con2"; | |
1948 | +SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed"; | |
1949 | +SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked"; | |
1950 | +INSERT INTO t1 VALUES ("con2"); | |
1951 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con2"; | |
1952 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con3"; | |
1953 | +INSERT INTO t1 VALUES ("con3"); | |
1954 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con3"; | |
1955 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con4"; | |
1956 | +INSERT INTO t1 VALUES ("con4"); | |
1957 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con4"; | |
1958 | +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; | |
1959 | +SELECT * FROM t1 ORDER BY a; | |
1960 | +a | |
1961 | +SET DEBUG_SYNC= "now SIGNAL group2_queued"; | |
1962 | +SELECT * FROM t1 ORDER BY a; | |
1963 | +a | |
1964 | +con1 | |
1965 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5"; | |
1966 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued"; | |
1967 | +INSERT INTO t1 VALUES ("con5"); | |
1968 | +SET DEBUG_SYNC= "now WAIT_FOR con5_leader"; | |
1969 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con6_queued"; | |
1970 | +INSERT INTO t1 VALUES ("con6"); | |
1971 | +SET DEBUG_SYNC= "now WAIT_FOR group3_con5"; | |
1972 | +SELECT * FROM t1 ORDER BY a; | |
1973 | +a | |
1974 | +con1 | |
1975 | +SET DEBUG_SYNC= "now SIGNAL group3_committed"; | |
1976 | +SET DEBUG_SYNC= "now WAIT_FOR group2_visible"; | |
1977 | +SELECT * FROM t1 ORDER BY a; | |
1978 | +a | |
1979 | +con1 | |
1980 | +con2 | |
1981 | +con3 | |
1982 | +con4 | |
1983 | +SET DEBUG_SYNC= "now SIGNAL group2_checked"; | |
1984 | +SELECT * FROM t1 ORDER BY a; | |
1985 | +a | |
1986 | +con1 | |
1987 | +con2 | |
1988 | +con3 | |
1989 | +con4 | |
1990 | +con5 | |
1991 | +con6 | |
1992 | +SELECT variable_value - @commits FROM information_schema.global_status | |
1993 | +WHERE variable_name = 'binlog_commits'; | |
1994 | +variable_value - @commits | |
1995 | +6 | |
1996 | +SELECT variable_value - @group_commits FROM information_schema.global_status | |
1997 | +WHERE variable_name = 'binlog_group_commits'; | |
1998 | +variable_value - @group_commits | |
1999 | +3 | |
2000 | +SET DEBUG_SYNC= 'RESET'; | |
2001 | +DROP TABLE t1; | |
2002 | --- /dev/null | |
2003 | +++ b/mysql-test/r/group_commit_binlog_pos.result | |
2004 | @@ -0,0 +1,35 @@ | |
2005 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2006 | +INSERT INTO t1 VALUES (0); | |
2007 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued"; | |
2008 | +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3"; | |
2009 | +INSERT INTO t1 VALUES (1); | |
2010 | +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; | |
2011 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con2_queued"; | |
2012 | +INSERT INTO t1 VALUES (2); | |
2013 | +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; | |
2014 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con3_queued"; | |
2015 | +INSERT INTO t1 VALUES (3); | |
2016 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2017 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2018 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2019 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2020 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2021 | +SELECT * FROM t1 ORDER BY a; | |
2022 | +a | |
2023 | +0 | |
2024 | +1 | |
2025 | +2 | |
2026 | +SET SESSION debug="+d,crash_dispatch_command_before"; | |
2027 | +SELECT 1; | |
2028 | +Got one of the listed errors | |
2029 | +Got one of the listed errors | |
2030 | +Got one of the listed errors | |
2031 | +SELECT * FROM t1 ORDER BY a; | |
2032 | +a | |
2033 | +0 | |
2034 | +1 | |
2035 | +2 | |
2036 | +3 | |
2037 | +InnoDB: Last MySQL binlog file position 0 768, file name ./master-bin.000001 | |
2038 | +SET DEBUG_SYNC= 'RESET'; | |
2039 | +DROP TABLE t1; | |
2040 | --- /dev/null | |
2041 | +++ b/mysql-test/r/group_commit_crash.result | |
2042 | @@ -0,0 +1,120 @@ | |
2043 | +CREATE TABLE t1(a CHAR(255), | |
2044 | +b CHAR(255), | |
2045 | +c CHAR(255), | |
2046 | +d CHAR(255), | |
2047 | +id INT AUTO_INCREMENT, | |
2048 | +PRIMARY KEY(id)) ENGINE=InnoDB; | |
2049 | +create table t2 like t1; | |
2050 | +create procedure setcrash(IN i INT) | |
2051 | +begin | |
2052 | +CASE i | |
2053 | +WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare"; | |
2054 | +WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log"; | |
2055 | +WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog"; | |
2056 | +WHEN 4 THEN SET SESSION debug="d,crash_commit_after"; | |
2057 | +WHEN 5 THEN SET SESSION debug="d,crash_commit_before"; | |
2058 | +ELSE BEGIN END; | |
2059 | +END CASE; | |
2060 | +end // | |
2061 | +FLUSH TABLES; | |
2062 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2063 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2064 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2065 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2066 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2067 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2068 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2069 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2070 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2071 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2072 | +RESET MASTER; | |
2073 | +START TRANSACTION; | |
2074 | +insert into t1 select * from t2; | |
2075 | +call setcrash(5); | |
2076 | +COMMIT; | |
2077 | +Got one of the listed errors | |
2078 | +SELECT * FROM t1 ORDER BY id; | |
2079 | +a b c d id | |
2080 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2081 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2082 | +delete from t1; | |
2083 | +RESET MASTER; | |
2084 | +START TRANSACTION; | |
2085 | +insert into t1 select * from t2; | |
2086 | +call setcrash(4); | |
2087 | +COMMIT; | |
2088 | +Got one of the listed errors | |
2089 | +SELECT * FROM t1 ORDER BY id; | |
2090 | +a b c d id | |
2091 | +a b c d 1 | |
2092 | +a b c d 2 | |
2093 | +a b c d 3 | |
2094 | +a b c d 4 | |
2095 | +a b c d 5 | |
2096 | +a b c d 6 | |
2097 | +a b c d 7 | |
2098 | +a b c d 8 | |
2099 | +a b c d 9 | |
2100 | +a b c d 10 | |
2101 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2102 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2103 | +master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2 | |
2104 | +delete from t1; | |
2105 | +RESET MASTER; | |
2106 | +START TRANSACTION; | |
2107 | +insert into t1 select * from t2; | |
2108 | +call setcrash(3); | |
2109 | +COMMIT; | |
2110 | +Got one of the listed errors | |
2111 | +SELECT * FROM t1 ORDER BY id; | |
2112 | +a b c d id | |
2113 | +a b c d 1 | |
2114 | +a b c d 2 | |
2115 | +a b c d 3 | |
2116 | +a b c d 4 | |
2117 | +a b c d 5 | |
2118 | +a b c d 6 | |
2119 | +a b c d 7 | |
2120 | +a b c d 8 | |
2121 | +a b c d 9 | |
2122 | +a b c d 10 | |
2123 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2124 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2125 | +master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2 | |
2126 | +delete from t1; | |
2127 | +RESET MASTER; | |
2128 | +START TRANSACTION; | |
2129 | +insert into t1 select * from t2; | |
2130 | +call setcrash(2); | |
2131 | +COMMIT; | |
2132 | +Got one of the listed errors | |
2133 | +SELECT * FROM t1 ORDER BY id; | |
2134 | +a b c d id | |
2135 | +a b c d 1 | |
2136 | +a b c d 2 | |
2137 | +a b c d 3 | |
2138 | +a b c d 4 | |
2139 | +a b c d 5 | |
2140 | +a b c d 6 | |
2141 | +a b c d 7 | |
2142 | +a b c d 8 | |
2143 | +a b c d 9 | |
2144 | +a b c d 10 | |
2145 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2146 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2147 | +master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2 | |
2148 | +delete from t1; | |
2149 | +RESET MASTER; | |
2150 | +START TRANSACTION; | |
2151 | +insert into t1 select * from t2; | |
2152 | +call setcrash(1); | |
2153 | +COMMIT; | |
2154 | +Got one of the listed errors | |
2155 | +SELECT * FROM t1 ORDER BY id; | |
2156 | +a b c d id | |
2157 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2158 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2159 | +delete from t1; | |
2160 | +DROP TABLE t1; | |
2161 | +DROP TABLE t2; | |
2162 | +DROP PROCEDURE setcrash; | |
2163 | --- /dev/null | |
2164 | +++ b/mysql-test/r/xa_binlog.result | |
2165 | @@ -0,0 +1,32 @@ | |
2166 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; | |
2167 | +SET binlog_format= mixed; | |
2168 | +RESET MASTER; | |
2169 | +XA START 'xatest'; | |
2170 | +INSERT INTO t1 VALUES (1); | |
2171 | +XA END 'xatest'; | |
2172 | +XA PREPARE 'xatest'; | |
2173 | +XA COMMIT 'xatest'; | |
2174 | +XA START 'xatest'; | |
2175 | +INSERT INTO t1 VALUES (2); | |
2176 | +XA END 'xatest'; | |
2177 | +XA COMMIT 'xatest' ONE PHASE; | |
2178 | +BEGIN; | |
2179 | +INSERT INTO t1 VALUES (3); | |
2180 | +COMMIT; | |
2181 | +SELECT * FROM t1 ORDER BY a; | |
2182 | +a | |
2183 | +1 | |
2184 | +2 | |
2185 | +3 | |
2186 | +SHOW BINLOG EVENTS LIMIT 1,9; | |
2187 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2188 | +master-bin.000001 # Query 1 # BEGIN | |
2189 | +master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (1) | |
2190 | +master-bin.000001 # Query 1 # COMMIT | |
2191 | +master-bin.000001 # Query 1 # BEGIN | |
2192 | +master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (2) | |
2193 | +master-bin.000001 # Xid 1 # COMMIT /* xid=XX */ | |
2194 | +master-bin.000001 # Query 1 # BEGIN | |
2195 | +master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (3) | |
2196 | +master-bin.000001 # Xid 1 # COMMIT /* xid=XX */ | |
2197 | +DROP TABLE t1; | |
2198 | --- /dev/null | |
2199 | +++ b/mysql-test/suite/binlog/r/binlog_ioerr.result | |
2200 | @@ -0,0 +1,28 @@ | |
2201 | +CALL mtr.add_suppression("Error writing file 'master-bin'"); | |
2202 | +RESET MASTER; | |
2203 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2204 | +INSERT INTO t1 VALUES(0); | |
2205 | +SET SESSION debug='+d,fail_binlog_write_1'; | |
2206 | +INSERT INTO t1 VALUES(1); | |
2207 | +ERROR HY000: Error writing file 'master-bin' (errno: 28) | |
2208 | +INSERT INTO t1 VALUES(2); | |
2209 | +ERROR HY000: Error writing file 'master-bin' (errno: 28) | |
2210 | +SET SESSION debug=''; | |
2211 | +INSERT INTO t1 VALUES(3); | |
2212 | +SELECT * FROM t1; | |
2213 | +a | |
2214 | +0 | |
2215 | +3 | |
2216 | +SHOW BINLOG EVENTS; | |
2217 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2218 | +BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: # | |
2219 | +BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb | |
2220 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2221 | +BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0) | |
2222 | +BINLOG POS Xid 1 ENDPOS COMMIT /* XID */ | |
2223 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2224 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2225 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2226 | +BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3) | |
2227 | +BINLOG POS Xid 1 ENDPOS COMMIT /* XID */ | |
2228 | +DROP TABLE t1; | |
2229 | --- /dev/null | |
2230 | +++ b/mysql-test/suite/binlog/t/binlog_ioerr.test | |
2231 | @@ -0,0 +1,30 @@ | |
2232 | +source include/have_debug.inc; | |
2233 | +source include/have_innodb.inc; | |
2234 | +source include/have_log_bin.inc; | |
2235 | +source include/have_binlog_format_mixed_or_statement.inc; | |
2236 | + | |
2237 | +CALL mtr.add_suppression("Error writing file 'master-bin'"); | |
2238 | + | |
2239 | +RESET MASTER; | |
2240 | + | |
2241 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2242 | +INSERT INTO t1 VALUES(0); | |
2243 | +SET SESSION debug='+d,fail_binlog_write_1'; | |
2244 | +--error ER_ERROR_ON_WRITE | |
2245 | +INSERT INTO t1 VALUES(1); | |
2246 | +--error ER_ERROR_ON_WRITE | |
2247 | +INSERT INTO t1 VALUES(2); | |
2248 | +SET SESSION debug=''; | |
2249 | +INSERT INTO t1 VALUES(3); | |
2250 | +SELECT * FROM t1; | |
2251 | + | |
2252 | +# Actually the output from this currently shows a bug. | |
2253 | +# The injected IO error leaves partially written transactions in the binlog in | |
2254 | +# the form of stray "BEGIN" events. | |
2255 | +# These should disappear from the output if binlog error handling is improved | |
2256 | +# (see MySQL Bug#37148 and WL#1790). | |
2257 | +--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/ | |
2258 | +--replace_column 1 BINLOG 2 POS 5 ENDPOS | |
2259 | +SHOW BINLOG EVENTS; | |
2260 | + | |
2261 | +DROP TABLE t1; | |
2262 | --- /dev/null | |
2263 | +++ b/mysql-test/t/group_commit.test | |
2264 | @@ -0,0 +1,115 @@ | |
2265 | +--source include/have_debug_sync.inc | |
2266 | +--source include/have_innodb.inc | |
2267 | +--source include/have_log_bin.inc | |
2268 | + | |
2269 | +# Test some group commit code paths by using debug_sync to do controlled | |
2270 | +# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a | |
2271 | +# group. | |
2272 | +# | |
2273 | +# Group 3 is allowed to race as far as possible ahead before group 2 finishes | |
2274 | +# to check some edge case for concurrency control. | |
2275 | + | |
2276 | +CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb; | |
2277 | + | |
2278 | +SELECT variable_value INTO @commits FROM information_schema.global_status | |
2279 | + WHERE variable_name = 'binlog_commits'; | |
2280 | +SELECT variable_value INTO @group_commits FROM information_schema.global_status | |
2281 | + WHERE variable_name = 'binlog_group_commits'; | |
2282 | + | |
2283 | +connect(con1,localhost,root,,); | |
2284 | +connect(con2,localhost,root,,); | |
2285 | +connect(con3,localhost,root,,); | |
2286 | +connect(con4,localhost,root,,); | |
2287 | +connect(con5,localhost,root,,); | |
2288 | +connect(con6,localhost,root,,); | |
2289 | + | |
2290 | +# Start group1 (with one thread) doing commit, waiting for | |
2291 | +# group2 to queue up before finishing. | |
2292 | + | |
2293 | +connection con1; | |
2294 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued"; | |
2295 | +send INSERT INTO t1 VALUES ("con1"); | |
2296 | + | |
2297 | +# Make group2 (with three threads) queue up. | |
2298 | +# Make sure con2 is the group commit leader for group2. | |
2299 | +# Make group2 wait with running commit_ordered() until group3 has committed. | |
2300 | + | |
2301 | +connection con2; | |
2302 | +set DEBUG_SYNC= "now WAIT_FOR group1_running"; | |
2303 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con2"; | |
2304 | +SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed"; | |
2305 | +SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked"; | |
2306 | +send INSERT INTO t1 VALUES ("con2"); | |
2307 | +connection con3; | |
2308 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con2"; | |
2309 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con3"; | |
2310 | +send INSERT INTO t1 VALUES ("con3"); | |
2311 | +connection con4; | |
2312 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con3"; | |
2313 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con4"; | |
2314 | +send INSERT INTO t1 VALUES ("con4"); | |
2315 | + | |
2316 | +# When group2 is queued, let group1 continue and queue group3. | |
2317 | + | |
2318 | +connection default; | |
2319 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con4"; | |
2320 | + | |
2321 | +# At this point, trasaction 1 is still not visible as commit_ordered() has not | |
2322 | +# been called yet. | |
2323 | +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; | |
2324 | +SELECT * FROM t1 ORDER BY a; | |
2325 | + | |
2326 | +SET DEBUG_SYNC= "now SIGNAL group2_queued"; | |
2327 | +connection con1; | |
2328 | +reap; | |
2329 | + | |
2330 | +# Now transaction 1 is visible. | |
2331 | +connection default; | |
2332 | +SELECT * FROM t1 ORDER BY a; | |
2333 | + | |
2334 | +connection con5; | |
2335 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5"; | |
2336 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued"; | |
2337 | +send INSERT INTO t1 VALUES ("con5"); | |
2338 | + | |
2339 | +connection con6; | |
2340 | +SET DEBUG_SYNC= "now WAIT_FOR con5_leader"; | |
2341 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con6_queued"; | |
2342 | +send INSERT INTO t1 VALUES ("con6"); | |
2343 | + | |
2344 | +connection default; | |
2345 | +SET DEBUG_SYNC= "now WAIT_FOR group3_con5"; | |
2346 | +# Still only transaction 1 visible, as group2 have not yet run commit_ordered(). | |
2347 | +SELECT * FROM t1 ORDER BY a; | |
2348 | +SET DEBUG_SYNC= "now SIGNAL group3_committed"; | |
2349 | +SET DEBUG_SYNC= "now WAIT_FOR group2_visible"; | |
2350 | +# Now transactions 1-4 visible. | |
2351 | +SELECT * FROM t1 ORDER BY a; | |
2352 | +SET DEBUG_SYNC= "now SIGNAL group2_checked"; | |
2353 | + | |
2354 | +connection con2; | |
2355 | +reap; | |
2356 | + | |
2357 | +connection con3; | |
2358 | +reap; | |
2359 | + | |
2360 | +connection con4; | |
2361 | +reap; | |
2362 | + | |
2363 | +connection con5; | |
2364 | +reap; | |
2365 | + | |
2366 | +connection con6; | |
2367 | +reap; | |
2368 | + | |
2369 | +connection default; | |
2370 | +# Check all transactions finally visible. | |
2371 | +SELECT * FROM t1 ORDER BY a; | |
2372 | + | |
2373 | +SELECT variable_value - @commits FROM information_schema.global_status | |
2374 | + WHERE variable_name = 'binlog_commits'; | |
2375 | +SELECT variable_value - @group_commits FROM information_schema.global_status | |
2376 | + WHERE variable_name = 'binlog_group_commits'; | |
2377 | + | |
2378 | +SET DEBUG_SYNC= 'RESET'; | |
2379 | +DROP TABLE t1; | |
2380 | --- /dev/null | |
2381 | +++ b/mysql-test/t/group_commit_binlog_pos-master.opt | |
2382 | @@ -0,0 +1 @@ | |
2383 | +--skip-stack-trace --skip-core-file | |
2384 | --- /dev/null | |
2385 | +++ b/mysql-test/t/group_commit_binlog_pos.test | |
2386 | @@ -0,0 +1,89 @@ | |
2387 | +--source include/have_debug_sync.inc | |
2388 | +--source include/have_innodb.inc | |
2389 | +--source include/have_log_bin.inc | |
2390 | +--source include/have_binlog_format_mixed_or_statement.inc | |
2391 | + | |
2392 | +# Need DBUG to crash the server intentionally | |
2393 | +--source include/have_debug.inc | |
2394 | +# Don't test this under valgrind, memory leaks will occur as we crash | |
2395 | +--source include/not_valgrind.inc | |
2396 | + | |
2397 | +# The test case currently uses grep and tail, which may be unavailable on | |
2398 | +# some windows systems. But see MWL#191 for how to remove the need for grep. | |
2399 | +--source include/not_windows.inc | |
2400 | + | |
2401 | +# XtraDB stores the binlog position corresponding to the last commit, and | |
2402 | +# prints it during crash recovery. | |
2403 | +# Test that we get the correct position when we group commit several | |
2404 | +# transactions together. | |
2405 | + | |
2406 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2407 | +INSERT INTO t1 VALUES (0); | |
2408 | + | |
2409 | +connect(con1,localhost,root,,); | |
2410 | +connect(con2,localhost,root,,); | |
2411 | +connect(con3,localhost,root,,); | |
2412 | + | |
2413 | +# Queue up three commits for group commit. | |
2414 | + | |
2415 | +connection con1; | |
2416 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued"; | |
2417 | +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3"; | |
2418 | +send INSERT INTO t1 VALUES (1); | |
2419 | + | |
2420 | +connection con2; | |
2421 | +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; | |
2422 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con2_queued"; | |
2423 | +send INSERT INTO t1 VALUES (2); | |
2424 | + | |
2425 | +connection con3; | |
2426 | +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; | |
2427 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con3_queued"; | |
2428 | +send INSERT INTO t1 VALUES (3); | |
2429 | + | |
2430 | +connection default; | |
2431 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2432 | +# At this point, no transactions are committed. | |
2433 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2434 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2435 | +# At this point, 1 transaction is committed. | |
2436 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2437 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2438 | + | |
2439 | +# At this point, 2 transactions are committed. | |
2440 | +SELECT * FROM t1 ORDER BY a; | |
2441 | + | |
2442 | +connection con2; | |
2443 | +reap; | |
2444 | + | |
2445 | +# Now crash the server with 1+2 in-memory committed, 3 only prepared. | |
2446 | +connection default; | |
2447 | +system echo wait-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; | |
2448 | +SET SESSION debug="+d,crash_dispatch_command_before"; | |
2449 | +--error 2006,2013 | |
2450 | +SELECT 1; | |
2451 | + | |
2452 | +connection con1; | |
2453 | +--error 2006,2013 | |
2454 | +reap; | |
2455 | +connection con3; | |
2456 | +--error 2006,2013 | |
2457 | +reap; | |
2458 | + | |
2459 | +system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; | |
2460 | + | |
2461 | +connection default; | |
2462 | +--enable_reconnect | |
2463 | +--source include/wait_until_connected_again.inc | |
2464 | + | |
2465 | +# Crash recovery should recover all three transactions. | |
2466 | +SELECT * FROM t1 ORDER BY a; | |
2467 | + | |
2468 | +# Check that the binlog position reported by InnoDB is the correct one | |
2469 | +# for the end of the second transaction (as can be checked with | |
2470 | +# mysqlbinlog). | |
2471 | +let $MYSQLD_DATADIR= `SELECT @@datadir`; | |
2472 | +--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1 | |
2473 | + | |
2474 | +SET DEBUG_SYNC= 'RESET'; | |
2475 | +DROP TABLE t1; | |
2476 | --- /dev/null | |
2477 | +++ b/mysql-test/t/group_commit_crash-master.opt | |
2478 | @@ -0,0 +1 @@ | |
2479 | +--skip-stack-trace --skip-core-file | |
2480 | --- /dev/null | |
2481 | +++ b/mysql-test/t/group_commit_crash.test | |
2482 | @@ -0,0 +1,80 @@ | |
2483 | +# Testing group commit by crashing a few times. | |
2484 | +# Test adapted from the Facebook patch: lp:mysqlatfacebook | |
2485 | +--source include/not_embedded.inc | |
2486 | +# Don't test this under valgrind, memory leaks will occur | |
2487 | +--source include/not_valgrind.inc | |
2488 | + | |
2489 | +# Binary must be compiled with debug for crash to occur | |
2490 | +--source include/have_debug.inc | |
2491 | +--source include/have_innodb.inc | |
2492 | +--source include/have_log_bin.inc | |
2493 | + | |
2494 | +let $innodb_file_format_max_orig=`select @@innodb_file_format_max`; | |
2495 | +CREATE TABLE t1(a CHAR(255), | |
2496 | + b CHAR(255), | |
2497 | + c CHAR(255), | |
2498 | + d CHAR(255), | |
2499 | + id INT AUTO_INCREMENT, | |
2500 | + PRIMARY KEY(id)) ENGINE=InnoDB; | |
2501 | +create table t2 like t1; | |
2502 | +delimiter //; | |
2503 | +create procedure setcrash(IN i INT) | |
2504 | +begin | |
2505 | + CASE i | |
2506 | + WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare"; | |
2507 | + WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log"; | |
2508 | + WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog"; | |
2509 | + WHEN 4 THEN SET SESSION debug="d,crash_commit_after"; | |
2510 | + WHEN 5 THEN SET SESSION debug="d,crash_commit_before"; | |
2511 | + ELSE BEGIN END; | |
2512 | + END CASE; | |
2513 | +end // | |
2514 | +delimiter ;// | |
2515 | +# Avoid getting a crashed mysql.proc table. | |
2516 | +FLUSH TABLES; | |
2517 | + | |
2518 | +let $numtests = 5; | |
2519 | + | |
2520 | +let $numinserts = 10; | |
2521 | +while ($numinserts) | |
2522 | +{ | |
2523 | + dec $numinserts; | |
2524 | + INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2525 | +} | |
2526 | + | |
2527 | +--enable_reconnect | |
2528 | + | |
2529 | +while ($numtests) | |
2530 | +{ | |
2531 | + RESET MASTER; | |
2532 | + | |
2533 | + START TRANSACTION; | |
2534 | + insert into t1 select * from t2; | |
2535 | + # Write file to make mysql-test-run.pl expect crash | |
2536 | + --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect | |
2537 | + | |
2538 | + eval call setcrash($numtests); | |
2539 | + | |
2540 | + # Run the crashing query | |
2541 | + --error 2006,2013 | |
2542 | + COMMIT; | |
2543 | + | |
2544 | + # Poll the server waiting for it to be back online again. | |
2545 | + --source include/wait_until_connected_again.inc | |
2546 | + | |
2547 | + # table and binlog should be in sync. | |
2548 | + SELECT * FROM t1 ORDER BY id; | |
2549 | + SHOW BINLOG EVENTS LIMIT 2,1; | |
2550 | + | |
2551 | + delete from t1; | |
2552 | + | |
2553 | + dec $numtests; | |
2554 | +} | |
2555 | + | |
2556 | +# final cleanup | |
2557 | +DROP TABLE t1; | |
2558 | +DROP TABLE t2; | |
2559 | +DROP PROCEDURE setcrash; | |
2560 | +--disable_query_log | |
2561 | +eval SET GLOBAL innodb_file_format_max=$innodb_file_format_max_orig; | |
2562 | +--enable_query_log | |
2563 | --- /dev/null | |
2564 | +++ b/mysql-test/t/xa_binlog.test | |
2565 | @@ -0,0 +1,32 @@ | |
2566 | +--source include/have_innodb.inc | |
2567 | +--source include/have_log_bin.inc | |
2568 | + | |
2569 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; | |
2570 | + | |
2571 | +# Fix binlog format (otherwise SHOW BINLOG EVENTS will fluctuate). | |
2572 | +SET binlog_format= mixed; | |
2573 | + | |
2574 | +RESET MASTER; | |
2575 | + | |
2576 | +XA START 'xatest'; | |
2577 | +INSERT INTO t1 VALUES (1); | |
2578 | +XA END 'xatest'; | |
2579 | +XA PREPARE 'xatest'; | |
2580 | +XA COMMIT 'xatest'; | |
2581 | + | |
2582 | +XA START 'xatest'; | |
2583 | +INSERT INTO t1 VALUES (2); | |
2584 | +XA END 'xatest'; | |
2585 | +XA COMMIT 'xatest' ONE PHASE; | |
2586 | + | |
2587 | +BEGIN; | |
2588 | +INSERT INTO t1 VALUES (3); | |
2589 | +COMMIT; | |
2590 | + | |
2591 | +SELECT * FROM t1 ORDER BY a; | |
2592 | + | |
2593 | +--replace_column 2 # 5 # | |
2594 | +--replace_regex /xid=[0-9]+/xid=XX/ | |
2595 | +SHOW BINLOG EVENTS LIMIT 1,9; | |
2596 | + | |
2597 | +DROP TABLE t1; |