]>
Commit | Line | Data |
---|---|---|
13ceb006 AM |
1 | --- a/include/my_sys.h |
2 | +++ b/include/my_sys.h | |
3 | @@ -524,6 +524,8 @@ | |
4 | ||
5 | #define my_b_tell(info) ((info)->pos_in_file + \ | |
6 | (size_t) (*(info)->current_pos - (info)->request_pos)) | |
7 | +#define my_b_write_tell(info) ((info)->pos_in_file + \ | |
8 | + ((info)->write_pos - (info)->write_buffer)) | |
9 | ||
10 | #define my_b_get_buffer_start(info) (info)->request_pos | |
11 | #define my_b_get_bytes_in_buffer(info) (char*) (info)->read_end - \ | |
12 | --- a/include/mysql/plugin.h | |
13 | +++ b/include/mysql/plugin.h | |
14 | @@ -559,6 +559,8 @@ | |
15 | ||
16 | #define EXTENDED_FOR_USERSTAT | |
17 | ||
18 | +#define EXTENDED_FOR_COMMIT_ORDERED | |
19 | + | |
20 | /** | |
21 | Create a temporary file. | |
22 | ||
23 | --- a/sql/handler.cc | |
24 | +++ b/sql/handler.cc | |
25 | @@ -90,6 +90,8 @@ | |
26 | static TYPELIB known_extensions= {0,"known_exts", NULL, NULL}; | |
27 | uint known_extensions_id= 0; | |
28 | ||
29 | +static int commit_one_phase_low(THD *thd, bool all, THD_TRANS *trans, | |
30 | + bool is_real_trans); | |
31 | ||
32 | ||
33 | static plugin_ref ha_default_plugin(THD *thd) | |
34 | @@ -1119,7 +1121,8 @@ | |
35 | */ | |
36 | bool is_real_trans= all || thd->transaction.all.ha_list == 0; | |
37 | Ha_trx_info *ha_info= trans->ha_list; | |
38 | - my_xid xid= thd->transaction.xid_state.xid.get_my_xid(); | |
39 | + bool need_commit_ordered; | |
40 | + my_xid xid; | |
41 | DBUG_ENTER("ha_commit_trans"); | |
42 | ||
43 | /* | |
44 | @@ -1152,13 +1155,20 @@ | |
45 | DBUG_RETURN(2); | |
46 | } | |
47 | ||
48 | - if (ha_info) | |
49 | + if (!ha_info) | |
50 | + { | |
51 | + /* Free resources and perform other cleanup even for 'empty' transactions. */ | |
52 | + if (is_real_trans) | |
53 | + thd->transaction.cleanup(); | |
54 | + DBUG_RETURN(0); | |
55 | + } | |
56 | + else | |
57 | { | |
58 | uint rw_ha_count; | |
59 | bool rw_trans; | |
60 | MDL_request mdl_request; | |
61 | ||
62 | - DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE();); | |
63 | + DBUG_EXECUTE_IF("crash_commit_before", abort();); | |
64 | ||
65 | /* Close all cursors that can not survive COMMIT */ | |
66 | if (is_real_trans) /* not a statement commit */ | |
67 | @@ -1197,57 +1207,80 @@ | |
68 | !thd->slave_thread) | |
69 | { | |
70 | my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); | |
71 | - ha_rollback_trans(thd, all); | |
72 | - error= 1; | |
73 | - goto end; | |
74 | + goto err; | |
75 | } | |
76 | ||
77 | - if (!trans->no_2pc && (rw_ha_count > 1)) | |
78 | + if (trans->no_2pc || (rw_ha_count <= 1)) | |
79 | { | |
80 | - for (; ha_info && !error; ha_info= ha_info->next()) | |
81 | + error= ha_commit_one_phase(thd, all); | |
82 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
83 | + goto end; | |
84 | + } | |
85 | + | |
86 | + need_commit_ordered= FALSE; | |
87 | + xid= thd->transaction.xid_state.xid.get_my_xid(); | |
88 | + | |
89 | + for (Ha_trx_info *hi= ha_info; hi; hi= hi->next()) | |
90 | { | |
91 | int err; | |
92 | - handlerton *ht= ha_info->ht(); | |
93 | + handlerton *ht= hi->ht(); | |
94 | /* | |
95 | Do not call two-phase commit if this particular | |
96 | transaction is read-only. This allows for simpler | |
97 | implementation in engines that are always read-only. | |
98 | */ | |
99 | - if (! ha_info->is_trx_read_write()) | |
100 | + if (! hi->is_trx_read_write()) | |
101 | continue; | |
102 | /* | |
103 | Sic: we know that prepare() is not NULL since otherwise | |
104 | trans->no_2pc would have been set. | |
105 | */ | |
106 | - if ((err= ht->prepare(ht, thd, all))) | |
107 | - { | |
108 | - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); | |
109 | - error= 1; | |
110 | - } | |
111 | + err= ht->prepare(ht, thd, all); | |
112 | status_var_increment(thd->status_var.ha_prepare_count); | |
113 | + if (err) | |
114 | + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); | |
115 | + | |
116 | + if (err) | |
117 | + goto err; | |
118 | + | |
119 | + need_commit_ordered|= (ht->commit_ordered != NULL); | |
120 | } | |
121 | - DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); | |
122 | - if (error || (is_real_trans && xid && | |
123 | - (error= !(cookie= tc_log->log_xid(thd, xid))))) | |
124 | + DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT();); | |
125 | + | |
126 | + if (!is_real_trans) | |
127 | { | |
128 | - ha_rollback_trans(thd, all); | |
129 | - error= 1; | |
130 | + error= commit_one_phase_low(thd, all, trans, is_real_trans); | |
131 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
132 | goto end; | |
133 | } | |
134 | - DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE();); | |
135 | - } | |
136 | - error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0; | |
137 | - DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE();); | |
138 | - if (cookie) | |
139 | + | |
140 | + cookie= tc_log->log_and_order(thd, xid, all, need_commit_ordered); | |
141 | + if (!cookie) | |
142 | + goto err; | |
143 | + | |
144 | + DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT();); | |
145 | + | |
146 | + error= commit_one_phase_low(thd, all, trans, is_real_trans) ? 2 : 0; | |
147 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
148 | + if (is_real_trans) /* userstat.patch */ | |
149 | + thd->diff_commit_trans++; /* userstat.patch */ | |
150 | + RUN_HOOK(transaction, after_commit, (thd, FALSE)); | |
151 | + | |
152 | + DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT();); | |
153 | if(tc_log->unlog(cookie, xid)) | |
154 | { | |
155 | error= 2; | |
156 | goto end; | |
157 | } | |
158 | - DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE();); | |
159 | - if (is_real_trans) | |
160 | - thd->diff_commit_trans++; | |
161 | - RUN_HOOK(transaction, after_commit, (thd, FALSE)); | |
162 | + | |
163 | + DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT();); | |
164 | + goto end; | |
165 | + | |
166 | + /* Come here if error and we need to rollback. */ | |
167 | +err: | |
168 | + error= 1; /* Transaction was rolled back */ | |
169 | + ha_rollback_trans(thd, all); | |
170 | + | |
171 | end: | |
172 | if (rw_trans && mdl_request.ticket) | |
173 | { | |
174 | @@ -1260,9 +1293,6 @@ | |
175 | thd->mdl_context.release_lock(mdl_request.ticket); | |
176 | } | |
177 | } | |
178 | - /* Free resources and perform other cleanup even for 'empty' transactions. */ | |
179 | - else if (is_real_trans) | |
180 | - thd->transaction.cleanup(); | |
181 | DBUG_RETURN(error); | |
182 | } | |
183 | ||
184 | @@ -1279,7 +1309,6 @@ | |
185 | ||
186 | int ha_commit_one_phase(THD *thd, bool all) | |
187 | { | |
188 | - int error=0; | |
189 | THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; | |
190 | /* | |
191 | "real" is a nick name for a transaction for which a commit will | |
192 | @@ -1295,8 +1324,16 @@ | |
193 | transaction.all.ha_list, see why in trans_register_ha()). | |
194 | */ | |
195 | bool is_real_trans=all || thd->transaction.all.ha_list == 0; | |
196 | - Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; | |
197 | DBUG_ENTER("ha_commit_one_phase"); | |
198 | + DBUG_RETURN(commit_one_phase_low(thd, all, trans, is_real_trans)); | |
199 | +} | |
200 | + | |
201 | +static int | |
202 | +commit_one_phase_low(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) | |
203 | +{ | |
204 | + int error= 0; | |
205 | + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; | |
206 | + DBUG_ENTER("commit_one_phase_low"); | |
207 | ||
208 | if (ha_info) | |
209 | { | |
210 | @@ -1894,7 +1931,16 @@ | |
211 | { | |
212 | bool warn= true; | |
213 | ||
214 | + /* | |
215 | + Holding the LOCK_commit_ordered mutex ensures that we get the same | |
216 | + snapshot for all engines (including the binary log). This allows us | |
217 | + among other things to do backups with | |
218 | + START TRANSACTION WITH CONSISTENT SNAPSHOT and | |
219 | + have a consistent binlog position. | |
220 | + */ | |
221 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
222 | plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn); | |
223 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
224 | ||
225 | /* | |
226 | Same idea as when one wants to CREATE TABLE in one engine which does not | |
227 | --- a/sql/handler.h | |
228 | +++ b/sql/handler.h | |
229 | @@ -756,6 +756,53 @@ | |
230 | and 'real commit' mean the same event. | |
231 | */ | |
232 | int (*commit)(handlerton *hton, THD *thd, bool all); | |
233 | + /* | |
234 | + The commit_ordered() method is called prior to the commit() method, after | |
235 | + the transaction manager has decided to commit (not rollback) the | |
236 | + transaction. Unlike commit(), commit_ordered() is called only when the | |
237 | + full transaction is committed, not for each commit of statement | |
238 | + transaction in a multi-statement transaction. | |
239 | + | |
240 | + Not that like prepare(), commit_ordered() is only called when 2-phase | |
241 | + commit takes place. Ie. when no binary log and only a single engine | |
242 | + participates in a transaction, one commit() is called, no | |
243 | + commit_ordered(). So engines must be prepared for this. | |
244 | + | |
245 | + The calls to commit_ordered() in multiple parallel transactions is | |
246 | + guaranteed to happen in the same order in every participating | |
247 | + handler. This can be used to ensure the same commit order among multiple | |
248 | + handlers (eg. in table handler and binlog). So if transaction T1 calls | |
249 | + into commit_ordered() of handler A before T2, then T1 will also call | |
250 | + commit_ordered() of handler B before T2. | |
251 | + | |
252 | + Engines that implement this method should during this call make the | |
253 | + transaction visible to other transactions, thereby making the order of | |
254 | + transaction commits be defined by the order of commit_ordered() calls. | |
255 | + | |
256 | + The intention is that commit_ordered() should do the minimal amount of | |
257 | + work that needs to happen in consistent commit order among handlers. To | |
258 | + preserve ordering, calls need to be serialised on a global mutex, so | |
259 | + doing any time-consuming or blocking operations in commit_ordered() will | |
260 | + limit scalability. | |
261 | + | |
262 | + Handlers can rely on commit_ordered() calls to be serialised (no two | |
263 | + calls can run in parallel, so no extra locking on the handler part is | |
264 | + required to ensure this). | |
265 | + | |
266 | + Note that commit_ordered() can be called from a different thread than the | |
267 | + one handling the transaction! So it can not do anything that depends on | |
268 | + thread local storage, in particular it can not call my_error() and | |
269 | + friends (instead it can store the error code and delay the call of | |
270 | + my_error() to the commit() method). | |
271 | + | |
272 | + Similarly, since commit_ordered() returns void, any return error code | |
273 | + must be saved and returned from the commit() method instead. | |
274 | + | |
275 | + The commit_ordered method is optional, and can be left unset if not | |
276 | + needed in a particular handler (then there will be no ordering guarantees | |
277 | + wrt. other engines and binary log). | |
278 | + */ | |
279 | + void (*commit_ordered)(handlerton *hton, THD *thd, bool all); | |
280 | int (*rollback)(handlerton *hton, THD *thd, bool all); | |
281 | int (*prepare)(handlerton *hton, THD *thd, bool all); | |
282 | int (*recover)(handlerton *hton, XID *xid_list, uint len); | |
283 | --- a/sql/log.cc | |
284 | +++ b/sql/log.cc | |
1bfc1981 | 285 | @@ -71,6 +71,25 @@ |
13ceb006 AM |
286 | static int binlog_rollback(handlerton *hton, THD *thd, bool all); |
287 | static int binlog_prepare(handlerton *hton, THD *thd, bool all); | |
288 | ||
289 | +static LEX_STRING const write_error_msg= | |
290 | + { C_STRING_WITH_LEN("error writing to the binary log") }; | |
291 | + | |
292 | +static my_bool mutexes_inited; | |
293 | +mysql_mutex_t LOCK_group_commit_queue; | |
294 | +mysql_mutex_t LOCK_commit_ordered; | |
295 | + | |
296 | +static ulonglong binlog_status_var_num_commits; | |
297 | +static ulonglong binlog_status_var_num_group_commits; | |
298 | + | |
299 | +static SHOW_VAR binlog_status_vars_detail[]= | |
300 | +{ | |
301 | + {"commits", | |
302 | + (char *)&binlog_status_var_num_commits, SHOW_LONGLONG}, | |
303 | + {"group_commits", | |
304 | + (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG}, | |
305 | + {NullS, NullS, SHOW_LONG} | |
306 | +}; | |
307 | + | |
308 | /** | |
309 | purge logs, master and slave sides both, related error code | |
310 | convertor. | |
1bfc1981 | 311 | @@ -167,41 +186,6 @@ |
13ceb006 AM |
312 | } |
313 | ||
314 | /* | |
315 | - Helper class to hold a mutex for the duration of the | |
316 | - block. | |
317 | - | |
318 | - Eliminates the need for explicit unlocking of mutexes on, e.g., | |
319 | - error returns. On passing a null pointer, the sentry will not do | |
320 | - anything. | |
321 | - */ | |
322 | -class Mutex_sentry | |
323 | -{ | |
324 | -public: | |
325 | - Mutex_sentry(mysql_mutex_t *mutex) | |
326 | - : m_mutex(mutex) | |
327 | - { | |
328 | - if (m_mutex) | |
329 | - mysql_mutex_lock(mutex); | |
330 | - } | |
331 | - | |
332 | - ~Mutex_sentry() | |
333 | - { | |
334 | - if (m_mutex) | |
335 | - mysql_mutex_unlock(m_mutex); | |
336 | -#ifndef DBUG_OFF | |
337 | - m_mutex= 0; | |
338 | -#endif | |
339 | - } | |
340 | - | |
341 | -private: | |
342 | - mysql_mutex_t *m_mutex; | |
343 | - | |
344 | - // It's not allowed to copy this object in any way | |
345 | - Mutex_sentry(Mutex_sentry const&); | |
346 | - void operator=(Mutex_sentry const&); | |
347 | -}; | |
348 | - | |
349 | -/* | |
350 | Helper classes to store non-transactional and transactional data | |
351 | before copying it to the binary log. | |
352 | */ | |
1bfc1981 | 353 | @@ -211,7 +195,8 @@ |
13ceb006 AM |
354 | binlog_cache_data(): m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF), |
355 | incident(FALSE), changes_to_non_trans_temp_table_flag(FALSE), | |
356 | saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0), | |
357 | - ptr_binlog_cache_disk_use(0) | |
358 | + ptr_binlog_cache_disk_use(0), commit_bin_log_file_pos(0), | |
359 | + using_xa(FALSE), xa_xid(0) | |
360 | { } | |
361 | ||
362 | ~binlog_cache_data() | |
1bfc1981 | 363 | @@ -270,6 +255,8 @@ |
13ceb006 AM |
364 | variable after truncating the cache. |
365 | */ | |
366 | cache_log.disk_writes= 0; | |
367 | + using_xa= FALSE; | |
368 | + commit_bin_log_file_pos= 0; | |
369 | DBUG_ASSERT(empty()); | |
370 | } | |
371 | ||
1bfc1981 | 372 | @@ -411,6 +398,20 @@ |
13ceb006 AM |
373 | |
374 | binlog_cache_data& operator=(const binlog_cache_data& info); | |
375 | binlog_cache_data(const binlog_cache_data& info); | |
376 | + | |
377 | +public: | |
378 | + /* | |
379 | + Binlog position after current commit, available to storage engines during | |
380 | + commit_ordered() and commit(). | |
381 | + */ | |
382 | + ulonglong commit_bin_log_file_pos; | |
383 | + | |
384 | + /* | |
385 | + Flag set true if this transaction is committed with log_xid() as part of | |
386 | + XA, false if not. | |
387 | + */ | |
388 | + bool using_xa; | |
389 | + my_xid xa_xid; | |
390 | }; | |
391 | ||
392 | class binlog_cache_mngr { | |
1bfc1981 | 393 | @@ -1624,7 +1625,7 @@ |
13ceb006 AM |
394 | */ |
395 | static inline int | |
396 | binlog_flush_cache(THD *thd, binlog_cache_data* cache_data, Log_event *end_evt, | |
397 | - bool is_transactional) | |
398 | + bool is_transactional, bool all) | |
399 | { | |
400 | DBUG_ENTER("binlog_flush_cache"); | |
401 | int error= 0; | |
1bfc1981 | 402 | @@ -1643,8 +1644,8 @@ |
13ceb006 AM |
403 | were, we would have to ensure that we're not ending a statement |
404 | inside a stored function. | |
405 | */ | |
406 | - error= mysql_bin_log.write(thd, &cache_data->cache_log, end_evt, | |
407 | - cache_data->has_incident()); | |
408 | + error= mysql_bin_log.write_transaction_to_binlog(thd, cache_data, | |
409 | + end_evt, all); | |
410 | } | |
411 | cache_data->reset(); | |
412 | ||
1bfc1981 | 413 | @@ -1663,12 +1664,12 @@ |
13ceb006 AM |
414 | */ |
415 | static inline int | |
416 | binlog_commit_flush_stmt_cache(THD *thd, | |
417 | - binlog_cache_mngr *cache_mngr) | |
418 | + binlog_cache_mngr *cache_mngr, bool all) | |
419 | { | |
420 | Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), | |
421 | FALSE, FALSE, TRUE, 0); | |
422 | return (binlog_flush_cache(thd, &cache_mngr->stmt_cache, &end_evt, | |
423 | - FALSE)); | |
424 | + FALSE, all)); | |
425 | } | |
426 | ||
427 | /** | |
1bfc1981 | 428 | @@ -1681,12 +1682,12 @@ |
13ceb006 AM |
429 | nonzero if an error pops up when flushing the cache. |
430 | */ | |
431 | static inline int | |
432 | -binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr) | |
433 | +binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all) | |
434 | { | |
435 | Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), | |
436 | TRUE, FALSE, TRUE, 0); | |
437 | return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt, | |
438 | - TRUE)); | |
439 | + TRUE, all)); | |
440 | } | |
441 | ||
442 | /** | |
1bfc1981 | 443 | @@ -1699,12 +1700,12 @@ |
13ceb006 AM |
444 | nonzero if an error pops up when flushing the cache. |
445 | */ | |
446 | static inline int | |
447 | -binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr) | |
448 | +binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all) | |
449 | { | |
450 | Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"), | |
451 | TRUE, FALSE, TRUE, 0); | |
452 | return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt, | |
453 | - TRUE)); | |
454 | + TRUE, all)); | |
455 | } | |
456 | ||
457 | /** | |
1bfc1981 | 458 | @@ -1719,11 +1720,11 @@ |
13ceb006 AM |
459 | */ |
460 | static inline int | |
461 | binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, | |
462 | - my_xid xid) | |
463 | + my_xid xid, bool all) | |
464 | { | |
465 | Xid_log_event end_evt(thd, xid); | |
466 | return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt, | |
467 | - TRUE)); | |
468 | + TRUE, all)); | |
469 | } | |
470 | ||
471 | /** | |
1bfc1981 | 472 | @@ -1785,7 +1786,7 @@ |
13ceb006 AM |
473 | do nothing. |
474 | just pretend we can do 2pc, so that MySQL won't | |
475 | switch to 1pc. | |
476 | - real work will be done in MYSQL_BIN_LOG::log_xid() | |
477 | + real work will be done in MYSQL_BIN_LOG::log_and_order() | |
478 | */ | |
479 | return 0; | |
480 | } | |
1bfc1981 | 481 | @@ -1818,7 +1819,7 @@ |
13ceb006 AM |
482 | |
483 | if (!cache_mngr->stmt_cache.empty()) | |
484 | { | |
485 | - error= binlog_commit_flush_stmt_cache(thd, cache_mngr); | |
486 | + error= binlog_commit_flush_stmt_cache(thd, cache_mngr, all); | |
487 | } | |
488 | ||
489 | if (cache_mngr->trx_cache.empty()) | |
1bfc1981 | 490 | @@ -1837,7 +1838,7 @@ |
13ceb006 AM |
491 | Otherwise, we accumulate the changes. |
492 | */ | |
493 | if (!error && ending_trans(thd, all)) | |
494 | - error= binlog_commit_flush_trx_cache(thd, cache_mngr); | |
495 | + error= binlog_commit_flush_trx_cache(thd, cache_mngr, all); | |
496 | ||
497 | /* | |
498 | This is part of the stmt rollback. | |
1bfc1981 | 499 | @@ -1881,7 +1882,7 @@ |
13ceb006 AM |
500 | } |
501 | else if (!cache_mngr->stmt_cache.empty()) | |
502 | { | |
503 | - error= binlog_commit_flush_stmt_cache(thd, cache_mngr); | |
504 | + error= binlog_commit_flush_stmt_cache(thd, cache_mngr, all); | |
505 | } | |
506 | ||
507 | if (cache_mngr->trx_cache.empty()) | |
1bfc1981 | 508 | @@ -1929,7 +1930,7 @@ |
13ceb006 AM |
509 | (trans_has_updated_non_trans_table(thd) && |
510 | ending_single_stmt_trans(thd,all) && | |
511 | thd->variables.binlog_format == BINLOG_FORMAT_MIXED))) | |
512 | - error= binlog_rollback_flush_trx_cache(thd, cache_mngr); | |
513 | + error= binlog_rollback_flush_trx_cache(thd, cache_mngr, all); | |
514 | /* | |
515 | Truncate the cache if: | |
516 | . aborting a single or multi-statement transaction or; | |
1bfc1981 | 517 | @@ -2904,6 +2905,7 @@ |
13ceb006 AM |
518 | MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period) |
519 | :bytes_written(0), prepared_xids(0), file_id(1), open_count(1), | |
520 | need_start_event(TRUE), | |
521 | + group_commit_queue(0), num_commits(0), num_group_commits(0), | |
522 | sync_period_ptr(sync_period), | |
523 | is_relay_log(0), signal_cnt(0), | |
524 | description_event_for_exec(0), description_event_for_queue(0) | |
29ffd636 | 525 | @@ -5361,19 +5363,15 @@ |
13ceb006 AM |
526 | SYNOPSIS |
527 | write_cache() | |
528 | cache Cache to write to the binary log | |
529 | - lock_log True if the LOCK_log mutex should be aquired, false otherwise | |
530 | - sync_log True if the log should be flushed and synced | |
531 | ||
532 | DESCRIPTION | |
533 | Write the contents of the cache to the binary log. The cache will | |
534 | be reset as a READ_CACHE to be able to read the contents from it. | |
535 | */ | |
536 | ||
537 | -int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, | |
538 | - bool lock_log, bool sync_log) | |
539 | +int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache) | |
540 | { | |
541 | - Mutex_sentry sentry(lock_log ? &LOCK_log : NULL); | |
542 | - | |
543 | + mysql_mutex_assert_owner(&LOCK_log); | |
544 | if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) | |
545 | return ER_ERROR_ON_WRITE; | |
546 | uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs; | |
29ffd636 | 547 | @@ -5484,6 +5482,8 @@ |
13ceb006 AM |
548 | } |
549 | ||
550 | /* Write data to the binary log file */ | |
551 | + DBUG_EXECUTE_IF("fail_binlog_write_1", | |
552 | + errno= 28; return ER_ERROR_ON_WRITE;); | |
553 | if (my_b_write(&log_file, cache->read_pos, length)) | |
554 | return ER_ERROR_ON_WRITE; | |
555 | thd->binlog_bytes_written+= length; | |
29ffd636 | 556 | @@ -5492,9 +5492,6 @@ |
13ceb006 AM |
557 | |
558 | DBUG_ASSERT(carry == 0); | |
559 | ||
560 | - if (sync_log) | |
561 | - return flush_and_sync(0); | |
562 | - | |
563 | return 0; // All OK | |
564 | } | |
565 | ||
29ffd636 | 566 | @@ -5535,8 +5532,6 @@ |
13ceb006 AM |
567 | if (!is_open()) |
568 | DBUG_RETURN(error); | |
569 | ||
570 | - LEX_STRING const write_error_msg= | |
571 | - { C_STRING_WITH_LEN("error writing to the binary log") }; | |
572 | Incident incident= INCIDENT_LOST_EVENTS; | |
573 | Incident_log_event ev(thd, incident, write_error_msg); | |
574 | if (lock) | |
29ffd636 | 575 | @@ -5585,112 +5580,332 @@ |
13ceb006 AM |
576 | 'cache' needs to be reinitialized after this functions returns. |
577 | */ | |
578 | ||
579 | -bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event, | |
580 | - bool incident) | |
581 | +bool | |
582 | +MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_cache_data *cache_data, | |
583 | + Log_event *end_ev, bool all) | |
1bfc1981 | 584 | +{ |
13ceb006 AM |
585 | + group_commit_entry entry; |
586 | + bool ret; | |
587 | + DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog"); | |
588 | + | |
589 | + entry.thd= thd; | |
590 | + entry.cache_data= cache_data; | |
591 | + entry.error= 0; | |
592 | + entry.all= all; | |
593 | + | |
594 | + /* | |
595 | + Log "BEGIN" at the beginning of every transaction. Here, a transaction is | |
596 | + either a BEGIN..COMMIT block or a single statement in autocommit mode. | |
597 | + | |
598 | + Create the necessary events here, where we have the correct THD (and | |
599 | + thread context). | |
600 | + | |
601 | + Due to group commit the actual writing to binlog may happen in a different | |
602 | + thread. | |
603 | + */ | |
604 | + Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0); | |
605 | + entry.begin_event= &qinfo; | |
606 | + entry.end_event= end_ev; | |
607 | + if (cache_data->has_incident()) | |
608 | + { | |
609 | + Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg); | |
610 | + entry.incident_event= &inc_ev; | |
611 | + ret = write_transaction_to_binlog_events(&entry); | |
612 | + } | |
613 | + else | |
614 | + { | |
615 | + entry.incident_event= NULL; | |
616 | + ret = write_transaction_to_binlog_events(&entry); | |
617 | + } | |
618 | + if (!ret) /* userstat.patch */ | |
619 | + thd->binlog_bytes_written += qinfo.data_written; /* userstat.patch */ | |
620 | + DBUG_RETURN(ret); | |
621 | +} | |
622 | + | |
623 | +bool | |
624 | +MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry) | |
1bfc1981 AM |
625 | { |
626 | - DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)"); | |
13ceb006 AM |
627 | + /* |
628 | + To facilitate group commit for the binlog, we first queue up ourselves in | |
629 | + the group commit queue. Then the first thread to enter the queue waits for | |
630 | + the LOCK_log mutex, and commits for everyone in the queue once it gets the | |
631 | + lock. Any other threads in the queue just wait for the first one to finish | |
632 | + the commit and wake them up. | |
633 | + */ | |
634 | + entry->thd->clear_wakeup_ready(); | |
635 | + mysql_mutex_lock(&LOCK_group_commit_queue); | |
636 | + group_commit_entry *orig_queue= group_commit_queue; | |
637 | + entry->next= orig_queue; | |
638 | + group_commit_queue= entry; | |
639 | + DEBUG_SYNC(entry->thd, "commit_group_commit_queue"); | |
640 | + mysql_mutex_unlock(&LOCK_group_commit_queue); | |
641 | + | |
642 | + /* | |
643 | + The first in the queue handle group commit for all; the others just wait | |
644 | + to be signalled when group commit is done. | |
645 | + */ | |
646 | + if (orig_queue != NULL) | |
647 | + entry->thd->wait_for_wakeup_ready(); | |
648 | + else | |
649 | + trx_group_commit_leader(entry); | |
650 | + | |
651 | + if (likely(!entry->error)) | |
652 | + return 0; | |
653 | + | |
654 | + switch (entry->error) | |
655 | + { | |
656 | + case ER_ERROR_ON_WRITE: | |
657 | + my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno); | |
658 | + break; | |
659 | + case ER_ERROR_ON_READ: | |
660 | + my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH), | |
661 | + entry->cache_data->cache_log.file_name, entry->commit_errno); | |
662 | + break; | |
663 | + default: | |
664 | + /* | |
665 | + There are not (and should not be) any errors thrown not covered above. | |
666 | + But just in case one is added later without updating the above switch | |
667 | + statement, include a catch-all. | |
668 | + */ | |
669 | + my_printf_error(entry->error, | |
670 | + "Error writing transaction to binary log: %d", | |
671 | + MYF(ME_NOREFRESH), entry->error); | |
672 | + } | |
1bfc1981 | 673 | |
13ceb006 AM |
674 | + /* |
675 | + Since we return error, this transaction XID will not be committed, so | |
676 | + we need to mark it as not needed for recovery (unlog() is not called | |
677 | + for a transaction if log_xid() fails). | |
678 | + */ | |
679 | + if (entry->cache_data->using_xa && entry->cache_data->xa_xid) | |
680 | + mark_xid_done(); | |
681 | + | |
682 | + return 1; | |
683 | +} | |
684 | + | |
685 | +/* | |
686 | + Do binlog group commit as the lead thread. | |
687 | + | |
688 | + This must be called when this thread/transaction is queued at the start of | |
689 | + the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group | |
690 | + commit all the transactions in the queue (more may have entered while waiting | |
691 | + for LOCK_log). After commit is done, all other threads in the queue will be | |
692 | + signalled. | |
693 | + | |
694 | + */ | |
695 | +void | |
696 | +MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) | |
697 | +{ | |
698 | + DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader"); | |
699 | + uint xid_count= 0; | |
700 | + uint write_count= 0; | |
1bfc1981 AM |
701 | + bool check_purge= false; |
702 | + group_commit_entry *current= 0; | |
13ceb006 AM |
703 | DBUG_ASSERT(is_open()); |
704 | if (likely(is_open())) // Should always be true | |
705 | { | |
1bfc1981 AM |
706 | - bool check_purge; |
707 | - | |
708 | + /* | |
709 | + Lock the LOCK_log(), and once we get it, collect any additional writes | |
710 | + that queued up while we were waiting. | |
711 | + */ | |
712 | mysql_mutex_lock(&LOCK_log); | |
713 | + | |
714 | + DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log"); | |
715 | + mysql_mutex_lock(&LOCK_group_commit_queue); | |
716 | + current= group_commit_queue; | |
717 | + group_commit_queue= NULL; | |
718 | + mysql_mutex_unlock(&LOCK_group_commit_queue); | |
719 | + | |
720 | + /* As the queue is in reverse order of entering, reverse it. */ | |
721 | + group_commit_entry *queue= NULL; | |
722 | + while (current) | |
723 | + { | |
724 | + group_commit_entry *next= current->next; | |
725 | + current->next= queue; | |
726 | + queue= current; | |
727 | + current= next; | |
728 | + } | |
729 | + DBUG_ASSERT(leader == queue /* the leader should be first in queue */); | |
13ceb006 AM |
730 | /* |
731 | - We only bother to write to the binary log if there is anything | |
732 | - to write. | |
733 | - */ | |
734 | - if (my_b_tell(cache) > 0) | |
1bfc1981 AM |
735 | + Now we have in queue the list of transactions to be committed in order. |
736 | + | |
13ceb006 AM |
737 | + Commit every transaction in the queue. |
738 | + | |
739 | + Note that we are doing this in a different thread than the one running | |
740 | + the transaction! So we are limited in the operations we can do. In | |
741 | + particular, we cannot call my_error() on behalf of a transaction, as | |
742 | + that obtains the THD from thread local storage. Instead, we must set | |
743 | + current->error and let the thread do the error reporting itself once | |
744 | + we wake it up. | |
745 | + */ | |
746 | + for (current= queue; current != NULL; current= current->next) | |
747 | { | |
748 | + binlog_cache_data *cache_data= current->cache_data; | |
749 | + IO_CACHE *cache= &cache_data->cache_log; | |
750 | + | |
751 | /* | |
752 | - Log "BEGIN" at the beginning of every transaction. Here, a | |
753 | - transaction is either a BEGIN..COMMIT block or a single | |
754 | - statement in autocommit mode. | |
755 | + We only bother to write to the binary log if there is anything | |
756 | + to write. | |
757 | */ | |
758 | - Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0); | |
759 | - if (qinfo.write(&log_file)) | |
760 | - goto err; | |
761 | - thd->binlog_bytes_written+= qinfo.data_written; | |
762 | - DBUG_EXECUTE_IF("crash_before_writing_xid", | |
763 | - { | |
764 | - if ((write_error= write_cache(thd, cache, false, true))) | |
765 | - DBUG_PRINT("info", ("error writing binlog cache: %d", | |
766 | - write_error)); | |
767 | - DBUG_PRINT("info", ("crashing before writing xid")); | |
768 | - DBUG_SUICIDE(); | |
769 | - }); | |
1bfc1981 AM |
770 | - |
771 | - if ((write_error= write_cache(thd, cache, false, false))) | |
772 | - goto err; | |
773 | - | |
774 | - if (commit_event && commit_event->write(&log_file)) | |
775 | - goto err; | |
776 | - if (commit_event) | |
777 | - thd->binlog_bytes_written+= commit_event->data_written; | |
13ceb006 AM |
778 | + if (my_b_tell(cache) > 0) |
779 | + { | |
780 | + if ((current->error= write_transaction(current))) | |
781 | + current->commit_errno= errno; | |
13ceb006 AM |
782 | + write_count++; |
783 | + } | |
784 | ||
1bfc1981 | 785 | - if (incident && write_incident(thd, FALSE)) |
13ceb006 | 786 | - goto err; |
13ceb006 AM |
787 | + cache_data->commit_bin_log_file_pos= my_b_write_tell(&log_file); |
788 | + if (cache_data->using_xa && cache_data->xa_xid) | |
789 | + xid_count++; | |
790 | + } | |
791 | ||
13ceb006 AM |
792 | + if (write_count > 0) |
793 | + { | |
794 | bool synced= 0; | |
795 | if (flush_and_sync(&synced)) | |
796 | - goto err; | |
797 | - DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE();); | |
798 | - if (cache->error) // Error on read | |
799 | { | |
800 | - sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno); | |
801 | - write_error=1; // Don't give more errors | |
802 | - goto err; | |
803 | + for (current= queue; current != NULL; current= current->next) | |
804 | + { | |
805 | + if (!current->error) | |
806 | + { | |
807 | + current->error= ER_ERROR_ON_WRITE; | |
808 | + current->commit_errno= errno; | |
809 | + } | |
810 | + } | |
811 | + } | |
812 | + else | |
813 | + { | |
814 | + signal_update(); | |
815 | } | |
816 | ||
817 | if (RUN_HOOK(binlog_storage, after_flush, | |
818 | - (thd, log_file_name, log_file.pos_in_file, synced))) | |
819 | + (leader->thd, log_file_name, log_file.pos_in_file, synced))) | |
820 | { | |
821 | sql_print_error("Failed to run 'after_flush' hooks"); | |
822 | - write_error=1; | |
823 | - goto err; | |
824 | + for (current= queue; current != NULL; current= current->next) | |
825 | + { | |
826 | + if (!current->error) | |
827 | + { | |
828 | + current->error= ER_ERROR_ON_WRITE; | |
829 | + current->commit_errno= errno; | |
830 | + } | |
831 | + } | |
832 | } | |
833 | ||
834 | - signal_update(); | |
835 | } | |
836 | ||
837 | /* | |
838 | - if commit_event is Xid_log_event, increase the number of | |
839 | - prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated | |
840 | + if any commit_events are Xid_log_event, increase the number of | |
841 | + prepared_xids (it's decreased in ::unlog()). Binlog cannot be rotated | |
842 | if there're prepared xids in it - see the comment in new_file() for | |
843 | an explanation. | |
844 | - If the commit_event is not Xid_log_event (then it's a Query_log_event) | |
845 | - rotate binlog, if necessary. | |
846 | + If no Xid_log_events (then it's all Query_log_event) rotate binlog, | |
847 | + if necessary. | |
848 | */ | |
849 | - if (commit_event && commit_event->get_type_code() == XID_EVENT) | |
850 | + if (xid_count > 0) | |
851 | { | |
852 | - mysql_mutex_lock(&LOCK_prep_xids); | |
853 | - prepared_xids++; | |
854 | - mysql_mutex_unlock(&LOCK_prep_xids); | |
1bfc1981 | 855 | - mysql_mutex_unlock(&LOCK_log); |
13ceb006 AM |
856 | + mark_xids_active(xid_count); |
857 | } | |
858 | else | |
1bfc1981 AM |
859 | { |
860 | if (rotate(false, &check_purge)) | |
13ceb006 | 861 | - goto err; |
1bfc1981 AM |
862 | - mysql_mutex_unlock(&LOCK_log); |
863 | - if (check_purge) | |
864 | - purge(); | |
13ceb006 AM |
865 | + { |
866 | + for (current= queue; current != NULL; current= current->next) | |
867 | + { | |
868 | + if (!current->error) | |
869 | + { | |
870 | + current->error= ER_ERROR_ON_WRITE; | |
871 | + current->commit_errno= errno; | |
872 | + } | |
873 | + } | |
874 | + } | |
1bfc1981 AM |
875 | } |
876 | - } | |
13ceb006 AM |
877 | |
878 | - DBUG_RETURN(0); | |
1bfc1981 AM |
879 | + DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered"); |
880 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
881 | + /* | |
882 | + We cannot unlock LOCK_log until we have locked LOCK_commit_ordered; | |
883 | + otherwise scheduling could allow the next group commit to run ahead of us, | |
884 | + messing up the order of commit_ordered() calls. But as soon as | |
885 | + LOCK_commit_ordered is obtained, we can let the next group commit start. | |
886 | + */ | |
887 | ||
13ceb006 AM |
888 | -err: |
889 | - if (!write_error) | |
1bfc1981 | 890 | - { |
13ceb006 AM |
891 | - write_error= 1; |
892 | - sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); | |
1bfc1981 | 893 | + mysql_mutex_unlock(&LOCK_log); |
13ceb006 | 894 | + |
1bfc1981 AM |
895 | + if (xid_count > 0 && check_purge) |
896 | + { | |
897 | + purge(); | |
898 | + } | |
899 | + | |
900 | + DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log"); | |
901 | + ++num_group_commits; | |
13ceb006 AM |
902 | + |
903 | + /* | |
1bfc1981 AM |
904 | + Wakeup each participant waiting for our group commit, first calling the |
905 | + commit_ordered() methods for any transactions doing 2-phase commit. | |
13ceb006 | 906 | + */ |
1bfc1981 AM |
907 | + current= queue; |
908 | + while (current != NULL) | |
909 | + { | |
910 | + group_commit_entry *next; | |
911 | + | |
912 | + DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered"); | |
913 | + ++num_commits; | |
914 | + if (current->cache_data->using_xa && !current->error) | |
915 | + run_commit_ordered(current->thd, current->all); | |
916 | + | |
917 | + /* | |
918 | + Careful not to access current->next after waking up the other thread! As | |
919 | + it may change immediately after wakeup. | |
920 | + */ | |
921 | + next= current->next; | |
922 | + if (current != leader) // Don't wake up ourself | |
923 | + current->thd->signal_wakeup_ready(); | |
924 | + current= next; | |
925 | + } | |
926 | + DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered"); | |
927 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
13ceb006 AM |
928 | } |
929 | - mysql_mutex_unlock(&LOCK_log); | |
930 | - DBUG_RETURN(1); | |
13ceb006 AM |
931 | + |
932 | + DBUG_VOID_RETURN; | |
933 | } | |
934 | ||
1bfc1981 | 935 | |
13ceb006 AM |
936 | +int |
937 | +MYSQL_BIN_LOG::write_transaction(group_commit_entry *entry) | |
938 | +{ | |
939 | + binlog_cache_data *cache_data= entry->cache_data; | |
940 | + IO_CACHE *cache= &cache_data->cache_log; | |
941 | + | |
942 | + if (entry->begin_event->write(&log_file)) | |
943 | + return ER_ERROR_ON_WRITE; | |
944 | + | |
945 | + DBUG_EXECUTE_IF("crash_before_writing_xid", | |
946 | + { | |
947 | + if ((write_cache(entry->thd, cache))) | |
948 | + DBUG_PRINT("info", ("error writing binlog cache")); | |
949 | + else | |
950 | + flush_and_sync(0); | |
951 | + | |
952 | + DBUG_PRINT("info", ("crashing before writing xid")); | |
953 | + abort(); | |
954 | + }); | |
955 | + | |
956 | + if (write_cache(entry->thd, cache)) | |
957 | + return ER_ERROR_ON_WRITE; | |
958 | + | |
959 | + if (entry->end_event->write(&log_file)) | |
960 | + return ER_ERROR_ON_WRITE; | |
961 | + | |
962 | + if (entry->incident_event && entry->incident_event->write(&log_file)) | |
963 | + return ER_ERROR_ON_WRITE; | |
964 | + | |
965 | + if (cache->error) // Error on read | |
966 | + return ER_ERROR_ON_READ; | |
967 | + | |
968 | + return 0; | |
969 | +} | |
1bfc1981 | 970 | + |
13ceb006 AM |
971 | /** |
972 | Wait until we get a signal that the relay log has been updated. | |
1bfc1981 | 973 | |
29ffd636 | 974 | @@ -6095,6 +6310,68 @@ |
13ceb006 AM |
975 | } |
976 | ||
977 | ||
978 | +void | |
979 | +TC_init() | |
980 | +{ | |
981 | + mysql_mutex_init(key_LOCK_group_commit_queue, &LOCK_group_commit_queue, MY_MUTEX_INIT_SLOW); | |
982 | + mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered, MY_MUTEX_INIT_SLOW); | |
983 | + mutexes_inited= TRUE; | |
984 | +} | |
985 | + | |
986 | + | |
987 | +void | |
988 | +TC_destroy() | |
989 | +{ | |
990 | + if (mutexes_inited) | |
991 | + { | |
992 | + mysql_mutex_destroy(&LOCK_group_commit_queue); | |
993 | + mysql_mutex_destroy(&LOCK_commit_ordered); | |
994 | + mutexes_inited= FALSE; | |
995 | + } | |
996 | +} | |
997 | + | |
998 | + | |
999 | +void | |
1000 | +TC_LOG::run_commit_ordered(THD *thd, bool all) | |
1001 | +{ | |
1002 | + Ha_trx_info *ha_info= | |
1003 | + all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list; | |
1004 | + | |
1005 | + mysql_mutex_assert_owner(&LOCK_commit_ordered); | |
1006 | + for (; ha_info; ha_info= ha_info->next()) | |
1007 | + { | |
1008 | + handlerton *ht= ha_info->ht(); | |
1009 | + if (!ht->commit_ordered) | |
1010 | + continue; | |
1011 | + ht->commit_ordered(ht, thd, all); | |
1012 | + DEBUG_SYNC(thd, "commit_after_run_commit_ordered"); | |
1013 | + } | |
1014 | +} | |
1015 | + | |
1016 | +int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all, | |
1017 | + bool need_commit_ordered) | |
1018 | +{ | |
1019 | + int cookie; | |
1020 | + | |
1021 | + cookie= 0; | |
1022 | + if (xid) | |
1023 | + cookie= log_one_transaction(xid); | |
1024 | + | |
1025 | + if (need_commit_ordered) | |
1026 | + { | |
1027 | + /* Only run commit_ordered() if log_xid was successful. */ | |
1028 | + if (cookie) | |
1029 | + { | |
1030 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
1031 | + run_commit_ordered(thd, all); | |
1032 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
1033 | + } | |
1034 | + } | |
1035 | + | |
1036 | + return cookie; | |
1037 | +} | |
1038 | + | |
1039 | + | |
1040 | /********* transaction coordinator log for 2pc - mmap() based solution *******/ | |
1041 | ||
1042 | /* | |
29ffd636 | 1043 | @@ -6231,6 +6508,7 @@ |
13ceb006 AM |
1044 | mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST); |
1045 | mysql_cond_init(key_COND_active, &COND_active, 0); | |
1046 | mysql_cond_init(key_COND_pool, &COND_pool, 0); | |
1047 | + mysql_cond_init(key_COND_queue_busy, &COND_queue_busy, 0); | |
1048 | ||
1049 | inited=6; | |
1050 | ||
29ffd636 | 1051 | @@ -6238,6 +6516,8 @@ |
13ceb006 AM |
1052 | active=pages; |
1053 | pool=pages+1; | |
1054 | pool_last=pages+npages-1; | |
1055 | + commit_ordered_queue= NULL; | |
1056 | + commit_ordered_queue_busy= false; | |
1057 | ||
1058 | return 0; | |
1059 | ||
29ffd636 | 1060 | @@ -6343,7 +6623,7 @@ |
13ceb006 AM |
1061 | to the position in memory where xid was logged to. |
1062 | */ | |
1063 | ||
1064 | -int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid) | |
1065 | +int TC_LOG_MMAP::log_one_transaction(my_xid xid) | |
1066 | { | |
1067 | int err; | |
1068 | PAGE *p; | |
29ffd636 | 1069 | @@ -6482,7 +6762,9 @@ |
13ceb006 AM |
1070 | mysql_mutex_destroy(&LOCK_sync); |
1071 | mysql_mutex_destroy(&LOCK_active); | |
1072 | mysql_mutex_destroy(&LOCK_pool); | |
1073 | + mysql_cond_destroy(&COND_active); | |
1074 | mysql_cond_destroy(&COND_pool); | |
1075 | + mysql_cond_destroy(&COND_queue_busy); | |
1076 | case 5: | |
1077 | data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails | |
1078 | case 4: | |
29ffd636 | 1079 | @@ -6692,42 +6974,87 @@ |
13ceb006 AM |
1080 | mysql_cond_destroy(&COND_prep_xids); |
1081 | } | |
1082 | ||
1083 | -/** | |
1084 | - @todo | |
1085 | - group commit | |
1086 | +/* | |
1087 | + Do a binlog log_xid() for a group of transactions, linked through | |
1088 | + thd->next_commit_ordered. | |
1089 | ||
1090 | @retval | |
1091 | 0 error | |
1092 | @retval | |
1093 | 1 success | |
1094 | */ | |
1095 | -int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid) | |
1096 | +int TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, | |
1097 | + bool need_commit_ordered __attribute__((unused))) | |
1098 | { | |
1099 | - DBUG_ENTER("TC_LOG_BINLOG::log"); | |
1100 | + DBUG_ENTER("TC_LOG_BINLOG::log_and_order"); | |
1101 | binlog_cache_mngr *cache_mngr= | |
1102 | (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); | |
1103 | + | |
1104 | + cache_mngr->trx_cache.using_xa= TRUE; | |
1105 | + cache_mngr->trx_cache.xa_xid= xid; | |
1106 | /* | |
1107 | We always commit the entire transaction when writing an XID. Also | |
1108 | note that the return value is inverted. | |
1109 | */ | |
1110 | - DBUG_RETURN(!binlog_commit_flush_stmt_cache(thd, cache_mngr) && | |
1111 | - !binlog_commit_flush_trx_cache(thd, cache_mngr, xid)); | |
1112 | + DBUG_RETURN(!binlog_commit_flush_stmt_cache(thd, cache_mngr, all) && | |
1113 | + !binlog_commit_flush_trx_cache(thd, cache_mngr, xid, all)); | |
1114 | } | |
1115 | ||
1116 | -int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) | |
1117 | +/* | |
1118 | + After an XID is logged, we need to hold on to the current binlog file until | |
1119 | + it is fully committed in the storage engine. The reason is that crash | |
1120 | + recovery only looks at the latest binlog, so we must make sure there are no | |
1121 | + outstanding prepared (but not committed) transactions before rotating the | |
1122 | + binlog. | |
1123 | + | |
1124 | + To handle this, we keep a count of outstanding XIDs. This function is used | |
1125 | + to increase this count when committing one or more transactions to the | |
1126 | + binary log. | |
1127 | +*/ | |
1128 | +void | |
1129 | +TC_LOG_BINLOG::mark_xids_active(uint xid_count) | |
1130 | { | |
1131 | - DBUG_ENTER("TC_LOG_BINLOG::unlog"); | |
1132 | + DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active"); | |
1133 | + DBUG_PRINT("info", ("xid_count=%u", xid_count)); | |
1134 | + mysql_mutex_lock(&LOCK_prep_xids); | |
1135 | + prepared_xids+= xid_count; | |
1136 | + mysql_mutex_unlock(&LOCK_prep_xids); | |
1137 | + DBUG_VOID_RETURN; | |
1138 | +} | |
1139 | + | |
1140 | +/* | |
1141 | + Once an XID is committed, it is safe to rotate the binary log, as it can no | |
1142 | + longer be needed during crash recovery. | |
1143 | + | |
1144 | + This function is called to mark an XID this way. It needs to decrease the | |
1145 | + count of pending XIDs, and signal the log rotator thread when it reaches zero. | |
1146 | +*/ | |
1147 | +void | |
1148 | +TC_LOG_BINLOG::mark_xid_done() | |
1149 | +{ | |
1150 | + my_bool send_signal; | |
1151 | + | |
1152 | + DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done"); | |
1153 | mysql_mutex_lock(&LOCK_prep_xids); | |
1154 | // prepared_xids can be 0 if the transaction had ignorable errors. | |
1155 | DBUG_ASSERT(prepared_xids >= 0); | |
1156 | if (prepared_xids > 0) | |
1157 | prepared_xids--; | |
1158 | - if (prepared_xids == 0) { | |
1159 | + send_signal= (prepared_xids == 0); | |
1160 | + mysql_mutex_unlock(&LOCK_prep_xids); | |
1161 | + if (send_signal) { | |
1162 | DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids)); | |
1163 | mysql_cond_signal(&COND_prep_xids); | |
1164 | } | |
1165 | - mysql_mutex_unlock(&LOCK_prep_xids); | |
1166 | - DBUG_RETURN(rotate_and_purge(0)); // as ::write() did not rotate | |
1167 | + DBUG_VOID_RETURN; | |
1168 | +} | |
1169 | + | |
1170 | +int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) | |
1171 | +{ | |
1172 | + DBUG_ENTER("TC_LOG_BINLOG::unlog"); | |
1173 | + if (xid) | |
1174 | + mark_xid_done(); | |
1175 | + DBUG_RETURN(rotate_and_purge(0)); | |
1176 | } | |
1177 | ||
1178 | int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle) | |
29ffd636 | 1179 | @@ -6796,9 +7123,67 @@ |
13ceb006 AM |
1180 | { |
1181 | return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file; | |
1182 | } | |
1183 | +/* | |
1184 | + Get the current position of the MySQL binlog for transaction currently being | |
1185 | + committed. | |
1186 | + | |
1187 | + This is valid to call from within storage engine commit_ordered() and | |
1188 | + commit() methods only. | |
1189 | + | |
1190 | + Since it stores the position inside THD, it is safe to call without any | |
1191 | + locking. | |
1192 | + | |
1193 | + Note that currently the binlog file name is not stored inside THD, but this | |
1194 | + is still safe as it can only change when the log is rotated, and we never | |
1195 | + rotate the binlog while commits are pending inside storage engines. | |
1196 | +*/ | |
1197 | +extern "C" | |
1198 | +void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file) | |
1199 | +{ | |
1200 | + binlog_cache_mngr *cache_mngr; | |
1201 | + if (binlog_hton->state == SHOW_OPTION_YES | |
1202 | + && (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton))) | |
1203 | + { | |
1204 | + *out_pos= cache_mngr->trx_cache.commit_bin_log_file_pos; | |
1205 | + *out_file= mysql_bin_log.get_log_fname(); | |
1206 | + } | |
1207 | + else | |
1208 | + { | |
3d3ecf24 | 1209 | + *out_pos= 0ULL; |
13ceb006 AM |
1210 | + *out_file= NULL; |
1211 | + } | |
1212 | +} | |
1213 | #endif /* INNODB_COMPATIBILITY_HOOKS */ | |
1214 | ||
1215 | ||
1216 | +static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff) | |
1217 | +{ | |
1218 | + mysql_bin_log.set_status_variables(); | |
1219 | + var->type= SHOW_ARRAY; | |
1220 | + var->value= (char *)&binlog_status_vars_detail; | |
1221 | + return 0; | |
1222 | +} | |
1223 | + | |
1224 | +static SHOW_VAR binlog_status_vars_top[]= { | |
1225 | + {"binlog", (char *) &show_binlog_vars, SHOW_FUNC}, | |
1226 | + {NullS, NullS, SHOW_LONG} | |
1227 | +}; | |
1228 | + | |
1229 | +/* | |
1230 | + Copy out current values of status variables, for SHOW STATUS or | |
1231 | + information_schema.global_status. | |
1232 | + | |
1233 | + This is called only under LOCK_status, so we can fill in a static array. | |
1234 | +*/ | |
1235 | +void | |
1236 | +TC_LOG_BINLOG::set_status_variables() | |
1237 | +{ | |
1238 | + mysql_mutex_lock(&LOCK_commit_ordered); | |
1239 | + binlog_status_var_num_commits= this->num_commits; | |
1240 | + binlog_status_var_num_group_commits= this->num_group_commits; | |
1241 | + mysql_mutex_unlock(&LOCK_commit_ordered); | |
1242 | +} | |
1243 | + | |
1244 | struct st_mysql_storage_engine binlog_storage_engine= | |
1245 | { MYSQL_HANDLERTON_INTERFACE_VERSION }; | |
1246 | ||
29ffd636 | 1247 | @@ -6813,7 +7198,7 @@ |
13ceb006 AM |
1248 | binlog_init, /* Plugin Init */ |
1249 | NULL, /* Plugin Deinit */ | |
1250 | 0x0100 /* 1.0 */, | |
1251 | - NULL, /* status variables */ | |
1252 | + binlog_status_vars_top, /* status variables */ | |
1253 | NULL, /* system variables */ | |
1254 | NULL, /* config options */ | |
1255 | 0, /* flags */ | |
1256 | --- a/sql/log.h | |
1257 | +++ b/sql/log.h | |
1258 | @@ -44,17 +44,42 @@ | |
1259 | ||
1260 | virtual int open(const char *opt_name)=0; | |
1261 | virtual void close()=0; | |
1262 | - virtual int log_xid(THD *thd, my_xid xid)=0; | |
1263 | + virtual int log_and_order(THD *thd, my_xid xid, bool all, | |
1264 | + bool need_commit_ordered)=0; | |
1265 | virtual int unlog(ulong cookie, my_xid xid)=0; | |
1266 | + | |
1267 | + protected: | |
1268 | + void run_commit_ordered(THD *thd, bool all); | |
1269 | }; | |
1270 | ||
1271 | +/* | |
1272 | + Locks used to ensure serialised execution of | |
1273 | + TC_LOG::run_commit_ordered(), or any other code that calls handler | |
1274 | + commit_ordered() methods. | |
1275 | +*/ | |
1276 | +extern mysql_mutex_t LOCK_group_commit_queue; | |
1277 | +extern mysql_mutex_t LOCK_commit_ordered; | |
1278 | + | |
1279 | +extern void TC_init(); | |
1280 | +extern void TC_destroy(); | |
1281 | + | |
1282 | class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging | |
1283 | { | |
1284 | public: | |
1285 | TC_LOG_DUMMY() {} | |
1286 | int open(const char *opt_name) { return 0; } | |
1287 | void close() { } | |
1288 | - int log_xid(THD *thd, my_xid xid) { return 1; } | |
1289 | + /* | |
1290 | + TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we | |
1291 | + only use internal XA during commit when >= 2 XA-capable engines | |
1292 | + participate. | |
1293 | + */ | |
1294 | + int log_and_order(THD *thd, my_xid xid, bool all, | |
1295 | + bool need_commit_ordered) | |
1296 | + { | |
1297 | + DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); | |
1298 | + return 1; | |
1299 | + } | |
1300 | int unlog(ulong cookie, my_xid xid) { return 0; } | |
1301 | }; | |
1302 | ||
1303 | @@ -80,6 +105,13 @@ | |
1304 | mysql_cond_t cond; // to wait for a sync | |
1305 | } PAGE; | |
1306 | ||
1307 | + /* List of THDs for which to invoke commit_ordered(), in order. */ | |
1308 | + struct commit_entry | |
1309 | + { | |
1310 | + struct commit_entry *next; | |
1311 | + THD *thd; | |
1312 | + }; | |
1313 | + | |
1314 | char logname[FN_REFLEN]; | |
1315 | File fd; | |
1316 | my_off_t file_length; | |
1317 | @@ -94,16 +126,38 @@ | |
1318 | */ | |
1319 | mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync; | |
1320 | mysql_cond_t COND_pool, COND_active; | |
1321 | + /* | |
1322 | + Queue of threads that need to call commit_ordered(). | |
1323 | + Access to this queue must be protected by LOCK_group_commit_queue | |
1324 | + */ | |
1325 | + commit_entry *commit_ordered_queue; | |
1326 | + /* | |
1327 | + This flag and condition is used to reserve the queue while threads in it | |
1328 | + each run the commit_ordered() methods one after the other. Only once the | |
1329 | + last commit_ordered() in the queue is done can we start on a new queue | |
1330 | + run. | |
1331 | + | |
1332 | + Since we start this process in the first thread in the queue and finish in | |
1333 | + the last (and possibly different) thread, we need a condition variable for | |
1334 | + this (we cannot unlock a mutex in a different thread than the one who | |
1335 | + locked it). | |
1336 | + | |
1337 | + The condition is used together with the LOCK_group_commit_queue mutex. | |
1338 | + */ | |
1339 | + my_bool commit_ordered_queue_busy; | |
1340 | + mysql_cond_t COND_queue_busy; | |
1341 | ||
1342 | public: | |
1343 | TC_LOG_MMAP(): inited(0) {} | |
1344 | int open(const char *opt_name); | |
1345 | void close(); | |
1346 | - int log_xid(THD *thd, my_xid xid); | |
1347 | + int log_and_order(THD *thd, my_xid xid, bool all, | |
1348 | + bool need_commit_ordered); | |
1349 | int unlog(ulong cookie, my_xid xid); | |
1350 | int recover(); | |
1351 | ||
1352 | private: | |
1353 | + int log_one_transaction(my_xid xid); | |
1354 | void get_active_from_pool(); | |
1355 | int sync(); | |
1356 | int overflow(); | |
1357 | @@ -271,9 +325,31 @@ | |
1358 | time_t last_time; | |
1359 | }; | |
1360 | ||
1361 | +class binlog_cache_data; | |
1362 | class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG | |
1363 | { | |
1364 | private: | |
1365 | + struct group_commit_entry | |
1366 | + { | |
1367 | + struct group_commit_entry *next; | |
1368 | + THD *thd; | |
1369 | + binlog_cache_data *cache_data; | |
1370 | + /* | |
1371 | + Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be | |
1372 | + written during group commit. The incident_event is only valid if | |
1373 | + trx_data->has_incident() is true. | |
1374 | + */ | |
1375 | + Log_event *begin_event; | |
1376 | + Log_event *end_event; | |
1377 | + Log_event *incident_event; | |
1378 | + /* Set during group commit to record any per-thread error. */ | |
1379 | + int error; | |
1380 | + int commit_errno; | |
1381 | + /* This is the `all' parameter for ha_commit_ordered(). */ | |
1382 | + bool all; | |
1383 | + /* True if we come in through XA log_and_order(), false otherwise. */ | |
1384 | + }; | |
1385 | + | |
1386 | #ifdef HAVE_PSI_INTERFACE | |
1387 | /** The instrumentation key to use for @ LOCK_index. */ | |
1388 | PSI_mutex_key m_key_LOCK_index; | |
1389 | @@ -325,6 +401,12 @@ | |
1390 | In 5.0 it's 0 for relay logs too! | |
1391 | */ | |
1392 | bool no_auto_events; | |
1393 | + /* Queue of transactions queued up to participate in group commit. */ | |
1394 | + group_commit_entry *group_commit_queue; | |
1395 | + /* Total number of committed transactions. */ | |
1396 | + ulonglong num_commits; | |
1397 | + /* Number of group commits done. */ | |
1398 | + ulonglong num_group_commits; | |
1399 | ||
1400 | /* pointer to the sync period variable, for binlog this will be | |
1401 | sync_binlog_period, for relay log this will be | |
1402 | @@ -346,6 +428,11 @@ | |
1403 | */ | |
1404 | int new_file_without_locking(); | |
1405 | int new_file_impl(bool need_lock); | |
1406 | + int write_transaction(group_commit_entry *entry); | |
1407 | + bool write_transaction_to_binlog_events(group_commit_entry *entry); | |
1408 | + void trx_group_commit_leader(group_commit_entry *leader); | |
1409 | + void mark_xid_done(); | |
1410 | + void mark_xids_active(uint xid_count); | |
1411 | ||
1412 | public: | |
1413 | MYSQL_LOG::generate_name; | |
1414 | @@ -387,7 +474,8 @@ | |
1415 | ||
1416 | int open(const char *opt_name); | |
1417 | void close(); | |
1418 | - int log_xid(THD *thd, my_xid xid); | |
1419 | + int log_and_order(THD *thd, my_xid xid, bool all, | |
1420 | + bool need_commit_ordered); | |
1421 | int unlog(ulong cookie, my_xid xid); | |
1422 | int recover(IO_CACHE *log, Format_description_log_event *fdle); | |
1423 | #if !defined(MYSQL_CLIENT) | |
1424 | @@ -434,11 +522,11 @@ | |
1425 | int new_file(); | |
1426 | ||
1427 | bool write(Log_event* event_info); // binary log write | |
1428 | - bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident); | |
1429 | + bool write_transaction_to_binlog(THD *thd, binlog_cache_data *cache_data, | |
1430 | + Log_event *end_ev, bool all); | |
1431 | bool write_incident(THD *thd, bool lock); | |
1432 | ||
1433 | - int write_cache(THD *thd, IO_CACHE *cache, | |
1434 | - bool lock_log, bool flush_and_sync); | |
1435 | + int write_cache(THD *thd, IO_CACHE *cache); | |
1436 | void set_write_error(THD *thd, bool is_transactional); | |
1437 | bool check_write_error(THD *thd); | |
1438 | ||
1bfc1981 | 1439 | @@ -509,6 +597,7 @@ |
13ceb006 AM |
1440 | inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);} |
1441 | inline IO_CACHE *get_index_file() { return &index_file;} | |
1442 | inline uint32 get_open_count() { return open_count; } | |
1443 | + void set_status_variables(); | |
1444 | }; | |
1445 | ||
1446 | class Log_event_handler | |
1447 | --- a/sql/mysqld.cc | |
1448 | +++ b/sql/mysqld.cc | |
29ffd636 | 1449 | @@ -1495,6 +1495,7 @@ |
13ceb006 AM |
1450 | ha_end(); |
1451 | if (tc_log) | |
1452 | tc_log->close(); | |
1453 | + TC_destroy(); | |
1454 | delegates_destroy(); | |
1455 | xid_cache_free(); | |
1456 | table_def_free(); | |
29ffd636 | 1457 | @@ -3911,6 +3912,8 @@ |
13ceb006 AM |
1458 | query_response_time_init(); |
1459 | #endif // HAVE_RESPONSE_TIME_DISTRIBUTION | |
1460 | /* We have to initialize the storage engines before CSV logging */ | |
1461 | + TC_init(); | |
1462 | + | |
1463 | init_global_table_stats(); | |
1464 | init_global_index_stats(); | |
1465 | ||
29ffd636 | 1466 | @@ -7872,6 +7875,7 @@ |
13ceb006 AM |
1467 | key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count, |
1468 | key_PARTITION_LOCK_auto_inc; | |
1469 | PSI_mutex_key key_RELAYLOG_LOCK_index; | |
1470 | +PSI_mutex_key key_LOCK_wakeup_ready, key_LOCK_group_commit_queue, key_LOCK_commit_ordered; | |
1471 | ||
1472 | static PSI_mutex_info all_server_mutexes[]= | |
1473 | { | |
29ffd636 | 1474 | @@ -7892,6 +7896,7 @@ |
13ceb006 AM |
1475 | { &key_delayed_insert_mutex, "Delayed_insert::mutex", 0}, |
1476 | { &key_hash_filo_lock, "hash_filo::lock", 0}, | |
1477 | { &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL}, | |
1478 | + { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL}, | |
1479 | { &key_LOCK_connection_count, "LOCK_connection_count", PSI_FLAG_GLOBAL}, | |
1480 | { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL}, | |
1481 | { &key_LOCK_delayed_create, "LOCK_delayed_create", PSI_FLAG_GLOBAL}, | |
29ffd636 | 1482 | @@ -7907,6 +7912,7 @@ |
13ceb006 AM |
1483 | "LOCK_global_index_stats", PSI_FLAG_GLOBAL}, |
1484 | { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL}, | |
1485 | { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL}, | |
1486 | + { &key_LOCK_group_commit_queue, "LOCK_group_commit_queue", PSI_FLAG_GLOBAL}, | |
1487 | { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL}, | |
1488 | { &key_LOCK_prepared_stmt_count, "LOCK_prepared_stmt_count", PSI_FLAG_GLOBAL}, | |
1489 | { &key_LOCK_rpl_status, "LOCK_rpl_status", PSI_FLAG_GLOBAL}, | |
29ffd636 | 1490 | @@ -7918,6 +7924,7 @@ |
13ceb006 AM |
1491 | { &key_LOCK_temporary_tables, "THD::LOCK_temporary_tables", 0}, |
1492 | { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL}, | |
1493 | { &key_LOCK_uuid_generator, "LOCK_uuid_generator", PSI_FLAG_GLOBAL}, | |
1494 | + { &key_LOCK_wakeup_ready, "THD::LOCK_wakeup_ready", 0}, | |
1495 | { &key_LOG_LOCK_log, "LOG::LOCK_log", 0}, | |
1496 | { &key_master_info_data_lock, "Master_info::data_lock", 0}, | |
1497 | { &key_master_info_run_lock, "Master_info::run_lock", 0}, | |
29ffd636 | 1498 | @@ -7965,6 +7972,7 @@ |
13ceb006 AM |
1499 | key_TABLE_SHARE_cond, key_user_level_lock_cond, |
1500 | key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache; | |
1501 | PSI_cond_key key_RELAYLOG_update_cond; | |
1502 | +PSI_cond_key key_COND_wakeup_ready, key_COND_queue_busy; | |
1503 | ||
1504 | static PSI_cond_info all_server_conds[]= | |
1505 | { | |
29ffd636 | 1506 | @@ -7981,8 +7989,10 @@ |
13ceb006 AM |
1507 | { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0}, |
1508 | { &key_COND_cache_status_changed, "Query_cache::COND_cache_status_changed", 0}, | |
1509 | { &key_COND_manager, "COND_manager", PSI_FLAG_GLOBAL}, | |
1510 | + { &key_COND_queue_busy, "COND_queue_busy", PSI_FLAG_GLOBAL}, | |
1511 | { &key_COND_rpl_status, "COND_rpl_status", PSI_FLAG_GLOBAL}, | |
1512 | { &key_COND_server_started, "COND_server_started", PSI_FLAG_GLOBAL}, | |
1513 | + { &key_COND_wakeup_ready, "THD::COND_wakeup_ready", 0}, | |
1514 | { &key_delayed_insert_cond, "Delayed_insert::cond", 0}, | |
1515 | { &key_delayed_insert_cond_client, "Delayed_insert::cond_client", 0}, | |
1516 | { &key_item_func_sleep_cond, "Item_func_sleep::cond", 0}, | |
1517 | --- a/sql/mysqld.h | |
1518 | +++ b/sql/mysqld.h | |
29ffd636 | 1519 | @@ -274,6 +274,7 @@ |
13ceb006 AM |
1520 | key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, |
1521 | key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc; | |
1522 | extern PSI_mutex_key key_RELAYLOG_LOCK_index; | |
1523 | +extern PSI_mutex_key key_LOCK_wakeup_ready, key_LOCK_group_commit_queue, key_LOCK_commit_ordered; | |
1524 | ||
1525 | extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, | |
1526 | key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, | |
29ffd636 | 1527 | @@ -294,6 +295,7 @@ |
13ceb006 AM |
1528 | key_TABLE_SHARE_cond, key_user_level_lock_cond, |
1529 | key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache; | |
1530 | extern PSI_cond_key key_RELAYLOG_update_cond; | |
1531 | +extern PSI_cond_key key_COND_wakeup_ready, key_COND_queue_busy; | |
1532 | ||
1533 | extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, | |
1534 | key_thread_handle_manager, key_thread_kill_server, key_thread_main, | |
1535 | --- a/sql/sql_class.cc | |
1536 | +++ b/sql/sql_class.cc | |
1bfc1981 | 1537 | @@ -1005,6 +1005,8 @@ |
13ceb006 AM |
1538 | mysql_mutex_init(key_LOCK_thd_data, &LOCK_thd_data, MY_MUTEX_INIT_FAST); |
1539 | mysql_mutex_init(key_LOCK_temporary_tables, &LOCK_temporary_tables, | |
1540 | MY_MUTEX_INIT_FAST); | |
1541 | + mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); | |
1542 | + mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, NULL); | |
1543 | ||
1544 | /* Variables with default values */ | |
1545 | proc_info="login"; | |
1bfc1981 | 1546 | @@ -1609,6 +1611,8 @@ |
13ceb006 AM |
1547 | my_free(db); |
1548 | db= NULL; | |
1549 | free_root(&transaction.mem_root,MYF(0)); | |
1550 | + mysql_cond_destroy(&COND_wakeup_ready); | |
1551 | + mysql_mutex_destroy(&LOCK_wakeup_ready); | |
1552 | mysql_mutex_destroy(&LOCK_thd_data); | |
1553 | mysql_mutex_destroy(&LOCK_temporary_tables); | |
1554 | #ifndef DBUG_OFF | |
29ffd636 | 1555 | @@ -5297,6 +5301,24 @@ |
13ceb006 AM |
1556 | DBUG_RETURN(0); |
1557 | } | |
1558 | ||
1559 | +void | |
1560 | +THD::wait_for_wakeup_ready() | |
1561 | +{ | |
1562 | + mysql_mutex_lock(&LOCK_wakeup_ready); | |
1563 | + while (!wakeup_ready) | |
1564 | + mysql_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready); | |
1565 | + mysql_mutex_unlock(&LOCK_wakeup_ready); | |
1566 | +} | |
1567 | + | |
1568 | +void | |
1569 | +THD::signal_wakeup_ready() | |
1570 | +{ | |
1571 | + mysql_mutex_lock(&LOCK_wakeup_ready); | |
1572 | + wakeup_ready= true; | |
1573 | + mysql_mutex_unlock(&LOCK_wakeup_ready); | |
1574 | + mysql_cond_signal(&COND_wakeup_ready); | |
1575 | +} | |
1576 | + | |
1577 | bool Discrete_intervals_list::append(ulonglong start, ulonglong val, | |
1578 | ulonglong incr) | |
1579 | { | |
1580 | --- a/sql/sql_class.h | |
1581 | +++ b/sql/sql_class.h | |
1bfc1981 | 1582 | @@ -3078,6 +3078,14 @@ |
13ceb006 AM |
1583 | LEX_STRING get_invoker_user() { return invoker_user; } |
1584 | LEX_STRING get_invoker_host() { return invoker_host; } | |
1585 | bool has_invoker() { return invoker_user.length > 0; } | |
1586 | + void clear_wakeup_ready() { wakeup_ready= false; } | |
1587 | + /* | |
1588 | + Sleep waiting for others to wake us up with signal_wakeup_ready(). | |
1589 | + Must call clear_wakeup_ready() before waiting. | |
1590 | + */ | |
1591 | + void wait_for_wakeup_ready(); | |
1592 | + /* Wake this thread up from wait_for_wakeup_ready(). */ | |
1593 | + void signal_wakeup_ready(); | |
1594 | private: | |
1595 | ||
1596 | /** The current internal error handler for this thread, or NULL. */ | |
1bfc1981 | 1597 | @@ -3120,6 +3128,16 @@ |
13ceb006 AM |
1598 | */ |
1599 | LEX_STRING invoker_user; | |
1600 | LEX_STRING invoker_host; | |
1601 | + /* | |
1602 | + Flag, mutex and condition for a thread to wait for a signal from another | |
1603 | + thread. | |
1604 | + | |
1605 | + Currently used to wait for group commit to complete, can also be used for | |
1606 | + other purposes. | |
1607 | + */ | |
1608 | + bool wakeup_ready; | |
1609 | + mysql_mutex_t LOCK_wakeup_ready; | |
1610 | + mysql_cond_t COND_wakeup_ready; | |
1611 | }; | |
1612 | ||
1613 | /* Returns string as 'IP' for the client-side of the connection represented by | |
1614 | --- a/sql/sql_parse.cc | |
1615 | +++ b/sql/sql_parse.cc | |
1616 | @@ -889,6 +889,10 @@ | |
1617 | DBUG_ENTER("dispatch_command"); | |
1618 | DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command)); | |
1619 | ||
1620 | + DBUG_EXECUTE_IF("crash_dispatch_command_before", | |
1621 | + { DBUG_PRINT("crash_dispatch_command_before", ("now")); | |
1622 | + DBUG_ABORT(); }); | |
1623 | + | |
1624 | #if defined(ENABLED_PROFILING) | |
1625 | thd->profiling.start_new_query(); | |
1626 | #endif | |
1627 | --- a/mysql-test/suite/perfschema/r/dml_setup_instruments.result | |
1628 | +++ b/mysql-test/suite/perfschema/r/dml_setup_instruments.result | |
1629 | @@ -11,9 +11,9 @@ | |
1630 | wait/synch/mutex/sql/HA_DATA_PARTITION::LOCK_auto_inc YES YES | |
1631 | wait/synch/mutex/sql/LOCK_active_mi YES YES | |
1632 | wait/synch/mutex/sql/LOCK_audit_mask YES YES | |
1633 | +wait/synch/mutex/sql/LOCK_commit_ordered YES YES | |
1634 | wait/synch/mutex/sql/LOCK_connection_count YES YES | |
1635 | wait/synch/mutex/sql/LOCK_crypt YES YES | |
1636 | -wait/synch/mutex/sql/LOCK_delayed_create YES YES | |
1637 | select * from performance_schema.setup_instruments | |
1638 | where name like 'Wait/Synch/Rwlock/sql/%' | |
1639 | and name not in ('wait/synch/rwlock/sql/CRYPTO_dynlock_value::lock') | |
1640 | @@ -38,6 +38,7 @@ | |
1641 | NAME ENABLED TIMED | |
1642 | wait/synch/cond/sql/COND_flush_thread_cache YES YES | |
1643 | wait/synch/cond/sql/COND_manager YES YES | |
1644 | +wait/synch/cond/sql/COND_queue_busy YES YES | |
1645 | wait/synch/cond/sql/COND_queue_state YES YES | |
1646 | wait/synch/cond/sql/COND_rpl_status YES YES | |
1647 | wait/synch/cond/sql/COND_server_started YES YES | |
1648 | @@ -45,7 +46,6 @@ | |
1649 | wait/synch/cond/sql/COND_thread_count YES YES | |
1650 | wait/synch/cond/sql/Delayed_insert::cond YES YES | |
1651 | wait/synch/cond/sql/Delayed_insert::cond_client YES YES | |
1652 | -wait/synch/cond/sql/Event_scheduler::COND_state YES YES | |
1653 | select * from performance_schema.setup_instruments | |
1654 | where name='Wait'; | |
1655 | select * from performance_schema.setup_instruments | |
1656 | --- a/storage/innobase/handler/ha_innodb.cc | |
1657 | +++ b/storage/innobase/handler/ha_innodb.cc | |
1658 | @@ -375,6 +375,9 @@ | |
1659 | static INNOBASE_SHARE *get_share(const char *table_name); | |
1660 | static void free_share(INNOBASE_SHARE *share); | |
1661 | static int innobase_close_connection(handlerton *hton, THD* thd); | |
1662 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1663 | +static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all); | |
1664 | +#endif | |
1665 | static int innobase_commit(handlerton *hton, THD* thd, bool all); | |
1666 | static int innobase_rollback(handlerton *hton, THD* thd, bool all); | |
1667 | static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd, | |
1668 | @@ -1699,7 +1702,10 @@ | |
1669 | trx_t* trx) /*!< in/out: InnoDB transaction handle */ | |
1670 | { | |
1671 | DBUG_ENTER("innobase_trx_init"); | |
1672 | +#ifndef EXTENDED_FOR_COMMIT_ORDERED | |
1673 | + /* used by innobase_commit_ordered */ | |
1674 | DBUG_ASSERT(EQ_CURRENT_THD(thd)); | |
1675 | +#endif | |
1676 | DBUG_ASSERT(thd == trx->mysql_thd); | |
1677 | ||
1678 | trx->check_foreigns = !thd_test_options( | |
1679 | @@ -1760,7 +1766,10 @@ | |
1680 | { | |
1681 | trx_t*& trx = thd_to_trx(thd); | |
1682 | ||
1683 | +#ifndef EXTENDED_FOR_COMMIT_ORDERED | |
1684 | + /* used by innobase_commit_ordered */ | |
1685 | ut_ad(EQ_CURRENT_THD(thd)); | |
1686 | +#endif | |
1687 | ||
1688 | if (trx == NULL) { | |
1689 | trx = innobase_trx_allocate(thd); | |
1690 | @@ -1846,6 +1855,7 @@ | |
1691 | { | |
1692 | trx->is_registered = 0; | |
1693 | trx->owns_prepare_mutex = 0; | |
1694 | + trx->called_commit_ordered = 0; | |
1695 | } | |
1696 | ||
1697 | /*********************************************************************//** | |
1698 | @@ -1861,6 +1871,29 @@ | |
1699 | } | |
1700 | ||
1701 | /*********************************************************************//** | |
1702 | +*/ | |
1703 | +static inline | |
1704 | +void | |
1705 | +trx_called_commit_ordered_set( | |
1706 | +/*==========================*/ | |
1707 | + trx_t* trx) | |
1708 | +{ | |
1709 | + ut_a(trx_is_registered_for_2pc(trx)); | |
1710 | + trx->called_commit_ordered = 1; | |
1711 | +} | |
1712 | + | |
1713 | +/*********************************************************************//** | |
1714 | +*/ | |
1715 | +static inline | |
1716 | +bool | |
1717 | +trx_called_commit_ordered( | |
1718 | +/*======================*/ | |
1719 | + const trx_t* trx) | |
1720 | +{ | |
1721 | + return(trx->called_commit_ordered == 1); | |
1722 | +} | |
1723 | + | |
1724 | +/*********************************************************************//** | |
1725 | Check if transaction is started. | |
1726 | @reutrn true if transaction is in state started */ | |
1727 | static | |
1728 | @@ -2435,6 +2468,9 @@ | |
1729 | innobase_hton->savepoint_set=innobase_savepoint; | |
1730 | innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint; | |
1731 | innobase_hton->savepoint_release=innobase_release_savepoint; | |
1732 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1733 | + innobase_hton->commit_ordered=innobase_commit_ordered; | |
1734 | +#endif | |
1735 | innobase_hton->commit=innobase_commit; | |
1736 | innobase_hton->rollback=innobase_rollback; | |
1737 | innobase_hton->prepare=innobase_xa_prepare; | |
1738 | @@ -3187,6 +3223,126 @@ | |
1739 | DBUG_RETURN(0); | |
1740 | } | |
1741 | ||
1742 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1743 | +/* MEMO: | |
1744 | + InnoDB is coded with intention that always trx is accessed by the owner thd. | |
1745 | + (not protected by any mutex/lock) | |
1746 | + So, the caller of innobase_commit_ordered() should be conscious of | |
1747 | + cache coherency between multi CPU about the trx, if called from another thd. | |
1748 | + | |
1749 | + MariaDB's first implementation about it seems the cherency is protected by | |
1750 | + the pthread_mutex LOCK_wakeup_ready. So, no problem for now. | |
1751 | + | |
1752 | + But we should be aware the importance of the coherency. | |
1753 | + */ | |
1754 | +/*****************************************************************//** | |
1755 | +low function function innobase_commit_ordered().*/ | |
1756 | +static | |
1757 | +void | |
1758 | +innobase_commit_ordered_low( | |
1759 | +/*========================*/ | |
1760 | + trx_t* trx, /*!< in: Innodb transaction */ | |
1761 | + THD* thd) /*!< in: MySQL thread handle */ | |
1762 | +{ | |
1763 | + ulonglong tmp_pos; | |
1764 | + DBUG_ENTER("innobase_commit_ordered"); | |
1765 | + | |
1766 | + /* This part was from innobase_commit() */ | |
1767 | + | |
1768 | + /* We need current binlog position for ibbackup to work. | |
1769 | + Note, the position is current because commit_ordered is guaranteed | |
1770 | + to be called in same sequenece as writing to binlog. */ | |
1771 | +retry: | |
1772 | + if (innobase_commit_concurrency > 0) { | |
1773 | + mysql_mutex_lock(&commit_cond_m); | |
1774 | + commit_threads++; | |
1775 | + | |
1776 | + if (commit_threads > innobase_commit_concurrency) { | |
1777 | + commit_threads--; | |
1778 | + mysql_cond_wait(&commit_cond, | |
1779 | + &commit_cond_m); | |
1780 | + mysql_mutex_unlock(&commit_cond_m); | |
1781 | + goto retry; | |
1782 | + } | |
1783 | + else { | |
1784 | + mysql_mutex_unlock(&commit_cond_m); | |
1785 | + } | |
1786 | + } | |
1787 | + | |
1788 | + mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name)); | |
1789 | + trx->mysql_log_offset = (ib_int64_t) tmp_pos; | |
1790 | + | |
1791 | + /* Don't do write + flush right now. For group commit | |
1792 | + to work we want to do the flush in the innobase_commit() | |
1793 | + method, which runs without holding any locks. */ | |
1794 | + trx->flush_log_later = TRUE; | |
1795 | + innobase_commit_low(trx); | |
1796 | + trx->flush_log_later = FALSE; | |
1797 | + | |
1798 | + if (innobase_commit_concurrency > 0) { | |
1799 | + mysql_mutex_lock(&commit_cond_m); | |
1800 | + commit_threads--; | |
1801 | + mysql_cond_signal(&commit_cond); | |
1802 | + mysql_mutex_unlock(&commit_cond_m); | |
1803 | + } | |
1804 | + | |
1805 | + DBUG_VOID_RETURN; | |
1806 | +} | |
1807 | + | |
1808 | +/*****************************************************************//** | |
1809 | +Perform the first, fast part of InnoDB commit. | |
1810 | + | |
1811 | +Doing it in this call ensures that we get the same commit order here | |
1812 | +as in binlog and any other participating transactional storage engines. | |
1813 | + | |
1814 | +Note that we want to do as little as really needed here, as we run | |
1815 | +under a global mutex. The expensive fsync() is done later, in | |
1816 | +innobase_commit(), without a lock so group commit can take place. | |
1817 | + | |
1818 | +Note also that this method can be called from a different thread than | |
1819 | +the one handling the rest of the transaction. */ | |
1820 | +static | |
1821 | +void | |
1822 | +innobase_commit_ordered( | |
1823 | +/*====================*/ | |
1824 | + handlerton *hton, /*!< in: Innodb handlerton */ | |
1825 | + THD* thd, /*!< in: MySQL thread handle of the user for whom | |
1826 | + the transaction should be committed */ | |
1827 | + bool all) /*!< in: TRUE - commit transaction | |
1828 | + FALSE - the current SQL statement ended */ | |
1829 | +{ | |
1830 | + trx_t* trx; | |
1831 | + DBUG_ENTER("innobase_commit_ordered"); | |
1832 | + DBUG_ASSERT(hton == innodb_hton_ptr); | |
1833 | + | |
1834 | + trx = check_trx_exists(thd); | |
1835 | + | |
1836 | + /* Since we will reserve the kernel mutex, we have to release | |
1837 | + the search system latch first to obey the latching order. */ | |
1838 | + | |
1839 | + if (trx->has_search_latch) { | |
1840 | + trx_search_latch_release_if_reserved(trx); | |
1841 | + } | |
1842 | + | |
1843 | + if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) { | |
1844 | + /* We cannot throw error here; instead we will catch this error | |
1845 | + again in innobase_commit() and report it from there. */ | |
1846 | + DBUG_VOID_RETURN; | |
1847 | + } | |
1848 | + | |
1849 | + /* commit_ordered is only called when committing the whole transaction | |
1850 | + (or an SQL statement when autocommit is on). */ | |
1851 | + DBUG_ASSERT(all || | |
1852 | + (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); | |
1853 | + | |
1854 | + innobase_commit_ordered_low(trx, thd); | |
1855 | + | |
1856 | + trx_called_commit_ordered_set(trx); | |
1857 | + | |
1858 | + DBUG_VOID_RETURN; | |
1859 | +} | |
1860 | +#endif /* EXTENDED_FOR_COMMIT_ORDERED */ | |
1861 | + | |
1862 | /*****************************************************************//** | |
1863 | Commits a transaction in an InnoDB database or marks an SQL statement | |
1864 | ended. | |
1865 | @@ -3238,6 +3394,16 @@ | |
1866 | /* We were instructed to commit the whole transaction, or | |
1867 | this is an SQL statement end and autocommit is on */ | |
1868 | ||
1869 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1870 | + ut_ad(!trx_has_prepare_commit_mutex(trx)); | |
1871 | + | |
1872 | + /* Run the fast part of commit if we did not already. */ | |
1873 | + if (!trx_called_commit_ordered(trx)) { | |
1874 | + innobase_commit_ordered_low(trx, thd); | |
1875 | + } | |
1876 | +#else | |
1877 | + ut_ad(!trx_called_commit_ordered(trx)); | |
1878 | + | |
1879 | /* We need current binlog position for ibbackup to work. | |
1880 | Note, the position is current because of | |
1881 | prepare_commit_mutex */ | |
1882 | @@ -3292,6 +3458,7 @@ | |
1883 | ||
1884 | mysql_mutex_unlock(&prepare_commit_mutex); | |
1885 | } | |
1886 | +#endif /* EXTENDED_FOR_COMMIT_ORDERED */ | |
1887 | ||
1888 | trx_deregister_from_2pc(trx); | |
1889 | ||
29ffd636 | 1890 | @@ -10981,6 +11148,7 @@ |
13ceb006 AM |
1891 | |
1892 | srv_active_wake_master_thread(); | |
1893 | ||
1894 | +#ifndef EXTENDED_FOR_COMMIT_ORDERED | |
1895 | if (thd_sql_command(thd) != SQLCOM_XA_PREPARE | |
1896 | && (all | |
1897 | || !thd_test_options( | |
29ffd636 | 1898 | @@ -11007,6 +11175,7 @@ |
13ceb006 AM |
1899 | mysql_mutex_lock(&prepare_commit_mutex); |
1900 | trx_owns_prepare_commit_mutex_set(trx); | |
1901 | } | |
1902 | +#endif /* ifndef EXTENDED_FOR_COMMIT_ORDERED */ | |
1903 | ||
1904 | return(error); | |
1905 | } | |
1906 | --- a/storage/innobase/handler/ha_innodb.h | |
1907 | +++ b/storage/innobase/handler/ha_innodb.h | |
1908 | @@ -240,6 +240,12 @@ | |
1909 | struct charset_info_st *thd_charset(MYSQL_THD thd); | |
1910 | LEX_STRING *thd_query_string(MYSQL_THD thd); | |
1911 | ||
1912 | +#ifdef EXTENDED_FOR_COMMIT_ORDERED | |
1913 | +/** Get the file name and position of the MySQL binlog corresponding to the | |
1914 | + * current commit. | |
1915 | + */ | |
1916 | +void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file); | |
1917 | +#else | |
1918 | /** Get the file name of the MySQL binlog. | |
1919 | * @return the name of the binlog file | |
1920 | */ | |
1921 | @@ -249,6 +255,7 @@ | |
1922 | * @return byte offset from the beginning of the binlog | |
1923 | */ | |
1924 | ulonglong mysql_bin_log_file_pos(void); | |
1925 | +#endif | |
1926 | ||
1927 | /** | |
1928 | Check if a user thread is a replication slave thread | |
1929 | --- a/storage/innobase/include/trx0trx.h | |
1930 | +++ b/storage/innobase/include/trx0trx.h | |
1931 | @@ -494,6 +494,7 @@ | |
1932 | this is set to 1 then registered should | |
1933 | also be set to 1. This is used in the | |
1934 | XA code */ | |
1935 | + unsigned called_commit_ordered:1;/* 1 if innobase_commit_ordered has run. */ | |
1936 | /*------------------------------*/ | |
1937 | ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ | |
1938 | ulint check_foreigns; /* normally TRUE, but if the user | |
1939 | --- a/storage/innobase/trx/trx0trx.c | |
1940 | +++ b/storage/innobase/trx/trx0trx.c | |
1941 | @@ -111,6 +111,7 @@ | |
1942 | ||
1943 | trx->is_registered = 0; | |
1944 | trx->owns_prepare_mutex = 0; | |
1945 | + trx->called_commit_ordered = 0; | |
1946 | ||
1947 | trx->start_time = ut_time(); | |
1948 | ||
1949 | --- /dev/null | |
1950 | +++ b/mysql-test/r/group_commit.result | |
1951 | @@ -0,0 +1,63 @@ | |
1952 | +CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb; | |
1953 | +SELECT variable_value INTO @commits FROM information_schema.global_status | |
1954 | +WHERE variable_name = 'binlog_commits'; | |
1955 | +SELECT variable_value INTO @group_commits FROM information_schema.global_status | |
1956 | +WHERE variable_name = 'binlog_group_commits'; | |
1957 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued"; | |
1958 | +INSERT INTO t1 VALUES ("con1"); | |
1959 | +set DEBUG_SYNC= "now WAIT_FOR group1_running"; | |
1960 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con2"; | |
1961 | +SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed"; | |
1962 | +SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked"; | |
1963 | +INSERT INTO t1 VALUES ("con2"); | |
1964 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con2"; | |
1965 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con3"; | |
1966 | +INSERT INTO t1 VALUES ("con3"); | |
1967 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con3"; | |
1968 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con4"; | |
1969 | +INSERT INTO t1 VALUES ("con4"); | |
1970 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con4"; | |
1971 | +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; | |
1972 | +SELECT * FROM t1 ORDER BY a; | |
1973 | +a | |
1974 | +SET DEBUG_SYNC= "now SIGNAL group2_queued"; | |
1975 | +SELECT * FROM t1 ORDER BY a; | |
1976 | +a | |
1977 | +con1 | |
1978 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5"; | |
1979 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued"; | |
1980 | +INSERT INTO t1 VALUES ("con5"); | |
1981 | +SET DEBUG_SYNC= "now WAIT_FOR con5_leader"; | |
1982 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con6_queued"; | |
1983 | +INSERT INTO t1 VALUES ("con6"); | |
1984 | +SET DEBUG_SYNC= "now WAIT_FOR group3_con5"; | |
1985 | +SELECT * FROM t1 ORDER BY a; | |
1986 | +a | |
1987 | +con1 | |
1988 | +SET DEBUG_SYNC= "now SIGNAL group3_committed"; | |
1989 | +SET DEBUG_SYNC= "now WAIT_FOR group2_visible"; | |
1990 | +SELECT * FROM t1 ORDER BY a; | |
1991 | +a | |
1992 | +con1 | |
1993 | +con2 | |
1994 | +con3 | |
1995 | +con4 | |
1996 | +SET DEBUG_SYNC= "now SIGNAL group2_checked"; | |
1997 | +SELECT * FROM t1 ORDER BY a; | |
1998 | +a | |
1999 | +con1 | |
2000 | +con2 | |
2001 | +con3 | |
2002 | +con4 | |
2003 | +con5 | |
2004 | +con6 | |
2005 | +SELECT variable_value - @commits FROM information_schema.global_status | |
2006 | +WHERE variable_name = 'binlog_commits'; | |
2007 | +variable_value - @commits | |
2008 | +6 | |
2009 | +SELECT variable_value - @group_commits FROM information_schema.global_status | |
2010 | +WHERE variable_name = 'binlog_group_commits'; | |
2011 | +variable_value - @group_commits | |
2012 | +3 | |
2013 | +SET DEBUG_SYNC= 'RESET'; | |
2014 | +DROP TABLE t1; | |
2015 | --- /dev/null | |
2016 | +++ b/mysql-test/r/group_commit_binlog_pos.result | |
2017 | @@ -0,0 +1,35 @@ | |
2018 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2019 | +INSERT INTO t1 VALUES (0); | |
2020 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued"; | |
2021 | +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3"; | |
2022 | +INSERT INTO t1 VALUES (1); | |
2023 | +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; | |
2024 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con2_queued"; | |
2025 | +INSERT INTO t1 VALUES (2); | |
2026 | +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; | |
2027 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con3_queued"; | |
2028 | +INSERT INTO t1 VALUES (3); | |
2029 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2030 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2031 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2032 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2033 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2034 | +SELECT * FROM t1 ORDER BY a; | |
2035 | +a | |
2036 | +0 | |
2037 | +1 | |
2038 | +2 | |
2039 | +SET SESSION debug="+d,crash_dispatch_command_before"; | |
2040 | +SELECT 1; | |
2041 | +Got one of the listed errors | |
2042 | +Got one of the listed errors | |
2043 | +Got one of the listed errors | |
2044 | +SELECT * FROM t1 ORDER BY a; | |
2045 | +a | |
2046 | +0 | |
2047 | +1 | |
2048 | +2 | |
2049 | +3 | |
2050 | +InnoDB: Last MySQL binlog file position 0 768, file name ./master-bin.000001 | |
2051 | +SET DEBUG_SYNC= 'RESET'; | |
2052 | +DROP TABLE t1; | |
2053 | --- /dev/null | |
2054 | +++ b/mysql-test/r/group_commit_crash.result | |
2055 | @@ -0,0 +1,120 @@ | |
2056 | +CREATE TABLE t1(a CHAR(255), | |
2057 | +b CHAR(255), | |
2058 | +c CHAR(255), | |
2059 | +d CHAR(255), | |
2060 | +id INT AUTO_INCREMENT, | |
2061 | +PRIMARY KEY(id)) ENGINE=InnoDB; | |
2062 | +create table t2 like t1; | |
2063 | +create procedure setcrash(IN i INT) | |
2064 | +begin | |
2065 | +CASE i | |
2066 | +WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare"; | |
2067 | +WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log"; | |
2068 | +WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog"; | |
2069 | +WHEN 4 THEN SET SESSION debug="d,crash_commit_after"; | |
2070 | +WHEN 5 THEN SET SESSION debug="d,crash_commit_before"; | |
2071 | +ELSE BEGIN END; | |
2072 | +END CASE; | |
2073 | +end // | |
2074 | +FLUSH TABLES; | |
2075 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2076 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2077 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2078 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2079 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2080 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2081 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2082 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2083 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2084 | +INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2085 | +RESET MASTER; | |
2086 | +START TRANSACTION; | |
2087 | +insert into t1 select * from t2; | |
2088 | +call setcrash(5); | |
2089 | +COMMIT; | |
2090 | +Got one of the listed errors | |
2091 | +SELECT * FROM t1 ORDER BY id; | |
2092 | +a b c d id | |
2093 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2094 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2095 | +delete from t1; | |
2096 | +RESET MASTER; | |
2097 | +START TRANSACTION; | |
2098 | +insert into t1 select * from t2; | |
2099 | +call setcrash(4); | |
2100 | +COMMIT; | |
2101 | +Got one of the listed errors | |
2102 | +SELECT * FROM t1 ORDER BY id; | |
2103 | +a b c d id | |
2104 | +a b c d 1 | |
2105 | +a b c d 2 | |
2106 | +a b c d 3 | |
2107 | +a b c d 4 | |
2108 | +a b c d 5 | |
2109 | +a b c d 6 | |
2110 | +a b c d 7 | |
2111 | +a b c d 8 | |
2112 | +a b c d 9 | |
2113 | +a b c d 10 | |
2114 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2115 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2116 | +master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2 | |
2117 | +delete from t1; | |
2118 | +RESET MASTER; | |
2119 | +START TRANSACTION; | |
2120 | +insert into t1 select * from t2; | |
2121 | +call setcrash(3); | |
2122 | +COMMIT; | |
2123 | +Got one of the listed errors | |
2124 | +SELECT * FROM t1 ORDER BY id; | |
2125 | +a b c d id | |
2126 | +a b c d 1 | |
2127 | +a b c d 2 | |
2128 | +a b c d 3 | |
2129 | +a b c d 4 | |
2130 | +a b c d 5 | |
2131 | +a b c d 6 | |
2132 | +a b c d 7 | |
2133 | +a b c d 8 | |
2134 | +a b c d 9 | |
2135 | +a b c d 10 | |
2136 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2137 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2138 | +master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2 | |
2139 | +delete from t1; | |
2140 | +RESET MASTER; | |
2141 | +START TRANSACTION; | |
2142 | +insert into t1 select * from t2; | |
2143 | +call setcrash(2); | |
2144 | +COMMIT; | |
2145 | +Got one of the listed errors | |
2146 | +SELECT * FROM t1 ORDER BY id; | |
2147 | +a b c d id | |
2148 | +a b c d 1 | |
2149 | +a b c d 2 | |
2150 | +a b c d 3 | |
2151 | +a b c d 4 | |
2152 | +a b c d 5 | |
2153 | +a b c d 6 | |
2154 | +a b c d 7 | |
2155 | +a b c d 8 | |
2156 | +a b c d 9 | |
2157 | +a b c d 10 | |
2158 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2159 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2160 | +master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2 | |
2161 | +delete from t1; | |
2162 | +RESET MASTER; | |
2163 | +START TRANSACTION; | |
2164 | +insert into t1 select * from t2; | |
2165 | +call setcrash(1); | |
2166 | +COMMIT; | |
2167 | +Got one of the listed errors | |
2168 | +SELECT * FROM t1 ORDER BY id; | |
2169 | +a b c d id | |
2170 | +SHOW BINLOG EVENTS LIMIT 2,1; | |
2171 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2172 | +delete from t1; | |
2173 | +DROP TABLE t1; | |
2174 | +DROP TABLE t2; | |
2175 | +DROP PROCEDURE setcrash; | |
2176 | --- /dev/null | |
2177 | +++ b/mysql-test/r/xa_binlog.result | |
2178 | @@ -0,0 +1,32 @@ | |
2179 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; | |
2180 | +SET binlog_format= mixed; | |
2181 | +RESET MASTER; | |
2182 | +XA START 'xatest'; | |
2183 | +INSERT INTO t1 VALUES (1); | |
2184 | +XA END 'xatest'; | |
2185 | +XA PREPARE 'xatest'; | |
2186 | +XA COMMIT 'xatest'; | |
2187 | +XA START 'xatest'; | |
2188 | +INSERT INTO t1 VALUES (2); | |
2189 | +XA END 'xatest'; | |
2190 | +XA COMMIT 'xatest' ONE PHASE; | |
2191 | +BEGIN; | |
2192 | +INSERT INTO t1 VALUES (3); | |
2193 | +COMMIT; | |
2194 | +SELECT * FROM t1 ORDER BY a; | |
2195 | +a | |
2196 | +1 | |
2197 | +2 | |
2198 | +3 | |
2199 | +SHOW BINLOG EVENTS LIMIT 1,9; | |
2200 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2201 | +master-bin.000001 # Query 1 # BEGIN | |
2202 | +master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (1) | |
2203 | +master-bin.000001 # Query 1 # COMMIT | |
2204 | +master-bin.000001 # Query 1 # BEGIN | |
2205 | +master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (2) | |
2206 | +master-bin.000001 # Xid 1 # COMMIT /* xid=XX */ | |
2207 | +master-bin.000001 # Query 1 # BEGIN | |
2208 | +master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (3) | |
2209 | +master-bin.000001 # Xid 1 # COMMIT /* xid=XX */ | |
2210 | +DROP TABLE t1; | |
2211 | --- /dev/null | |
2212 | +++ b/mysql-test/suite/binlog/r/binlog_ioerr.result | |
2213 | @@ -0,0 +1,28 @@ | |
2214 | +CALL mtr.add_suppression("Error writing file 'master-bin'"); | |
2215 | +RESET MASTER; | |
2216 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2217 | +INSERT INTO t1 VALUES(0); | |
2218 | +SET SESSION debug='+d,fail_binlog_write_1'; | |
2219 | +INSERT INTO t1 VALUES(1); | |
2220 | +ERROR HY000: Error writing file 'master-bin' (errno: 28) | |
2221 | +INSERT INTO t1 VALUES(2); | |
2222 | +ERROR HY000: Error writing file 'master-bin' (errno: 28) | |
2223 | +SET SESSION debug=''; | |
2224 | +INSERT INTO t1 VALUES(3); | |
2225 | +SELECT * FROM t1; | |
2226 | +a | |
2227 | +0 | |
2228 | +3 | |
2229 | +SHOW BINLOG EVENTS; | |
2230 | +Log_name Pos Event_type Server_id End_log_pos Info | |
2231 | +BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: # | |
2232 | +BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb | |
2233 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2234 | +BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0) | |
2235 | +BINLOG POS Xid 1 ENDPOS COMMIT /* XID */ | |
2236 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2237 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2238 | +BINLOG POS Query 1 ENDPOS BEGIN | |
2239 | +BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3) | |
2240 | +BINLOG POS Xid 1 ENDPOS COMMIT /* XID */ | |
2241 | +DROP TABLE t1; | |
2242 | --- /dev/null | |
2243 | +++ b/mysql-test/suite/binlog/t/binlog_ioerr.test | |
2244 | @@ -0,0 +1,30 @@ | |
2245 | +source include/have_debug.inc; | |
2246 | +source include/have_innodb.inc; | |
2247 | +source include/have_log_bin.inc; | |
2248 | +source include/have_binlog_format_mixed_or_statement.inc; | |
2249 | + | |
2250 | +CALL mtr.add_suppression("Error writing file 'master-bin'"); | |
2251 | + | |
2252 | +RESET MASTER; | |
2253 | + | |
2254 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2255 | +INSERT INTO t1 VALUES(0); | |
2256 | +SET SESSION debug='+d,fail_binlog_write_1'; | |
2257 | +--error ER_ERROR_ON_WRITE | |
2258 | +INSERT INTO t1 VALUES(1); | |
2259 | +--error ER_ERROR_ON_WRITE | |
2260 | +INSERT INTO t1 VALUES(2); | |
2261 | +SET SESSION debug=''; | |
2262 | +INSERT INTO t1 VALUES(3); | |
2263 | +SELECT * FROM t1; | |
2264 | + | |
2265 | +# Actually the output from this currently shows a bug. | |
2266 | +# The injected IO error leaves partially written transactions in the binlog in | |
2267 | +# the form of stray "BEGIN" events. | |
2268 | +# These should disappear from the output if binlog error handling is improved | |
2269 | +# (see MySQL Bug#37148 and WL#1790). | |
2270 | +--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/ | |
2271 | +--replace_column 1 BINLOG 2 POS 5 ENDPOS | |
2272 | +SHOW BINLOG EVENTS; | |
2273 | + | |
2274 | +DROP TABLE t1; | |
2275 | --- /dev/null | |
2276 | +++ b/mysql-test/t/group_commit.test | |
2277 | @@ -0,0 +1,115 @@ | |
2278 | +--source include/have_debug_sync.inc | |
2279 | +--source include/have_innodb.inc | |
2280 | +--source include/have_log_bin.inc | |
2281 | + | |
2282 | +# Test some group commit code paths by using debug_sync to do controlled | |
2283 | +# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a | |
2284 | +# group. | |
2285 | +# | |
2286 | +# Group 3 is allowed to race as far as possible ahead before group 2 finishes | |
2287 | +# to check some edge case for concurrency control. | |
2288 | + | |
2289 | +CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb; | |
2290 | + | |
2291 | +SELECT variable_value INTO @commits FROM information_schema.global_status | |
2292 | + WHERE variable_name = 'binlog_commits'; | |
2293 | +SELECT variable_value INTO @group_commits FROM information_schema.global_status | |
2294 | + WHERE variable_name = 'binlog_group_commits'; | |
2295 | + | |
2296 | +connect(con1,localhost,root,,); | |
2297 | +connect(con2,localhost,root,,); | |
2298 | +connect(con3,localhost,root,,); | |
2299 | +connect(con4,localhost,root,,); | |
2300 | +connect(con5,localhost,root,,); | |
2301 | +connect(con6,localhost,root,,); | |
2302 | + | |
2303 | +# Start group1 (with one thread) doing commit, waiting for | |
2304 | +# group2 to queue up before finishing. | |
2305 | + | |
2306 | +connection con1; | |
2307 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued"; | |
2308 | +send INSERT INTO t1 VALUES ("con1"); | |
2309 | + | |
2310 | +# Make group2 (with three threads) queue up. | |
2311 | +# Make sure con2 is the group commit leader for group2. | |
2312 | +# Make group2 wait with running commit_ordered() until group3 has committed. | |
2313 | + | |
2314 | +connection con2; | |
2315 | +set DEBUG_SYNC= "now WAIT_FOR group1_running"; | |
2316 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con2"; | |
2317 | +SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed"; | |
2318 | +SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked"; | |
2319 | +send INSERT INTO t1 VALUES ("con2"); | |
2320 | +connection con3; | |
2321 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con2"; | |
2322 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con3"; | |
2323 | +send INSERT INTO t1 VALUES ("con3"); | |
2324 | +connection con4; | |
2325 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con3"; | |
2326 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con4"; | |
2327 | +send INSERT INTO t1 VALUES ("con4"); | |
2328 | + | |
2329 | +# When group2 is queued, let group1 continue and queue group3. | |
2330 | + | |
2331 | +connection default; | |
2332 | +SET DEBUG_SYNC= "now WAIT_FOR group2_con4"; | |
2333 | + | |
2334 | +# At this point, trasaction 1 is still not visible as commit_ordered() has not | |
2335 | +# been called yet. | |
2336 | +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; | |
2337 | +SELECT * FROM t1 ORDER BY a; | |
2338 | + | |
2339 | +SET DEBUG_SYNC= "now SIGNAL group2_queued"; | |
2340 | +connection con1; | |
2341 | +reap; | |
2342 | + | |
2343 | +# Now transaction 1 is visible. | |
2344 | +connection default; | |
2345 | +SELECT * FROM t1 ORDER BY a; | |
2346 | + | |
2347 | +connection con5; | |
2348 | +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5"; | |
2349 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued"; | |
2350 | +send INSERT INTO t1 VALUES ("con5"); | |
2351 | + | |
2352 | +connection con6; | |
2353 | +SET DEBUG_SYNC= "now WAIT_FOR con5_leader"; | |
2354 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con6_queued"; | |
2355 | +send INSERT INTO t1 VALUES ("con6"); | |
2356 | + | |
2357 | +connection default; | |
2358 | +SET DEBUG_SYNC= "now WAIT_FOR group3_con5"; | |
2359 | +# Still only transaction 1 visible, as group2 have not yet run commit_ordered(). | |
2360 | +SELECT * FROM t1 ORDER BY a; | |
2361 | +SET DEBUG_SYNC= "now SIGNAL group3_committed"; | |
2362 | +SET DEBUG_SYNC= "now WAIT_FOR group2_visible"; | |
2363 | +# Now transactions 1-4 visible. | |
2364 | +SELECT * FROM t1 ORDER BY a; | |
2365 | +SET DEBUG_SYNC= "now SIGNAL group2_checked"; | |
2366 | + | |
2367 | +connection con2; | |
2368 | +reap; | |
2369 | + | |
2370 | +connection con3; | |
2371 | +reap; | |
2372 | + | |
2373 | +connection con4; | |
2374 | +reap; | |
2375 | + | |
2376 | +connection con5; | |
2377 | +reap; | |
2378 | + | |
2379 | +connection con6; | |
2380 | +reap; | |
2381 | + | |
2382 | +connection default; | |
2383 | +# Check all transactions finally visible. | |
2384 | +SELECT * FROM t1 ORDER BY a; | |
2385 | + | |
2386 | +SELECT variable_value - @commits FROM information_schema.global_status | |
2387 | + WHERE variable_name = 'binlog_commits'; | |
2388 | +SELECT variable_value - @group_commits FROM information_schema.global_status | |
2389 | + WHERE variable_name = 'binlog_group_commits'; | |
2390 | + | |
2391 | +SET DEBUG_SYNC= 'RESET'; | |
2392 | +DROP TABLE t1; | |
2393 | --- /dev/null | |
2394 | +++ b/mysql-test/t/group_commit_binlog_pos-master.opt | |
2395 | @@ -0,0 +1 @@ | |
2396 | +--skip-stack-trace --skip-core-file | |
2397 | --- /dev/null | |
2398 | +++ b/mysql-test/t/group_commit_binlog_pos.test | |
2399 | @@ -0,0 +1,89 @@ | |
2400 | +--source include/have_debug_sync.inc | |
2401 | +--source include/have_innodb.inc | |
2402 | +--source include/have_log_bin.inc | |
2403 | +--source include/have_binlog_format_mixed_or_statement.inc | |
2404 | + | |
2405 | +# Need DBUG to crash the server intentionally | |
2406 | +--source include/have_debug.inc | |
2407 | +# Don't test this under valgrind, memory leaks will occur as we crash | |
2408 | +--source include/not_valgrind.inc | |
2409 | + | |
2410 | +# The test case currently uses grep and tail, which may be unavailable on | |
2411 | +# some windows systems. But see MWL#191 for how to remove the need for grep. | |
2412 | +--source include/not_windows.inc | |
2413 | + | |
2414 | +# XtraDB stores the binlog position corresponding to the last commit, and | |
2415 | +# prints it during crash recovery. | |
2416 | +# Test that we get the correct position when we group commit several | |
2417 | +# transactions together. | |
2418 | + | |
2419 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; | |
2420 | +INSERT INTO t1 VALUES (0); | |
2421 | + | |
2422 | +connect(con1,localhost,root,,); | |
2423 | +connect(con2,localhost,root,,); | |
2424 | +connect(con3,localhost,root,,); | |
2425 | + | |
2426 | +# Queue up three commits for group commit. | |
2427 | + | |
2428 | +connection con1; | |
2429 | +SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued"; | |
2430 | +SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3"; | |
2431 | +send INSERT INTO t1 VALUES (1); | |
2432 | + | |
2433 | +connection con2; | |
2434 | +SET DEBUG_SYNC= "now WAIT_FOR con1_waiting"; | |
2435 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con2_queued"; | |
2436 | +send INSERT INTO t1 VALUES (2); | |
2437 | + | |
2438 | +connection con3; | |
2439 | +SET DEBUG_SYNC= "now WAIT_FOR con2_queued"; | |
2440 | +SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con3_queued"; | |
2441 | +send INSERT INTO t1 VALUES (3); | |
2442 | + | |
2443 | +connection default; | |
2444 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2445 | +# At this point, no transactions are committed. | |
2446 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2447 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2448 | +# At this point, 1 transaction is committed. | |
2449 | +SET DEBUG_SYNC= "now SIGNAL con1_loop_cont"; | |
2450 | +SET DEBUG_SYNC= "now WAIT_FOR con1_loop"; | |
2451 | + | |
2452 | +# At this point, 2 transactions are committed. | |
2453 | +SELECT * FROM t1 ORDER BY a; | |
2454 | + | |
2455 | +connection con2; | |
2456 | +reap; | |
2457 | + | |
2458 | +# Now crash the server with 1+2 in-memory committed, 3 only prepared. | |
2459 | +connection default; | |
2460 | +system echo wait-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; | |
2461 | +SET SESSION debug="+d,crash_dispatch_command_before"; | |
2462 | +--error 2006,2013 | |
2463 | +SELECT 1; | |
2464 | + | |
2465 | +connection con1; | |
2466 | +--error 2006,2013 | |
2467 | +reap; | |
2468 | +connection con3; | |
2469 | +--error 2006,2013 | |
2470 | +reap; | |
2471 | + | |
2472 | +system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; | |
2473 | + | |
2474 | +connection default; | |
2475 | +--enable_reconnect | |
2476 | +--source include/wait_until_connected_again.inc | |
2477 | + | |
2478 | +# Crash recovery should recover all three transactions. | |
2479 | +SELECT * FROM t1 ORDER BY a; | |
2480 | + | |
2481 | +# Check that the binlog position reported by InnoDB is the correct one | |
2482 | +# for the end of the second transaction (as can be checked with | |
2483 | +# mysqlbinlog). | |
2484 | +let $MYSQLD_DATADIR= `SELECT @@datadir`; | |
2485 | +--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1 | |
2486 | + | |
2487 | +SET DEBUG_SYNC= 'RESET'; | |
2488 | +DROP TABLE t1; | |
2489 | --- /dev/null | |
2490 | +++ b/mysql-test/t/group_commit_crash-master.opt | |
2491 | @@ -0,0 +1 @@ | |
2492 | +--skip-stack-trace --skip-core-file | |
2493 | --- /dev/null | |
2494 | +++ b/mysql-test/t/group_commit_crash.test | |
2495 | @@ -0,0 +1,80 @@ | |
2496 | +# Testing group commit by crashing a few times. | |
2497 | +# Test adapted from the Facebook patch: lp:mysqlatfacebook | |
2498 | +--source include/not_embedded.inc | |
2499 | +# Don't test this under valgrind, memory leaks will occur | |
2500 | +--source include/not_valgrind.inc | |
2501 | + | |
2502 | +# Binary must be compiled with debug for crash to occur | |
2503 | +--source include/have_debug.inc | |
2504 | +--source include/have_innodb.inc | |
2505 | +--source include/have_log_bin.inc | |
2506 | + | |
2507 | +let $innodb_file_format_max_orig=`select @@innodb_file_format_max`; | |
2508 | +CREATE TABLE t1(a CHAR(255), | |
2509 | + b CHAR(255), | |
2510 | + c CHAR(255), | |
2511 | + d CHAR(255), | |
2512 | + id INT AUTO_INCREMENT, | |
2513 | + PRIMARY KEY(id)) ENGINE=InnoDB; | |
2514 | +create table t2 like t1; | |
2515 | +delimiter //; | |
2516 | +create procedure setcrash(IN i INT) | |
2517 | +begin | |
2518 | + CASE i | |
2519 | + WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare"; | |
2520 | + WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log"; | |
2521 | + WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog"; | |
2522 | + WHEN 4 THEN SET SESSION debug="d,crash_commit_after"; | |
2523 | + WHEN 5 THEN SET SESSION debug="d,crash_commit_before"; | |
2524 | + ELSE BEGIN END; | |
2525 | + END CASE; | |
2526 | +end // | |
2527 | +delimiter ;// | |
2528 | +# Avoid getting a crashed mysql.proc table. | |
2529 | +FLUSH TABLES; | |
2530 | + | |
2531 | +let $numtests = 5; | |
2532 | + | |
2533 | +let $numinserts = 10; | |
2534 | +while ($numinserts) | |
2535 | +{ | |
2536 | + dec $numinserts; | |
2537 | + INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd'); | |
2538 | +} | |
2539 | + | |
2540 | +--enable_reconnect | |
2541 | + | |
2542 | +while ($numtests) | |
2543 | +{ | |
2544 | + RESET MASTER; | |
2545 | + | |
2546 | + START TRANSACTION; | |
2547 | + insert into t1 select * from t2; | |
2548 | + # Write file to make mysql-test-run.pl expect crash | |
2549 | + --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect | |
2550 | + | |
2551 | + eval call setcrash($numtests); | |
2552 | + | |
2553 | + # Run the crashing query | |
2554 | + --error 2006,2013 | |
2555 | + COMMIT; | |
2556 | + | |
2557 | + # Poll the server waiting for it to be back online again. | |
2558 | + --source include/wait_until_connected_again.inc | |
2559 | + | |
2560 | + # table and binlog should be in sync. | |
2561 | + SELECT * FROM t1 ORDER BY id; | |
2562 | + SHOW BINLOG EVENTS LIMIT 2,1; | |
2563 | + | |
2564 | + delete from t1; | |
2565 | + | |
2566 | + dec $numtests; | |
2567 | +} | |
2568 | + | |
2569 | +# final cleanup | |
2570 | +DROP TABLE t1; | |
2571 | +DROP TABLE t2; | |
2572 | +DROP PROCEDURE setcrash; | |
2573 | +--disable_query_log | |
2574 | +eval SET GLOBAL innodb_file_format_max=$innodb_file_format_max_orig; | |
2575 | +--enable_query_log | |
2576 | --- /dev/null | |
2577 | +++ b/mysql-test/t/xa_binlog.test | |
2578 | @@ -0,0 +1,32 @@ | |
2579 | +--source include/have_innodb.inc | |
2580 | +--source include/have_log_bin.inc | |
2581 | + | |
2582 | +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; | |
2583 | + | |
2584 | +# Fix binlog format (otherwise SHOW BINLOG EVENTS will fluctuate). | |
2585 | +SET binlog_format= mixed; | |
2586 | + | |
2587 | +RESET MASTER; | |
2588 | + | |
2589 | +XA START 'xatest'; | |
2590 | +INSERT INTO t1 VALUES (1); | |
2591 | +XA END 'xatest'; | |
2592 | +XA PREPARE 'xatest'; | |
2593 | +XA COMMIT 'xatest'; | |
2594 | + | |
2595 | +XA START 'xatest'; | |
2596 | +INSERT INTO t1 VALUES (2); | |
2597 | +XA END 'xatest'; | |
2598 | +XA COMMIT 'xatest' ONE PHASE; | |
2599 | + | |
2600 | +BEGIN; | |
2601 | +INSERT INTO t1 VALUES (3); | |
2602 | +COMMIT; | |
2603 | + | |
2604 | +SELECT * FROM t1 ORDER BY a; | |
2605 | + | |
2606 | +--replace_column 2 # 5 # | |
2607 | +--replace_regex /xid=[0-9]+/xid=XX/ | |
2608 | +SHOW BINLOG EVENTS LIMIT 1,9; | |
2609 | + | |
2610 | +DROP TABLE t1; |