+DROP TABLE t1;
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
-@@ -11716,7 +11716,7 @@
+@@ -11728,7 +11728,7 @@
}
if (min_max_ranges.elements > 0)
{
int4store(key + key_length, thd->server_id);
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
-@@ -1113,11 +1113,18 @@
+@@ -1116,11 +1116,18 @@
break;
#else
{
/*
SHOW statements should not add the used tables to the list of tables
used in a transaction.
-@@ -1130,24 +1137,23 @@
+@@ -1133,24 +1140,23 @@
/*
We have name + wildcard in packet, separated by endzero
*/
mysql_reset_thd_for_next_command(thd);
lex_start(thd);
/* Must be before we init the table list. */
-@@ -1172,9 +1178,6 @@
+@@ -1175,9 +1181,6 @@
table_list.schema_table= schema_table;
}
+--source include/rpl_end.inc
--- a/sql/log.cc
+++ b/sql/log.cc
-@@ -5090,6 +5090,12 @@
+@@ -5079,6 +5079,12 @@
user_var_event->type,
user_var_event->charset_number,
flags);
--- a/mysql-test/t/file_contents.test
+++ b/mysql-test/t/file_contents.test
@@ -20,7 +20,7 @@
- $dir_docs = "$dir_docs/packages/MySQL-server";
+ $dir_docs = glob "$dir_docs/packages/MySQL-server*";
} else {
# RedHat: version number in directory name
- $dir_docs = glob "$dir_docs/MySQL-server*";
} elsif ($dir_bin =~ m|/usr$|) {
# RPM build during development
@@ -28,9 +28,12 @@
- if(-d "$dir_docs/packages/MySQL-server") {
- # SuSE
- $dir_docs = "$dir_docs/packages/MySQL-server";
+ if(-d "$dir_docs/packages") {
+ # SuSE: "packages/" in the documentation path
+ $dir_docs = glob "$dir_docs/packages/MySQL-server*";
+ } elsif (glob "$dir_docs/percona-server-server*") {
+ # Debian
+ $dir_docs = glob "$dir_docs/percona-server-server*";
--- /dev/null
+--- a/include/my_sys.h
++++ b/include/my_sys.h
+@@ -524,6 +524,8 @@
+
+ #define my_b_tell(info) ((info)->pos_in_file + \
+ (size_t) (*(info)->current_pos - (info)->request_pos))
++#define my_b_write_tell(info) ((info)->pos_in_file + \
++ ((info)->write_pos - (info)->write_buffer))
+
+ #define my_b_get_buffer_start(info) (info)->request_pos
+ #define my_b_get_bytes_in_buffer(info) (char*) (info)->read_end - \
+--- a/include/mysql/plugin.h
++++ b/include/mysql/plugin.h
+@@ -559,6 +559,8 @@
+
+ #define EXTENDED_FOR_USERSTAT
+
++#define EXTENDED_FOR_COMMIT_ORDERED
++
+ /**
+ Create a temporary file.
+
+--- a/sql/handler.cc
++++ b/sql/handler.cc
+@@ -90,6 +90,8 @@
+ static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
+ uint known_extensions_id= 0;
+
++static int commit_one_phase_low(THD *thd, bool all, THD_TRANS *trans,
++ bool is_real_trans);
+
+
+ static plugin_ref ha_default_plugin(THD *thd)
+@@ -1119,7 +1121,8 @@
+ */
+ bool is_real_trans= all || thd->transaction.all.ha_list == 0;
+ Ha_trx_info *ha_info= trans->ha_list;
+- my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
++ bool need_commit_ordered;
++ my_xid xid;
+ DBUG_ENTER("ha_commit_trans");
+
+ /*
+@@ -1152,13 +1155,20 @@
+ DBUG_RETURN(2);
+ }
+
+- if (ha_info)
++ if (!ha_info)
++ {
++ /* Free resources and perform other cleanup even for 'empty' transactions. */
++ if (is_real_trans)
++ thd->transaction.cleanup();
++ DBUG_RETURN(0);
++ }
++ else
+ {
+ uint rw_ha_count;
+ bool rw_trans;
+ MDL_request mdl_request;
+
+- DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
++ DBUG_EXECUTE_IF("crash_commit_before", abort(););
+
+ /* Close all cursors that can not survive COMMIT */
+ if (is_real_trans) /* not a statement commit */
+@@ -1197,57 +1207,80 @@
+ !thd->slave_thread)
+ {
+ my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
+- ha_rollback_trans(thd, all);
+- error= 1;
+- goto end;
++ goto err;
+ }
+
+- if (!trans->no_2pc && (rw_ha_count > 1))
++ if (trans->no_2pc || (rw_ha_count <= 1))
+ {
+- for (; ha_info && !error; ha_info= ha_info->next())
++ error= ha_commit_one_phase(thd, all);
++ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
++ goto end;
++ }
++
++ need_commit_ordered= FALSE;
++ xid= thd->transaction.xid_state.xid.get_my_xid();
++
++ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+ {
+ int err;
+- handlerton *ht= ha_info->ht();
++ handlerton *ht= hi->ht();
+ /*
+ Do not call two-phase commit if this particular
+ transaction is read-only. This allows for simpler
+ implementation in engines that are always read-only.
+ */
+- if (! ha_info->is_trx_read_write())
++ if (! hi->is_trx_read_write())
+ continue;
+ /*
+ Sic: we know that prepare() is not NULL since otherwise
+ trans->no_2pc would have been set.
+ */
+- if ((err= ht->prepare(ht, thd, all)))
+- {
+- my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+- error= 1;
+- }
++ err= ht->prepare(ht, thd, all);
+ status_var_increment(thd->status_var.ha_prepare_count);
++ if (err)
++ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
++
++ if (err)
++ goto err;
++
++ need_commit_ordered|= (ht->commit_ordered != NULL);
+ }
+- DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
+- if (error || (is_real_trans && xid &&
+- (error= !(cookie= tc_log->log_xid(thd, xid)))))
++ DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT(););
++
++ if (!is_real_trans)
+ {
+- ha_rollback_trans(thd, all);
+- error= 1;
++ error= commit_one_phase_low(thd, all, trans, is_real_trans);
++ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+ }
+- DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
+- }
+- error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
+- DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
+- if (cookie)
++
++ cookie= tc_log->log_and_order(thd, xid, all, need_commit_ordered);
++ if (!cookie)
++ goto err;
++
++ DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT(););
++
++ error= commit_one_phase_low(thd, all, trans, is_real_trans) ? 2 : 0;
++ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
++ if (is_real_trans) /* userstat.patch */
++ thd->diff_commit_trans++; /* userstat.patch */
++ RUN_HOOK(transaction, after_commit, (thd, FALSE));
++
++ DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT(););
+ if(tc_log->unlog(cookie, xid))
+ {
+ error= 2;
+ goto end;
+ }
+- DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+- if (is_real_trans)
+- thd->diff_commit_trans++;
+- RUN_HOOK(transaction, after_commit, (thd, FALSE));
++
++ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
++ goto end;
++
++ /* Come here if error and we need to rollback. */
++err:
++ error= 1; /* Transaction was rolled back */
++ ha_rollback_trans(thd, all);
++
+ end:
+ if (rw_trans && mdl_request.ticket)
+ {
+@@ -1260,9 +1293,6 @@
+ thd->mdl_context.release_lock(mdl_request.ticket);
+ }
+ }
+- /* Free resources and perform other cleanup even for 'empty' transactions. */
+- else if (is_real_trans)
+- thd->transaction.cleanup();
+ DBUG_RETURN(error);
+ }
+
+@@ -1279,7 +1309,6 @@
+
+ int ha_commit_one_phase(THD *thd, bool all)
+ {
+- int error=0;
+ THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
+ /*
+ "real" is a nick name for a transaction for which a commit will
+@@ -1295,8 +1324,16 @@
+ transaction.all.ha_list, see why in trans_register_ha()).
+ */
+ bool is_real_trans=all || thd->transaction.all.ha_list == 0;
+- Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
+ DBUG_ENTER("ha_commit_one_phase");
++ DBUG_RETURN(commit_one_phase_low(thd, all, trans, is_real_trans));
++}
++
++static int
++commit_one_phase_low(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
++{
++ int error= 0;
++ Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
++ DBUG_ENTER("commit_one_phase_low");
+
+ if (ha_info)
+ {
+@@ -1894,7 +1931,16 @@
+ {
+ bool warn= true;
+
++ /*
++ Holding the LOCK_commit_ordered mutex ensures that we get the same
++ snapshot for all engines (including the binary log). This allows us
++ among other things to do backups with
++ START TRANSACTION WITH CONSISTENT SNAPSHOT and
++ have a consistent binlog position.
++ */
++ mysql_mutex_lock(&LOCK_commit_ordered);
+ plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
++ mysql_mutex_unlock(&LOCK_commit_ordered);
+
+ /*
+ Same idea as when one wants to CREATE TABLE in one engine which does not
+--- a/sql/handler.h
++++ b/sql/handler.h
+@@ -756,6 +756,53 @@
+ and 'real commit' mean the same event.
+ */
+ int (*commit)(handlerton *hton, THD *thd, bool all);
++ /*
++ The commit_ordered() method is called prior to the commit() method, after
++ the transaction manager has decided to commit (not rollback) the
++ transaction. Unlike commit(), commit_ordered() is called only when the
++ full transaction is committed, not for each commit of statement
++ transaction in a multi-statement transaction.
++
++ Not that like prepare(), commit_ordered() is only called when 2-phase
++ commit takes place. Ie. when no binary log and only a single engine
++ participates in a transaction, one commit() is called, no
++ commit_ordered(). So engines must be prepared for this.
++
++ The calls to commit_ordered() in multiple parallel transactions is
++ guaranteed to happen in the same order in every participating
++ handler. This can be used to ensure the same commit order among multiple
++ handlers (eg. in table handler and binlog). So if transaction T1 calls
++ into commit_ordered() of handler A before T2, then T1 will also call
++ commit_ordered() of handler B before T2.
++
++ Engines that implement this method should during this call make the
++ transaction visible to other transactions, thereby making the order of
++ transaction commits be defined by the order of commit_ordered() calls.
++
++ The intention is that commit_ordered() should do the minimal amount of
++ work that needs to happen in consistent commit order among handlers. To
++ preserve ordering, calls need to be serialised on a global mutex, so
++ doing any time-consuming or blocking operations in commit_ordered() will
++ limit scalability.
++
++ Handlers can rely on commit_ordered() calls to be serialised (no two
++ calls can run in parallel, so no extra locking on the handler part is
++ required to ensure this).
++
++ Note that commit_ordered() can be called from a different thread than the
++ one handling the transaction! So it can not do anything that depends on
++ thread local storage, in particular it can not call my_error() and
++ friends (instead it can store the error code and delay the call of
++ my_error() to the commit() method).
++
++ Similarly, since commit_ordered() returns void, any return error code
++ must be saved and returned from the commit() method instead.
++
++ The commit_ordered method is optional, and can be left unset if not
++ needed in a particular handler (then there will be no ordering guarantees
++ wrt. other engines and binary log).
++ */
++ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
+ int (*rollback)(handlerton *hton, THD *thd, bool all);
+ int (*prepare)(handlerton *hton, THD *thd, bool all);
+ int (*recover)(handlerton *hton, XID *xid_list, uint len);
+--- a/sql/log.cc
++++ b/sql/log.cc
+@@ -49,6 +49,7 @@
+
+ #include "sql_plugin.h"
+ #include "rpl_handler.h"
++#include "debug_sync.h"
+
+ /* max size of the log message */
+ #define MAX_LOG_BUFFER_SIZE 1024
+@@ -71,6 +72,25 @@
+ static int binlog_rollback(handlerton *hton, THD *thd, bool all);
+ static int binlog_prepare(handlerton *hton, THD *thd, bool all);
+
++static LEX_STRING const write_error_msg=
++ { C_STRING_WITH_LEN("error writing to the binary log") };
++
++static my_bool mutexes_inited;
++mysql_mutex_t LOCK_group_commit_queue;
++mysql_mutex_t LOCK_commit_ordered;
++
++static ulonglong binlog_status_var_num_commits;
++static ulonglong binlog_status_var_num_group_commits;
++
++static SHOW_VAR binlog_status_vars_detail[]=
++{
++ {"commits",
++ (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
++ {"group_commits",
++ (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
++ {NullS, NullS, SHOW_LONG}
++};
++
+ /**
+ purge logs, master and slave sides both, related error code
+ convertor.
+@@ -167,41 +187,6 @@
+ }
+
+ /*
+- Helper class to hold a mutex for the duration of the
+- block.
+-
+- Eliminates the need for explicit unlocking of mutexes on, e.g.,
+- error returns. On passing a null pointer, the sentry will not do
+- anything.
+- */
+-class Mutex_sentry
+-{
+-public:
+- Mutex_sentry(mysql_mutex_t *mutex)
+- : m_mutex(mutex)
+- {
+- if (m_mutex)
+- mysql_mutex_lock(mutex);
+- }
+-
+- ~Mutex_sentry()
+- {
+- if (m_mutex)
+- mysql_mutex_unlock(m_mutex);
+-#ifndef DBUG_OFF
+- m_mutex= 0;
+-#endif
+- }
+-
+-private:
+- mysql_mutex_t *m_mutex;
+-
+- // It's not allowed to copy this object in any way
+- Mutex_sentry(Mutex_sentry const&);
+- void operator=(Mutex_sentry const&);
+-};
+-
+-/*
+ Helper classes to store non-transactional and transactional data
+ before copying it to the binary log.
+ */
+@@ -211,7 +196,8 @@
+ binlog_cache_data(): m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF),
+ incident(FALSE), changes_to_non_trans_temp_table_flag(FALSE),
+ saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
+- ptr_binlog_cache_disk_use(0)
++ ptr_binlog_cache_disk_use(0), commit_bin_log_file_pos(0),
++ using_xa(FALSE), xa_xid(0)
+ { }
+
+ ~binlog_cache_data()
+@@ -270,6 +256,8 @@
+ variable after truncating the cache.
+ */
+ cache_log.disk_writes= 0;
++ using_xa= FALSE;
++ commit_bin_log_file_pos= 0;
+ DBUG_ASSERT(empty());
+ }
+
+@@ -411,6 +399,20 @@
+
+ binlog_cache_data& operator=(const binlog_cache_data& info);
+ binlog_cache_data(const binlog_cache_data& info);
++
++public:
++ /*
++ Binlog position after current commit, available to storage engines during
++ commit_ordered() and commit().
++ */
++ ulonglong commit_bin_log_file_pos;
++
++ /*
++ Flag set true if this transaction is committed with log_xid() as part of
++ XA, false if not.
++ */
++ bool using_xa;
++ my_xid xa_xid;
+ };
+
+ class binlog_cache_mngr {
+@@ -1627,7 +1629,7 @@
+ */
+ static inline int
+ binlog_flush_cache(THD *thd, binlog_cache_data* cache_data, Log_event *end_evt,
+- bool is_transactional)
++ bool is_transactional, bool all)
+ {
+ DBUG_ENTER("binlog_flush_cache");
+ int error= 0;
+@@ -1646,8 +1648,8 @@
+ were, we would have to ensure that we're not ending a statement
+ inside a stored function.
+ */
+- error= mysql_bin_log.write(thd, &cache_data->cache_log, end_evt,
+- cache_data->has_incident());
++ error= mysql_bin_log.write_transaction_to_binlog(thd, cache_data,
++ end_evt, all);
+ }
+ cache_data->reset();
+
+@@ -1666,12 +1668,12 @@
+ */
+ static inline int
+ binlog_commit_flush_stmt_cache(THD *thd,
+- binlog_cache_mngr *cache_mngr)
++ binlog_cache_mngr *cache_mngr, bool all)
+ {
+ Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
+ FALSE, FALSE, TRUE, 0);
+ return (binlog_flush_cache(thd, &cache_mngr->stmt_cache, &end_evt,
+- FALSE));
++ FALSE, all));
+ }
+
+ /**
+@@ -1684,12 +1686,12 @@
+ nonzero if an error pops up when flushing the cache.
+ */
+ static inline int
+-binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr)
++binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
+ {
+ Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
+ TRUE, FALSE, TRUE, 0);
+ return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt,
+- TRUE));
++ TRUE, all));
+ }
+
+ /**
+@@ -1702,12 +1704,12 @@
+ nonzero if an error pops up when flushing the cache.
+ */
+ static inline int
+-binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr)
++binlog_rollback_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
+ {
+ Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"),
+ TRUE, FALSE, TRUE, 0);
+ return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt,
+- TRUE));
++ TRUE, all));
+ }
+
+ /**
+@@ -1722,11 +1724,11 @@
+ */
+ static inline int
+ binlog_commit_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr,
+- my_xid xid)
++ my_xid xid, bool all)
+ {
+ Xid_log_event end_evt(thd, xid);
+ return (binlog_flush_cache(thd, &cache_mngr->trx_cache, &end_evt,
+- TRUE));
++ TRUE, all));
+ }
+
+ /**
+@@ -1788,7 +1790,7 @@
+ do nothing.
+ just pretend we can do 2pc, so that MySQL won't
+ switch to 1pc.
+- real work will be done in MYSQL_BIN_LOG::log_xid()
++ real work will be done in MYSQL_BIN_LOG::log_and_order()
+ */
+ return 0;
+ }
+@@ -1821,7 +1823,7 @@
+
+ if (!cache_mngr->stmt_cache.empty())
+ {
+- error= binlog_commit_flush_stmt_cache(thd, cache_mngr);
++ error= binlog_commit_flush_stmt_cache(thd, cache_mngr, all);
+ }
+
+ if (cache_mngr->trx_cache.empty())
+@@ -1840,7 +1842,7 @@
+ Otherwise, we accumulate the changes.
+ */
+ if (!error && ending_trans(thd, all))
+- error= binlog_commit_flush_trx_cache(thd, cache_mngr);
++ error= binlog_commit_flush_trx_cache(thd, cache_mngr, all);
+
+ /*
+ This is part of the stmt rollback.
+@@ -1884,7 +1886,7 @@
+ }
+ else if (!cache_mngr->stmt_cache.empty())
+ {
+- error= binlog_commit_flush_stmt_cache(thd, cache_mngr);
++ error= binlog_commit_flush_stmt_cache(thd, cache_mngr, all);
+ }
+
+ if (cache_mngr->trx_cache.empty())
+@@ -1932,7 +1934,7 @@
+ (trans_has_updated_non_trans_table(thd) &&
+ ending_single_stmt_trans(thd,all) &&
+ thd->variables.binlog_format == BINLOG_FORMAT_MIXED)))
+- error= binlog_rollback_flush_trx_cache(thd, cache_mngr);
++ error= binlog_rollback_flush_trx_cache(thd, cache_mngr, all);
+ /*
+ Truncate the cache if:
+ . aborting a single or multi-statement transaction or;
+@@ -2907,6 +2909,7 @@
+ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
+ :bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
+ need_start_event(TRUE),
++ group_commit_queue(0), num_commits(0), num_group_commits(0),
+ sync_period_ptr(sync_period),
+ is_relay_log(0), signal_cnt(0),
+ description_event_for_exec(0), description_event_for_queue(0)
+@@ -5279,19 +5282,15 @@
+ SYNOPSIS
+ write_cache()
+ cache Cache to write to the binary log
+- lock_log True if the LOCK_log mutex should be aquired, false otherwise
+- sync_log True if the log should be flushed and synced
+
+ DESCRIPTION
+ Write the contents of the cache to the binary log. The cache will
+ be reset as a READ_CACHE to be able to read the contents from it.
+ */
+
+-int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache,
+- bool lock_log, bool sync_log)
++int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
+ {
+- Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
+-
++ mysql_mutex_assert_owner(&LOCK_log);
+ if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
+ return ER_ERROR_ON_WRITE;
+ uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
+@@ -5402,6 +5401,8 @@
+ }
+
+ /* Write data to the binary log file */
++ DBUG_EXECUTE_IF("fail_binlog_write_1",
++ errno= 28; return ER_ERROR_ON_WRITE;);
+ if (my_b_write(&log_file, cache->read_pos, length))
+ return ER_ERROR_ON_WRITE;
+ thd->binlog_bytes_written+= length;
+@@ -5410,9 +5411,6 @@
+
+ DBUG_ASSERT(carry == 0);
+
+- if (sync_log)
+- return flush_and_sync(0);
+-
+ return 0; // All OK
+ }
+
+@@ -5453,8 +5451,6 @@
+ if (!is_open())
+ DBUG_RETURN(error);
+
+- LEX_STRING const write_error_msg=
+- { C_STRING_WITH_LEN("error writing to the binary log") };
+ Incident incident= INCIDENT_LOST_EVENTS;
+ Incident_log_event ev(thd, incident, write_error_msg);
+ if (lock)
+@@ -5496,104 +5492,320 @@
+ 'cache' needs to be reinitialized after this functions returns.
+ */
+
+-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
+- bool incident)
++bool
++MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_cache_data *cache_data,
++ Log_event *end_ev, bool all)
+ {
+- DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
++ group_commit_entry entry;
++ bool ret;
++ DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
++
++ entry.thd= thd;
++ entry.cache_data= cache_data;
++ entry.error= 0;
++ entry.all= all;
++
++ /*
++ Log "BEGIN" at the beginning of every transaction. Here, a transaction is
++ either a BEGIN..COMMIT block or a single statement in autocommit mode.
++
++ Create the necessary events here, where we have the correct THD (and
++ thread context).
++
++ Due to group commit the actual writing to binlog may happen in a different
++ thread.
++ */
++ Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0);
++ entry.begin_event= &qinfo;
++ entry.end_event= end_ev;
++ if (cache_data->has_incident())
++ {
++ Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg);
++ entry.incident_event= &inc_ev;
++ ret = write_transaction_to_binlog_events(&entry);
++ }
++ else
++ {
++ entry.incident_event= NULL;
++ ret = write_transaction_to_binlog_events(&entry);
++ }
++ if (!ret) /* userstat.patch */
++ thd->binlog_bytes_written += qinfo.data_written; /* userstat.patch */
++ DBUG_RETURN(ret);
++}
++
++bool
++MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
++{
++ /*
++ To facilitate group commit for the binlog, we first queue up ourselves in
++ the group commit queue. Then the first thread to enter the queue waits for
++ the LOCK_log mutex, and commits for everyone in the queue once it gets the
++ lock. Any other threads in the queue just wait for the first one to finish
++ the commit and wake them up.
++ */
++ entry->thd->clear_wakeup_ready();
++ mysql_mutex_lock(&LOCK_group_commit_queue);
++ group_commit_entry *orig_queue= group_commit_queue;
++ entry->next= orig_queue;
++ group_commit_queue= entry;
++ DEBUG_SYNC(entry->thd, "commit_group_commit_queue");
++ mysql_mutex_unlock(&LOCK_group_commit_queue);
++
++ /*
++ The first in the queue handle group commit for all; the others just wait
++ to be signalled when group commit is done.
++ */
++ if (orig_queue != NULL)
++ entry->thd->wait_for_wakeup_ready();
++ else
++ trx_group_commit_leader(entry);
++
++ if (likely(!entry->error))
++ return 0;
++
++ switch (entry->error)
++ {
++ case ER_ERROR_ON_WRITE:
++ my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno);
++ break;
++ case ER_ERROR_ON_READ:
++ my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
++ entry->cache_data->cache_log.file_name, entry->commit_errno);
++ break;
++ default:
++ /*
++ There are not (and should not be) any errors thrown not covered above.
++ But just in case one is added later without updating the above switch
++ statement, include a catch-all.
++ */
++ my_printf_error(entry->error,
++ "Error writing transaction to binary log: %d",
++ MYF(ME_NOREFRESH), entry->error);
++ }
++
++ /*
++ Since we return error, this transaction XID will not be committed, so
++ we need to mark it as not needed for recovery (unlog() is not called
++ for a transaction if log_xid() fails).
++ */
++ if (entry->cache_data->using_xa && entry->cache_data->xa_xid)
++ mark_xid_done();
++
++ return 1;
++}
++
++/*
++ Do binlog group commit as the lead thread.
++
++ This must be called when this thread/transaction is queued at the start of
++ the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
++ commit all the transactions in the queue (more may have entered while waiting
++ for LOCK_log). After commit is done, all other threads in the queue will be
++ signalled.
++
++ */
++void
++MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
++{
++ DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
++ uint xid_count= 0;
++ uint write_count= 0;
++
++ /*
++ Lock the LOCK_log(), and once we get it, collect any additional writes
++ that queued up while we were waiting.
++ */
+ mysql_mutex_lock(&LOCK_log);
++ DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
++ mysql_mutex_lock(&LOCK_group_commit_queue);
++ group_commit_entry *current= group_commit_queue;
++ group_commit_queue= NULL;
++ mysql_mutex_unlock(&LOCK_group_commit_queue);
++
++ /* As the queue is in reverse order of entering, reverse it. */
++ group_commit_entry *queue= NULL;
++ while (current)
++ {
++ group_commit_entry *next= current->next;
++ current->next= queue;
++ queue= current;
++ current= next;
++ }
++ DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
+
++ /* Now we have in queue the list of transactions to be committed in order. */
+ DBUG_ASSERT(is_open());
+ if (likely(is_open())) // Should always be true
+ {
+ /*
+- We only bother to write to the binary log if there is anything
+- to write.
+- */
+- if (my_b_tell(cache) > 0)
++ Commit every transaction in the queue.
++
++ Note that we are doing this in a different thread than the one running
++ the transaction! So we are limited in the operations we can do. In
++ particular, we cannot call my_error() on behalf of a transaction, as
++ that obtains the THD from thread local storage. Instead, we must set
++ current->error and let the thread do the error reporting itself once
++ we wake it up.
++ */
++ for (current= queue; current != NULL; current= current->next)
+ {
++ binlog_cache_data *cache_data= current->cache_data;
++ IO_CACHE *cache= &cache_data->cache_log;
++
+ /*
+- Log "BEGIN" at the beginning of every transaction. Here, a
+- transaction is either a BEGIN..COMMIT block or a single
+- statement in autocommit mode.
++ We only bother to write to the binary log if there is anything
++ to write.
+ */
+- Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0);
+- if (qinfo.write(&log_file))
+- goto err;
+- thd->binlog_bytes_written+= qinfo.data_written;
+- DBUG_EXECUTE_IF("crash_before_writing_xid",
+- {
+- if ((write_error= write_cache(thd, cache, false, true)))
+- DBUG_PRINT("info", ("error writing binlog cache: %d",
+- write_error));
+- DBUG_PRINT("info", ("crashing before writing xid"));
+- DBUG_SUICIDE();
+- });
++ if (my_b_tell(cache) > 0)
++ {
++ if ((current->error= write_transaction(current)))
++ current->commit_errno= errno;
+
+- if ((write_error= write_cache(thd, cache, false, false)))
+- goto err;
++ write_count++;
++ }
+
+- if (commit_event && commit_event->write(&log_file))
+- goto err;
+- if (commit_event)
+- thd->binlog_bytes_written+= commit_event->data_written;
++ cache_data->commit_bin_log_file_pos= my_b_write_tell(&log_file);
++ if (cache_data->using_xa && cache_data->xa_xid)
++ xid_count++;
++ }
+
+- if (incident && write_incident(thd, FALSE))
+- goto err;
+
++ if (write_count > 0)
++ {
+ bool synced= 0;
+ if (flush_and_sync(&synced))
+- goto err;
+- DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
+- if (cache->error) // Error on read
+ {
+- sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
+- write_error=1; // Don't give more errors
+- goto err;
++ for (current= queue; current != NULL; current= current->next)
++ {
++ if (!current->error)
++ {
++ current->error= ER_ERROR_ON_WRITE;
++ current->commit_errno= errno;
++ }
++ }
++ }
++ else
++ {
++ signal_update();
+ }
+
+ if (RUN_HOOK(binlog_storage, after_flush,
+- (thd, log_file_name, log_file.pos_in_file, synced)))
++ (leader->thd, log_file_name, log_file.pos_in_file, synced)))
+ {
+ sql_print_error("Failed to run 'after_flush' hooks");
+- write_error=1;
+- goto err;
++ for (current= queue; current != NULL; current= current->next)
++ {
++ if (!current->error)
++ {
++ current->error= ER_ERROR_ON_WRITE;
++ current->commit_errno= errno;
++ }
++ }
+ }
+
+- signal_update();
+ }
+
+ /*
+- if commit_event is Xid_log_event, increase the number of
+- prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
++ if any commit_events are Xid_log_event, increase the number of
++ prepared_xids (it's decreased in ::unlog()). Binlog cannot be rotated
+ if there're prepared xids in it - see the comment in new_file() for
+ an explanation.
+- If the commit_event is not Xid_log_event (then it's a Query_log_event)
+- rotate binlog, if necessary.
++ If no Xid_log_events (then it's all Query_log_event) rotate binlog,
++ if necessary.
+ */
+- if (commit_event && commit_event->get_type_code() == XID_EVENT)
++ if (xid_count > 0)
+ {
+- mysql_mutex_lock(&LOCK_prep_xids);
+- prepared_xids++;
+- mysql_mutex_unlock(&LOCK_prep_xids);
++ mark_xids_active(xid_count);
+ }
+ else
+ if (rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED))
+- goto err;
++ {
++ for (current= queue; current != NULL; current= current->next)
++ {
++ if (!current->error)
++ {
++ current->error= ER_ERROR_ON_WRITE;
++ current->commit_errno= errno;
++ }
++ }
++ }
+ }
++ DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
++ mysql_mutex_lock(&LOCK_commit_ordered);
++ /*
++ We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
++ otherwise scheduling could allow the next group commit to run ahead of us,
++ messing up the order of commit_ordered() calls. But as soon as
++ LOCK_commit_ordered is obtained, we can let the next group commit start.
++ */
+ mysql_mutex_unlock(&LOCK_log);
++ DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
++ ++num_group_commits;
+
+- DBUG_RETURN(0);
+-
+-err:
+- if (!write_error)
++ /*
++ Wakeup each participant waiting for our group commit, first calling the
++ commit_ordered() methods for any transactions doing 2-phase commit.
++ */
++ current= queue;
++ while (current != NULL)
+ {
+- write_error= 1;
+- sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
++ group_commit_entry *next;
++
++ DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
++ ++num_commits;
++ if (current->cache_data->using_xa && !current->error)
++ run_commit_ordered(current->thd, current->all);
++
++ /*
++ Careful not to access current->next after waking up the other thread! As
++ it may change immediately after wakeup.
++ */
++ next= current->next;
++ if (current != leader) // Don't wake up ourself
++ current->thd->signal_wakeup_ready();
++ current= next;
+ }
+- mysql_mutex_unlock(&LOCK_log);
+- DBUG_RETURN(1);
++ DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
++ mysql_mutex_unlock(&LOCK_commit_ordered);
++
++ DBUG_VOID_RETURN;
+ }
+
++int
++MYSQL_BIN_LOG::write_transaction(group_commit_entry *entry)
++{
++ binlog_cache_data *cache_data= entry->cache_data;
++ IO_CACHE *cache= &cache_data->cache_log;
++
++ if (entry->begin_event->write(&log_file))
++ return ER_ERROR_ON_WRITE;
++
++ DBUG_EXECUTE_IF("crash_before_writing_xid",
++ {
++ if ((write_cache(entry->thd, cache)))
++ DBUG_PRINT("info", ("error writing binlog cache"));
++ else
++ flush_and_sync(0);
++
++ DBUG_PRINT("info", ("crashing before writing xid"));
++ abort();
++ });
++
++ if (write_cache(entry->thd, cache))
++ return ER_ERROR_ON_WRITE;
++
++ if (entry->end_event->write(&log_file))
++ return ER_ERROR_ON_WRITE;
++
++ if (entry->incident_event && entry->incident_event->write(&log_file))
++ return ER_ERROR_ON_WRITE;
++
++ if (cache->error) // Error on read
++ return ER_ERROR_ON_READ;
++
++ return 0;
++}
+
+ /**
+ Wait until we get a signal that the relay log has been updated.
+@@ -5999,6 +6211,68 @@
+ }
+
+
++void
++TC_init()
++{
++ mysql_mutex_init(key_LOCK_group_commit_queue, &LOCK_group_commit_queue, MY_MUTEX_INIT_SLOW);
++ mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered, MY_MUTEX_INIT_SLOW);
++ mutexes_inited= TRUE;
++}
++
++
++void
++TC_destroy()
++{
++ if (mutexes_inited)
++ {
++ mysql_mutex_destroy(&LOCK_group_commit_queue);
++ mysql_mutex_destroy(&LOCK_commit_ordered);
++ mutexes_inited= FALSE;
++ }
++}
++
++
++void
++TC_LOG::run_commit_ordered(THD *thd, bool all)
++{
++ Ha_trx_info *ha_info=
++ all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
++
++ mysql_mutex_assert_owner(&LOCK_commit_ordered);
++ for (; ha_info; ha_info= ha_info->next())
++ {
++ handlerton *ht= ha_info->ht();
++ if (!ht->commit_ordered)
++ continue;
++ ht->commit_ordered(ht, thd, all);
++ DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
++ }
++}
++
++int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
++ bool need_commit_ordered)
++{
++ int cookie;
++
++ cookie= 0;
++ if (xid)
++ cookie= log_one_transaction(xid);
++
++ if (need_commit_ordered)
++ {
++ /* Only run commit_ordered() if log_xid was successful. */
++ if (cookie)
++ {
++ mysql_mutex_lock(&LOCK_commit_ordered);
++ run_commit_ordered(thd, all);
++ mysql_mutex_unlock(&LOCK_commit_ordered);
++ }
++ }
++
++ return cookie;
++}
++
++
+ /********* transaction coordinator log for 2pc - mmap() based solution *******/
+
+ /*
+@@ -6135,6 +6409,7 @@
+ mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_active, &COND_active, 0);
+ mysql_cond_init(key_COND_pool, &COND_pool, 0);
++ mysql_cond_init(key_COND_queue_busy, &COND_queue_busy, 0);
+
+ inited=6;
+
+@@ -6142,6 +6417,8 @@
+ active=pages;
+ pool=pages+1;
+ pool_last=pages+npages-1;
++ commit_ordered_queue= NULL;
++ commit_ordered_queue_busy= false;
+
+ return 0;
+
+@@ -6247,7 +6524,7 @@
+ to the position in memory where xid was logged to.
+ */
+
+-int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
++int TC_LOG_MMAP::log_one_transaction(my_xid xid)
+ {
+ int err;
+ PAGE *p;
+@@ -6386,7 +6663,9 @@
+ mysql_mutex_destroy(&LOCK_sync);
+ mysql_mutex_destroy(&LOCK_active);
+ mysql_mutex_destroy(&LOCK_pool);
++ mysql_cond_destroy(&COND_active);
+ mysql_cond_destroy(&COND_pool);
++ mysql_cond_destroy(&COND_queue_busy);
+ case 5:
+ data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
+ case 4:
+@@ -6596,42 +6875,87 @@
+ mysql_cond_destroy(&COND_prep_xids);
+ }
+
+-/**
+- @todo
+- group commit
++/*
++ Do a binlog log_xid() for a group of transactions, linked through
++ thd->next_commit_ordered.
+
+ @retval
+ 0 error
+ @retval
+ 1 success
+ */
+-int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
++int TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
++ bool need_commit_ordered __attribute__((unused)))
+ {
+- DBUG_ENTER("TC_LOG_BINLOG::log");
++ DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
+ binlog_cache_mngr *cache_mngr=
+ (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
++
++ cache_mngr->trx_cache.using_xa= TRUE;
++ cache_mngr->trx_cache.xa_xid= xid;
+ /*
+ We always commit the entire transaction when writing an XID. Also
+ note that the return value is inverted.
+ */
+- DBUG_RETURN(!binlog_commit_flush_stmt_cache(thd, cache_mngr) &&
+- !binlog_commit_flush_trx_cache(thd, cache_mngr, xid));
++ DBUG_RETURN(!binlog_commit_flush_stmt_cache(thd, cache_mngr, all) &&
++ !binlog_commit_flush_trx_cache(thd, cache_mngr, xid, all));
+ }
+
+-int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
++/*
++ After an XID is logged, we need to hold on to the current binlog file until
++ it is fully committed in the storage engine. The reason is that crash
++ recovery only looks at the latest binlog, so we must make sure there are no
++ outstanding prepared (but not committed) transactions before rotating the
++ binlog.
++
++ To handle this, we keep a count of outstanding XIDs. This function is used
++ to increase this count when committing one or more transactions to the
++ binary log.
++*/
++void
++TC_LOG_BINLOG::mark_xids_active(uint xid_count)
+ {
+- DBUG_ENTER("TC_LOG_BINLOG::unlog");
++ DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
++ DBUG_PRINT("info", ("xid_count=%u", xid_count));
++ mysql_mutex_lock(&LOCK_prep_xids);
++ prepared_xids+= xid_count;
++ mysql_mutex_unlock(&LOCK_prep_xids);
++ DBUG_VOID_RETURN;
++}
++
++/*
++ Once an XID is committed, it is safe to rotate the binary log, as it can no
++ longer be needed during crash recovery.
++
++ This function is called to mark an XID this way. It needs to decrease the
++ count of pending XIDs, and signal the log rotator thread when it reaches zero.
++*/
++void
++TC_LOG_BINLOG::mark_xid_done()
++{
++ my_bool send_signal;
++
++ DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
+ mysql_mutex_lock(&LOCK_prep_xids);
+ // prepared_xids can be 0 if the transaction had ignorable errors.
+ DBUG_ASSERT(prepared_xids >= 0);
+ if (prepared_xids > 0)
+ prepared_xids--;
+- if (prepared_xids == 0) {
++ send_signal= (prepared_xids == 0);
++ mysql_mutex_unlock(&LOCK_prep_xids);
++ if (send_signal) {
+ DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
+ mysql_cond_signal(&COND_prep_xids);
+ }
+- mysql_mutex_unlock(&LOCK_prep_xids);
+- DBUG_RETURN(rotate_and_purge(0)); // as ::write() did not rotate
++ DBUG_VOID_RETURN;
++}
++
++int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
++{
++ DBUG_ENTER("TC_LOG_BINLOG::unlog");
++ if (xid)
++ mark_xid_done();
++ DBUG_RETURN(rotate_and_purge(0));
+ }
+
+ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
+@@ -6700,9 +7024,67 @@
+ {
+ return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
+ }
++/*
++ Get the current position of the MySQL binlog for transaction currently being
++ committed.
++
++ This is valid to call from within storage engine commit_ordered() and
++ commit() methods only.
++
++ Since it stores the position inside THD, it is safe to call without any
++ locking.
++
++ Note that currently the binlog file name is not stored inside THD, but this
++ is still safe as it can only change when the log is rotated, and we never
++ rotate the binlog while commits are pending inside storage engines.
++*/
++extern "C"
++void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
++{
++ binlog_cache_mngr *cache_mngr;
++ if (binlog_hton->state == SHOW_OPTION_YES
++ && (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
++ {
++ *out_pos= cache_mngr->trx_cache.commit_bin_log_file_pos;
++ *out_file= mysql_bin_log.get_log_fname();
++ }
++ else
++ {
++ *out_pos= NULL;
++ *out_file= NULL;
++ }
++}
+ #endif /* INNODB_COMPATIBILITY_HOOKS */
+
+
++static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
++{
++ mysql_bin_log.set_status_variables();
++ var->type= SHOW_ARRAY;
++ var->value= (char *)&binlog_status_vars_detail;
++ return 0;
++}
++
++static SHOW_VAR binlog_status_vars_top[]= {
++ {"binlog", (char *) &show_binlog_vars, SHOW_FUNC},
++ {NullS, NullS, SHOW_LONG}
++};
++
++/*
++ Copy out current values of status variables, for SHOW STATUS or
++ information_schema.global_status.
++
++ This is called only under LOCK_status, so we can fill in a static array.
++*/
++void
++TC_LOG_BINLOG::set_status_variables()
++{
++ mysql_mutex_lock(&LOCK_commit_ordered);
++ binlog_status_var_num_commits= this->num_commits;
++ binlog_status_var_num_group_commits= this->num_group_commits;
++ mysql_mutex_unlock(&LOCK_commit_ordered);
++}
++
+ struct st_mysql_storage_engine binlog_storage_engine=
+ { MYSQL_HANDLERTON_INTERFACE_VERSION };
+
+@@ -6717,7 +7099,7 @@
+ binlog_init, /* Plugin Init */
+ NULL, /* Plugin Deinit */
+ 0x0100 /* 1.0 */,
+- NULL, /* status variables */
++ binlog_status_vars_top, /* status variables */
+ NULL, /* system variables */
+ NULL, /* config options */
+ 0, /* flags */
+--- a/sql/log.h
++++ b/sql/log.h
+@@ -44,17 +44,42 @@
+
+ virtual int open(const char *opt_name)=0;
+ virtual void close()=0;
+- virtual int log_xid(THD *thd, my_xid xid)=0;
++ virtual int log_and_order(THD *thd, my_xid xid, bool all,
++ bool need_commit_ordered)=0;
+ virtual int unlog(ulong cookie, my_xid xid)=0;
++
++ protected:
++ void run_commit_ordered(THD *thd, bool all);
+ };
+
++/*
++ Locks used to ensure serialised execution of
++ TC_LOG::run_commit_ordered(), or any other code that calls handler
++ commit_ordered() methods.
++*/
++extern mysql_mutex_t LOCK_group_commit_queue;
++extern mysql_mutex_t LOCK_commit_ordered;
++
++extern void TC_init();
++extern void TC_destroy();
++
+ class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
+ {
+ public:
+ TC_LOG_DUMMY() {}
+ int open(const char *opt_name) { return 0; }
+ void close() { }
+- int log_xid(THD *thd, my_xid xid) { return 1; }
++ /*
++ TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
++ only use internal XA during commit when >= 2 XA-capable engines
++ participate.
++ */
++ int log_and_order(THD *thd, my_xid xid, bool all,
++ bool need_commit_ordered)
++ {
++ DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
++ return 1;
++ }
+ int unlog(ulong cookie, my_xid xid) { return 0; }
+ };
+
+@@ -80,6 +105,13 @@
+ mysql_cond_t cond; // to wait for a sync
+ } PAGE;
+
++ /* List of THDs for which to invoke commit_ordered(), in order. */
++ struct commit_entry
++ {
++ struct commit_entry *next;
++ THD *thd;
++ };
++
+ char logname[FN_REFLEN];
+ File fd;
+ my_off_t file_length;
+@@ -94,16 +126,38 @@
+ */
+ mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
+ mysql_cond_t COND_pool, COND_active;
++ /*
++ Queue of threads that need to call commit_ordered().
++ Access to this queue must be protected by LOCK_group_commit_queue
++ */
++ commit_entry *commit_ordered_queue;
++ /*
++ This flag and condition is used to reserve the queue while threads in it
++ each run the commit_ordered() methods one after the other. Only once the
++ last commit_ordered() in the queue is done can we start on a new queue
++ run.
++
++ Since we start this process in the first thread in the queue and finish in
++ the last (and possibly different) thread, we need a condition variable for
++ this (we cannot unlock a mutex in a different thread than the one who
++ locked it).
++
++ The condition is used together with the LOCK_group_commit_queue mutex.
++ */
++ my_bool commit_ordered_queue_busy;
++ mysql_cond_t COND_queue_busy;
+
+ public:
+ TC_LOG_MMAP(): inited(0) {}
+ int open(const char *opt_name);
+ void close();
+- int log_xid(THD *thd, my_xid xid);
++ int log_and_order(THD *thd, my_xid xid, bool all,
++ bool need_commit_ordered);
+ int unlog(ulong cookie, my_xid xid);
+ int recover();
+
+ private:
++ int log_one_transaction(my_xid xid);
+ void get_active_from_pool();
+ int sync();
+ int overflow();
+@@ -271,9 +325,31 @@
+ time_t last_time;
+ };
+
++class binlog_cache_data;
+ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
+ {
+ private:
++ struct group_commit_entry
++ {
++ struct group_commit_entry *next;
++ THD *thd;
++ binlog_cache_data *cache_data;
++ /*
++ Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
++ written during group commit. The incident_event is only valid if
++ trx_data->has_incident() is true.
++ */
++ Log_event *begin_event;
++ Log_event *end_event;
++ Log_event *incident_event;
++ /* Set during group commit to record any per-thread error. */
++ int error;
++ int commit_errno;
++ /* This is the `all' parameter for ha_commit_ordered(). */
++ bool all;
++ /* True if we come in through XA log_and_order(), false otherwise. */
++ };
++
+ #ifdef HAVE_PSI_INTERFACE
+ /** The instrumentation key to use for @ LOCK_index. */
+ PSI_mutex_key m_key_LOCK_index;
+@@ -325,6 +401,12 @@
+ In 5.0 it's 0 for relay logs too!
+ */
+ bool no_auto_events;
++ /* Queue of transactions queued up to participate in group commit. */
++ group_commit_entry *group_commit_queue;
++ /* Total number of committed transactions. */
++ ulonglong num_commits;
++ /* Number of group commits done. */
++ ulonglong num_group_commits;
+
+ /* pointer to the sync period variable, for binlog this will be
+ sync_binlog_period, for relay log this will be
+@@ -346,6 +428,11 @@
+ */
+ int new_file_without_locking();
+ int new_file_impl(bool need_lock);
++ int write_transaction(group_commit_entry *entry);
++ bool write_transaction_to_binlog_events(group_commit_entry *entry);
++ void trx_group_commit_leader(group_commit_entry *leader);
++ void mark_xid_done();
++ void mark_xids_active(uint xid_count);
+
+ public:
+ MYSQL_LOG::generate_name;
+@@ -387,7 +474,8 @@
+
+ int open(const char *opt_name);
+ void close();
+- int log_xid(THD *thd, my_xid xid);
++ int log_and_order(THD *thd, my_xid xid, bool all,
++ bool need_commit_ordered);
+ int unlog(ulong cookie, my_xid xid);
+ int recover(IO_CACHE *log, Format_description_log_event *fdle);
+ #if !defined(MYSQL_CLIENT)
+@@ -434,11 +522,11 @@
+ int new_file();
+
+ bool write(Log_event* event_info); // binary log write
+- bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
++ bool write_transaction_to_binlog(THD *thd, binlog_cache_data *cache_data,
++ Log_event *end_ev, bool all);
+ bool write_incident(THD *thd, bool lock);
+
+- int write_cache(THD *thd, IO_CACHE *cache,
+- bool lock_log, bool flush_and_sync);
++ int write_cache(THD *thd, IO_CACHE *cache);
+ void set_write_error(THD *thd, bool is_transactional);
+ bool check_write_error(THD *thd);
+
+@@ -507,6 +595,7 @@
+ inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);}
+ inline IO_CACHE *get_index_file() { return &index_file;}
+ inline uint32 get_open_count() { return open_count; }
++ void set_status_variables();
+ };
+
+ class Log_event_handler
+--- a/sql/mysqld.cc
++++ b/sql/mysqld.cc
+@@ -1490,6 +1490,7 @@
+ ha_end();
+ if (tc_log)
+ tc_log->close();
++ TC_destroy();
+ delegates_destroy();
+ xid_cache_free();
+ table_def_free();
+@@ -4061,6 +4062,8 @@
+ query_response_time_init();
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ /* We have to initialize the storage engines before CSV logging */
++ TC_init();
++
+ init_global_table_stats();
+ init_global_index_stats();
+
+@@ -8004,6 +8007,7 @@
+ key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
+ key_PARTITION_LOCK_auto_inc;
+ PSI_mutex_key key_RELAYLOG_LOCK_index;
++PSI_mutex_key key_LOCK_wakeup_ready, key_LOCK_group_commit_queue, key_LOCK_commit_ordered;
+
+ static PSI_mutex_info all_server_mutexes[]=
+ {
+@@ -8024,6 +8028,7 @@
+ { &key_delayed_insert_mutex, "Delayed_insert::mutex", 0},
+ { &key_hash_filo_lock, "hash_filo::lock", 0},
+ { &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL},
++ { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL},
+ { &key_LOCK_connection_count, "LOCK_connection_count", PSI_FLAG_GLOBAL},
+ { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL},
+ { &key_LOCK_delayed_create, "LOCK_delayed_create", PSI_FLAG_GLOBAL},
+@@ -8039,6 +8044,7 @@
+ "LOCK_global_index_stats", PSI_FLAG_GLOBAL},
+ { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL},
+ { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL},
++ { &key_LOCK_group_commit_queue, "LOCK_group_commit_queue", PSI_FLAG_GLOBAL},
+ { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL},
+ { &key_LOCK_prepared_stmt_count, "LOCK_prepared_stmt_count", PSI_FLAG_GLOBAL},
+ { &key_LOCK_rpl_status, "LOCK_rpl_status", PSI_FLAG_GLOBAL},
+@@ -8050,6 +8056,7 @@
+ { &key_LOCK_temporary_tables, "THD::LOCK_temporary_tables", 0},
+ { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL},
+ { &key_LOCK_uuid_generator, "LOCK_uuid_generator", PSI_FLAG_GLOBAL},
++ { &key_LOCK_wakeup_ready, "THD::LOCK_wakeup_ready", 0},
+ { &key_LOG_LOCK_log, "LOG::LOCK_log", 0},
+ { &key_master_info_data_lock, "Master_info::data_lock", 0},
+ { &key_master_info_run_lock, "Master_info::run_lock", 0},
+@@ -8097,6 +8104,7 @@
+ key_TABLE_SHARE_cond, key_user_level_lock_cond,
+ key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
+ PSI_cond_key key_RELAYLOG_update_cond;
++PSI_cond_key key_COND_wakeup_ready, key_COND_queue_busy;
+
+ static PSI_cond_info all_server_conds[]=
+ {
+@@ -8113,8 +8121,10 @@
+ { &key_RELAYLOG_update_cond, "MYSQL_RELAY_LOG::update_cond", 0},
+ { &key_COND_cache_status_changed, "Query_cache::COND_cache_status_changed", 0},
+ { &key_COND_manager, "COND_manager", PSI_FLAG_GLOBAL},
++ { &key_COND_queue_busy, "COND_queue_busy", PSI_FLAG_GLOBAL},
+ { &key_COND_rpl_status, "COND_rpl_status", PSI_FLAG_GLOBAL},
+ { &key_COND_server_started, "COND_server_started", PSI_FLAG_GLOBAL},
++ { &key_COND_wakeup_ready, "THD::COND_wakeup_ready", 0},
+ { &key_delayed_insert_cond, "Delayed_insert::cond", 0},
+ { &key_delayed_insert_cond_client, "Delayed_insert::cond_client", 0},
+ { &key_item_func_sleep_cond, "Item_func_sleep::cond", 0},
+--- a/sql/mysqld.h
++++ b/sql/mysqld.h
+@@ -273,6 +273,7 @@
+ key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
+ key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc;
+ extern PSI_mutex_key key_RELAYLOG_LOCK_index;
++extern PSI_mutex_key key_LOCK_wakeup_ready, key_LOCK_group_commit_queue, key_LOCK_commit_ordered;
+
+ extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger,
+ key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave,
+@@ -293,6 +294,7 @@
+ key_TABLE_SHARE_cond, key_user_level_lock_cond,
+ key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
+ extern PSI_cond_key key_RELAYLOG_update_cond;
++extern PSI_cond_key key_COND_wakeup_ready, key_COND_queue_busy;
+
+ extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
+ key_thread_handle_manager, key_thread_kill_server, key_thread_main,
+--- a/sql/sql_class.cc
++++ b/sql/sql_class.cc
+@@ -912,6 +912,8 @@
+ mysql_mutex_init(key_LOCK_thd_data, &LOCK_thd_data, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(key_LOCK_temporary_tables, &LOCK_temporary_tables,
+ MY_MUTEX_INIT_FAST);
++ mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
++ mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, NULL);
+
+ /* Variables with default values */
+ proc_info="login";
+@@ -1516,6 +1518,8 @@
+ my_free(db);
+ db= NULL;
+ free_root(&transaction.mem_root,MYF(0));
++ mysql_cond_destroy(&COND_wakeup_ready);
++ mysql_mutex_destroy(&LOCK_wakeup_ready);
+ mysql_mutex_destroy(&LOCK_thd_data);
+ mysql_mutex_destroy(&LOCK_temporary_tables);
+ #ifndef DBUG_OFF
+@@ -5199,6 +5203,24 @@
+ DBUG_RETURN(0);
+ }
+
++void
++THD::wait_for_wakeup_ready()
++{
++ mysql_mutex_lock(&LOCK_wakeup_ready);
++ while (!wakeup_ready)
++ mysql_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready);
++ mysql_mutex_unlock(&LOCK_wakeup_ready);
++}
++
++void
++THD::signal_wakeup_ready()
++{
++ mysql_mutex_lock(&LOCK_wakeup_ready);
++ wakeup_ready= true;
++ mysql_mutex_unlock(&LOCK_wakeup_ready);
++ mysql_cond_signal(&COND_wakeup_ready);
++}
++
+ bool Discrete_intervals_list::append(ulonglong start, ulonglong val,
+ ulonglong incr)
+ {
+--- a/sql/sql_class.h
++++ b/sql/sql_class.h
+@@ -3017,6 +3017,14 @@
+ LEX_STRING get_invoker_user() { return invoker_user; }
+ LEX_STRING get_invoker_host() { return invoker_host; }
+ bool has_invoker() { return invoker_user.length > 0; }
++ void clear_wakeup_ready() { wakeup_ready= false; }
++ /*
++ Sleep waiting for others to wake us up with signal_wakeup_ready().
++ Must call clear_wakeup_ready() before waiting.
++ */
++ void wait_for_wakeup_ready();
++ /* Wake this thread up from wait_for_wakeup_ready(). */
++ void signal_wakeup_ready();
+ private:
+
+ /** The current internal error handler for this thread, or NULL. */
+@@ -3059,6 +3067,16 @@
+ */
+ LEX_STRING invoker_user;
+ LEX_STRING invoker_host;
++ /*
++ Flag, mutex and condition for a thread to wait for a signal from another
++ thread.
++
++ Currently used to wait for group commit to complete, can also be used for
++ other purposes.
++ */
++ bool wakeup_ready;
++ mysql_mutex_t LOCK_wakeup_ready;
++ mysql_cond_t COND_wakeup_ready;
+ };
+
+ /* Returns string as 'IP' for the client-side of the connection represented by
+--- a/sql/sql_parse.cc
++++ b/sql/sql_parse.cc
+@@ -889,6 +889,10 @@
+ DBUG_ENTER("dispatch_command");
+ DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command));
+
++ DBUG_EXECUTE_IF("crash_dispatch_command_before",
++ { DBUG_PRINT("crash_dispatch_command_before", ("now"));
++ DBUG_ABORT(); });
++
+ #if defined(ENABLED_PROFILING)
+ thd->profiling.start_new_query();
+ #endif
+--- a/mysql-test/suite/perfschema/r/dml_setup_instruments.result
++++ b/mysql-test/suite/perfschema/r/dml_setup_instruments.result
+@@ -11,9 +11,9 @@
+ wait/synch/mutex/sql/HA_DATA_PARTITION::LOCK_auto_inc YES YES
+ wait/synch/mutex/sql/LOCK_active_mi YES YES
+ wait/synch/mutex/sql/LOCK_audit_mask YES YES
++wait/synch/mutex/sql/LOCK_commit_ordered YES YES
+ wait/synch/mutex/sql/LOCK_connection_count YES YES
+ wait/synch/mutex/sql/LOCK_crypt YES YES
+-wait/synch/mutex/sql/LOCK_delayed_create YES YES
+ select * from performance_schema.setup_instruments
+ where name like 'Wait/Synch/Rwlock/sql/%'
+ and name not in ('wait/synch/rwlock/sql/CRYPTO_dynlock_value::lock')
+@@ -38,6 +38,7 @@
+ NAME ENABLED TIMED
+ wait/synch/cond/sql/COND_flush_thread_cache YES YES
+ wait/synch/cond/sql/COND_manager YES YES
++wait/synch/cond/sql/COND_queue_busy YES YES
+ wait/synch/cond/sql/COND_queue_state YES YES
+ wait/synch/cond/sql/COND_rpl_status YES YES
+ wait/synch/cond/sql/COND_server_started YES YES
+@@ -45,7 +46,6 @@
+ wait/synch/cond/sql/COND_thread_count YES YES
+ wait/synch/cond/sql/Delayed_insert::cond YES YES
+ wait/synch/cond/sql/Delayed_insert::cond_client YES YES
+-wait/synch/cond/sql/Event_scheduler::COND_state YES YES
+ select * from performance_schema.setup_instruments
+ where name='Wait';
+ select * from performance_schema.setup_instruments
+--- a/storage/innobase/handler/ha_innodb.cc
++++ b/storage/innobase/handler/ha_innodb.cc
+@@ -375,6 +375,9 @@
+ static INNOBASE_SHARE *get_share(const char *table_name);
+ static void free_share(INNOBASE_SHARE *share);
+ static int innobase_close_connection(handlerton *hton, THD* thd);
++#ifdef EXTENDED_FOR_COMMIT_ORDERED
++static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
++#endif
+ static int innobase_commit(handlerton *hton, THD* thd, bool all);
+ static int innobase_rollback(handlerton *hton, THD* thd, bool all);
+ static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
+@@ -1699,7 +1702,10 @@
+ trx_t* trx) /*!< in/out: InnoDB transaction handle */
+ {
+ DBUG_ENTER("innobase_trx_init");
++#ifndef EXTENDED_FOR_COMMIT_ORDERED
++ /* used by innobase_commit_ordered */
+ DBUG_ASSERT(EQ_CURRENT_THD(thd));
++#endif
+ DBUG_ASSERT(thd == trx->mysql_thd);
+
+ trx->check_foreigns = !thd_test_options(
+@@ -1760,7 +1766,10 @@
+ {
+ trx_t*& trx = thd_to_trx(thd);
+
++#ifndef EXTENDED_FOR_COMMIT_ORDERED
++ /* used by innobase_commit_ordered */
+ ut_ad(EQ_CURRENT_THD(thd));
++#endif
+
+ if (trx == NULL) {
+ trx = innobase_trx_allocate(thd);
+@@ -1846,6 +1855,7 @@
+ {
+ trx->is_registered = 0;
+ trx->owns_prepare_mutex = 0;
++ trx->called_commit_ordered = 0;
+ }
+
+ /*********************************************************************//**
+@@ -1861,6 +1871,29 @@
+ }
+
+ /*********************************************************************//**
++*/
++static inline
++void
++trx_called_commit_ordered_set(
++/*==========================*/
++ trx_t* trx)
++{
++ ut_a(trx_is_registered_for_2pc(trx));
++ trx->called_commit_ordered = 1;
++}
++
++/*********************************************************************//**
++*/
++static inline
++bool
++trx_called_commit_ordered(
++/*======================*/
++ const trx_t* trx)
++{
++ return(trx->called_commit_ordered == 1);
++}
++
++/*********************************************************************//**
+ Check if transaction is started.
+ @reutrn true if transaction is in state started */
+ static
+@@ -2435,6 +2468,9 @@
+ innobase_hton->savepoint_set=innobase_savepoint;
+ innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
+ innobase_hton->savepoint_release=innobase_release_savepoint;
++#ifdef EXTENDED_FOR_COMMIT_ORDERED
++ innobase_hton->commit_ordered=innobase_commit_ordered;
++#endif
+ innobase_hton->commit=innobase_commit;
+ innobase_hton->rollback=innobase_rollback;
+ innobase_hton->prepare=innobase_xa_prepare;
+@@ -3187,6 +3223,126 @@
+ DBUG_RETURN(0);
+ }
+
++#ifdef EXTENDED_FOR_COMMIT_ORDERED
++/* MEMO:
++ InnoDB is coded with intention that always trx is accessed by the owner thd.
++ (not protected by any mutex/lock)
++ So, the caller of innobase_commit_ordered() should be conscious of
++ cache coherency between multi CPU about the trx, if called from another thd.
++
++ MariaDB's first implementation about it seems the cherency is protected by
++ the pthread_mutex LOCK_wakeup_ready. So, no problem for now.
++
++ But we should be aware the importance of the coherency.
++ */
++/*****************************************************************//**
++low function function innobase_commit_ordered().*/
++static
++void
++innobase_commit_ordered_low(
++/*========================*/
++ trx_t* trx, /*!< in: Innodb transaction */
++ THD* thd) /*!< in: MySQL thread handle */
++{
++ ulonglong tmp_pos;
++ DBUG_ENTER("innobase_commit_ordered");
++
++ /* This part was from innobase_commit() */
++
++ /* We need current binlog position for ibbackup to work.
++ Note, the position is current because commit_ordered is guaranteed
++ to be called in same sequenece as writing to binlog. */
++retry:
++ if (innobase_commit_concurrency > 0) {
++ mysql_mutex_lock(&commit_cond_m);
++ commit_threads++;
++
++ if (commit_threads > innobase_commit_concurrency) {
++ commit_threads--;
++ mysql_cond_wait(&commit_cond,
++ &commit_cond_m);
++ mysql_mutex_unlock(&commit_cond_m);
++ goto retry;
++ }
++ else {
++ mysql_mutex_unlock(&commit_cond_m);
++ }
++ }
++
++ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
++ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
++
++ /* Don't do write + flush right now. For group commit
++ to work we want to do the flush in the innobase_commit()
++ method, which runs without holding any locks. */
++ trx->flush_log_later = TRUE;
++ innobase_commit_low(trx);
++ trx->flush_log_later = FALSE;
++
++ if (innobase_commit_concurrency > 0) {
++ mysql_mutex_lock(&commit_cond_m);
++ commit_threads--;
++ mysql_cond_signal(&commit_cond);
++ mysql_mutex_unlock(&commit_cond_m);
++ }
++
++ DBUG_VOID_RETURN;
++}
++
++/*****************************************************************//**
++Perform the first, fast part of InnoDB commit.
++
++Doing it in this call ensures that we get the same commit order here
++as in binlog and any other participating transactional storage engines.
++
++Note that we want to do as little as really needed here, as we run
++under a global mutex. The expensive fsync() is done later, in
++innobase_commit(), without a lock so group commit can take place.
++
++Note also that this method can be called from a different thread than
++the one handling the rest of the transaction. */
++static
++void
++innobase_commit_ordered(
++/*====================*/
++ handlerton *hton, /*!< in: Innodb handlerton */
++ THD* thd, /*!< in: MySQL thread handle of the user for whom
++ the transaction should be committed */
++ bool all) /*!< in: TRUE - commit transaction
++ FALSE - the current SQL statement ended */
++{
++ trx_t* trx;
++ DBUG_ENTER("innobase_commit_ordered");
++ DBUG_ASSERT(hton == innodb_hton_ptr);
++
++ trx = check_trx_exists(thd);
++
++ /* Since we will reserve the kernel mutex, we have to release
++ the search system latch first to obey the latching order. */
++
++ if (trx->has_search_latch) {
++ trx_search_latch_release_if_reserved(trx);
++ }
++
++ if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
++ /* We cannot throw error here; instead we will catch this error
++ again in innobase_commit() and report it from there. */
++ DBUG_VOID_RETURN;
++ }
++
++ /* commit_ordered is only called when committing the whole transaction
++ (or an SQL statement when autocommit is on). */
++ DBUG_ASSERT(all ||
++ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
++
++ innobase_commit_ordered_low(trx, thd);
++
++ trx_called_commit_ordered_set(trx);
++
++ DBUG_VOID_RETURN;
++}
++#endif /* EXTENDED_FOR_COMMIT_ORDERED */
++
+ /*****************************************************************//**
+ Commits a transaction in an InnoDB database or marks an SQL statement
+ ended.
+@@ -3238,6 +3394,16 @@
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
+
++#ifdef EXTENDED_FOR_COMMIT_ORDERED
++ ut_ad(!trx_has_prepare_commit_mutex(trx));
++
++ /* Run the fast part of commit if we did not already. */
++ if (!trx_called_commit_ordered(trx)) {
++ innobase_commit_ordered_low(trx, thd);
++ }
++#else
++ ut_ad(!trx_called_commit_ordered(trx));
++
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because of
+ prepare_commit_mutex */
+@@ -3292,6 +3458,7 @@
+
+ mysql_mutex_unlock(&prepare_commit_mutex);
+ }
++#endif /* EXTENDED_FOR_COMMIT_ORDERED */
+
+ trx_deregister_from_2pc(trx);
+
+@@ -10973,6 +11140,7 @@
+
+ srv_active_wake_master_thread();
+
++#ifndef EXTENDED_FOR_COMMIT_ORDERED
+ if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
+ && (all
+ || !thd_test_options(
+@@ -10999,6 +11167,7 @@
+ mysql_mutex_lock(&prepare_commit_mutex);
+ trx_owns_prepare_commit_mutex_set(trx);
+ }
++#endif /* ifndef EXTENDED_FOR_COMMIT_ORDERED */
+
+ return(error);
+ }
+--- a/storage/innobase/handler/ha_innodb.h
++++ b/storage/innobase/handler/ha_innodb.h
+@@ -240,6 +240,12 @@
+ struct charset_info_st *thd_charset(MYSQL_THD thd);
+ LEX_STRING *thd_query_string(MYSQL_THD thd);
+
++#ifdef EXTENDED_FOR_COMMIT_ORDERED
++/** Get the file name and position of the MySQL binlog corresponding to the
++ * current commit.
++ */
++void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
++#else
+ /** Get the file name of the MySQL binlog.
+ * @return the name of the binlog file
+ */
+@@ -249,6 +255,7 @@
+ * @return byte offset from the beginning of the binlog
+ */
+ ulonglong mysql_bin_log_file_pos(void);
++#endif
+
+ /**
+ Check if a user thread is a replication slave thread
+--- a/storage/innobase/include/trx0trx.h
++++ b/storage/innobase/include/trx0trx.h
+@@ -494,6 +494,7 @@
+ this is set to 1 then registered should
+ also be set to 1. This is used in the
+ XA code */
++ unsigned called_commit_ordered:1;/* 1 if innobase_commit_ordered has run. */
+ /*------------------------------*/
+ ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
+ ulint check_foreigns; /* normally TRUE, but if the user
+--- a/storage/innobase/trx/trx0trx.c
++++ b/storage/innobase/trx/trx0trx.c
+@@ -111,6 +111,7 @@
+
+ trx->is_registered = 0;
+ trx->owns_prepare_mutex = 0;
++ trx->called_commit_ordered = 0;
+
+ trx->start_time = ut_time();
+
+--- /dev/null
++++ b/mysql-test/r/group_commit.result
+@@ -0,0 +1,63 @@
++CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
++SELECT variable_value INTO @commits FROM information_schema.global_status
++WHERE variable_name = 'binlog_commits';
++SELECT variable_value INTO @group_commits FROM information_schema.global_status
++WHERE variable_name = 'binlog_group_commits';
++SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
++INSERT INTO t1 VALUES ("con1");
++set DEBUG_SYNC= "now WAIT_FOR group1_running";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con2";
++SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
++SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
++INSERT INTO t1 VALUES ("con2");
++SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con3";
++INSERT INTO t1 VALUES ("con3");
++SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con4";
++INSERT INTO t1 VALUES ("con4");
++SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
++SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
++SELECT * FROM t1 ORDER BY a;
++a
++SET DEBUG_SYNC= "now SIGNAL group2_queued";
++SELECT * FROM t1 ORDER BY a;
++a
++con1
++SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
++SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
++INSERT INTO t1 VALUES ("con5");
++SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con6_queued";
++INSERT INTO t1 VALUES ("con6");
++SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
++SELECT * FROM t1 ORDER BY a;
++a
++con1
++SET DEBUG_SYNC= "now SIGNAL group3_committed";
++SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
++SELECT * FROM t1 ORDER BY a;
++a
++con1
++con2
++con3
++con4
++SET DEBUG_SYNC= "now SIGNAL group2_checked";
++SELECT * FROM t1 ORDER BY a;
++a
++con1
++con2
++con3
++con4
++con5
++con6
++SELECT variable_value - @commits FROM information_schema.global_status
++WHERE variable_name = 'binlog_commits';
++variable_value - @commits
++6
++SELECT variable_value - @group_commits FROM information_schema.global_status
++WHERE variable_name = 'binlog_group_commits';
++variable_value - @group_commits
++3
++SET DEBUG_SYNC= 'RESET';
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/r/group_commit_binlog_pos.result
+@@ -0,0 +1,35 @@
++CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
++INSERT INTO t1 VALUES (0);
++SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
++SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
++INSERT INTO t1 VALUES (1);
++SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con2_queued";
++INSERT INTO t1 VALUES (2);
++SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con3_queued";
++INSERT INTO t1 VALUES (3);
++SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
++SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
++SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
++SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
++SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
++SELECT * FROM t1 ORDER BY a;
++a
++0
++1
++2
++SET SESSION debug="+d,crash_dispatch_command_before";
++SELECT 1;
++Got one of the listed errors
++Got one of the listed errors
++Got one of the listed errors
++SELECT * FROM t1 ORDER BY a;
++a
++0
++1
++2
++3
++InnoDB: Last MySQL binlog file position 0 768, file name ./master-bin.000001
++SET DEBUG_SYNC= 'RESET';
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/r/group_commit_crash.result
+@@ -0,0 +1,120 @@
++CREATE TABLE t1(a CHAR(255),
++b CHAR(255),
++c CHAR(255),
++d CHAR(255),
++id INT AUTO_INCREMENT,
++PRIMARY KEY(id)) ENGINE=InnoDB;
++create table t2 like t1;
++create procedure setcrash(IN i INT)
++begin
++CASE i
++WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
++WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
++WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
++WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
++WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
++ELSE BEGIN END;
++END CASE;
++end //
++FLUSH TABLES;
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++RESET MASTER;
++START TRANSACTION;
++insert into t1 select * from t2;
++call setcrash(5);
++COMMIT;
++Got one of the listed errors
++SELECT * FROM t1 ORDER BY id;
++a b c d id
++SHOW BINLOG EVENTS LIMIT 2,1;
++Log_name Pos Event_type Server_id End_log_pos Info
++delete from t1;
++RESET MASTER;
++START TRANSACTION;
++insert into t1 select * from t2;
++call setcrash(4);
++COMMIT;
++Got one of the listed errors
++SELECT * FROM t1 ORDER BY id;
++a b c d id
++a b c d 1
++a b c d 2
++a b c d 3
++a b c d 4
++a b c d 5
++a b c d 6
++a b c d 7
++a b c d 8
++a b c d 9
++a b c d 10
++SHOW BINLOG EVENTS LIMIT 2,1;
++Log_name Pos Event_type Server_id End_log_pos Info
++master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2
++delete from t1;
++RESET MASTER;
++START TRANSACTION;
++insert into t1 select * from t2;
++call setcrash(3);
++COMMIT;
++Got one of the listed errors
++SELECT * FROM t1 ORDER BY id;
++a b c d id
++a b c d 1
++a b c d 2
++a b c d 3
++a b c d 4
++a b c d 5
++a b c d 6
++a b c d 7
++a b c d 8
++a b c d 9
++a b c d 10
++SHOW BINLOG EVENTS LIMIT 2,1;
++Log_name Pos Event_type Server_id End_log_pos Info
++master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2
++delete from t1;
++RESET MASTER;
++START TRANSACTION;
++insert into t1 select * from t2;
++call setcrash(2);
++COMMIT;
++Got one of the listed errors
++SELECT * FROM t1 ORDER BY id;
++a b c d id
++a b c d 1
++a b c d 2
++a b c d 3
++a b c d 4
++a b c d 5
++a b c d 6
++a b c d 7
++a b c d 8
++a b c d 9
++a b c d 10
++SHOW BINLOG EVENTS LIMIT 2,1;
++Log_name Pos Event_type Server_id End_log_pos Info
++master-bin.000001 175 Query 1 269 use `test`; insert into t1 select * from t2
++delete from t1;
++RESET MASTER;
++START TRANSACTION;
++insert into t1 select * from t2;
++call setcrash(1);
++COMMIT;
++Got one of the listed errors
++SELECT * FROM t1 ORDER BY id;
++a b c d id
++SHOW BINLOG EVENTS LIMIT 2,1;
++Log_name Pos Event_type Server_id End_log_pos Info
++delete from t1;
++DROP TABLE t1;
++DROP TABLE t2;
++DROP PROCEDURE setcrash;
+--- /dev/null
++++ b/mysql-test/r/xa_binlog.result
+@@ -0,0 +1,32 @@
++CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
++SET binlog_format= mixed;
++RESET MASTER;
++XA START 'xatest';
++INSERT INTO t1 VALUES (1);
++XA END 'xatest';
++XA PREPARE 'xatest';
++XA COMMIT 'xatest';
++XA START 'xatest';
++INSERT INTO t1 VALUES (2);
++XA END 'xatest';
++XA COMMIT 'xatest' ONE PHASE;
++BEGIN;
++INSERT INTO t1 VALUES (3);
++COMMIT;
++SELECT * FROM t1 ORDER BY a;
++a
++1
++2
++3
++SHOW BINLOG EVENTS LIMIT 1,9;
++Log_name Pos Event_type Server_id End_log_pos Info
++master-bin.000001 # Query 1 # BEGIN
++master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (1)
++master-bin.000001 # Query 1 # COMMIT
++master-bin.000001 # Query 1 # BEGIN
++master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (2)
++master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
++master-bin.000001 # Query 1 # BEGIN
++master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (3)
++master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/suite/binlog/r/binlog_ioerr.result
+@@ -0,0 +1,28 @@
++CALL mtr.add_suppression("Error writing file 'master-bin'");
++RESET MASTER;
++CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
++INSERT INTO t1 VALUES(0);
++SET SESSION debug='+d,fail_binlog_write_1';
++INSERT INTO t1 VALUES(1);
++ERROR HY000: Error writing file 'master-bin' (errno: 28)
++INSERT INTO t1 VALUES(2);
++ERROR HY000: Error writing file 'master-bin' (errno: 28)
++SET SESSION debug='';
++INSERT INTO t1 VALUES(3);
++SELECT * FROM t1;
++a
++0
++3
++SHOW BINLOG EVENTS;
++Log_name Pos Event_type Server_id End_log_pos Info
++BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: #
++BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb
++BINLOG POS Query 1 ENDPOS BEGIN
++BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0)
++BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
++BINLOG POS Query 1 ENDPOS BEGIN
++BINLOG POS Query 1 ENDPOS BEGIN
++BINLOG POS Query 1 ENDPOS BEGIN
++BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3)
++BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/suite/binlog/t/binlog_ioerr.test
+@@ -0,0 +1,30 @@
++source include/have_debug.inc;
++source include/have_innodb.inc;
++source include/have_log_bin.inc;
++source include/have_binlog_format_mixed_or_statement.inc;
++
++CALL mtr.add_suppression("Error writing file 'master-bin'");
++
++RESET MASTER;
++
++CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
++INSERT INTO t1 VALUES(0);
++SET SESSION debug='+d,fail_binlog_write_1';
++--error ER_ERROR_ON_WRITE
++INSERT INTO t1 VALUES(1);
++--error ER_ERROR_ON_WRITE
++INSERT INTO t1 VALUES(2);
++SET SESSION debug='';
++INSERT INTO t1 VALUES(3);
++SELECT * FROM t1;
++
++# Actually the output from this currently shows a bug.
++# The injected IO error leaves partially written transactions in the binlog in
++# the form of stray "BEGIN" events.
++# These should disappear from the output if binlog error handling is improved
++# (see MySQL Bug#37148 and WL#1790).
++--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
++--replace_column 1 BINLOG 2 POS 5 ENDPOS
++SHOW BINLOG EVENTS;
++
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/t/group_commit.test
+@@ -0,0 +1,115 @@
++--source include/have_debug_sync.inc
++--source include/have_innodb.inc
++--source include/have_log_bin.inc
++
++# Test some group commit code paths by using debug_sync to do controlled
++# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a
++# group.
++#
++# Group 3 is allowed to race as far as possible ahead before group 2 finishes
++# to check some edge case for concurrency control.
++
++CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
++
++SELECT variable_value INTO @commits FROM information_schema.global_status
++ WHERE variable_name = 'binlog_commits';
++SELECT variable_value INTO @group_commits FROM information_schema.global_status
++ WHERE variable_name = 'binlog_group_commits';
++
++connect(con1,localhost,root,,);
++connect(con2,localhost,root,,);
++connect(con3,localhost,root,,);
++connect(con4,localhost,root,,);
++connect(con5,localhost,root,,);
++connect(con6,localhost,root,,);
++
++# Start group1 (with one thread) doing commit, waiting for
++# group2 to queue up before finishing.
++
++connection con1;
++SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
++send INSERT INTO t1 VALUES ("con1");
++
++# Make group2 (with three threads) queue up.
++# Make sure con2 is the group commit leader for group2.
++# Make group2 wait with running commit_ordered() until group3 has committed.
++
++connection con2;
++set DEBUG_SYNC= "now WAIT_FOR group1_running";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con2";
++SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
++SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
++send INSERT INTO t1 VALUES ("con2");
++connection con3;
++SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con3";
++send INSERT INTO t1 VALUES ("con3");
++connection con4;
++SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL group2_con4";
++send INSERT INTO t1 VALUES ("con4");
++
++# When group2 is queued, let group1 continue and queue group3.
++
++connection default;
++SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
++
++# At this point, trasaction 1 is still not visible as commit_ordered() has not
++# been called yet.
++SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
++SELECT * FROM t1 ORDER BY a;
++
++SET DEBUG_SYNC= "now SIGNAL group2_queued";
++connection con1;
++reap;
++
++# Now transaction 1 is visible.
++connection default;
++SELECT * FROM t1 ORDER BY a;
++
++connection con5;
++SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
++SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
++send INSERT INTO t1 VALUES ("con5");
++
++connection con6;
++SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con6_queued";
++send INSERT INTO t1 VALUES ("con6");
++
++connection default;
++SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
++# Still only transaction 1 visible, as group2 have not yet run commit_ordered().
++SELECT * FROM t1 ORDER BY a;
++SET DEBUG_SYNC= "now SIGNAL group3_committed";
++SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
++# Now transactions 1-4 visible.
++SELECT * FROM t1 ORDER BY a;
++SET DEBUG_SYNC= "now SIGNAL group2_checked";
++
++connection con2;
++reap;
++
++connection con3;
++reap;
++
++connection con4;
++reap;
++
++connection con5;
++reap;
++
++connection con6;
++reap;
++
++connection default;
++# Check all transactions finally visible.
++SELECT * FROM t1 ORDER BY a;
++
++SELECT variable_value - @commits FROM information_schema.global_status
++ WHERE variable_name = 'binlog_commits';
++SELECT variable_value - @group_commits FROM information_schema.global_status
++ WHERE variable_name = 'binlog_group_commits';
++
++SET DEBUG_SYNC= 'RESET';
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/t/group_commit_binlog_pos-master.opt
+@@ -0,0 +1 @@
++--skip-stack-trace --skip-core-file
+--- /dev/null
++++ b/mysql-test/t/group_commit_binlog_pos.test
+@@ -0,0 +1,89 @@
++--source include/have_debug_sync.inc
++--source include/have_innodb.inc
++--source include/have_log_bin.inc
++--source include/have_binlog_format_mixed_or_statement.inc
++
++# Need DBUG to crash the server intentionally
++--source include/have_debug.inc
++# Don't test this under valgrind, memory leaks will occur as we crash
++--source include/not_valgrind.inc
++
++# The test case currently uses grep and tail, which may be unavailable on
++# some windows systems. But see MWL#191 for how to remove the need for grep.
++--source include/not_windows.inc
++
++# XtraDB stores the binlog position corresponding to the last commit, and
++# prints it during crash recovery.
++# Test that we get the correct position when we group commit several
++# transactions together.
++
++CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
++INSERT INTO t1 VALUES (0);
++
++connect(con1,localhost,root,,);
++connect(con2,localhost,root,,);
++connect(con3,localhost,root,,);
++
++# Queue up three commits for group commit.
++
++connection con1;
++SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
++SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
++send INSERT INTO t1 VALUES (1);
++
++connection con2;
++SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con2_queued";
++send INSERT INTO t1 VALUES (2);
++
++connection con3;
++SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
++SET DEBUG_SYNC= "commit_group_commit_queue SIGNAL con3_queued";
++send INSERT INTO t1 VALUES (3);
++
++connection default;
++SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
++# At this point, no transactions are committed.
++SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
++SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
++# At this point, 1 transaction is committed.
++SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
++SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
++
++# At this point, 2 transactions are committed.
++SELECT * FROM t1 ORDER BY a;
++
++connection con2;
++reap;
++
++# Now crash the server with 1+2 in-memory committed, 3 only prepared.
++connection default;
++system echo wait-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
++SET SESSION debug="+d,crash_dispatch_command_before";
++--error 2006,2013
++SELECT 1;
++
++connection con1;
++--error 2006,2013
++reap;
++connection con3;
++--error 2006,2013
++reap;
++
++system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
++
++connection default;
++--enable_reconnect
++--source include/wait_until_connected_again.inc
++
++# Crash recovery should recover all three transactions.
++SELECT * FROM t1 ORDER BY a;
++
++# Check that the binlog position reported by InnoDB is the correct one
++# for the end of the second transaction (as can be checked with
++# mysqlbinlog).
++let $MYSQLD_DATADIR= `SELECT @@datadir`;
++--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1
++
++SET DEBUG_SYNC= 'RESET';
++DROP TABLE t1;
+--- /dev/null
++++ b/mysql-test/t/group_commit_crash-master.opt
+@@ -0,0 +1 @@
++--skip-stack-trace --skip-core-file
+--- /dev/null
++++ b/mysql-test/t/group_commit_crash.test
+@@ -0,0 +1,80 @@
++# Testing group commit by crashing a few times.
++# Test adapted from the Facebook patch: lp:mysqlatfacebook
++--source include/not_embedded.inc
++# Don't test this under valgrind, memory leaks will occur
++--source include/not_valgrind.inc
++
++# Binary must be compiled with debug for crash to occur
++--source include/have_debug.inc
++--source include/have_innodb.inc
++--source include/have_log_bin.inc
++
++let $innodb_file_format_max_orig=`select @@innodb_file_format_max`;
++CREATE TABLE t1(a CHAR(255),
++ b CHAR(255),
++ c CHAR(255),
++ d CHAR(255),
++ id INT AUTO_INCREMENT,
++ PRIMARY KEY(id)) ENGINE=InnoDB;
++create table t2 like t1;
++delimiter //;
++create procedure setcrash(IN i INT)
++begin
++ CASE i
++ WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
++ WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
++ WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
++ WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
++ WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
++ ELSE BEGIN END;
++ END CASE;
++end //
++delimiter ;//
++# Avoid getting a crashed mysql.proc table.
++FLUSH TABLES;
++
++let $numtests = 5;
++
++let $numinserts = 10;
++while ($numinserts)
++{
++ dec $numinserts;
++ INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
++}
++
++--enable_reconnect
++
++while ($numtests)
++{
++ RESET MASTER;
++
++ START TRANSACTION;
++ insert into t1 select * from t2;
++ # Write file to make mysql-test-run.pl expect crash
++ --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
++
++ eval call setcrash($numtests);
++
++ # Run the crashing query
++ --error 2006,2013
++ COMMIT;
++
++ # Poll the server waiting for it to be back online again.
++ --source include/wait_until_connected_again.inc
++
++ # table and binlog should be in sync.
++ SELECT * FROM t1 ORDER BY id;
++ SHOW BINLOG EVENTS LIMIT 2,1;
++
++ delete from t1;
++
++ dec $numtests;
++}
++
++# final cleanup
++DROP TABLE t1;
++DROP TABLE t2;
++DROP PROCEDURE setcrash;
++--disable_query_log
++eval SET GLOBAL innodb_file_format_max=$innodb_file_format_max_orig;
++--enable_query_log
+--- /dev/null
++++ b/mysql-test/t/xa_binlog.test
+@@ -0,0 +1,32 @@
++--source include/have_innodb.inc
++--source include/have_log_bin.inc
++
++CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
++
++# Fix binlog format (otherwise SHOW BINLOG EVENTS will fluctuate).
++SET binlog_format= mixed;
++
++RESET MASTER;
++
++XA START 'xatest';
++INSERT INTO t1 VALUES (1);
++XA END 'xatest';
++XA PREPARE 'xatest';
++XA COMMIT 'xatest';
++
++XA START 'xatest';
++INSERT INTO t1 VALUES (2);
++XA END 'xatest';
++XA COMMIT 'xatest' ONE PHASE;
++
++BEGIN;
++INSERT INTO t1 VALUES (3);
++COMMIT;
++
++SELECT * FROM t1 ORDER BY a;
++
++--replace_column 2 # 5 #
++--replace_regex /xid=[0-9]+/xid=XX/
++SHOW BINLOG EVENTS LIMIT 1,9;
++
++DROP TABLE t1;
if (page_zip && !dict_index_is_clust(index)
--- a/storage/innobase/btr/btr0sea.c
+++ b/storage/innobase/btr/btr0sea.c
-@@ -48,6 +48,8 @@
+@@ -47,6 +47,8 @@
+ Protected by btr_search_latch. */
UNIV_INTERN char btr_search_enabled = TRUE;
- UNIV_INTERN ibool btr_search_fully_disabled = FALSE;
+UNIV_INTERN ulint btr_search_index_num = 1;
+
- /** Mutex protecting btr_search_enabled */
- static mutex_t btr_search_enabled_mutex;
-
-@@ -79,7 +81,9 @@
+ #ifdef UNIV_PFS_MUTEX
+ /* Key to register btr_search_enabled_mutex with performance schema */
+ UNIV_INTERN mysql_pfs_key_t btr_search_enabled_mutex_key;
+@@ -75,7 +77,9 @@
/* We will allocate the latch from dynamic memory to get it to the
same DRAM page as other hotspot semaphores */
/** padding to prevent other memory update hotspots from residing on
the same memory cache line */
-@@ -131,18 +135,19 @@
+@@ -127,18 +131,19 @@
will not guarantee success. */
static
void
heap = table->heap;
-@@ -153,7 +158,7 @@
+@@ -149,7 +154,7 @@
if (heap->free_block == NULL) {
buf_block_t* block = buf_block_alloc(NULL);
if (heap->free_block == NULL) {
heap->free_block = block;
-@@ -161,7 +166,7 @@
+@@ -157,7 +162,7 @@
buf_block_free(block);
}
}
}
-@@ -173,19 +178,30 @@
+@@ -169,17 +174,28 @@
/*==================*/
ulint hash_size) /*!< in: hash index hash table size */
{
- SYNC_SEARCH_SYS);
+ //rw_lock_create(btr_search_latch_key, &btr_search_latch,
+ // SYNC_SEARCH_SYS);
- mutex_create(btr_search_enabled_mutex_key,
- &btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
}
/*****************************************************************//**
-@@ -195,11 +211,22 @@
+@@ -189,11 +205,22 @@
btr_search_sys_free(void)
/*=====================*/
{
mem_free(btr_search_sys);
btr_search_sys = NULL;
}
-@@ -212,7 +239,7 @@
+@@ -206,9 +233,10 @@
/*====================*/
{
- mutex_enter(&btr_search_enabled_mutex);
+ dict_table_t* table;
++ ulint i;
+
+ mutex_enter(&dict_sys->mutex);
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock_all();
- /* Disable access to hash index, also tell ha_insert_for_fold()
- stop adding new nodes to hash index, but still allow updating
-@@ -230,7 +257,7 @@
- /* btr_search_enabled_mutex should guarantee this. */
- ut_ad(!btr_search_enabled);
+ btr_search_enabled = FALSE;
+
+@@ -232,10 +260,12 @@
+ buf_pool_clear_hash_index();
+
+ /* Clear the adaptive hash index. */
+- hash_table_clear(btr_search_sys->hash_index);
+- mem_heap_empty(btr_search_sys->hash_index->heap);
++ for (i = 0; i < btr_search_index_num; i++) {
++ hash_table_clear(btr_search_sys->hash_index[i]);
++ mem_heap_empty(btr_search_sys->hash_index[i]->heap);
++ }
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock_all();
- mutex_exit(&btr_search_enabled_mutex);
}
-@@ -242,12 +269,12 @@
+ /********************************************************************//**
+@@ -245,11 +275,11 @@
+ btr_search_enable(void)
/*====================*/
{
- mutex_enter(&btr_search_enabled_mutex);
- rw_lock_x_lock(&btr_search_latch);
+ btr_search_x_lock_all();
btr_search_enabled = TRUE;
- btr_search_fully_disabled = FALSE;
- rw_lock_x_unlock(&btr_search_latch);
+ btr_search_x_unlock_all();
- mutex_exit(&btr_search_enabled_mutex);
}
-@@ -300,20 +327,21 @@
+ /*****************************************************************//**
+@@ -301,20 +331,21 @@
ulint
btr_search_info_get_ref_count(
/*==========================*/
return(ret);
}
-@@ -334,8 +362,8 @@
+@@ -335,8 +366,8 @@
int cmp;
#ifdef UNIV_SYNC_DEBUG
#endif /* UNIV_SYNC_DEBUG */
index = cursor->index;
-@@ -453,8 +481,8 @@
+@@ -454,8 +485,8 @@
/*!< in: cursor */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
|| rw_lock_own(&block->lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
-@@ -538,7 +566,7 @@
+@@ -539,7 +570,7 @@
ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
-@@ -578,10 +606,10 @@
+@@ -580,10 +611,10 @@
mem_heap_free(heap);
}
#ifdef UNIV_SYNC_DEBUG
block, rec);
}
}
-@@ -601,8 +629,8 @@
+@@ -603,8 +634,8 @@
ulint* params2;
#ifdef UNIV_SYNC_DEBUG
#endif /* UNIV_SYNC_DEBUG */
block = btr_cur_get_block(cursor);
-@@ -623,7 +651,7 @@
+@@ -625,7 +656,7 @@
if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
}
if (cursor->flag == BTR_CUR_HASH_FAIL) {
-@@ -633,11 +661,11 @@
+@@ -635,11 +666,11 @@
btr_search_n_hash_fail++;
#endif /* UNIV_SEARCH_PERF_STAT */
}
if (build_index) {
-@@ -882,17 +910,17 @@
+@@ -884,17 +915,17 @@
cursor->flag = BTR_CUR_HASH;
if (UNIV_LIKELY(!has_search_latch)) {
if (UNIV_UNLIKELY(!rec)) {
goto failure_unlock;
-@@ -910,7 +938,7 @@
+@@ -912,7 +943,7 @@
goto failure_unlock;
}
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
}
-@@ -1007,7 +1035,7 @@
+@@ -1009,7 +1040,7 @@
/*-------------------------------------------*/
failure_unlock:
if (UNIV_LIKELY(!has_search_latch)) {
}
failure:
cursor->flag = BTR_CUR_HASH_FAIL;
-@@ -1030,10 +1058,11 @@
+@@ -1032,10 +1063,11 @@
void
btr_search_drop_page_hash_index(
/*============================*/
{
hash_table_t* table;
ulint n_fields;
-@@ -1052,22 +1081,60 @@
+@@ -1054,23 +1086,55 @@
ulint* offsets;
#ifdef UNIV_SYNC_DEBUG
retry:
- rw_lock_s_lock(&btr_search_latch);
-+ if (index_in) {
-+ index = index_in;
-+ rw_lock_s_lock(btr_search_get_latch(index->id));
-+ } else if (btr_search_index_num > 1) {
+- index = block->index;
++ if (btr_search_index_num > 1) {
+ rw_lock_t* btr_search_latch;
+
+ /* FIXME: This may be optimistic implementation still. */
+ btr_search_latch = (rw_lock_t*)(block->btr_search_latch);
+ if (UNIV_LIKELY(!btr_search_latch)) {
-+ if (block->is_hashed) {
++ if (block->index) {
+ goto retry;
+ }
+ return;
+ rw_lock_s_unlock(btr_search_latch);
+ goto retry;
+ }
-+ if (UNIV_LIKELY(!block->is_hashed)) {
++ if (UNIV_LIKELY(!block->index)) {
+ rw_lock_s_unlock(btr_search_latch);
+ goto retry;
+ }
+ } else {
+ /* btr_search_index_num == 1 */
+ /* btr_search_latch is only one and able to obtain
-+ before evaluating block->is_hashed. */
++ before evaluating block->index. */
+ rw_lock_s_lock(btr_search_latch_part[0]);
-+ if (UNIV_LIKELY(!block->is_hashed)) {
++ if (UNIV_LIKELY(!block->index)) {
+ rw_lock_s_unlock(btr_search_latch_part[0]);
+ return;
+ }
+ index = block->index;
+ }
-+
- page = block->frame;
- if (UNIV_LIKELY(!block->is_hashed)) {
+ if (UNIV_LIKELY(!index)) {
- rw_lock_s_unlock(&btr_search_latch);
+ rw_lock_s_unlock(btr_search_get_latch(index->id));
return;
}
+- ut_a(!dict_index_is_ibuf(index));
- table = btr_search_sys->hash_index;
+ table = btr_search_get_hash_index(index->id);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-@@ -1077,14 +1144,14 @@
+@@ -1080,12 +1144,14 @@
n_fields = block->curr_n_fields;
n_bytes = block->curr_n_bytes;
-- index = block->index;
+ ut_a(index == block->index);
- ut_a(!dict_index_is_ibuf(index));
++ ut_a(!dict_index_is_ibuf(index));
/* NOTE: The fields of block must not be accessed after
releasing btr_search_latch, as the index page might only
ut_a(n_fields + n_bytes > 0);
-@@ -1134,7 +1201,7 @@
+@@ -1136,7 +1202,7 @@
mem_heap_free(heap);
}
- rw_lock_x_lock(&btr_search_latch);
+ rw_lock_x_lock(btr_search_get_latch(index->id));
- if (UNIV_UNLIKELY(!block->is_hashed)) {
+ if (UNIV_UNLIKELY(!block->index)) {
/* Someone else has meanwhile dropped the hash index */
-@@ -1150,7 +1217,7 @@
+@@ -1152,7 +1218,7 @@
/* Someone else has meanwhile built a new hash index on the
page, with different parameters */
mem_free(folds);
goto retry;
-@@ -1166,6 +1233,7 @@
+@@ -1167,6 +1233,7 @@
+ index->search_info->ref_count--;
- block->is_hashed = FALSE;
block->index = NULL;
+ block->btr_search_latch = NULL;
-
+
cleanup:
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-@@ -1178,14 +1246,14 @@
+@@ -1179,14 +1246,14 @@
"InnoDB: the hash index to a page of %s,"
" still %lu hash nodes remain.\n",
index->name, (ulong) block->n_pointers);
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
mem_free(folds);
-@@ -1217,9 +1285,9 @@
+@@ -1218,9 +1285,9 @@
ulint* offsets;
ibool released_search_latch;
for (j = 0; j < srv_buf_pool_instances; j++) {
buf_pool_t* buf_pool;
-@@ -1253,7 +1321,7 @@
+@@ -1254,7 +1321,7 @@
/* keeping latch order */
released_search_latch = TRUE;
rw_lock_x_lock(&block->lock);
-@@ -1305,7 +1373,7 @@
+@@ -1306,7 +1373,7 @@
mem_heap_empty(heap);
}
- rw_lock_x_lock(&btr_search_latch);
+ rw_lock_x_lock(btr_search_get_latch(index->id));
- if (UNIV_UNLIKELY(!block->is_hashed)) {
+ if (UNIV_UNLIKELY(!block->index)) {
goto cleanup;
-@@ -1315,12 +1383,12 @@
+@@ -1316,12 +1383,12 @@
if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
|| UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
goto retry;
}
-@@ -1334,6 +1402,7 @@
+@@ -1334,6 +1401,7 @@
+ index->search_info->ref_count--;
- block->is_hashed = FALSE;
block->index = NULL;
+ block->btr_search_latch = NULL;
cleanup:
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-@@ -1346,18 +1415,18 @@
+@@ -1346,18 +1414,18 @@
index->name, (ulong) block->n_pointers);
}
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
-@@ -1404,7 +1473,7 @@
+@@ -1395,7 +1463,7 @@
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
}
mtr_commit(&mtr);
-@@ -1446,26 +1515,26 @@
+@@ -1436,31 +1504,26 @@
ut_ad(index);
ut_a(!dict_index_is_ibuf(index));
-- table = btr_search_sys->hash_index;
+ table = btr_search_get_hash_index(index->id);
- page = buf_block_get_frame(block);
-
++ page = buf_block_get_frame(block);
++
#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- rw_lock_s_lock(&btr_search_latch);
+-
+- if (!btr_search_enabled) {
+- rw_lock_s_unlock(&btr_search_latch);
+- return;
+- }
+-
+- table = btr_search_sys->hash_index;
+- page = buf_block_get_frame(block);
+ rw_lock_s_lock(btr_search_get_latch(index->id));
- if (block->is_hashed && ((block->curr_n_fields != n_fields)
- || (block->curr_n_bytes != n_bytes)
- || (block->curr_left_side != left_side))) {
+ if (block->index && ((block->curr_n_fields != n_fields)
+- || (block->curr_n_bytes != n_bytes)
+- || (block->curr_left_side != left_side))) {
++ || (block->curr_n_bytes != n_bytes)
++ || (block->curr_left_side != left_side))) {
- rw_lock_s_unlock(&btr_search_latch);
+ rw_lock_s_unlock(btr_search_get_latch(index->id));
}
n_recs = page_get_n_recs(page);
-@@ -1559,9 +1628,9 @@
+@@ -1554,9 +1617,9 @@
fold = next_fold;
}
- rw_lock_x_lock(&btr_search_latch);
+ rw_lock_x_lock(btr_search_get_latch(index->id));
- if (UNIV_UNLIKELY(btr_search_fully_disabled)) {
+ if (UNIV_UNLIKELY(!btr_search_enabled)) {
goto exit_func;
-@@ -1589,6 +1658,7 @@
+@@ -1583,6 +1646,7 @@
block->curr_n_bytes = n_bytes;
block->curr_left_side = left_side;
block->index = index;
for (i = 0; i < n_cached; i++) {
-@@ -1596,7 +1666,7 @@
+@@ -1590,7 +1654,7 @@
}
exit_func:
mem_free(folds);
mem_free(recs);
-@@ -1635,13 +1705,13 @@
- ut_a(!(new_block->is_hashed || block->is_hashed)
- || !dict_index_is_ibuf(index));
+@@ -1625,7 +1689,7 @@
+ ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
- rw_lock_s_lock(&btr_search_latch);
+ rw_lock_s_lock(btr_search_get_latch(index->id));
- if (new_block->is_hashed) {
+ ut_a(!new_block->index || new_block->index == index);
+ ut_a(!block->index || block->index == index);
+@@ -1634,9 +1698,9 @@
+
+ if (new_block->index) {
- rw_lock_s_unlock(&btr_search_latch);
+ rw_lock_s_unlock(btr_search_get_latch(index->id));
return;
}
-@@ -1656,7 +1726,7 @@
+@@ -1651,7 +1715,7 @@
new_block->n_bytes = block->curr_n_bytes;
new_block->left_side = left_side;
ut_a(n_fields + n_bytes > 0);
-@@ -1668,7 +1738,7 @@
+@@ -1663,7 +1727,7 @@
return;
}
}
/********************************************************************//**
-@@ -1707,7 +1777,7 @@
+@@ -1702,7 +1766,7 @@
ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
- ut_a(!dict_index_is_ibuf(cursor->index));
+ ut_a(!dict_index_is_ibuf(index));
- table = btr_search_sys->hash_index;
+ table = btr_search_get_hash_index(cursor->index->id);
- index_id = cursor->index->id;
- fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
-@@ -1716,11 +1786,11 @@
- if (UNIV_LIKELY_NULL(heap)) {
+ rec = btr_cur_get_rec(cursor);
+
+@@ -1713,7 +1777,7 @@
mem_heap_free(heap);
}
+
- rw_lock_x_lock(&btr_search_latch);
+ rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
- ha_search_and_delete_if_found(table, fold, rec);
+ if (block->index) {
+ ut_a(block->index == index);
+@@ -1721,7 +1785,7 @@
+ ha_search_and_delete_if_found(table, fold, rec);
+ }
- rw_lock_x_unlock(&btr_search_latch);
+ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
}
/********************************************************************//**
-@@ -1754,21 +1824,21 @@
- ut_a(block->index == cursor->index);
- ut_a(!dict_index_is_ibuf(cursor->index));
+@@ -1758,7 +1822,7 @@
+ ut_a(cursor->index == index);
+ ut_a(!dict_index_is_ibuf(index));
- rw_lock_x_lock(&btr_search_latch);
+ rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
- if ((cursor->flag == BTR_CUR_HASH)
- && (cursor->n_fields == block->curr_n_fields)
+ if (!block->index) {
+
+@@ -1772,15 +1836,15 @@
&& (cursor->n_bytes == block->curr_n_bytes)
&& !block->curr_left_side) {
ha_search_and_update_if_found(table, cursor->fold, rec,
block, page_rec_get_next(rec));
+ func_exit:
- rw_lock_x_unlock(&btr_search_latch);
+ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
} else {
btr_search_update_hash_on_insert(cursor);
}
-@@ -1803,9 +1873,9 @@
+@@ -1815,9 +1879,9 @@
ulint* offsets = offsets_;
rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
-@@ -1850,7 +1920,7 @@
+@@ -1862,7 +1926,7 @@
} else {
if (left_side) {
- rw_lock_x_lock(&btr_search_latch);
-+ rw_lock_x_lock(btr_search_get_latch(index_id));
++ rw_lock_x_lock(btr_search_get_latch(index->id));
locked = TRUE;
-@@ -1864,7 +1934,7 @@
+@@ -1880,7 +1944,7 @@
if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
-+ rw_lock_x_lock(btr_search_get_latch(index_id));
++ rw_lock_x_lock(btr_search_get_latch(index->id));
locked = TRUE;
- }
-@@ -1882,7 +1952,7 @@
+
+@@ -1902,7 +1966,7 @@
if (!left_side) {
if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
-+ rw_lock_x_lock(btr_search_get_latch(index_id));
++ rw_lock_x_lock(btr_search_get_latch(index->id));
locked = TRUE;
- }
-@@ -1897,7 +1967,7 @@
+
+@@ -1921,7 +1985,7 @@
if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
-+ rw_lock_x_lock(btr_search_get_latch(index_id));
++ rw_lock_x_lock(btr_search_get_latch(index->id));
locked = TRUE;
- }
-@@ -1920,7 +1990,7 @@
+
+@@ -1948,7 +2012,7 @@
mem_heap_free(heap);
}
if (locked) {
- rw_lock_x_unlock(&btr_search_latch);
-+ rw_lock_x_unlock(btr_search_get_latch(index_id));
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
}
}
-@@ -1936,7 +2006,7 @@
+@@ -1964,7 +2028,7 @@
ha_node_t* node;
ulint n_page_dumps = 0;
ibool ok = TRUE;
ulint cell_count;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
-@@ -1948,23 +2018,25 @@
+@@ -1976,23 +2040,25 @@
rec_offs_init(offsets_);
for (; node != NULL; node = node->next) {
const buf_block_t* block
-@@ -2073,19 +2145,21 @@
+@@ -2099,19 +2165,21 @@
give other queries a chance to run. */
if (i != 0) {
buf_pool_page_hash_x_unlock_all();
block->index = NULL;
+ block->btr_search_latch = NULL;
- block->is_hashed = FALSE;
-
-@@ -1414,7 +1415,7 @@
- /* To follow the latching order, we
- have to release btr_search_latch
- before acquiring block->latch. */
-- rw_lock_x_unlock(&btr_search_latch);
-+ btr_search_x_unlock_all();
- /* When we release the search latch,
- we must rescan all blocks, because
- some may become hashed again. */
-@@ -1445,11 +1446,11 @@
- anything. block->is_hashed can only
- be set on uncompressed file pages. */
-
-- btr_search_drop_page_hash_index(block);
-+ btr_search_drop_page_hash_index(block, NULL);
-
- rw_lock_x_unlock(&block->lock);
-
-- rw_lock_x_lock(&btr_search_latch);
-+ btr_search_x_lock_all();
-
- ut_ad(!btr_search_enabled);
- }
-@@ -1468,7 +1469,11 @@
- ibool released_search_latch;
+ #ifdef UNIV_DEBUG
+ block->page.in_page_hash = FALSE;
+@@ -1392,7 +1393,11 @@
+ ulint p;
#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!btr_search_enabled);
-@@ -2204,6 +2209,7 @@
+@@ -2107,6 +2112,7 @@
{
block->check_index_page_at_flush = FALSE;
block->index = NULL;
+ block->btr_search_latch = NULL;
block->n_hash_helps = 0;
- block->is_hashed = FALSE;
+ block->n_fields = 1;
--- a/storage/innobase/buf/buf0lru.c
+++ b/storage/innobase/buf/buf0lru.c
@@ -560,7 +560,7 @@
if (ref_count == 0) {
break;
}
---- a/storage/innobase/ha/ha0ha.c
-+++ b/storage/innobase/ha/ha0ha.c
-@@ -102,7 +102,8 @@
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- #ifdef UNIV_SYNC_DEBUG
-- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
-+ /* cannot identificate which btr_search_latch[i] for now */
-+ //ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
- #endif /* UNIV_SYNC_DEBUG */
-
- #ifndef UNIV_HOTBACKUP
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -11824,6 +11824,11 @@
/************************************************************************
Drops a page hash index based on index */
UNIV_INTERN
-@@ -199,10 +201,47 @@
+@@ -199,6 +201,40 @@
# define btr_search_validate() TRUE
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+btr_search_s_unlock_all(void);
+/*==========================*/
+
-+
- /** Flag: has the search system been enabled?
- Protected by btr_search_latch and btr_search_enabled_mutex. */
- extern char btr_search_enabled;
-
-+extern ulint btr_search_index_num;
-+
- /** Flag: whether the search system has completed its disabling process,
- It is set to TRUE right after buf_pool_drop_hash_index() in
- btr_search_disable(), indicating hash index entries are cleaned up.
-@@ -269,7 +308,7 @@
+ /** The search info struct in an index */
+ struct btr_search_struct{
+ ulint ref_count; /*!< Number of blocks in this index tree
+@@ -259,7 +295,7 @@
/** The hash index system */
struct btr_search_sys_struct{
mapping dtuple_fold values
to rec_t pointers on index pages */
};
-@@ -290,10 +329,12 @@
-
- Bear in mind (3) and (4) when using the hash index.
- */
--extern rw_lock_t* btr_search_latch_temp;
-+//extern rw_lock_t* btr_search_latch_temp;
-+
-+extern rw_lock_t** btr_search_latch_part;
-
- /** The latch protecting the adaptive search system */
--#define btr_search_latch (*btr_search_latch_temp)
-+//#define btr_search_latch (*btr_search_latch_temp)
-
- #ifdef UNIV_SEARCH_PERF_STAT
- /** Number of successful adaptive hash index lookups */
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -62,8 +62,8 @@
+ }
+}
+
---- a/storage/innobase/include/buf0buf.h
-+++ b/storage/innobase/include/buf0buf.h
-@@ -1576,7 +1576,7 @@
- pointers in the adaptive hash index
- pointing to this frame */
- #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-- unsigned is_hashed:1; /*!< TRUE if hash index has
-+ volatile unsigned is_hashed:1; /*!< TRUE if hash index has
- already been built on this
- page; note that it does not
- guarantee that the index is
-@@ -1590,6 +1590,7 @@
- unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
- dict_index_t* index; /*!< Index for which the adaptive
- hash index has been created. */
-+ volatile rw_lock_t* btr_search_latch;
- /* @} */
- # ifdef UNIV_SYNC_DEBUG
- /** @name Debug fields */
---- a/storage/innobase/page/page0page.c
-+++ b/storage/innobase/page/page0page.c
-@@ -218,7 +218,7 @@
- const ibool is_hashed = block->is_hashed;
-
- if (is_hashed) {
-- rw_lock_x_lock(&btr_search_latch);
-+ rw_lock_x_lock(btr_search_get_latch(block->index->id));
- }
-
- ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-@@ -244,7 +244,7 @@
-
- #ifndef UNIV_HOTBACKUP
- if (is_hashed) {
-- rw_lock_x_unlock(&btr_search_latch);
-+ rw_lock_x_unlock(btr_search_get_latch(block->index->id));
- }
- #endif /* !UNIV_HOTBACKUP */
- }
--- a/storage/innobase/page/page0zip.c
+++ b/storage/innobase/page/page0zip.c
@@ -4456,7 +4456,7 @@
(ulong) btr_search_sys_subtotal,
(ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
---- a/storage/innobase/sync/sync0sync.c
-+++ b/storage/innobase/sync/sync0sync.c
-@@ -1222,7 +1222,6 @@
- case SYNC_OUTER_ANY_LATCH:
- case SYNC_FILE_FORMAT_TAG:
- case SYNC_DOUBLEWRITE:
-- case SYNC_SEARCH_SYS:
- case SYNC_SEARCH_SYS_CONF:
- case SYNC_TRX_LOCK_HEAP:
- case SYNC_KERNEL:
-@@ -1244,6 +1243,7 @@
- ut_error;
- }
- break;
-+ case SYNC_SEARCH_SYS:
- case SYNC_BUF_LRU_LIST:
- case SYNC_BUF_FLUSH_LIST:
- case SYNC_BUF_PAGE_HASH:
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -265,8 +265,14 @@
trx->has_search_latch = FALSE;
}
+--- a/storage/innobase/include/btr0types.h
++++ b/storage/innobase/include/btr0types.h
+@@ -52,15 +52,19 @@
+
+ Bear in mind (3) and (4) when using the hash index.
+ */
+-extern rw_lock_t* btr_search_latch_temp;
++//extern rw_lock_t* btr_search_latch_temp;
++
++extern rw_lock_t** btr_search_latch_part;
+
+ /** The latch protecting the adaptive search system */
+-#define btr_search_latch (*btr_search_latch_temp)
++//#define btr_search_latch (*btr_search_latch_temp)
+
+ /** Flag: has the search system been enabled?
+ Protected by btr_search_latch. */
+ extern char btr_search_enabled;
+
++extern ulint btr_search_index_num;
++
+ #ifdef UNIV_BLOB_DEBUG
+ # include "buf0types.h"
+ /** An index->blobs entry for keeping track of off-page column references */
+--- a/storage/innobase/ha/ha0ha.c
++++ b/storage/innobase/ha/ha0ha.c
+@@ -120,7 +120,7 @@
+ ut_a(block->frame == page_align(data));
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(rw_lock_own(block->btr_search_latch, RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+ ASSERT_HASH_MUTEX_OWN(table, fold);
+ ut_ad(btr_search_enabled);
+@@ -213,7 +213,7 @@
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ // ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+ ut_ad(btr_search_enabled);
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+@@ -253,7 +253,7 @@
+ ut_a(new_block->frame == page_align(new_data));
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ // ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+ if (!btr_search_enabled) {
+@@ -296,7 +296,7 @@
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+ ASSERT_HASH_MUTEX_OWN(table, fold);
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ // ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+ ut_ad(btr_search_enabled);
+
+--- a/storage/innobase/include/ha0ha.ic
++++ b/storage/innobase/include/ha0ha.ic
+@@ -121,7 +121,7 @@
+
+ ASSERT_HASH_MUTEX_OWN(table, fold);
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
++// ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+ #endif /* UNIV_SYNC_DEBUG */
+ ut_ad(btr_search_enabled);
+
+@@ -186,7 +186,7 @@
+
+ ASSERT_HASH_MUTEX_OWN(table, fold);
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++// ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+ ut_ad(btr_search_enabled);
+
+--- a/storage/innobase/include/buf0buf.h
++++ b/storage/innobase/include/buf0buf.h
+@@ -1580,6 +1580,7 @@
+ complete, though: there may
+ have been hash collisions,
+ record deletions, etc. */
++ volatile rw_lock_t* btr_search_latch;
+ /* @} */
+ # ifdef UNIV_SYNC_DEBUG
+ /** @name Debug fields */
+--- a/storage/innobase/sync/sync0sync.c
++++ b/storage/innobase/sync/sync0sync.c
+@@ -1222,7 +1222,6 @@
+ case SYNC_OUTER_ANY_LATCH:
+ case SYNC_FILE_FORMAT_TAG:
+ case SYNC_DOUBLEWRITE:
+- case SYNC_SEARCH_SYS:
+ case SYNC_TRX_LOCK_HEAP:
+ case SYNC_KERNEL:
+ case SYNC_IBUF_BITMAP_MUTEX:
+@@ -1243,6 +1242,7 @@
+ ut_error;
+ }
+ break;
++ case SYNC_SEARCH_SYS:
+ case SYNC_BUF_LRU_LIST:
+ case SYNC_BUF_FLUSH_LIST:
+ case SYNC_BUF_PAGE_HASH:
/** @brief Initialize the default value of innodb_commit_concurrency.
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
-@@ -4205,3 +4205,139 @@
- STRUCT_FLD(system_vars, NULL),
- STRUCT_FLD(__reserved1, NULL)
+@@ -4243,3 +4243,140 @@
+ STRUCT_FLD(__reserved1, NULL),
+ STRUCT_FLD(flags, 0UL)
};
+
+/***********************************************************************
+ STRUCT_FLD(version, 0x0100 /* 1.0 */),
+ STRUCT_FLD(status_vars, NULL),
+ STRUCT_FLD(system_vars, NULL),
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++ STRUCT_FLD(flags, 0UL)
+};
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
# should be done or reviewed by the maintainer!
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
-@@ -4224,6 +4224,36 @@
+@@ -4126,6 +4126,36 @@
mutex_exit(block_mutex);
}
}
#define OK(expr) \
-@@ -4372,3 +4373,701 @@
- STRUCT_FLD(system_vars, NULL),
- STRUCT_FLD(__reserved1, NULL)
+@@ -4411,3 +4412,713 @@
+ STRUCT_FLD(__reserved1, NULL),
+ STRUCT_FLD(flags, 0UL)
};
+
+/***********************************************************************
+ table->field[2]->store(block->page.offset);
+ table->field[3]->store(page_get_n_recs(frame));
+ table->field[4]->store(page_get_data_size(frame));
-+ table->field[5]->store(block->is_hashed);
++ table->field[5]->store(block->index != NULL);
+ table->field[6]->store(block->page.access_time);
+ table->field[7]->store(block->page.newest_modification != 0);
+ table->field[8]->store(block->page.oldest_modification != 0);
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages_index =
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages_blob =
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
--- a/storage/innobase/handler/i_s.h
#endif /* i_s_h */
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
-@@ -1174,6 +1174,14 @@
+@@ -1161,6 +1161,14 @@
/*===========*/
const buf_pool_t* buf_pool) /*!< in: buffer pool */
__attribute__((nonnull, const));
# should be done or reviewed by the maintainer!
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
-@@ -1007,10 +1007,12 @@
+@@ -1005,10 +1005,12 @@
buf_block_t* block;
byte* frame;
ulint i;
/* Reserve space for the block descriptors. */
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
-@@ -1048,6 +1050,10 @@
+@@ -1046,6 +1048,10 @@
chunk->size = size;
}
# should be done or reviewed by the maintainer!
--- a/storage/innobase/btr/btr0sea.c
+++ b/storage/innobase/btr/btr0sea.c
-@@ -1186,6 +1186,179 @@
+@@ -1187,6 +1187,178 @@
mem_free(folds);
}
+ if (buf_block_get_state(block)
+ != BUF_BLOCK_FILE_PAGE
+ || block->index != index
-+ || !block->is_hashed) {
++ || !block->index) {
+ continue;
+ }
+
+
+ rw_lock_x_lock(&btr_search_latch);
+
-+ if (UNIV_UNLIKELY(!block->is_hashed)) {
++ if (UNIV_UNLIKELY(!block->index)) {
+ goto cleanup;
+ }
+
+ ut_a(index->search_info->ref_count > 0);
+ index->search_info->ref_count--;
+
-+ block->is_hashed = FALSE;
+ block->index = NULL;
+
+cleanup:
+}
+
/********************************************************************//**
- Drops a page hash index when a page is freed from a fseg to the file system.
- Drops possible hash index if the page happens to be in the buffer pool. */
+ Drops a possible page hash index when a page is evicted from the buffer pool
+ or freed in a file segment. */
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -294,14 +294,14 @@
+/*=====================================*/
+ dict_index_t* index); /* in: record descriptor */
/********************************************************************//**
- Drops a page hash index when a page is freed from a fseg to the file system.
- Drops possible hash index if the page happens to be in the buffer pool. */
+ Drops a possible page hash index when a page is evicted from the buffer pool
+ or freed in a file segment. */
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
-@@ -1585,6 +1585,15 @@
+@@ -1575,6 +1575,15 @@
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
export_vars.innodb_buffer_pool_write_requests
= srv_buf_pool_write_requests;
+--- /dev/null
++++ b/mysql-test/suite/sys_vars/r/innodb_dict_size_limit_basic.result
+@@ -0,0 +1,3 @@
++SELECT @@global.innodb_dict_size_limit;
++@@global.innodb_dict_size_limit
++0
+--- /dev/null
++++ b/mysql-test/suite/sys_vars/t/innodb_dict_size_limit_basic.test
+@@ -0,0 +1 @@
++SELECT @@global.innodb_dict_size_limit;
+--source include/wait_until_count_sessions.inc
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
-@@ -1632,6 +1632,9 @@
+@@ -1638,6 +1638,9 @@
alter_list(rhs.alter_list, mem_root),
key_list(rhs.key_list, mem_root),
create_list(rhs.create_list, mem_root),
flags(rhs.flags),
keys_onoff(rhs.keys_onoff),
tablespace_op(rhs.tablespace_op),
-@@ -1654,6 +1657,7 @@
+@@ -1660,6 +1663,7 @@
list_copy_and_replace_each_value(alter_list, mem_root);
list_copy_and_replace_each_value(key_list, mem_root);
list_copy_and_replace_each_value(create_list, mem_root);
Tries to open a single-table tablespace and optionally checks the space id is
right in it. If does not succeed, prints an error message to the .err log. This
function is used to open a tablespace when we start up mysqld, and also in
-@@ -3079,7 +3165,7 @@
+@@ -3055,8 +3141,11 @@
+ accessing the first page of the file */
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: tablespace flags */
+- const char* name) /*!< in: table name in the
++ const char* name, /*!< in: table name in the
+ databasename/tablename format */
++ trx_t* trx) /*!< in: transaction. This is only used
++ for IMPORT TABLESPACE, must be NULL
++ otherwise */
+ {
+ os_file_t file;
+ char* filepath;
+@@ -3079,7 +3168,7 @@
file = os_file_create_simple_no_error_handling(
innodb_file_data_key, filepath, OS_FILE_OPEN,
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
-@@ -3126,6 +3212,445 @@
+@@ -3126,6 +3215,453 @@
space_id = fsp_header_get_space_id(page);
space_flags = fsp_header_get_flags(page);
+ /* over write space id of all pages */
+ rec_offs_init(offsets_);
+
++ /* Unlock the data dictionary to not block queries
++ accessing other tables */
++ ut_a(trx);
++ row_mysql_unlock_data_dictionary(trx);
++
+ fprintf(stderr, "InnoDB: Progress in %%:");
+
+ for (offset = 0; offset < free_limit_bytes;
+
+ fprintf(stderr, " done.\n");
+
++ /* Reacquire the data dictionary lock */
++ row_mysql_lock_data_dictionary(trx);
++
+ /* update SYS_INDEXES set root page */
+ index = dict_table_get_first_index(table);
+ while (index) {
ut_free(buf2);
if (UNIV_UNLIKELY(space_id != id
-@@ -3167,6 +3692,271 @@
+@@ -3167,6 +3703,269 @@
os_file_close(file);
mem_free(filepath);
+ ulint page_no;
+ ulint zip_size;
+ ulint height;
-+ ulint root_height = 0;
+ rec_t* node_ptr;
+ dict_table_t* table;
+ dict_index_t* index;
+
+ if (height == ULINT_UNDEFINED) {
+ height = btr_page_get_level(page, &mtr);
-+ root_height = height;
+ }
+
+ if (height == 0) {
/* It is possible, though very improbable, that the lsn's in the
tablespace to be imported have risen above the current system lsn, if
a lengthy purge, ibuf merge, or rollback was performed on a backup
+@@ -2632,7 +2637,7 @@
+ success = fil_open_single_table_tablespace(
+ TRUE, table->space,
+ table->flags == DICT_TF_COMPACT ? 0 : table->flags,
+- table->name);
++ table->name, trx);
+ if (success) {
+ table->ibd_file_missing = FALSE;
+ table->tablespace_discarded = FALSE;
@@ -2658,6 +2663,11 @@
trx->op_info = "";
UNIV_INTERN ulint srv_dict_size_limit = 0;
/*-------------------------------------------*/
UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
+--- a/storage/innobase/dict/dict0load.c
++++ b/storage/innobase/dict/dict0load.c
+@@ -778,7 +778,7 @@
+ object and check that the .ibd file exists. */
+
+ fil_open_single_table_tablespace(FALSE, space_id,
+- flags, name);
++ flags, name, NULL);
+ }
+
+ mem_free(name);
+@@ -1833,7 +1833,7 @@
+ if (!fil_open_single_table_tablespace(
+ TRUE, table->space,
+ table->flags == DICT_TF_COMPACT ? 0 :
+- table->flags & ~(~0 << DICT_TF_BITS), name)) {
++ table->flags & ~(~0 << DICT_TF_BITS), name, NULL)) {
+ /* We failed to find a sensible
+ tablespace file */
+
+--- a/storage/innobase/include/fil0fil.h
++++ b/storage/innobase/include/fil0fil.h
+@@ -34,6 +34,7 @@
+ #include "sync0rw.h"
+ #include "ibuf0types.h"
+ #endif /* !UNIV_HOTBACKUP */
++#include "trx0types.h"
+
+ /** When mysqld is run, the default directory "." is the mysqld datadir,
+ but in the MySQL Embedded Server Library and ibbackup it is not the default
+@@ -473,8 +474,11 @@
+ accessing the first page of the file */
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: tablespace flags */
+- const char* name); /*!< in: table name in the
++ const char* name, /*!< in: table name in the
+ databasename/tablename format */
++ trx_t* trx); /*!< in: transaction. This is only used
++ for IMPORT TABLESPACE, must be NULL
++ otherwise */
+ /********************************************************************//**
+ It is possible, though very improbable, that the lsn's in the tablespace to be
+ imported have risen above the current system lsn, if a lengthy purge, ibuf
/*
IMPLEMENTATION OF THE BUFFER POOL
-@@ -1931,8 +1965,16 @@
+@@ -1834,8 +1868,16 @@
mutex_t* block_mutex;
ibool must_read;
unsigned access_time;
buf_pool->stat.n_page_gets++;
for (;;) {
-@@ -1950,7 +1992,7 @@
+@@ -1853,7 +1895,7 @@
//buf_pool_mutex_exit(buf_pool);
rw_lock_s_unlock(&buf_pool->page_hash_latch);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 37 || buf_validate());
-@@ -2046,6 +2088,13 @@
+@@ -1949,6 +1991,13 @@
/* Let us wait until the read operation
completes */
for (;;) {
enum buf_io_fix io_fix;
-@@ -2060,6 +2109,12 @@
+@@ -1963,6 +2012,12 @@
break;
}
}
}
#ifdef UNIV_IBUF_COUNT_DEBUG
-@@ -2375,6 +2430,11 @@
+@@ -2277,6 +2332,11 @@
ibool must_read;
ulint retries = 0;
mutex_t* block_mutex = NULL;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
ut_ad(mtr);
-@@ -2404,6 +2464,9 @@
+@@ -2306,6 +2366,9 @@
|| ibuf_page_low(space, zip_size, offset,
FALSE, file, line, NULL));
#endif
buf_pool->stat.n_page_gets++;
fold = buf_page_address_fold(space, offset);
loop:
-@@ -2474,9 +2537,9 @@
+@@ -2376,9 +2439,9 @@
return(NULL);
}
retries = 0;
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
-@@ -2786,6 +2849,13 @@
+@@ -2688,6 +2751,13 @@
/* Let us wait until the read operation
completes */
for (;;) {
enum buf_io_fix io_fix;
-@@ -2800,6 +2870,12 @@
+@@ -2702,6 +2772,12 @@
break;
}
}
}
fix_type = MTR_MEMO_BUF_FIX;
-@@ -2826,13 +2902,17 @@
+@@ -2728,13 +2804,17 @@
read-ahead */
buf_read_ahead_linear(space, zip_size, offset,
return(block);
}
-@@ -2856,6 +2936,7 @@
+@@ -2758,6 +2838,7 @@
unsigned access_time;
ibool success;
ulint fix_type;
ut_ad(block);
ut_ad(mtr);
-@@ -2933,6 +3014,10 @@
+@@ -2835,6 +2916,10 @@
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(block->page.file_page_was_freed == FALSE);
#endif
if (UNIV_UNLIKELY(!access_time)) {
/* In the case of a first access, try to apply linear
read-ahead */
-@@ -2940,7 +3025,7 @@
+@@ -2842,7 +2927,7 @@
buf_read_ahead_linear(buf_block_get_space(block),
buf_block_get_zip_size(block),
buf_block_get_page_no(block),
}
#ifdef UNIV_IBUF_COUNT_DEBUG
-@@ -2950,6 +3035,9 @@
+@@ -2852,6 +2937,9 @@
buf_pool = buf_pool_from_block(block);
buf_pool->stat.n_page_gets++;
return(TRUE);
}
-@@ -2972,6 +3060,7 @@
+@@ -2874,6 +2962,7 @@
buf_pool_t* buf_pool;
ibool success;
ulint fix_type;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
-@@ -3058,6 +3147,11 @@
+@@ -2960,6 +3049,11 @@
#endif
buf_pool->stat.n_page_gets++;
}
#define OK(expr) \
-@@ -1807,3 +1809,166 @@
+@@ -1807,3 +1809,170 @@
DBUG_RETURN(0);
}
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* Plugin flags */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL),
+};
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
#ifdef EXTENDED_SLOWLOG
if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) {
trx->take_stats = TRUE;
-@@ -3196,6 +3204,11 @@
+@@ -3195,6 +3203,11 @@
trx_search_latch_release_if_reserved(trx);
}
thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
/* Release a possible FIFO ticket and search latch. Since we will
-@@ -12420,6 +12464,7 @@
+@@ -12429,6 +12473,7 @@
MYSQL_SYSVAR(rollback_segments),
MYSQL_SYSVAR(corrupt_table_action),
MYSQL_SYSVAR(lazy_drop_table),
}
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
-@@ -1502,6 +1502,11 @@
+@@ -1505,6 +1505,11 @@
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(err);
}
-@@ -2007,7 +2012,7 @@
+@@ -2010,7 +2015,7 @@
}
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
&cursor, 0, __FILE__, __LINE__, &mtr);
if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
-@@ -2067,7 +2072,7 @@
+@@ -2070,7 +2075,7 @@
btr_cur_search_to_nth_level(index, 0, entry,
PAGE_CUR_LE,
&cursor, 0,
__FILE__, __LINE__, &mtr);
}
-@@ -2121,6 +2126,22 @@
+@@ -2124,6 +2129,22 @@
if (UNIV_LIKELY_NULL(big_rec)) {
rec_t* rec;
ulint* offsets;
MYSQL_SYSVAR(data_file_path),
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
-@@ -634,6 +634,11 @@
+@@ -621,6 +621,11 @@
buf_calc_page_new_checksum(
/*=======================*/
const byte* page); /*!< in: buffer page */
UNIV_INTERN ulong srv_replication_delay = 0;
+--- /dev/null
++++ b/mysql-test/suite/sys_vars/r/innodb_fast_checksum_basic.result
+@@ -0,0 +1,3 @@
++SELECT @@global.innodb_fast_checksum;
++@@global.innodb_fast_checksum
++0
+--- /dev/null
++++ b/mysql-test/suite/sys_vars/t/innodb_fast_checksum_basic.test
+@@ -0,0 +1 @@
++SELECT @@global.innodb_fast_checksum;
MYSQL_SYSVAR(buffer_pool_size),
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
-@@ -1715,7 +1715,7 @@
+@@ -1705,7 +1705,7 @@
time_t last_printout_time;
/*!< when buf_print_io was last time
called */
/*!< Statistics of buddy system,
indexed by block size */
buf_pool_stat_t stat; /*!< current statistics */
-@@ -1813,7 +1813,7 @@
+@@ -1803,7 +1803,7 @@
UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
/*!< unmodified compressed pages */
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/*!< buddy free lists */
buf_page_t watch[BUF_POOL_WATCH_SIZE];
-@@ -1821,9 +1821,9 @@
+@@ -1811,9 +1811,9 @@
pool watches. Protected by
buf_pool->mutex. */
# should be done or reviewed by the maintainer!
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
-@@ -2041,6 +2041,27 @@
+@@ -1944,6 +1944,27 @@
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
}
if (UNIV_UNLIKELY(!bpage->zip.data)) {
/* There is no compressed page. */
err_exit:
-@@ -2550,6 +2571,27 @@
+@@ -2452,6 +2473,27 @@
block = (buf_block_t*) buf_page_hash_get_low(
buf_pool, space, offset, fold);
if (block) {
block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
ut_a(block_mutex);
}
-@@ -3472,11 +3514,28 @@
+@@ -3374,11 +3416,28 @@
fold = buf_page_address_fold(space, offset);
if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
/* The page is already in the buffer pool. */
watch_page = NULL;
-@@ -3607,6 +3666,7 @@
+@@ -3509,6 +3568,7 @@
bpage->state = BUF_BLOCK_ZIP_PAGE;
bpage->space = space;
bpage->offset = offset;
#ifdef UNIV_DEBUG
bpage->in_page_hash = FALSE;
-@@ -3691,6 +3751,7 @@
+@@ -3593,6 +3653,7 @@
fold = buf_page_address_fold(space, offset);
//buf_pool_mutex_enter(buf_pool);
mutex_enter(&buf_pool->LRU_list_mutex);
rw_lock_x_lock(&buf_pool->page_hash_latch);
-@@ -3698,6 +3759,21 @@
+@@ -3600,6 +3661,21 @@
block = (buf_block_t*) buf_page_hash_get_low(
buf_pool, space, offset, fold);
if (block
&& buf_page_in_file(&block->page)
&& !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
-@@ -4051,8 +4127,11 @@
+@@ -3953,8 +4029,11 @@
}
if (io_type == BUF_IO_WRITE
+ for (k = chunk->size; k--; block++) {
+ if (buf_block_get_state(block)
+ != BUF_BLOCK_FILE_PAGE
-+ || !block->is_hashed
++ || !block->index
+ || buf_page_get_space(&block->page) != id) {
+ continue;
+ }
NULL, /* Plugin Deinit */
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
-@@ -1468,6 +1468,7 @@
+@@ -1455,6 +1455,7 @@
0 if the block was never accessed
in the buffer pool */
/* @} */
extern ulint srv_n_rows_inserted;
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
-@@ -690,6 +690,7 @@
+@@ -689,6 +689,7 @@
#define SYNC_BUF_POOL 150 /* Buffer pool mutex */
#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
#define SYNC_DOUBLEWRITE 140
/* Write the log but do not flush it to disk */
+--- a/mysql-test/include/default_mysqld.cnf
++++ b/mysql-test/include/default_mysqld.cnf
+@@ -29,7 +29,7 @@
+ max_heap_table_size= 1M
+
+ loose-innodb_data_file_path= ibdata1:10M:autoextend
+-loose-innodb_buffer_pool_size= 8M
++loose-innodb_buffer_pool_size= 32M
+ loose-innodb_write_io_threads= 2
+ loose-innodb_read_io_threads= 2
+ loose-innodb_log_buffer_size= 1M
+--- a/mysql-test/suite/innodb/r/innodb.result
++++ b/mysql-test/suite/innodb/r/innodb.result
+@@ -1678,7 +1678,7 @@
+ drop table t1;
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+ variable_value
+-511
++2047
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
+ variable_value
+ 16384
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
-@@ -882,9 +882,9 @@
+@@ -880,9 +880,9 @@
block->page.in_zip_hash = FALSE;
block->page.in_flush_list = FALSE;
block->page.in_free_list = FALSE;
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-@@ -1429,7 +1429,7 @@
+@@ -1366,7 +1366,7 @@
memcpy(dpage, bpage, sizeof *dpage);
ut_d(bpage->in_page_hash = FALSE);
/* relocate buf_pool->LRU */
-@@ -3284,8 +3284,8 @@
+@@ -3188,8 +3188,8 @@
bpage->in_zip_hash = FALSE;
bpage->in_flush_list = FALSE;
bpage->in_free_list = FALSE;
ut_d(bpage->in_page_hash = TRUE);
-@@ -3450,7 +3450,7 @@
+@@ -3354,7 +3354,7 @@
ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
/* Flush pages from the end of the LRU list if necessary */
if (buf_debug_prints) {
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
-@@ -1417,11 +1417,11 @@
+@@ -1404,11 +1404,11 @@
UT_LIST_NODE_T(buf_page_t) LRU;
/*!< node of the LRU list */
innobase_old_blocks_pct = buf_LRU_old_ratio_update(
innobase_old_blocks_pct, TRUE);
-@@ -2694,6 +2867,25 @@
+@@ -2693,6 +2866,25 @@
trx_t* trx) /*!< in: transaction handle */
{
if (trx_is_started(trx)) {
ut_ad(page_is_leaf(buf_block_get_frame(block)));
rec = btr_cur_get_rec(cursor);
-@@ -3650,6 +3733,11 @@
+@@ -3645,6 +3728,11 @@
page = btr_cur_get_page(&cursor);
-
+#include "srv0srv.h"
/** Flag: has the search system been enabled?
- Protected by btr_search_latch and btr_search_enabled_mutex. */
+ Protected by btr_search_latch. */
UNIV_INTERN char btr_search_enabled = TRUE;
-@@ -607,6 +607,11 @@
+@@ -609,6 +609,11 @@
block = btr_cur_get_block(cursor);
/* prototypes for new functions added to ha_innodb.cc */
trx_t* innobase_get_trx();
-@@ -1136,6 +1137,11 @@
+@@ -1134,6 +1135,11 @@
ready = buf_flush_ready_for_replace(&block->page);
mutex_exit(&block->mutex);
if (!ready) {
return(block);
-@@ -2007,6 +2013,13 @@
+@@ -1910,6 +1916,13 @@
return(NULL);
}
block_mutex = buf_page_get_mutex_enter(bpage);
rw_lock_s_unlock(&buf_pool->page_hash_latch);
-@@ -2587,6 +2600,13 @@
+@@ -2489,6 +2502,13 @@
return(NULL);
}
switch (buf_block_get_state(block)) {
buf_page_t* bpage;
ibool success;
-@@ -3261,6 +3281,7 @@
+@@ -3163,6 +3183,7 @@
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
HASH_INVALIDATE(bpage, hash);
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
bpage->file_page_was_freed = FALSE;
#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-@@ -3899,6 +3920,7 @@
+@@ -3801,6 +3822,7 @@
(ulong) bpage->offset);
}
/* From version 3.23.38 up we store the page checksum
to the 4 first bytes of the page end lsn field */
-@@ -3940,6 +3962,23 @@
+@@ -3842,6 +3864,23 @@
REFMAN "forcing-innodb-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
/* If page space id is larger than TRX_SYS_SPACE
(0), we will attempt to mark the corresponding
-@@ -3956,6 +3995,7 @@
+@@ -3858,6 +3897,7 @@
}
}
}
if (recv_recovery_is_on()) {
/* Pages must be uncompressed for crash recovery. */
-@@ -3965,8 +4005,11 @@
+@@ -3867,8 +3907,11 @@
if (uncompressed && !recv_no_ibuf_operations) {
ibuf_merge_or_delete_for_page(
if (descr != NULL) {
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
-@@ -4012,6 +4012,12 @@
+@@ -4011,6 +4011,12 @@
DBUG_RETURN(1);
}
/* Create buffers for packing the fields of a record. Why
table->reclength did not work here? Obviously, because char
fields when packed actually became 1 byte longer, when we also
-@@ -4039,6 +4045,19 @@
+@@ -4038,6 +4044,19 @@
/* Get pointer to a table object in InnoDB dictionary cache */
ib_table = dict_table_get(norm_name, TRUE);
if (NULL == ib_table) {
if (is_part && retries < 10) {
++retries;
-@@ -5188,6 +5207,10 @@
+@@ -5187,6 +5206,10 @@
ha_statistic_increment(&SSV::ha_write_count);
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
table->timestamp_field->set_time();
-@@ -5405,6 +5428,10 @@
+@@ -5404,6 +5427,10 @@
func_exit:
innobase_active_small();
block, index != NULL && dict_index_is_ibuf(index)
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
-@@ -1016,7 +1016,7 @@
+@@ -1003,7 +1003,7 @@
const buf_block_t* block) /*!< in: pointer to the control block */
__attribute__((pure));
#else /* UNIV_DEBUG */
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Gets the space id of a block.
-@@ -1463,6 +1463,7 @@
+@@ -1450,6 +1450,7 @@
0 if the block was never accessed
in the buffer pool */
/* @} */
ut_ad(page_simple_validate_new((page_t*) page));
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
-@@ -1338,6 +1338,12 @@
+@@ -1341,6 +1341,12 @@
const rec_t* rec = btr_pcur_get_rec(&pcur);
const buf_block_t* block = btr_pcur_get_block(&pcur);
# should be done or reviewed by the maintainer!
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
-@@ -3868,7 +3868,8 @@
+@@ -3770,7 +3770,8 @@
read_space_id = mach_read_from_4(
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
unsigned last_s_line:14; /*!< Line number where last time s-locked */
unsigned last_x_line:14; /*!< Line number where last time x-locked */
#ifdef UNIV_DEBUG
-@@ -692,10 +693,10 @@
+@@ -693,10 +694,10 @@
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
Performance schema instrumented wrap function for rw_lock_x_lock_func()
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
-@@ -640,10 +640,10 @@
+@@ -641,10 +641,10 @@
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
{
/* Initialize the rwlock for performance schema */
lock->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key))
-@@ -656,10 +656,10 @@
+@@ -657,10 +657,10 @@
# ifdef UNIV_SYNC_DEBUG
level,
# endif /* UNIV_SYNC_DEBUG */
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_enter(), not directly
this function!
-@@ -729,9 +729,9 @@
+@@ -728,9 +728,9 @@
ulint line; /*!< Line where the mutex was locked */
ulint level; /*!< Level in the global latching order */
#endif /* UNIV_SYNC_DEBUG */
os_thread_id_t thread_id; /*!< The thread id of the thread
which locked the mutex. */
ulint magic_n; /*!< MUTEX_MAGIC_N */
-@@ -746,9 +746,9 @@
+@@ -745,9 +745,9 @@
ulong count_os_yield; /*!< count of os_wait */
ulonglong lspent_time; /*!< mutex os_wait timer msec */
ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */
}
/* these stats may not be accurate */
-@@ -665,10 +662,9 @@
+@@ -668,10 +665,9 @@
if (srv_print_latch_waits) {
fprintf(stderr,
"Thread %lu spin wait rw-x-lock at %p"
}
sync_array_reserve_cell(sync_primary_wait_array,
-@@ -689,10 +685,9 @@
+@@ -692,10 +688,9 @@
if (srv_print_latch_waits) {
fprintf(stderr,
"Thread %lu OS wait for rw-x-lock at %p"
# should be done or reviewed by the maintainer!
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
-@@ -4426,6 +4426,7 @@
+@@ -4330,6 +4330,7 @@
}
total_info->pool_size += pool_info->pool_size;
total_info->lru_len += pool_info->lru_len;
total_info->old_lru_len += pool_info->old_lru_len;
total_info->free_list_len += pool_info->free_list_len;
-@@ -4491,6 +4492,8 @@
+@@ -4395,6 +4396,8 @@
pool_info->pool_size = buf_pool->curr_size;
pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
pool_info->old_lru_len = buf_pool->LRU_old_len;
-@@ -4612,14 +4615,16 @@
+@@ -4516,14 +4519,16 @@
ut_ad(pool_info);
fprintf(file,
/** Thread slot in the thread table */
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
-@@ -767,6 +767,10 @@
+@@ -766,6 +766,10 @@
#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
#include "dict0mem.h"
#include "dict0types.h"
#include "ha_prototypes.h" /* for innobase_convert_name() */
-@@ -1812,6 +1814,1675 @@
+@@ -1812,6 +1814,1703 @@
DBUG_RETURN(0);
}
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
+/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLESTATS */
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
+/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_INDEXES */
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
+/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_COLUMNS */
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_fields */
+static ST_FIELD_INFO innodb_sys_fields_fields_info[] =
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
+/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign */
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols */
+static ST_FIELD_INFO innodb_sys_foreign_cols_fields_info[] =
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
/***********************************************************************
extern struct st_mysql_plugin i_s_innodb_rseg;
#endif /* i_s_h */
---- /dev/null
-+++ b/mysql-test/r/percona_innodb_use_sys_stats_table.result
-@@ -0,0 +1,3 @@
-+show variables like 'innodb_use_sys_stats%';
-+Variable_name Value
-+innodb_use_sys_stats_table ON
---- /dev/null
-+++ b/mysql-test/t/percona_innodb_use_sys_stats_table-master.opt
-@@ -0,0 +1 @@
-+--innodb_use_sys_stats_table
---- /dev/null
-+++ b/mysql-test/t/percona_innodb_use_sys_stats_table.test
-@@ -0,0 +1,2 @@
-+--source include/have_innodb.inc
-+show variables like 'innodb_use_sys_stats%';
# should be done or reviewed by the maintainer!
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
-@@ -4091,7 +4091,8 @@
+@@ -4085,7 +4085,8 @@
mtr_commit(mtr);
mutex_enter(&block->mutex);
/* Only free the block if it is still allocated to
-@@ -4102,16 +4103,21 @@
+@@ -4096,16 +4097,21 @@
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
--- a/storage/innobase/btr/btr0sea.c
+++ b/storage/innobase/btr/btr0sea.c
-@@ -1944,7 +1944,7 @@
+@@ -1972,7 +1972,7 @@
rec_offs_init(offsets_);
rw_lock_x_lock(&btr_search_latch);
cell_count = hash_get_n_cells(btr_search_sys->hash_index);
-@@ -1952,11 +1952,11 @@
+@@ -1980,11 +1980,11 @@
/* We release btr_search_latch every once in a while to
give other queries a chance to run. */
if ((i != 0) && ((i % chunk_size) == 0)) {
}
node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
-@@ -2067,11 +2067,11 @@
+@@ -2093,11 +2093,11 @@
/* We release btr_search_latch every once in a while to
give other queries a chance to run. */
if (i != 0) {
}
if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
-@@ -2079,7 +2079,7 @@
+@@ -2105,7 +2105,7 @@
}
}
UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
#endif /* UNIV_PFS_MUTEX */
-@@ -882,9 +887,13 @@
+@@ -880,9 +885,13 @@
block->page.in_zip_hash = FALSE;
block->page.in_flush_list = FALSE;
block->page.in_free_list = FALSE;
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-@@ -982,9 +991,11 @@
+@@ -980,9 +989,11 @@
memset(block->frame, '\0', UNIV_PAGE_SIZE);
#endif
/* Add the block to the free list */
ut_ad(buf_pool_from_block(block) == buf_pool);
block++;
-@@ -1039,7 +1050,8 @@
+@@ -1037,7 +1048,8 @@
buf_chunk_t* chunk = buf_pool->chunks;
ut_ad(buf_pool);
for (n = buf_pool->n_chunks; n--; chunk++) {
buf_block_t* block = buf_chunk_contains_zip(chunk, data);
-@@ -1145,9 +1157,21 @@
+@@ -1143,9 +1155,21 @@
------------------------------- */
mutex_create(buf_pool_mutex_key,
&buf_pool->mutex, SYNC_BUF_POOL);
buf_pool_mutex_enter(buf_pool);
if (buf_pool_size > 0) {
-@@ -1160,6 +1184,8 @@
+@@ -1158,6 +1182,8 @@
mem_free(chunk);
mem_free(buf_pool);
buf_pool_mutex_exit(buf_pool);
return(DB_ERROR);
-@@ -1190,6 +1216,8 @@
+@@ -1188,6 +1214,8 @@
/* All fields are initialized by mem_zalloc(). */
buf_pool_mutex_exit(buf_pool);
return(DB_SUCCESS);
-@@ -1402,7 +1430,11 @@
+@@ -1339,7 +1367,11 @@
ulint fold;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
ut_a(bpage->buf_fix_count == 0);
-@@ -1513,21 +1545,32 @@
+@@ -1450,21 +1482,32 @@
buf_page_t* bpage;
ulint i;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
bpage = &buf_pool->watch[i];
-@@ -1551,10 +1594,12 @@
+@@ -1488,10 +1531,12 @@
bpage->space = space;
bpage->offset = offset;
bpage->buf_fix_count = 1;
return(NULL);
case BUF_BLOCK_ZIP_PAGE:
ut_ad(bpage->in_page_hash);
-@@ -1572,6 +1617,8 @@
+@@ -1509,6 +1554,8 @@
ut_error;
/* Fix compiler warning */
return(NULL);
}
-@@ -1589,7 +1636,11 @@
+@@ -1526,7 +1573,11 @@
space, offset) */
buf_page_t* watch) /*!< in/out: sentinel for watch */
{
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
ut_d(watch->in_page_hash = FALSE);
-@@ -1611,28 +1662,31 @@
+@@ -1548,28 +1599,31 @@
buf_pool_t* buf_pool = buf_pool_get(space, offset);
ulint fold = buf_page_address_fold(space, offset);
}
/****************************************************************//**
-@@ -1652,14 +1706,16 @@
+@@ -1589,14 +1643,16 @@
buf_pool_t* buf_pool = buf_pool_get(space, offset);
ulint fold = buf_page_address_fold(space, offset);
return(ret);
}
-@@ -1676,13 +1732,15 @@
+@@ -1613,13 +1669,15 @@
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
}
/********************************************************************//**
-@@ -1706,14 +1764,20 @@
+@@ -1643,14 +1701,20 @@
ut_a(buf_page_in_file(bpage));
if (buf_page_peek_if_too_old(bpage)) {
}
}
-@@ -1730,7 +1794,8 @@
+@@ -1667,7 +1731,8 @@
buf_block_t* block;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-@@ -1739,7 +1804,8 @@
+@@ -1676,7 +1741,8 @@
block->check_index_page_at_flush = FALSE;
}
+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
}
- /********************************************************************//**
-@@ -1758,7 +1824,8 @@
- ibool is_hashed;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
-- buf_pool_mutex_enter(buf_pool);
-+ //buf_pool_mutex_enter(buf_pool);
-+ rw_lock_s_lock(&buf_pool->page_hash_latch);
-
- block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-
-@@ -1769,7 +1836,8 @@
- is_hashed = block->is_hashed;
- }
-
-- buf_pool_mutex_exit(buf_pool);
-+ //buf_pool_mutex_exit(buf_pool);
-+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
-
- return(is_hashed);
- }
-@@ -1791,7 +1859,8 @@
+ #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+@@ -1696,7 +1762,8 @@
buf_page_t* bpage;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
bpage = buf_page_hash_get(buf_pool, space, offset);
-@@ -1802,7 +1871,8 @@
+@@ -1707,7 +1774,8 @@
bpage->file_page_was_freed = TRUE;
}
return(bpage);
}
-@@ -1823,7 +1893,8 @@
+@@ -1728,7 +1796,8 @@
buf_page_t* bpage;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
bpage = buf_page_hash_get(buf_pool, space, offset);
-@@ -1832,7 +1903,8 @@
+@@ -1737,7 +1806,8 @@
bpage->file_page_was_freed = FALSE;
}
return(bpage);
}
-@@ -1864,8 +1936,9 @@
+@@ -1769,8 +1839,9 @@
buf_pool->stat.n_page_gets++;
for (;;) {
bpage = buf_page_hash_get(buf_pool, space, offset);
if (bpage) {
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
-@@ -1874,7 +1947,8 @@
+@@ -1779,7 +1850,8 @@
/* Page not in buf_pool: needs to be read from file */
buf_read_page(space, zip_size, offset);
-@@ -1886,10 +1960,15 @@
+@@ -1791,10 +1863,15 @@
if (UNIV_UNLIKELY(!bpage->zip.data)) {
/* There is no compressed page. */
err_exit:
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
switch (buf_page_get_state(bpage)) {
-@@ -1898,24 +1977,43 @@
+@@ -1803,24 +1880,43 @@
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
case BUF_BLOCK_ZIP_FREE:
buf_block_buf_fix_inc((buf_block_t*) bpage,
__FILE__, __LINE__);
goto got_block;
-@@ -1928,7 +2026,7 @@
+@@ -1833,7 +1929,7 @@
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
access_time = buf_page_is_accessed(bpage);
mutex_exit(block_mutex);
-@@ -2240,7 +2338,7 @@
+@@ -2144,7 +2240,7 @@
const buf_block_t* block) /*!< in: pointer to block,
not dereferenced */
{
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
/* The pointer should be aligned. */
-@@ -2276,6 +2374,7 @@
+@@ -2180,6 +2276,7 @@
ulint fix_type;
ibool must_read;
ulint retries = 0;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
ut_ad(mtr);
-@@ -2309,18 +2408,24 @@
+@@ -2213,18 +2310,24 @@
fold = buf_page_address_fold(space, offset);
loop:
block = guess;
block = guess = NULL;
} else {
ut_ad(!block->page.in_zip_hash);
-@@ -2329,12 +2434,19 @@
+@@ -2233,12 +2336,19 @@
}
if (block == NULL) {
block = NULL;
}
-@@ -2346,12 +2458,14 @@
+@@ -2250,12 +2360,14 @@
space, offset, fold);
if (UNIV_LIKELY_NULL(block)) {
if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_PEEK_IF_IN_POOL
-@@ -2404,7 +2518,8 @@
+@@ -2308,7 +2420,8 @@
/* The page is being read to buffer pool,
but we cannot wait around for the read to
complete. */
return(NULL);
}
-@@ -2414,38 +2529,49 @@
+@@ -2318,38 +2431,49 @@
ibool success;
case BUF_BLOCK_FILE_PAGE:
{
buf_page_t* hash_bpage;
-@@ -2458,35 +2584,47 @@
+@@ -2362,35 +2486,47 @@
while buf_pool->mutex was released.
Free the block that was allocated. */
buf_block_init_low(block);
block->lock_hash_val = lock_rec_hash(space, offset);
-@@ -2496,7 +2634,7 @@
+@@ -2400,7 +2536,7 @@
if (buf_page_get_state(&block->page)
== BUF_BLOCK_ZIP_PAGE) {
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
&block->page);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_ad(!block->page.in_flush_list);
-@@ -2514,18 +2652,23 @@
+@@ -2418,18 +2554,23 @@
/* Insert at the front of unzip_LRU list */
buf_unzip_LRU_add_block(block, FALSE);
buf_page_free_descriptor(bpage);
/* Decompress the page and apply buffered operations
-@@ -2539,12 +2682,15 @@
+@@ -2443,12 +2584,15 @@
}
/* Unfix and unlatch the block. */
rw_lock_x_unlock(&block->lock);
break;
-@@ -2560,7 +2706,7 @@
+@@ -2464,7 +2608,7 @@
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#if UNIV_WORD_SIZE == 4
/* On 32-bit systems, there is no padding in buf_page_t. On
other systems, Valgrind could complain about uninitialized pad
-@@ -2573,8 +2719,8 @@
+@@ -2477,8 +2621,8 @@
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
/* Set the watch, as it would have
been set if the page were not in the
-@@ -2583,6 +2729,9 @@
+@@ -2487,6 +2631,9 @@
space, offset, fold);
if (UNIV_LIKELY_NULL(block)) {
/* The page entered the buffer
pool for some reason. Try to
-@@ -2590,7 +2739,7 @@
+@@ -2494,7 +2641,7 @@
goto got_block;
}
}
fprintf(stderr,
"innodb_change_buffering_debug evict %u %u\n",
(unsigned) space, (unsigned) offset);
-@@ -2612,13 +2761,14 @@
+@@ -2516,13 +2663,14 @@
ut_a(mode == BUF_GET_POSSIBLY_FREED
|| !block->page.file_page_was_freed);
#endif
if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
buf_page_set_accessed_make_young(&block->page, access_time);
-@@ -2851,9 +3001,11 @@
+@@ -2755,9 +2903,11 @@
buf_pool = buf_pool_from_block(block);
if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
} else if (!buf_page_is_accessed(&block->page)) {
/* Above, we do a dirty read on purpose, to avoid
mutex contention. The field buf_page_t::access_time
-@@ -2861,9 +3013,11 @@
+@@ -2765,9 +2915,11 @@
field must be protected by mutex, however. */
ulint time_ms = ut_time_ms();
}
ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
-@@ -2930,18 +3084,21 @@
+@@ -2834,18 +2986,21 @@
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-@@ -3031,7 +3188,10 @@
+@@ -2935,7 +3090,10 @@
buf_page_t* hash_page;
ut_ad(buf_pool == buf_pool_get(space, offset));
ut_ad(mutex_own(&(block->mutex)));
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-@@ -3060,11 +3220,14 @@
+@@ -2964,11 +3122,14 @@
if (UNIV_LIKELY(!hash_page)) {
} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
/* Preserve the reference count. */
} else {
fprintf(stderr,
"InnoDB: Error: page %lu %lu already found"
-@@ -3074,7 +3237,8 @@
+@@ -2978,7 +3139,8 @@
(const void*) hash_page, (const void*) block);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
mutex_exit(&block->mutex);
buf_print();
buf_LRU_print();
buf_validate();
-@@ -3157,7 +3321,9 @@
+@@ -3061,7 +3223,9 @@
fold = buf_page_address_fold(space, offset);
watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
-@@ -3166,9 +3332,15 @@
+@@ -3070,9 +3234,15 @@
err_exit:
if (block) {
mutex_enter(&block->mutex);
bpage = NULL;
goto func_exit;
-@@ -3191,6 +3363,8 @@
+@@ -3095,6 +3265,8 @@
buf_page_init(buf_pool, space, offset, fold, block);
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
-@@ -3218,7 +3392,7 @@
+@@ -3122,7 +3294,7 @@
been added to buf_pool->LRU and
buf_pool->page_hash. */
mutex_exit(&block->mutex);
mutex_enter(&block->mutex);
block->page.zip.data = data;
-@@ -3231,13 +3405,14 @@
+@@ -3135,13 +3307,14 @@
buf_unzip_LRU_add_block(block, TRUE);
}
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool->mutex. Thus, we must
-@@ -3253,7 +3428,10 @@
+@@ -3157,7 +3330,10 @@
/* The block was added by some other thread. */
watch_page = NULL;
bpage = NULL;
goto func_exit;
-@@ -3301,20 +3479,26 @@
+@@ -3205,20 +3381,26 @@
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
bpage);
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-@@ -3356,7 +3540,9 @@
+@@ -3260,7 +3442,9 @@
fold = buf_page_address_fold(space, offset);
block = (buf_block_t*) buf_page_hash_get_low(
buf_pool, space, offset, fold);
-@@ -3372,7 +3558,9 @@
+@@ -3276,7 +3460,9 @@
#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
/* Page can be found in buf_pool */
buf_block_free(free_block);
-@@ -3394,6 +3582,7 @@
+@@ -3298,6 +3484,7 @@
mutex_enter(&block->mutex);
buf_page_init(buf_pool, space, offset, fold, block);
/* The block must be put to the LRU list */
buf_LRU_add_block(&block->page, FALSE);
-@@ -3420,7 +3609,7 @@
+@@ -3324,7 +3511,7 @@
the reacquisition of buf_pool->mutex. We also must
defer this operation until after the block descriptor
has been added to buf_pool->LRU and buf_pool->page_hash. */
mutex_enter(&block->mutex);
block->page.zip.data = data;
-@@ -3438,7 +3627,8 @@
+@@ -3342,7 +3529,8 @@
buf_page_set_accessed(&block->page, time_ms);
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
-@@ -3493,7 +3683,9 @@
+@@ -3397,7 +3585,9 @@
ibool ret = TRUE;
/* First unfix and release lock on the bpage */
mutex_enter(buf_page_get_mutex(bpage));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
ut_ad(bpage->buf_fix_count == 0);
-@@ -3514,11 +3706,15 @@
+@@ -3418,11 +3608,15 @@
ret = FALSE;
}
return(ret);
}
-@@ -3536,6 +3732,8 @@
+@@ -3440,6 +3634,8 @@
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
const ibool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
ut_a(buf_page_in_file(bpage));
-@@ -3678,8 +3876,26 @@
+@@ -3582,8 +3778,26 @@
}
}
#ifdef UNIV_IBUF_COUNT_DEBUG
if (io_type == BUF_IO_WRITE || uncompressed) {
-@@ -3702,6 +3918,7 @@
+@@ -3606,6 +3820,7 @@
the x-latch to this OS thread: do not let this confuse you in
debugging! */
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
buf_pool->stat.n_pages_read++;
-@@ -3719,6 +3936,9 @@
+@@ -3623,6 +3838,9 @@
buf_flush_write_complete(bpage);
if (uncompressed) {
rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
BUF_IO_WRITE);
-@@ -3741,8 +3961,8 @@
+@@ -3645,8 +3863,8 @@
}
#endif /* UNIV_DEBUG */
}
/*********************************************************************//**
-@@ -3759,7 +3979,9 @@
+@@ -3663,7 +3881,9 @@
ut_ad(buf_pool);
chunk = buf_pool->chunks;
-@@ -3776,7 +3998,9 @@
+@@ -3680,7 +3900,9 @@
}
}
return(TRUE);
}
-@@ -3824,7 +4048,8 @@
+@@ -3728,7 +3950,8 @@
freed = buf_LRU_search_and_free_block(buf_pool, 100);
}
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
-@@ -3837,7 +4062,8 @@
+@@ -3741,7 +3964,8 @@
memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
buf_refresh_io_stats(buf_pool);
}
/*********************************************************************//**
-@@ -3879,7 +4105,10 @@
+@@ -3783,7 +4007,10 @@
ut_ad(buf_pool);
chunk = buf_pool->chunks;
-@@ -3974,7 +4203,7 @@
+@@ -3878,7 +4105,7 @@
/* Check clean compressed-only blocks. */
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
switch (buf_page_get_io_fix(b)) {
case BUF_IO_NONE:
-@@ -4005,7 +4234,7 @@
+@@ -3909,7 +4136,7 @@
buf_flush_list_mutex_enter(buf_pool);
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
ut_ad(b->in_flush_list);
ut_a(b->oldest_modification);
n_flush++;
-@@ -4064,6 +4293,8 @@
+@@ -3968,6 +4195,8 @@
}
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
fprintf(stderr, "Free list len %lu, free blocks %lu\n",
(ulong) UT_LIST_GET_LEN(buf_pool->free),
-@@ -4074,8 +4305,11 @@
+@@ -3978,8 +4207,11 @@
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
ut_a(buf_LRU_validate());
ut_a(buf_flush_validate(buf_pool));
-@@ -4131,7 +4365,9 @@
+@@ -4035,7 +4267,9 @@
index_ids = mem_alloc(size * sizeof *index_ids);
counts = mem_alloc(sizeof(ulint) * size);
buf_flush_list_mutex_enter(buf_pool);
fprintf(stderr,
-@@ -4200,7 +4436,9 @@
+@@ -4104,7 +4338,9 @@
}
}
for (i = 0; i < n_found; i++) {
index = dict_index_get_if_in_cache(index_ids[i]);
-@@ -4257,7 +4495,7 @@
+@@ -4161,7 +4397,7 @@
buf_chunk_t* chunk;
ulint fixed_pages_number = 0;
chunk = buf_pool->chunks;
-@@ -4291,7 +4529,7 @@
+@@ -4195,7 +4431,7 @@
/* Traverse the lists of clean and dirty compressed-only blocks. */
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
-@@ -4303,7 +4541,7 @@
+@@ -4207,7 +4443,7 @@
buf_flush_list_mutex_enter(buf_pool);
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
ut_ad(b->in_flush_list);
switch (buf_page_get_state(b)) {
-@@ -4329,7 +4567,7 @@
+@@ -4233,7 +4469,7 @@
buf_flush_list_mutex_exit(buf_pool);
mutex_exit(&buf_pool->zip_mutex);
return(fixed_pages_number);
}
-@@ -4487,6 +4725,8 @@
+@@ -4391,6 +4627,8 @@
/* Find appropriate pool_info to store stats for this buffer pool */
pool_info = &all_pool_info[pool_id];
buf_pool_mutex_enter(buf_pool);
buf_flush_list_mutex_enter(buf_pool);
-@@ -4602,6 +4842,8 @@
+@@ -4506,6 +4744,8 @@
pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
buf_refresh_io_stats(buf_pool);
buf_pool_mutex_exit(buf_pool);
}
-@@ -4846,11 +5088,13 @@
+@@ -4750,11 +4990,13 @@
{
ulint len;
- mutex_enter(&((buf_block_t*) bpage)->mutex);
+ //mutex_enter(&((buf_block_t*) bpage)->mutex);
is_fixed = bpage->buf_fix_count > 0
- || !((buf_block_t*) bpage)->is_hashed;
+ || !((buf_block_t*) bpage)->index;
- mutex_exit(&((buf_block_t*) bpage)->mutex);
+ //mutex_exit(&((buf_block_t*) bpage)->mutex);
Creates the buffer pool.
@return own: buf_pool object, NULL if not enough memory or error */
UNIV_INTERN
-@@ -864,6 +878,15 @@
+@@ -851,6 +865,15 @@
const buf_page_t* bpage) /*!< in: pointer to control block */
__attribute__((pure));
/*********************************************************************//**
Get the flush type of a page.
@return flush type */
-@@ -1345,7 +1368,7 @@
+@@ -1332,7 +1355,7 @@
All these are protected by buf_pool->mutex. */
/* @{ */
/*!< based on state, this is a
list node, protected either by
buf_pool->mutex or by
-@@ -1373,6 +1396,10 @@
+@@ -1360,6 +1383,10 @@
BUF_BLOCK_REMOVE_HASH or
BUF_BLOCK_READY_IN_USE. */
#ifdef UNIV_DEBUG
ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
when buf_pool->flush_list_mutex is
-@@ -1465,11 +1492,11 @@
+@@ -1452,11 +1479,11 @@
a block is in the unzip_LRU list
if page.state == BUF_BLOCK_FILE_PAGE
and page.zip.data != NULL */
mutex_t mutex; /*!< mutex protecting this block:
state (also protected by the buffer
pool mutex), io_fix, buf_fix_count,
-@@ -1646,6 +1673,11 @@
+@@ -1636,6 +1663,11 @@
pool instance, protects compressed
only pages (of type buf_page_t, not
buf_block_t */
ulint instance_no; /*!< Array index of this buffer
pool instance */
ulint old_pool_size; /*!< Old pool size in bytes */
-@@ -1799,8 +1831,8 @@
+@@ -1789,8 +1821,8 @@
/** Test if a buffer pool mutex is owned. */
#define buf_pool_mutex_own(b) mutex_own(&b->mutex)
/** Acquire a buffer pool mutex. */
+#define SYNC_LOG_FLUSH_ORDER 156
#define SYNC_RECV 168
#define SYNC_WORK_QUEUE 162
- #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
-@@ -677,8 +681,13 @@
+ #define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
+@@ -676,8 +680,13 @@
SYNC_SEARCH_SYS, as memory allocation
can call routines there! Otherwise
the level is SYNC_MEM_HASH. */
#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
#define SYNC_DOUBLEWRITE 140
#define SYNC_ANY_LATCH 135
-@@ -709,7 +718,7 @@
+@@ -708,7 +717,7 @@
os_fast_mutex; /*!< We use this OS mutex in place of lock_word
when atomic operations are not enabled */
#endif
}
/******************************************************************//**
-@@ -1234,7 +1244,12 @@
+@@ -1233,7 +1243,12 @@
ut_error;
}
break;
case SYNC_BUF_POOL:
/* We can have multiple mutexes of this type therefore we
can only check whether the greater than condition holds. */
-@@ -1252,7 +1267,8 @@
+@@ -1251,7 +1266,8 @@
buffer block (block->mutex or buf_pool->zip_mutex). */
if (!sync_thread_levels_g(array, level, FALSE)) {
ut_a(sync_thread_levels_g(array, level - 1, TRUE));
}
if (node->state == INDEX_CREATE_INDEX_TREE) {
-@@ -1183,6 +1322,66 @@
- }
+@@ -1177,6 +1316,66 @@
+ return(NULL);
+ }
- /****************************************************************//**
++ thr->run_node = que_node_get_parent(node);
++
++ return(thr);
++}
++
++/****************************************************************//**
+*/
+UNIV_INTERN
+que_thr_t*
+ return(NULL);
+ }
+
-+ thr->run_node = que_node_get_parent(node);
-+
-+ return(thr);
-+}
-+
-+/****************************************************************//**
- Creates the foreign key constraints system tables inside InnoDB
- at database creation or database start if they are not found or are
- not of the right form.
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
--- a/storage/innobase/dict/dict0dict.c
+++ b/storage/innobase/dict/dict0dict.c
@@ -755,7 +755,7 @@
/* -------------- Log files ---------------------------*/
/* The default dir for log files is the datadir of MySQL */
-@@ -5257,6 +5260,10 @@
+@@ -5256,6 +5259,10 @@
error = row_insert_for_mysql((byte*) record, prebuilt);
}
#define OK(expr) \
-@@ -3483,6 +3484,221 @@
- STRUCT_FLD(__reserved1, NULL)
+@@ -3511,6 +3512,225 @@
+ STRUCT_FLD(flags, 0UL)
};
+/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_stats */
+
+ /* reserved for dependency checking */
+ /* void* */
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++
++ /* flags for plugin */
++ /* unsigned long */
++ STRUCT_FLD(flags, 0UL)
+};
+
/***********************************************************************
*/
static ST_FIELD_INFO i_s_innodb_rseg_fields_info[] =
-@@ -3645,3 +3861,347 @@
- /* void* */
- STRUCT_FLD(__reserved1, NULL)
+@@ -3677,3 +3897,349 @@
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL)
};
+
+/***********************************************************************
+ STRUCT_FLD(version, 0x0100 /* 1.0 */),
+ STRUCT_FLD(status_vars, NULL),
+ STRUCT_FLD(system_vars, NULL),
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++ STRUCT_FLD(flags, 0UL)
+};
+
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_index_stats =
+ STRUCT_FLD(version, 0x0100 /* 1.0 */),
+ STRUCT_FLD(status_vars, NULL),
+ STRUCT_FLD(system_vars, NULL),
-+ STRUCT_FLD(__reserved1, NULL)
++ STRUCT_FLD(__reserved1, NULL),
++ STRUCT_FLD(flags, 0UL)
+};
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
} else {
--- a/storage/innobase/row/row0ins.c
+++ b/storage/innobase/row/row0ins.c
-@@ -2015,6 +2015,8 @@
+@@ -2018,6 +2018,8 @@
}
#ifdef UNIV_DEBUG
ulint pos = upd_get_nth_field(update, i)->field_no;
+--- /dev/null
++++ b/mysql-test/r/percona_innodb_use_sys_stats_table.result
+@@ -0,0 +1,3 @@
++show variables like 'innodb_use_sys_stats%';
++Variable_name Value
++innodb_use_sys_stats_table ON
+--- /dev/null
++++ b/mysql-test/t/percona_innodb_use_sys_stats_table-master.opt
+@@ -0,0 +1 @@
++--innodb_use_sys_stats_table
+--- /dev/null
++++ b/mysql-test/t/percona_innodb_use_sys_stats_table.test
+@@ -0,0 +1,2 @@
++--source include/have_innodb.inc
++show variables like 'innodb_use_sys_stats%';
+ enter_innodb_with_tickets(trx);
+ return;
+ }
-+ os_atomic_increment_lint(&srv_conc_n_threads, -1);
++ (void) os_atomic_increment_lint(&srv_conc_n_threads, -1);
+ }
+ if (!has_yielded)
+ {
+static void
+srv_conc_exit_innodb_timer_based(trx_t* trx)
+{
-+ os_atomic_increment_lint(&srv_conc_n_threads, -1);
++ (void) os_atomic_increment_lint(&srv_conc_n_threads, -1);
+ trx->declared_to_be_inside_innodb = FALSE;
+ trx->n_tickets_to_enter_innodb = 0;
+ return;
ut_ad(srv_conc_n_threads >= 0);
+#ifdef HAVE_ATOMIC_BUILTINS
+ if (srv_thread_concurrency_timer_based) {
-+ os_atomic_increment_lint(&srv_conc_n_threads, 1);
++ (void) os_atomic_increment_lint(&srv_conc_n_threads, 1);
+ trx->declared_to_be_inside_innodb = TRUE;
+ trx->n_tickets_to_enter_innodb = 1;
+ return;
using my_pthread_setspecific_ptr()/my_thread_getspecific_ptr().
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
-@@ -4874,7 +4874,7 @@
+@@ -4931,7 +4931,7 @@
ER_BINLOG_UNSAFE_STATEMENT,
ER(ER_BINLOG_UNSAFE_STATEMENT),
ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
records_in_block= 10;
--- a/storage/heap/hp_delete.c
+++ b/storage/heap/hp_delete.c
-@@ -22,6 +22,8 @@
+@@ -22,6 +22,7 @@
uchar *pos;
HP_SHARE *share=info->s;
HP_KEYDEF *keydef, *end, *p_lastinx;
-+ uint rec_length, chunk_count;
+
DBUG_ENTER("heap_delete");
DBUG_PRINT("enter",("info: 0x%lx record: 0x%lx", (long) info, (long) record));
-@@ -31,6 +33,8 @@
- DBUG_RETURN(my_errno); /* Record changed */
- share->changed=1;
-
-+ rec_length = hp_get_encoded_data_length(share, record, &chunk_count);
-+
- if ( --(share->records) < share->blength >> 1) share->blength>>=1;
- pos=info->current_ptr;
-
-@@ -43,10 +47,7 @@
+@@ -43,10 +44,7 @@
}
info->update=HA_STATE_DELETED;
info->current_hash_ptr=0;
#if !defined(DBUG_OFF) && defined(EXTRA_HEAP_DEBUG)
DBUG_EXECUTE("check_heap",heap_check_heap(info, 0););
-@@ -75,7 +76,8 @@
+@@ -75,7 +73,8 @@
info->last_pos= NULL; /* For heap_rnext/heap_rprev */
custom_arg.keyseg= keyinfo->seg;
custom_arg.search_flag= SEARCH_SAME;
old_allocated= keyinfo->rb_tree.allocated;
res= tree_delete(&keyinfo->rb_tree, info->recbuf, custom_arg.key_length,
-@@ -112,6 +114,7 @@
+@@ -112,6 +111,7 @@
blength=share->blength;
if (share->records+1 == blength)
blength+= blength;
#define MAX_RECORDS 100000
#define MAX_KEYS 4
-@@ -44,6 +45,7 @@
- register uint i,j;
- uint ant,n1,n2,n3;
- uint write_count,update,opt_delete,check2,dupp_keys,found_key;
-+ uint mem_per_keys;
- int error;
- ulong pos;
- unsigned long key_check;
-@@ -53,6 +55,7 @@
+@@ -53,6 +54,7 @@
HP_SHARE *tmp_share;
HP_KEYDEF keyinfo[MAX_KEYS];
HA_KEYSEG keyseg[MAX_KEYS*5];
HEAP_PTR UNINIT_VAR(position);
HP_CREATE_INFO hp_create_info;
CHARSET_INFO *cs= &my_charset_latin1;
-@@ -65,12 +68,16 @@
+@@ -65,12 +67,16 @@
get_options(argc,argv);
bzero(&hp_create_info, sizeof(hp_create_info));
write_count=update=opt_delete=0;
key_check=0;
-@@ -118,11 +125,30 @@
+@@ -118,11 +124,28 @@
keyinfo[3].seg[0].null_pos=38;
keyinfo[3].seg[0].charset=cs;
+ columndef[3].length= 1;
+ columndef[3].null_bit= 1;
+ columndef[3].null_pos= 38;
-+
-+ mem_per_keys= (sizeof(char*) * 2) * 4;
+
bzero((char*) key1,sizeof(key1));
bzero((char*) key3,sizeof(key3));
HP_KEYDEF *keydef, *end;
uchar *pos;
HP_SHARE *share=info->s;
-+ uint rec_length, chunk_count;
++ uint chunk_count;
+
DBUG_ENTER("heap_write");
#ifndef DBUG_OFF
+ DBUG_RETURN(my_errno);
+ }
+
-+ rec_length= hp_get_encoded_data_length(share, record, &chunk_count);
++ hp_get_encoded_data_length(share, record, &chunk_count);
+
+ if (!(pos= hp_allocate_chunkset(&share->recordspace, chunk_count)))
DBUG_RETURN(my_errno);
Write a hash-key to the hash-index
--- a/storage/heap/hp_update.c
+++ b/storage/heap/hp_update.c
-@@ -17,43 +17,66 @@
+@@ -17,43 +17,65 @@
#include "heapdef.h"
uchar *pos;
my_bool auto_key_changed= 0;
HP_SHARE *share= info->s;
-+ uint old_length, new_length;
+ uint old_chunk_count, new_chunk_count;
+
DBUG_ENTER("heap_update");
+ if (info->opt_flag & READ_CHECK_USED && hp_rectest(info, old_record))
DBUG_RETURN(my_errno); /* Record changed */
+
-+ old_length = hp_get_encoded_data_length(share, old_record, &old_chunk_count);
-+ new_length = hp_get_encoded_data_length(share, new_record, &new_chunk_count);
++ hp_get_encoded_data_length(share, old_record, &old_chunk_count);
++ hp_get_encoded_data_length(share, new_record, &new_chunk_count);
+
+ if (new_chunk_count > old_chunk_count)
+ {
DBUG_RETURN(0);
err:
-@@ -63,7 +86,7 @@
+@@ -63,7 +85,7 @@
if (keydef->algorithm == HA_KEY_ALG_BTREE)
{
/* we don't need to delete non-inserted key from rb-tree */
{
if (++(share->records) == share->blength)
share->blength+= share->blength;
-@@ -73,10 +96,10 @@
+@@ -73,10 +95,10 @@
}
while (keydef >= share->keydef)
{
break;
}
keydef--;
-@@ -84,5 +107,12 @@
+@@ -84,5 +106,12 @@
}
if (++(share->records) == share->blength)
share->blength+= share->blength;
Summary(uk.UTF-8): MySQL - швидкий SQL-сервер
Summary(zh_CN.UTF-8): MySQL数据库服务器
Name: mysql
-Version: 5.5.17
-Release: 2
+Version: 5.5.18
+Release: 1
License: GPL + MySQL FLOSS Exception
Group: Applications/Databases
# Source0Download: http://dev.mysql.com/downloads/mysql/5.5.html#downloads
Source0: http://vesta.informatik.rwth-aachen.de/mysql/Downloads/MySQL-5.5/%{name}-%{version}.tar.gz
-# Source0-md5: dcb6a06e68c5e8f30f57b15300730c9c
+# Source0-md5: 38b65815249f3bcacf3b0ee85171c486
Source100: http://www.sphinxsearch.com/files/sphinx-2.0.1-beta.tar.gz
# Source100-md5: 95c217d81d0b7a4ff73d5297318c3481
Source1: %{name}.init
Patch158: subunit.patch
Patch159: bug860910.patch
Patch160: bug45702.patch
+Patch161: group_commit.patch
+Patch162: warning_fixes.patch
# </percona>
URL: http://www.mysql.com/products/community/
BuildRequires: bison
%patch158 -p1
%patch159 -p1
%patch160 -p1
+%patch161 -p1
+%patch162 -p1
# </percona>
# to get these files rebuild
# should be done or reviewed by the maintainer!
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
-@@ -2308,6 +2308,7 @@
+@@ -2326,6 +2326,7 @@
thd->sent_row_count++;
thd->sent_row_count_2++;
-# name : query_cache_with_comments.patch
+# name : query_cache_enhance.patch
# introduced : 11 or before
# maintainer : Oleg
#
extern ulonglong log_output_options;
extern ulong log_backup_output_options;
extern my_bool opt_log_queries_not_using_indexes;
---- /dev/null
-+++ b/sql/query_strip_comments.h
-@@ -0,0 +1,37 @@
-+#ifndef _SQL_QUERY_STRIPC_COMMENTS_H_
-+#define _SQL_QUERY_STRIPC_COMMENTS_H_
-+#ifdef HAVE_QUERY_CACHE
-+
-+// implemented in sql_cache.cc
-+class QueryStripComments
-+{
-+private:
-+ QueryStripComments(const QueryStripComments&);
-+ QueryStripComments& operator=(const QueryStripComments&);
-+public:
-+ QueryStripComments();
-+ ~QueryStripComments();
-+ void set(const char* a_query, uint a_query_length, uint a_additional_length);
-+
-+ char* query() { return buffer; }
-+ uint query_length() { return length; }
-+private:
-+ void cleanup();
-+private:
-+ char* buffer;
-+ uint length /*query length, not buffer length*/;
-+ uint buffer_length;
-+};
-+class QueryStripComments_Backup
-+{
-+public:
-+ QueryStripComments_Backup(THD* a_thd,QueryStripComments* qsc);
-+ ~QueryStripComments_Backup();
-+private:
-+ THD* thd;
-+ char* query;
-+ uint length;
-+};
-+
-+#endif // HAVE_QUERY_CACHE
-+#endif // _SQL_QUERY_STRIPC_COMMENTS_H_
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
-@@ -344,6 +344,181 @@
+@@ -344,6 +344,496 @@
#include "probes_mysql.h"
#include "transaction.h"
-+#include "query_strip_comments.h"
+
-+QueryStripComments::QueryStripComments()
-+{
-+ buffer = 0;
-+ length = 0;
-+ buffer_length = 0;
-+}
-+QueryStripComments::~QueryStripComments()
++namespace query_comments_parser
+{
-+ cleanup();
-+}
+
-+inline bool query_strip_comments_is_white_space(char c)
++
++enum Kind
+{
-+ return ((' ' == c) || ('\t' == c) || ('\r' == c) || ('\n' ==c ));
-+}
-+void QueryStripComments::set(const char* query, uint query_length, uint additional_length)
++ /* 'Empty' symbol - epsilon in classic parsers */
++ Empty,
++ /*
++ Special symbols:
++ * exclamation comment: slash-star-exclamation comment-body star-slash
++ * single-line and multi-line comments
++ */
++ Special,
++ /* Whitespaces: ' ' \t \r \n */
++ WhiteSpace,
++ /*
++ 1) C-style comment (slash-star comment-body star-slash)
++ 2) signle-line comment:
++ * sharp comment (sharp comment-body new-line)
++ * minus-minus comment (minus-minus comment-body new-line)
++ */
++ Comment,
++ /* Not a special symbols (this symbols can't be before SELECT ). */
++ Another,
++ /* Error: not-closed quotes, not-closed C-style comment, end-of-query */
++ Error
++};
++
++
++/**
++ Analyze kind of prefix of input string.
++
++ @param where pointer to pointer to begin of string. After analyzing input
++ string function skip analyzed prefix and return pointer to the next part
++ of string in the @param where.
++
++ @return kind of analyzed prefix.
++*/
++static Kind analyze(const char **where, const char *const end)
+{
-+ uint new_buffer_length = query_length + additional_length;
-+ if(new_buffer_length > buffer_length)
-+ {
-+ cleanup();
-+ buffer = (char*)my_malloc(new_buffer_length,MYF(0));
-+ }
-+ uint query_position = 0;
-+ uint position = 0;
-+ // Skip whitespaces from begin
-+ while((query_position < query_length) && query_strip_comments_is_white_space(query[query_position]))
++ DBUG_ASSERT(where != NULL);
++ DBUG_ASSERT(*where != NULL);
++ const char*&to= *where;
++ /* if empty */
++ if (*to == '\0')
+ {
-+ ++query_position;
++ return Empty;
+ }
-+ long int last_space = -1;
-+ while(query_position < query_length)
++
++ /* current symbol */
++ char current= *to;
++
++ switch (current)
+ {
-+ char current = query[query_position];
-+ bool insert_space = false; // insert space to buffer, (IMPORTANT) don't update query_position
-+ switch(current)
++ case '\'':
++ case '"':
++ /* skip quote */
++ to++;
++ /* search pair */
++ while (true)
+ {
-+ case '\'':
-+ case '"':
-+ {
-+ buffer[position++] = query[query_position++]; // copy current symbol
-+ while(query_position < query_length)
-+ {
-+ if(current == query[query_position]) // found pair quote
-+ {
-+ break;
-+ }
-+ buffer[position++] = query[query_position++]; // copy current symbol
-+ }
-+ break;
-+ }
-+ case '/':
++ /* check for pair of quote */
++ if (*to == current)
+ {
-+ if(((query_position + 2) < query_length) && ('*' == query[query_position+1]) && ('!' != query[query_position+2]))
++ /* skip second quote */
++ to++;
++ /* check for same symbol after second quote */
++ if (to < end && *to == current)
+ {
-+ query_position += 2; // skip "/*"
-+ do
++ /* same symbol, skip it */
++ to++;
++ /* check for end-of-line */
++ if (to == end)
+ {
-+ if('*' == query[query_position] && '/' == query[query_position+1]) // check for "*/"
-+ {
-+ query_position += 2; // skip "*/"
-+ insert_space = true;
-+ break;
-+ }
-+ else
-+ {
-+ ++query_position;
-+ }
++ /* not found - not closed quote */
++ return Error;
+ }
-+ while(query_position < query_length);
-+ if(!insert_space)
++ else
+ {
++ /* continue search of pair */
+ continue;
+ }
+ }
-+ break;
-+ }
-+ case '-':
-+ {
-+ if(query[query_position+1] == '-')
-+ {
-+ ++query_position; // skip "-", and go to search of "\n"
-+ }
+ else
+ {
-+ break;
++ return Another;
+ }
+ }
-+ case '#':
++ /* check for escaped symbols */
++ if (*to == '\\')
+ {
-+ do
-+ {
-+ ++query_position; // skip current symbol (# or -)
-+ if('\n' == query[query_position]) // check for '\n'
-+ {
-+ ++query_position; // skip '\n'
-+ insert_space = true;
-+ break;
-+ }
-+ }
-+ while(query_position < query_length);
-+ if(insert_space)
-+ {
-+ break;
-+ }
-+ else
-+ {
-+ continue;
-+ }
++ /* backslash, skip it */
++ to++;
+ }
-+ default:
-+ if(query_strip_comments_is_white_space(current))
++ /* check for end-of-line */
++ if (to == end)
+ {
-+ insert_space = true;
-+ ++query_position;
++ /* not found - not closed quote */
++ return Error;
+ }
-+ break; // make gcc happy
++ /* skip current symbol */
++ to++;
++ }
++ case '-':
++ /* Skip minus */
++ to++;
++ /* Check for second minus */
++ if (*to != '-')
++ {
++ /* Just minus */
++ return Another;
++ }
++ else
++ {
++ /*
++ Prefix is minus-minus, next case-branch is processing
++ single line comments.
++ */
++ }
++ case '#':
++ /*
++ This is single-line comment, it started by "#" or "--".
++ Skip first symbol.
++ */
++ to++;
++ /* search new-line */
++ to= strchr(to, '\n');
++ if (NULL == to)
++ {
++ /* not found, end of the comment is the end of the query */
++ to= end;
++ }
++ else
++ {
++ /* skip end-of-line */
++ to++;
+ }
-+ if(insert_space)
++ return Comment;
++ case '/':
++ /* skip slash */
++ to++;
++ /* check for star */
++ if (*to == '*')
+ {
-+ if((last_space + 1) != position)
++ /* skip star */
++ to++;
++ /* check for exclamation */
++ bool exclamation= (*to == '!');
++ /* search star-slash */
++ to= strstr(to, "*/");
++ if (NULL == to)
++ {
++ /* not found - not closed comment */
++ return Error;
++ }
++ else
+ {
-+ last_space = position;
-+ buffer[position++] = ' ';
++ /* found */
++ DBUG_ASSERT(to + 1 < end);
++ DBUG_ASSERT(0 == strncmp(to, "*/", 2));
++ /* skip star-slash */
++ to++;
++ to++;
++ return (exclamation ? Special : Comment);
+ }
+ }
+ else
+ {
-+ buffer[position++] = query[query_position++];
++ /* just slash */
++ return Another;
+ }
-+ }
-+ while((0 < position) && query_strip_comments_is_white_space(buffer[position - 1]))
++ case ' ':
++ case '\t':
++ case '\r':
++ case '\n':
++ {
++ /* skip space */
++ to++;
++ return WhiteSpace;
++ }
++ case '\\':
++ {
++ /* skip backslash */
++ to++;
++ if (to == end)
++ {
++ /*
++ query complete by backslash
++ probable error?
++ */
++ return Another;
++ }
++ else
++ {
++ /* skip after backslash symbol */
++ to++;
++ return Another;
++ }
++ }
++ case '(':
++ case ')':
++ {
++ /* skip parenthese */
++ to++;
++ return Special;
++ }
++ default:
++ {
++ /* skip symbol */
++ to++;
++ return Another;
++ }
++ };
++}
++
++
++static bool remove_comments_from_query(const char *const query,
++ const size_t query_length,
++ char *const result,
++ size_t *result_length)
++{
++ /* pointer to begin of parsed block */
++ const char *from= query;
++ const char *to= query;
++ /* pointer to end of the query */
++ const char *const end= query + query_length;
++ /* pointer to last space */
++ const char *space= NULL;
++ /* current position in result buffer */
++ char *current= result;
++ while (true)
+ {
-+ --position;
++ from= to;
++ switch (analyze(&to, end))
++ {
++ case Empty:
++ {
++ /*
++ parse completed
++ check for whitespace in the end
++ */
++ if (current == space)
++ {
++ /* drop whitespace in the end of query */
++ --current;
++ }
++ /* result is null-terminated string */
++ *current= 0;
++ /* set result length */
++ *result_length= current - result;
++ /* all right */
++ return true;
++ }
++ case Comment:
++ /* should just insert space instead of comment */
++ case WhiteSpace:
++ if (space == current || from == query)
++ {
++ /* previous symbol was space */
++ }
++ else
++ {
++ /* insert space to result buffer */
++ *current= ' ';
++ /* switch after inserted space */
++ current++;
++ }
++ /* remember last-after-space position */
++ space= current;
++ /* parse again */
++ continue;
++ case Special:
++ case Another:
++ {
++ /* calculate parsed block size */
++ size_t block_size= to - from;
++ /* copy parsed block to result */
++ memcpy(current, from, block_size);
++ /* switch result after copied block */
++ current+= block_size;
++ /* switch after parsed block */
++ from= to;
++ /* parse again */
++ continue;
++ }
++ case Error:
++ default:
++ {
++ /* bad source query */
++ return false;
++ }
++ }
+ }
-+ buffer[position] = 0;
-+ length = position;
+}
-+void QueryStripComments::cleanup()
++
++
++static size_t skip_not_another(const char *const query, size_t query_length)
+{
-+ if(buffer)
++ const char *from= query;
++ const char *to= query;
++ const char *const end= query + query_length;
++ while (true)
+ {
-+ my_free(buffer);
++ switch (analyze(&to, end))
++ {
++ case Error:
++ return 0;
++ case Empty:
++ case Another:
++ return (from - query);
++ default:
++ from= to;
++ continue;
++ };
+ }
-+ buffer = 0;
-+ length = 0;
-+ buffer_length = 0;
+}
-+QueryStripComments_Backup::QueryStripComments_Backup(THD* a_thd,QueryStripComments* qsc)
++
++
++static size_t skip_default(const char *const query, size_t /* query_length */)
++{
++ size_t query_position= 0;
++ /*
++ Skip '(' characters in queries like following:
++ (select a from t1) union (select a from t1);
++ */
++ while (query[query_position]=='(')
++ query_position++;
++ return query_position;
++}
++
++
++} /* namespace query_comments_parser */
++
++class Query_Switcher
+{
-+ if(opt_query_cache_strip_comments)
++private:
++ Query_Switcher(const Query_Switcher&);
++ Query_Switcher& operator=(const Query_Switcher&);
++
++
++public:
++ Query_Switcher(THD *thd) :
++ target_query(&(thd_query_string(thd)->str)),
++ target_length(&(thd_query_string(thd)->length)),
++ backup_query(thd->query()),
++ backup_length(thd->query_length())
+ {
-+ thd = a_thd;
-+ query = thd->query();
-+ length = thd->query_length();
-+ qsc->set(query,length,thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE);
-+ thd->set_query(qsc->query(),qsc->query_length());
+ }
-+ else
++
++ Query_Switcher(char **query,
++ size_t *length) :
++ target_query(query),
++ target_length(length),
++ backup_query(*query),
++ backup_length(*length)
+ {
-+ thd = 0;
-+ query = 0;
-+ length = 0;
+ }
-+}
-+QueryStripComments_Backup::~QueryStripComments_Backup()
++public:
++ void replace(Query_Without_Comments *query_without_comments)
++ {
++ *target_query= query_without_comments->query();
++ *target_length= query_without_comments->length();
++ }
++ void restore()
++ {
++ *target_query= backup_query;
++ *target_length= backup_length;
++ }
++private:
++ char* *target_query;
++ size_t *target_length;
++public:
++ char *const backup_query;
++ size_t const backup_length;
++};
++
++class Comments_Processor
+{
-+ if(thd)
++private:
++ Comments_Processor(const Comments_Processor&);
++ Comments_Processor& operator=(const Comments_Processor&);
++
++
++public:
++ Comments_Processor(THD *thd) :
++ query_switcher (thd),
++ db_length (thd->db_length),
++ query_without_comments(&(thd->query_without_comments)),
++ enabled (opt_query_cache_strip_comments),
++ restore (false)
+ {
-+ thd->set_query(query,length);
+ }
-+}
++
++
++ Comments_Processor(Query_Without_Comments *current_query_without_comments,
++ char **query,
++ size_t *length,
++ const size_t current_db_length) :
++ query_switcher (query, length),
++ db_length (current_db_length),
++ query_without_comments(current_query_without_comments),
++ enabled (opt_query_cache_strip_comments),
++ restore (false)
++ {
++ }
++
++
++ ~Comments_Processor()
++ {
++ restore_comments();
++ }
++
++
++ size_t prefix_length()
++ {
++ using query_comments_parser::skip_not_another;
++ using query_comments_parser::skip_default;
++ if (enabled)
++ {
++ return skip_not_another(query_switcher.backup_query,
++ query_switcher.backup_length);
++ }
++ else
++ {
++ return skip_default(query_switcher.backup_query,
++ query_switcher.backup_length);
++ }
++ }
++
++
++ bool remove_comments()
++ {
++ if (!enabled || restore)
++ {
++ return true;
++ }
++ /* Allocate memory for query rewrite */
++ if (!query_without_comments->allocate(query_switcher.backup_length,
++ db_length))
++ {
++ return false;
++ }
++ /* Remove comment from query */
++ size_t result_length;
++ using query_comments_parser::remove_comments_from_query;
++ if (!(restore= remove_comments_from_query(query_switcher.backup_query,
++ query_switcher.backup_length,
++ query_without_comments->query(),
++ &result_length)))
++ {
++ return false;
++ }
++ query_without_comments->set_length(result_length);
++ size_t db_length_from_query=
++ *((size_t*)(query_switcher.backup_query +
++ query_switcher.backup_length + 1));
++ *((size_t*)(query_without_comments->query() +
++ result_length + 1))= db_length_from_query;
++ /* Replace original query by striped */
++ query_switcher.replace(query_without_comments);
++ return restore;
++ }
++
++
++ void restore_comments()
++ {
++ if (enabled && restore)
++ {
++ /* Replace striped query by original */
++ query_switcher.restore();
++
++ /* Clean query_without_comments */
++ query_without_comments->set_length(0);
++
++ /* Mark as restored */
++ restore= false;
++ }
++ }
++private:
++ Query_Switcher query_switcher;
++private:
++ const size_t db_length;
++private:
++ Query_Without_Comments *query_without_comments;
++ bool enabled;
++ bool restore;
++};
+
#ifdef EMBEDDED_LIBRARY
#include "emb_qcache.h"
#endif
-@@ -454,7 +629,12 @@
+@@ -454,7 +939,12 @@
Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__);
DBUG_ENTER("Query_cache::try_lock");
-+ const char* old_proc_info= thd->proc_info;
++ const char *old_proc_info= thd->proc_info;
+ thd_proc_info(thd,"Waiting on query cache mutex");
+ DEBUG_SYNC(thd, "before_query_cache_mutex");
mysql_mutex_lock(&structure_guard_mutex);
while (1)
{
if (m_cache_lock_status == Query_cache::UNLOCKED)
-@@ -1274,6 +1454,8 @@
+@@ -1274,6 +1764,8 @@
unlock();
DBUG_VOID_RETURN;
}
-+ QueryStripComments *query_strip_comments = &(thd->query_strip_comments);
-+ QueryStripComments_Backup backup(thd,query_strip_comments);
++ Comments_Processor comments_processor(thd);
++ comments_processor.remove_comments();
/* Key is query + database + flag */
if (thd->db_length)
-@@ -1451,6 +1633,9 @@
- Query_cache_block_table *block_table, *block_table_end;
+@@ -1440,7 +1932,7 @@
+ */
+
+ int
+-Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length)
++Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length_uint)
+ {
+ ulonglong engine_data;
+ Query_cache_query *query;
+@@ -1452,6 +1944,11 @@
ulong tot_length;
Query_cache_query_flags flags;
-+ QueryStripComments *query_strip_comments = &(thd->query_strip_comments);
-+ char *sql_backup = sql;
-+ uint query_length_backup = query_length;
DBUG_ENTER("Query_cache::send_result_to_client");
++ size_t query_length= query_length_uint;
++ Comments_Processor comments_processor(&(thd->query_without_comments),
++ &sql,
++ &query_length,
++ thd->db_length);
/*
-@@ -1472,21 +1657,103 @@
+ Testing 'query_cache_size' without a lock here is safe: the thing
+@@ -1471,13 +1968,7 @@
+ }
{
- uint i= 0;
+- uint i= 0;
- /*
- Skip '(' characters in queries like following:
- (select a from t1) union (select a from t1);
- */
- while (sql[i]=='(')
- i++;
-+ if(opt_query_cache_strip_comments)
-+ {
-+ /* Skip all comments and non-letter symbols */
-+ uint& query_position = i;
-+ char* query = sql;
-+ while(query_position < query_length)
-+ {
-+ bool check = false;
-+ char current = query[query_position];
-+ switch(current)
-+ {
-+ case '/':
-+ if(((query_position + 2) < query_length) && ('*' == query[query_position+1]) && ('!' != query[query_position+2]))
-+ {
-+ query_position += 2; // skip "/*"
-+ do
-+ {
-+ if('*' == query[query_position] && '/' == query[query_position+1]) // check for "*/" (without space)
-+ {
-+ query_position += 2; // skip "*/" (without space)
-+ break;
-+ }
-+ else
-+ {
-+ ++query_position;
-+ }
-+ }
-+ while(query_position < query_length);
-+ continue; // analyze current symbol
-+ }
-+ break;
-+ case '-':
-+ if(query[query_position+1] == '-')
-+ {
-+ ++query_position; // skip "-"
-+ }
-+ else
-+ {
-+ break;
-+ }
-+ case '#':
-+ do
-+ {
-+ ++query_position; // skip current symbol
-+ if('\n' == query[query_position]) // check for '\n'
-+ {
-+ ++query_position; // skip '\n'
-+ break;
-+ }
-+ }
-+ while(query_position < query_length);
-+ continue; // analyze current symbol
-+ case '\r':
-+ case '\n':
-+ case '\t':
-+ case ' ':
-+ case '(':
-+ case ')':
-+ break;
-+ default:
-+ check = true;
-+ break; // make gcc happy
-+ } // switch(current)
-+ if(check)
-+ {
-+ if(query_position + 2 < query_length)
-+ {
-+ // cacheable
-+ break;
-+ }
-+ else
-+ {
-+ DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
-+ goto err;
-+ }
-+ } // if(check)
-+ ++query_position;
-+ } // while(query_position < query_length)
-+ }
-+ else // if(opt_query_cache_strip_comments)
-+ {
-+ /*
-+ Skip '(' characters in queries like following:
-+ (select a from t1) union (select a from t1);
-+ */
-+ while (sql[i]=='(')
-+ i++;
-
-- /*
-- Test if the query is a SELECT
-- (pre-space is removed in dispatch_command).
-+ } // if(opt_query_cache_strip_comments)
-+ /*
-+ Test if the query is a SELECT
-+ (pre-space is removed in dispatch_command).
++ size_t i= comments_processor.prefix_length();
-- First '/' looks like comment before command it is not
-- frequently appeared in real life, consequently we can
-- check all such queries, too.
-- */
-+ First '/' looks like comment before command it is not
-+ frequently appeared in real life, consequently we can
-+ check all such queries, too.
-+ */
- if ((my_toupper(system_charset_info, sql[i]) != 'S' ||
- my_toupper(system_charset_info, sql[i + 1]) != 'E' ||
- my_toupper(system_charset_info, sql[i + 2]) != 'L') &&
-@@ -1521,6 +1788,12 @@
+ /*
+ Test if the query is a SELECT
+@@ -1487,10 +1978,11 @@
+ frequently appeared in real life, consequently we can
+ check all such queries, too.
+ */
+- if ((my_toupper(system_charset_info, sql[i]) != 'S' ||
+- my_toupper(system_charset_info, sql[i + 1]) != 'E' ||
+- my_toupper(system_charset_info, sql[i + 2]) != 'L') &&
+- sql[i] != '/')
++ if (!((i + 2 < query_length) &&
++ ((my_toupper(system_charset_info, sql[i]) == 'S' &&
++ my_toupper(system_charset_info, sql[i + 1]) == 'E' &&
++ my_toupper(system_charset_info, sql[i + 2]) == 'L') ||
++ sql[i] == '/')))
+ {
+ DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
+ goto err;
+@@ -1543,6 +2035,7 @@
goto err_unlock;
Query_cache_block *query_block;
-+ if(opt_query_cache_strip_comments)
-+ {
-+ query_strip_comments->set(sql, query_length, thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE);
-+ sql = query_strip_comments->query();
-+ query_length = query_strip_comments->query_length();
-+ }
++ comments_processor.remove_comments();
- tot_length= query_length + thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE;
- if (thd->db_length)
-@@ -1587,6 +1860,8 @@
+ tot_length= query_length + 1 + sizeof(size_t) +
+ thd->db_length + QUERY_CACHE_FLAGS_SIZE;
+@@ -1611,6 +2104,7 @@
(uchar*) &flags, QUERY_CACHE_FLAGS_SIZE);
query_block = (Query_cache_block *) my_hash_search(&queries, (uchar*) sql,
tot_length);
-+ sql = sql_backup;
-+ query_length = query_length_backup;
++ comments_processor.restore_comments();
/* Quick abort on unlocked data */
if (query_block == 0 ||
query_block->query()->result() == 0 ||
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
-@@ -40,6 +40,9 @@
- #include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA,
- THR_LOCK_INFO */
+@@ -1487,6 +1487,74 @@
-+#ifdef HAVE_QUERY_CACHE
-+#include "query_strip_comments.h"
-+#endif // HAVE_QUERY_CACHE
+ extern "C" void my_message_sql(uint error, const char *str, myf MyFlags);
- class Reprepare_observer;
- class Relay_log_info;
-@@ -764,6 +767,9 @@
- statement lifetime. FIXME: must be const
- */
- ulong id;
++
+#ifdef HAVE_QUERY_CACHE
-+ QueryStripComments query_strip_comments; // see sql_cache.cc
-+#endif //HAVE_QUERY_CACHE
-
- /*
- MARK_COLUMNS_NONE: Means mark_used_colums is not set and no indicator to
++
++
++/*
++ @class Query_Without_Comments
++ This class provides way for safety (re)allocation
++ a memory for a query without comments.
++*/
++class Query_Without_Comments
++{
++private:
++ /*
++ Denied copy and assigment for object of this class.
++ */
++ Query_Without_Comments(const Query_Without_Comments&);
++ Query_Without_Comments& operator=(const Query_Without_Comments&);
++
++
++public:
++ /*
++ Constructor is filling fields by zero (no allocation).
++ */
++ Query_Without_Comments();
++
++
++ /*
++ Destructor clean allocated memory
++ */
++ ~Query_Without_Comments();
++public:
++
++
++/*
++ (Re)allocate memory for query. Query length after that is 0.
++ */
++ bool allocate(size_t query_length, size_t db_length);
++
++
++ /*
++ Set result query length, when query
++ without comments is copied to buffer.
++ */
++ void set_length(size_t query_length);
++
++
++public:
++ /*
++ Result query.
++ */
++ char* query();
++
++
++ /*
++ Result query length
++ */
++ size_t length();
++
++
++private:
++ char* buffer;
++ size_t q_length;
++ size_t b_length;
++};
++
++
++#endif /* HAVE_QUERY_CACHE */
++
+ /**
+ @class THD
+ For each client connection we create a separate thread with THD serving as
+@@ -1544,6 +1612,7 @@
+ struct st_mysql_stmt *current_stmt;
+ #endif
+ #ifdef HAVE_QUERY_CACHE
++ Query_Without_Comments query_without_comments;
+ Query_cache_tls query_cache_tls;
+ #endif
+ NET net; // client connection descriptor
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -1809,6 +1809,11 @@
"Don't cache results that are bigger than this",
--- /dev/null
+++ b/mysql-test/include/percona_query_cache_with_comments.inc
-@@ -0,0 +1,95 @@
+@@ -0,0 +1,117 @@
+--source include/percona_query_cache_with_comments_clear.inc
+let $query=/* with comment first */select * from t1;
+eval $query;
+;
+--source include/percona_query_cache_with_comments_eval.inc
+
++let $query=select */* a comment \*/from t1;
++--source include/percona_query_cache_with_comments_eval.inc
++
++let $query=select *# a comment \\
++from t1;
++--source include/percona_query_cache_with_comments_eval.inc
++
++let $query=select *-- a comment \\
++from t1;
++--source include/percona_query_cache_with_comments_eval.inc
++
++let $query=select "\\\\"" /* not a comment */" from t1;
++--source include/percona_query_cache_with_comments_eval.inc
++
++let $query=select "\\\\"" /*! not a comment */" from t1;
++--source include/percona_query_cache_with_comments_eval.inc
++
++# following two queries related to bug #856404.
++# There are different queries, but opt_query_cache_strip_comments thinks that they are equal.
+let $query=select ' \' ' from t1;
+--source include/percona_query_cache_with_comments_eval.inc
++
++let $query=select ' \' /* comment inside quotes with internal backslash quote */' from t1;
++--source include/percona_query_cache_with_comments_eval.inc
--- /dev/null
+++ b/mysql-test/include/percona_query_cache_with_comments_begin.inc
@@ -0,0 +1,12 @@
+
--- /dev/null
+++ b/mysql-test/r/percona_query_cache_with_comments.result
-@@ -0,0 +1,866 @@
+@@ -0,0 +1,1058 @@
+set global query_cache_strip_comments=ON;
+set GLOBAL query_cache_size=1355776;
+drop table if exists t1;
+Variable_name Value
+Qcache_hits 42
+-----------------------------------------------------
-+select * from t1 #comment in the end
++select * from t1 #comment in the end
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 42
++select * from t1 #comment in the end;
++a
++1
++2
++3
++select * from t1 #comment in the end;
++a
++1
++2
++3
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 44
++-----------------------------------------------------
++select * from t1 #comment in the end
++
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 44
++select * from t1 #comment in the end
++;
++a
++1
++2
++3
++select * from t1 #comment in the end
++;
++a
++1
++2
++3
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 46
++-----------------------------------------------------
++select * from t1 -- comment in the end
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 46
++select * from t1 -- comment in the end;
++a
++1
++2
++3
++select * from t1 -- comment in the end;
++a
++1
++2
++3
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 48
++-----------------------------------------------------
++select * from t1 -- comment in the end
++
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 48
++select * from t1 -- comment in the end
++;
++a
++1
++2
++3
++select * from t1 -- comment in the end
++;
++a
++1
++2
++3
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 1
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 1
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 50
++-----------------------------------------------------
++select */* a comment \*/from t1
+-----------------------------------------------------
+show status like "Qcache_queries_in_cache";
+Variable_name Value
+Qcache_inserts 1
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 42
-+select * from t1 #comment in the end;
++Qcache_hits 50
++select */* a comment \*/from t1;
+a
+1
+2
+3
-+select * from t1 #comment in the end;
++select */* a comment \*/from t1;
+a
+1
+2
+Qcache_inserts 1
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 44
++Qcache_hits 52
+-----------------------------------------------------
-+select * from t1 #comment in the end
-+
++select *# a comment \
++from t1
+-----------------------------------------------------
+show status like "Qcache_queries_in_cache";
+Variable_name Value
+Qcache_inserts 1
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 44
-+select * from t1 #comment in the end
-+;
++Qcache_hits 52
++select *# a comment \
++from t1;
+a
+1
+2
+3
-+select * from t1 #comment in the end
-+;
++select *# a comment \
++from t1;
+a
+1
+2
+Qcache_inserts 1
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 46
++Qcache_hits 54
+-----------------------------------------------------
-+select * from t1 -- comment in the end
++select *-- a comment \
++from t1
+-----------------------------------------------------
+show status like "Qcache_queries_in_cache";
+Variable_name Value
+Qcache_inserts 1
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 46
-+select * from t1 -- comment in the end;
++Qcache_hits 54
++select *-- a comment \
++from t1;
+a
+1
+2
+3
-+select * from t1 -- comment in the end;
++select *-- a comment \
++from t1;
+a
+1
+2
+Qcache_inserts 1
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 48
++Qcache_hits 56
+-----------------------------------------------------
-+select * from t1 -- comment in the end
-+
++select "\\"" /* not a comment */" from t1
+-----------------------------------------------------
+show status like "Qcache_queries_in_cache";
+Variable_name Value
+Qcache_inserts 1
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 48
-+select * from t1 -- comment in the end
-+;
-+a
-+1
-+2
-+3
-+select * from t1 -- comment in the end
-+;
-+a
-+1
-+2
-+3
++Qcache_hits 56
++select "\\"" /* not a comment */" from t1;
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
++select "\\"" /* not a comment */" from t1;
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
+show status like "Qcache_queries_in_cache";
+Variable_name Value
-+Qcache_queries_in_cache 1
++Qcache_queries_in_cache 2
+show status like "Qcache_inserts";
+Variable_name Value
-+Qcache_inserts 1
++Qcache_inserts 2
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 50
++Qcache_hits 57
++-----------------------------------------------------
++select "\\"" /*! not a comment */" from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 2
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 2
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 57
++select "\\"" /*! not a comment */" from t1;
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++select "\\"" /*! not a comment */" from t1;
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 3
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 3
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 58
+-----------------------------------------------------
+select ' \' ' from t1
+-----------------------------------------------------
+show status like "Qcache_queries_in_cache";
+Variable_name Value
-+Qcache_queries_in_cache 1
++Qcache_queries_in_cache 3
+show status like "Qcache_inserts";
+Variable_name Value
-+Qcache_inserts 1
++Qcache_inserts 3
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 50
++Qcache_hits 58
+select ' \' ' from t1;
+'
+ '
+ '
+show status like "Qcache_queries_in_cache";
+Variable_name Value
-+Qcache_queries_in_cache 2
++Qcache_queries_in_cache 4
+show status like "Qcache_inserts";
+Variable_name Value
-+Qcache_inserts 2
++Qcache_inserts 4
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 59
++-----------------------------------------------------
++select ' \' /* comment inside quotes with internal backslash quote */' from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 4
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 4
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 59
++select ' \' /* comment inside quotes with internal backslash quote */' from t1;
++' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++select ' \' /* comment inside quotes with internal backslash quote */' from t1;
++' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 5
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 5
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 51
++Qcache_hits 60
+DROP TABLE t1;
+SET GLOBAL query_cache_size=default;
+set global query_cache_strip_comments=OFF;
+SET GLOBAL query_cache_size= default;
--- /dev/null
+++ b/mysql-test/r/percona_query_cache_with_comments_disable.result
-@@ -0,0 +1,865 @@
+@@ -0,0 +1,1057 @@
+set GLOBAL query_cache_size=1355776;
+drop table if exists t1;
+create table t1 (a int not null);
+Variable_name Value
+Qcache_hits 25
+-----------------------------------------------------
-+select ' \' ' from t1
++select */* a comment \*/from t1
+-----------------------------------------------------
+show status like "Qcache_queries_in_cache";
+Variable_name Value
+show status like "Qcache_hits";
+Variable_name Value
+Qcache_hits 25
++select */* a comment \*/from t1;
++a
++1
++2
++3
++select */* a comment \*/from t1;
++a
++1
++2
++3
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 21
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 21
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 26
++-----------------------------------------------------
++select *# a comment \
++from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 21
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 21
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 26
++select *# a comment \
++from t1;
++a
++1
++2
++3
++select *# a comment \
++from t1;
++a
++1
++2
++3
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 22
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 22
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 27
++-----------------------------------------------------
++select *-- a comment \
++from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 22
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 22
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 27
++select *-- a comment \
++from t1;
++a
++1
++2
++3
++select *-- a comment \
++from t1;
++a
++1
++2
++3
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 23
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 23
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 28
++-----------------------------------------------------
++select "\\"" /* not a comment */" from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 23
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 23
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 28
++select "\\"" /* not a comment */" from t1;
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
++select "\\"" /* not a comment */" from t1;
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
++\" /* not a comment */
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 24
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 24
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 29
++-----------------------------------------------------
++select "\\"" /*! not a comment */" from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 24
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 24
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 29
++select "\\"" /*! not a comment */" from t1;
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++select "\\"" /*! not a comment */" from t1;
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++\" /*! not a comment */
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 25
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 25
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 30
++-----------------------------------------------------
++select ' \' ' from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 25
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 25
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 30
+select ' \' ' from t1;
+'
+ '
+ '
+show status like "Qcache_queries_in_cache";
+Variable_name Value
-+Qcache_queries_in_cache 21
++Qcache_queries_in_cache 26
+show status like "Qcache_inserts";
+Variable_name Value
-+Qcache_inserts 21
++Qcache_inserts 26
+show status like "Qcache_hits";
+Variable_name Value
-+Qcache_hits 26
++Qcache_hits 31
++-----------------------------------------------------
++select ' \' /* comment inside quotes with internal backslash quote */' from t1
++-----------------------------------------------------
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 26
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 26
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 31
++select ' \' /* comment inside quotes with internal backslash quote */' from t1;
++' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++select ' \' /* comment inside quotes with internal backslash quote */' from t1;
++' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++ ' /* comment inside quotes with internal backslash quote */
++show status like "Qcache_queries_in_cache";
++Variable_name Value
++Qcache_queries_in_cache 27
++show status like "Qcache_inserts";
++Variable_name Value
++Qcache_inserts 27
++show status like "Qcache_hits";
++Variable_name Value
++Qcache_hits 32
+DROP TABLE t1;
+SET GLOBAL query_cache_size=default;
+set global query_cache_strip_comments=OFF;
+action
+try_lock_mutex_query
+SET GLOBAL query_cache_size=0;
+--- /dev/null
++++ b/mysql-test/r/percona_query_cache_with_comments_crash_2.result
+@@ -0,0 +1,8 @@
++DROP TABLE IF EXISTS table17_int;
++DROP TABLE IF EXISTS table30_int;
++CREATE TABLE `table17_int` (pk integer auto_increment primary key, `col_char_10_not_null_key` char(10), `col_enum_not_null_key` int);
++CREATE TABLE `table30_int` (pk integer auto_increment primary key, `col_enum_not_null_key` int);
++SELECT X . `pk` FROM `table17_int` AS X LEFT JOIN `table30_int` AS Y USING ( `col_enum_not_null_key` ) WHERE X . `col_char_10_not_null_key` != ' you need to translate Views labels into other languages, consider installing the <a href=\" !path\">Internationalization</a> package\'s Views translation module.' LIMIT 7 /*Generated by THREAD_ID 1*/;
++pk
++DROP TABLE table17_int;
++DROP TABLE table30_int;
+--- /dev/null
++++ b/mysql-test/t/percona_query_cache_with_comments_crash_2-master.opt
+@@ -0,0 +1 @@
++--query-cache-size=10M --query-cache-strip-comments
+--- /dev/null
++++ b/mysql-test/t/percona_query_cache_with_comments_crash_2.test
+@@ -0,0 +1,9 @@
++--disable_warnings
++DROP TABLE IF EXISTS table17_int;
++DROP TABLE IF EXISTS table30_int;
++--enable_warnings
++CREATE TABLE `table17_int` (pk integer auto_increment primary key, `col_char_10_not_null_key` char(10), `col_enum_not_null_key` int);
++CREATE TABLE `table30_int` (pk integer auto_increment primary key, `col_enum_not_null_key` int);
++SELECT X . `pk` FROM `table17_int` AS X LEFT JOIN `table30_int` AS Y USING ( `col_enum_not_null_key` ) WHERE X . `col_char_10_not_null_key` != ' you need to translate Views labels into other languages, consider installing the <a href=\" !path\">Internationalization</a> package\'s Views translation module.' LIMIT 7 /*Generated by THREAD_ID 1*/;
++DROP TABLE table17_int;
++DROP TABLE table30_int;
+--- a/sql/sql_class.cc
++++ b/sql/sql_class.cc
+@@ -807,6 +807,99 @@
+ sql_errno == ER_TRG_NO_DEFINER);
+ }
+
++#ifdef HAVE_QUERY_CACHE
++
++
++Query_Without_Comments::Query_Without_Comments() :
++ buffer(0),
++ q_length(0),
++ b_length(0)
++{
++}
++
++
++Query_Without_Comments::~Query_Without_Comments()
++{
++ if(buffer)
++ {
++ my_free(buffer);
++ }
++}
++
++
++bool Query_Without_Comments::allocate(size_t query_length, size_t db_length)
++{
++ DBUG_ENTER("Query_Without_Comments::allocate");
++ DBUG_PRINT("info", ("old buffer: %p "
++ "old query: '%-.4096s' "
++ "old buffer length: %u "
++ "old query length: %u",
++ buffer,
++ buffer,
++ (uint) b_length,
++ (uint) q_length));
++ /* save maximum query length for check in the set_length */
++ q_length= query_length;
++ /* according to sql_parse.cc memory allocation */
++ size_t new_b_length= (query_length + 1) + sizeof(size_t) + db_length +
++ QUERY_CACHE_FLAGS_SIZE;
++ if (b_length < new_b_length)
++ {
++ b_length= new_b_length;
++ if (buffer)
++ {
++ buffer= (char*) my_realloc(buffer, b_length, MYF(0));
++ }
++ else
++ {
++ buffer= (char *) my_malloc(b_length, MYF(0));
++ }
++ }
++ buffer[0]= 0;
++ DBUG_PRINT("info", ("buffer: %p "
++ "buffer length: %u "
++ "query maximum length: %u",
++ buffer,
++ (uint) b_length,
++ (uint) q_length));
++ DBUG_RETURN(buffer);
++}
++
++
++void Query_Without_Comments::set_length(size_t query_length)
++{
++ DBUG_ENTER("Query_Without_Comments::set_length");
++ DBUG_ASSERT(query_length <= q_length);
++ buffer[query_length]= 0;
++ DBUG_PRINT("info", ("buffer: %p "
++ "query: '%-.4096s' "
++ "buffer length: %u "
++ "query maximum length: %u "
++ "query length: %u",
++ buffer,
++ buffer,
++ (uint) b_length,
++ (uint) q_length,
++ (uint) query_length));
++ q_length= query_length;
++ DBUG_VOID_RETURN;
++}
++
++
++char* Query_Without_Comments::query()
++{
++ return buffer;
++}
++
++
++size_t Query_Without_Comments::length()
++{
++ return q_length;
++}
++
++
++#endif // HAVE_QUERY_CACHE
++
+
+ THD::THD()
+ :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION,
#include "transaction.h"
#include "sql_audit.h"
#include "sql_prepare.h"
-@@ -1507,6 +1508,12 @@
+@@ -1518,6 +1519,12 @@
ulonglong end_utime_of_query= thd->current_utime();
ulonglong query_exec_time= get_query_exec_time(thd, end_utime_of_query);
/*
Low long_query_time value most likely means user is debugging stuff and even
-@@ -1671,6 +1678,7 @@
+@@ -1682,6 +1689,7 @@
case SCH_CHARSETS:
case SCH_ENGINES:
case SCH_COLLATIONS:
/**
Reload/resets privileges and the different caches.
-@@ -296,6 +296,12 @@
+@@ -322,6 +322,12 @@
#endif
if (options & REFRESH_USER_RESOURCES)
reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */
%token QUICK
%token RANGE_SYM /* SQL-2003-R */
%token READS_SYM /* SQL-2003-R */
-@@ -11099,6 +11100,15 @@
+@@ -11100,6 +11101,15 @@
{
Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
}
| CREATE PROCEDURE_SYM sp_name
{
LEX *lex= Lex;
-@@ -11338,6 +11348,12 @@
+@@ -11339,6 +11349,12 @@
Lex->type|= REFRESH_SLAVE;
Lex->reset_slave_info.all= false;
}
| MASTER_SYM
{ Lex->type|= REFRESH_MASTER; }
| DES_KEY_FILE
-@@ -12645,6 +12661,7 @@
+@@ -12646,6 +12662,7 @@
| PROXY_SYM {}
| QUARTER_SYM {}
| QUERY_SYM {}
sql_command_flags[SQLCOM_SHOW_CREATE_PROC]= CF_STATUS_COMMAND;
sql_command_flags[SQLCOM_SHOW_CREATE_FUNC]= CF_STATUS_COMMAND;
sql_command_flags[SQLCOM_SHOW_CREATE_TRIGGER]= CF_STATUS_COMMAND;
-@@ -2368,12 +2369,17 @@
+@@ -2393,12 +2394,17 @@
mysql_mutex_unlock(&LOCK_active_mi);
break;
}
if (active_mi != NULL)
{
res = show_master_info(thd, active_mi);
-@@ -2384,7 +2390,19 @@
+@@ -2409,7 +2415,19 @@
WARN_NO_MASTER_INFO, ER(WARN_NO_MASTER_INFO));
my_ok(thd);
}
%token STDDEV_SAMP_SYM /* SQL-2003-N */
%token STD_SYM
%token STOP_SYM
-@@ -11105,6 +11106,11 @@
+@@ -11106,6 +11107,11 @@
{
Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
}
sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND |
CF_SHOW_TABLE_COMMAND |
CF_REEXECUTION_FRAGILE);
-@@ -1510,6 +1513,8 @@
+@@ -1520,6 +1523,8 @@
case SCH_TABLE_NAMES:
case SCH_TABLES:
case SCH_VIEWS:
case SCH_TRIGGERS:
case SCH_EVENTS:
-@@ -2083,6 +2088,7 @@
+@@ -2110,6 +2115,7 @@
}
case SQLCOM_SHOW_DATABASES:
case SQLCOM_SHOW_TABLES:
case SQLCOM_SHOW_TRIGGERS:
case SQLCOM_SHOW_TABLE_STATUS:
case SQLCOM_SHOW_OPEN_TABLES:
-@@ -4852,6 +4858,8 @@
+@@ -4919,6 +4925,8 @@
case SCH_TABLE_NAMES:
case SCH_TABLES:
OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE},
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
-@@ -10892,6 +10892,15 @@
+@@ -10893,6 +10893,15 @@
if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_NAMES))
MYSQL_YYABORT;
}
DBUG_VOID_RETURN;
}
-@@ -5931,6 +5944,7 @@
+@@ -5932,6 +5945,7 @@
if (add_to_temporary_tables_list)
{
/* growing temp list at the head */
tmp_table->next= thd->temporary_tables;
if (tmp_table->next)
tmp_table->next->prev= tmp_table;
-@@ -5938,6 +5952,7 @@
+@@ -5939,6 +5953,7 @@
thd->temporary_tables->prev= 0;
if (thd->slave_thread)
slave_open_temp_tables++;
user_host, user_host_len,
query_utime, lock_utime, is_command,
sql_text, sql_text_len);
-@@ -1216,7 +1218,7 @@
+@@ -1200,8 +1202,6 @@
+
+ if (*slow_log_handler_list)
+ {
+- time_t current_time;
+-
+ /* do not log slow queries from replication threads */
+ if (thd->slave_thread && !opt_log_slow_slave_statements)
+ return 0;
+@@ -1216,16 +1216,29 @@
/* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
sctx->priv_user ? sctx->priv_user : "", "[",
sctx->host ? sctx->host : "", " [",
sctx->ip ? sctx->ip : "", "]", NullS) -
user_host_buff);
-@@ -1224,8 +1226,22 @@
- current_time= my_time_possible_from_micro(current_utime);
+
+- current_time= my_time_possible_from_micro(current_utime);
if (thd->start_utime)
{
- query_utime= (current_utime - thd->start_utime);
}
else
{
-@@ -1240,7 +1256,7 @@
+@@ -1240,7 +1253,7 @@
}
for (current_handler= slow_log_handler_list; *current_handler ;)
user_host_buff, user_host_len,
query_utime, lock_utime, is_command,
query, query_length) || error;
-@@ -2656,12 +2672,13 @@
+@@ -2656,12 +2669,13 @@
TRUE - error occured
*/
bool error= 0;
DBUG_ENTER("MYSQL_QUERY_LOG::write");
-@@ -2683,17 +2700,28 @@
+@@ -2683,17 +2697,28 @@
if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
{
/* Note that my_b_write() assumes it knows the length for this */
if (my_b_write(&log_file, (uchar*) buff, buff_len))
-@@ -2710,13 +2738,71 @@
+@@ -2710,13 +2735,71 @@
/* For slow query log */
sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0);
{"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS},
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
-@@ -384,6 +384,7 @@
+@@ -390,6 +390,7 @@
lex->describe= 0;
lex->subqueries= FALSE;
lex->context_analysis_only= 0;
lex->leaf_tables_insert= 0;
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
-@@ -2303,6 +2303,7 @@
+@@ -2345,6 +2345,7 @@
enum enum_yes_no_unknown tx_chain, tx_release;
bool safe_to_cache_query;
require "lib/mtr_process.pl";
require "lib/mtr_io.pl";
-@@ -629,6 +630,7 @@
+@@ -630,6 +631,7 @@
# Report test status
mtr_report_test($result);
DBUG_RETURN(error);
}
-@@ -3638,6 +3645,127 @@
+@@ -3644,6 +3651,127 @@
return;
}
SCH_VARIABLES,
SCH_VIEWS
};
-@@ -1231,6 +1240,9 @@
+@@ -1233,6 +1242,9 @@
bool locked;
bool implicit_emptied; /* Can be !=0 only if HEAP */
const COND *pushed_cond;
/**
next_insert_id is the next value which should be inserted into the
auto_increment column: in a inserting-multi-row statement (like INSERT
-@@ -1282,10 +1294,12 @@
+@@ -1284,10 +1296,12 @@
ref_length(sizeof(my_off_t)),
ft_handler(0), inited(NONE),
locked(FALSE), implicit_emptied(0),
virtual ~handler(void)
{
DBUG_ASSERT(locked == FALSE);
-@@ -1408,6 +1422,8 @@
+@@ -1410,6 +1424,8 @@
{
table= table_arg;
table_share= share;
}
virtual double scan_time()
{ return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
-@@ -1803,6 +1819,8 @@
+@@ -1805,6 +1821,8 @@
virtual bool is_crashed() const { return 0; }
virtual bool auto_repair() const { return 0; }
/*
Log error with all enabled log event handlers
-@@ -5040,6 +5047,8 @@
+@@ -5029,6 +5036,8 @@
thd->first_successful_insert_id_in_prev_stmt_for_binlog);
if (e.write(file))
goto err;
}
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
{
-@@ -5051,12 +5060,16 @@
+@@ -5040,12 +5049,16 @@
minimum());
if (e.write(file))
goto err;
}
if (thd->user_var_events.elements)
{
-@@ -5079,6 +5092,8 @@
+@@ -5068,6 +5081,8 @@
flags);
if (e.write(file))
goto err;
}
}
}
-@@ -5090,6 +5105,8 @@
+@@ -5079,6 +5094,8 @@
if (event_info->write(file) ||
DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
goto err;
error= 0;
err:
-@@ -5275,7 +5292,8 @@
+@@ -5264,7 +5281,8 @@
be reset as a READ_CACHE to be able to read the contents from it.
*/
{
Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
-@@ -5322,6 +5340,7 @@
+@@ -5311,6 +5329,7 @@
/* write the first half of the split header */
if (my_b_write(&log_file, header, carry))
return ER_ERROR_ON_WRITE;
/*
copy fixed second half of header to cache so the correct
-@@ -5390,6 +5409,7 @@
+@@ -5379,6 +5398,7 @@
/* Write data to the binary log file */
if (my_b_write(&log_file, cache->read_pos, length))
return ER_ERROR_ON_WRITE;
cache->read_pos=cache->read_end; // Mark buffer used up
} while ((length= my_b_fill(cache)));
-@@ -5504,20 +5524,23 @@
+@@ -5493,20 +5513,23 @@
Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0);
if (qinfo.write(&log_file))
goto err;
if (delete_table)
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
-@@ -869,6 +869,13 @@
+@@ -885,6 +885,13 @@
mysys_var=0;
binlog_evt_union.do_union= FALSE;
enable_slow_log= 0;
#ifndef DBUG_OFF
dbug_sentry=THD_SENTRY_MAGIC;
#endif
-@@ -1248,6 +1255,7 @@
+@@ -1264,6 +1271,7 @@
variables.option_bits|= OPTION_BIN_LOG;
else
variables.option_bits&= ~OPTION_BIN_LOG;
#if defined(ENABLED_DEBUG_SYNC)
/* Initialize the Debug Sync Facility. See debug_sync.cc. */
-@@ -1255,6 +1263,94 @@
- #endif /* defined(ENABLED_DEBUG_SYNC) */
+@@ -1273,6 +1281,94 @@
+ clear_slow_extended();
}
+// Resets stats in a THD.
/*
Init THD for query processing.
-@@ -2009,6 +2105,32 @@
+@@ -2027,6 +2123,32 @@
}
#endif
struct Item_change_record: public ilink
{
-@@ -2185,6 +2307,7 @@
+@@ -2203,6 +2325,7 @@
}
thd->sent_row_count++;
if (thd->vio_ok())
DBUG_RETURN(protocol->write());
-@@ -2277,6 +2400,7 @@
+@@ -2295,6 +2418,7 @@
select_export::~select_export()
{
thd->sent_row_count=row_count;
}
-@@ -3300,6 +3424,7 @@
+@@ -3318,6 +3442,7 @@
if (likely(thd != 0))
{ /* current_thd==0 when close_connection() calls net_send_error() */
thd->status_var.bytes_sent+= length;
}
}
-@@ -3307,6 +3432,7 @@
+@@ -3325,6 +3450,7 @@
void thd_increment_bytes_received(ulong length)
{
current_thd->status_var.bytes_received+= length;
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
-@@ -1634,6 +1634,8 @@
+@@ -1644,6 +1644,8 @@
*/
enum enum_server_command command;
uint32 server_id;
uint32 file_id; // for LOAD DATA INFILE
/* remote (peer) port */
uint16 peer_port;
-@@ -2098,6 +2100,8 @@
+@@ -2153,6 +2155,8 @@
*/
enum_tx_isolation tx_isolation;
enum_check_fields count_cuted_fields;
DYNAMIC_ARRAY user_var_events; /* For user variables replication */
MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */
-@@ -2192,6 +2196,49 @@
+@@ -2247,6 +2251,49 @@
*/
LOG_INFO* current_linfo;
NET* slave_net; // network connection from slave -> m.
/* Used by the sys_var class to store temporary values */
union
{
-@@ -2272,6 +2319,11 @@
+@@ -2327,6 +2374,11 @@
alloc_root.
*/
void init_for_queries();
void change_user(void);
void cleanup(void);
void cleanup_after_query();
-@@ -2744,6 +2796,15 @@
+@@ -2799,6 +2851,15 @@
}
thd_scheduler scheduler;
public:
inline Internal_error_handler *get_internal_handler()
{ return m_internal_handler; }
-@@ -2944,6 +3005,10 @@
+@@ -2999,6 +3060,10 @@
LEX_STRING invoker_host;
};
const char *any_db="*any*"; // Special symbol for check_access
const LEX_STRING command_name[]={
-@@ -701,6 +704,12 @@
+@@ -703,6 +706,12 @@
*/
thd->clear_error(); // Clear error message
thd->stmt_da->reset_diagnostics_area();
net_new_transaction(net);
-@@ -886,6 +895,10 @@
+@@ -888,6 +897,10 @@
(char *) thd->security_ctx->host_or_ip);
thd->command=command;
/*
Commands which always take a long time are logged into
the slow log only if opt_log_slow_admin_statements is set.
-@@ -1672,6 +1685,13 @@
+@@ -1683,6 +1696,13 @@
thd->profiling.discard_current_query();
#endif
break;
case SCH_OPEN_TABLES:
case SCH_VARIABLES:
case SCH_STATUS:
-@@ -1829,6 +1849,7 @@
+@@ -1857,6 +1877,7 @@
thd->security_ctx->priv_host)) &&
check_global_access(thd, SUPER_ACL))
{
my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER");
DBUG_RETURN(TRUE);
}
-@@ -4827,6 +4848,7 @@
+@@ -4892,6 +4913,7 @@
case ACL_INTERNAL_ACCESS_DENIED:
if (! no_errors)
{
my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
sctx->priv_user, sctx->priv_host, db);
}
-@@ -4877,6 +4899,7 @@
+@@ -4942,6 +4964,7 @@
DBUG_PRINT("error",("No possible access"));
if (!no_errors)
{
if (thd->password == 2)
my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0),
sctx->priv_user,
-@@ -4993,6 +5016,7 @@
+@@ -5058,6 +5081,7 @@
if (!thd->col_access && check_grant_db(thd, dst_db_name))
{
my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
thd->security_ctx->priv_user,
thd->security_ctx->priv_host,
-@@ -5263,6 +5287,7 @@
+@@ -5328,6 +5352,7 @@
if ((thd->security_ctx->master_access & want_access))
return 0;
get_privilege_desc(command, sizeof(command), want_access);
my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command);
return 1;
#else
-@@ -5644,6 +5669,32 @@
+@@ -5695,6 +5720,32 @@
lex_start(thd);
mysql_reset_thd_for_next_command(thd);
if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0)
{
LEX *lex= thd->lex;
-@@ -5712,6 +5763,52 @@
+@@ -5763,6 +5814,52 @@
DBUG_ASSERT(thd->change_list.is_empty());
}
--- a/sql/sql_reload.cc
+++ b/sql/sql_reload.cc
-@@ -294,14 +294,48 @@
+@@ -320,14 +320,48 @@
mysql_mutex_unlock(&LOCK_active_mi);
}
#endif
%token USE_FRM
%token USE_SYM
%token USING /* SQL-2003-R */
-@@ -11120,6 +11125,41 @@
+@@ -11121,6 +11126,41 @@
MYSQL_YYABORT;
#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
}
| CREATE PROCEDURE_SYM sp_name
{
LEX *lex= Lex;
-@@ -11365,6 +11405,16 @@
+@@ -11366,6 +11406,16 @@
Lex->type|= REFRESH_QUERY_RESPONSE_TIME;
#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
}
| MASTER_SYM
{ Lex->type|= REFRESH_MASTER; }
| DES_KEY_FILE
-@@ -12509,6 +12559,7 @@
+@@ -12510,6 +12560,7 @@
| CHAIN_SYM {}
| CHANGED {}
| CIPHER_SYM {}
| CLIENT_SYM {}
| CLASS_ORIGIN_SYM {}
| COALESCE {}
-@@ -12577,6 +12628,7 @@
+@@ -12578,6 +12629,7 @@
| HOSTS_SYM {}
| HOUR_SYM {}
| IDENTIFIED_SYM {}
| IGNORE_SERVER_IDS_SYM {}
| INVOKER_SYM {}
| IMPORT {}
-@@ -12728,6 +12780,7 @@
+@@ -12729,6 +12781,7 @@
| SUSPEND_SYM {}
| SWAPS_SYM {}
| SWITCHES_SYM {}
| TABLE_NAME_SYM {}
| TABLES {}
| TABLE_CHECKSUM_SYM {}
-@@ -12753,6 +12806,7 @@
+@@ -12754,6 +12807,7 @@
| UNKNOWN_SYM {}
| UNTIL_SYM {}
| USER {}
}
int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
-@@ -1537,16 +1540,24 @@
+@@ -1553,16 +1556,24 @@
int ha_myisam::update_row(const uchar *old_data, uchar *new_data)
{
}
int ha_myisam::index_read_map(uchar *buf, const uchar *key,
-@@ -1558,6 +1569,14 @@
+@@ -1574,6 +1585,14 @@
ha_statistic_increment(&SSV::ha_read_key_count);
int error=mi_rkey(file, buf, active_index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-@@ -1570,6 +1589,14 @@
+@@ -1586,6 +1605,14 @@
ha_statistic_increment(&SSV::ha_read_key_count);
int error=mi_rkey(file, buf, index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-@@ -1584,6 +1611,14 @@
+@@ -1600,6 +1627,14 @@
int error=mi_rkey(file, buf, active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
DBUG_RETURN(error);
}
-@@ -1595,6 +1630,13 @@
+@@ -1611,6 +1646,13 @@
ha_statistic_increment(&SSV::ha_read_next_count);
int error=mi_rnext(file,buf,active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-@@ -1606,6 +1648,13 @@
+@@ -1622,6 +1664,13 @@
ha_statistic_increment(&SSV::ha_read_prev_count);
int error=mi_rprev(file,buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-@@ -1617,6 +1666,14 @@
+@@ -1633,6 +1682,14 @@
ha_statistic_increment(&SSV::ha_read_first_count);
int error=mi_rfirst(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-@@ -1628,6 +1685,14 @@
+@@ -1644,6 +1701,14 @@
ha_statistic_increment(&SSV::ha_read_last_count);
int error=mi_rlast(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-@@ -1645,6 +1710,14 @@
+@@ -1661,6 +1726,14 @@
error= mi_rnext_same(file,buf);
} while (error == HA_ERR_RECORD_DELETED);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-@@ -1664,6 +1737,8 @@
+@@ -1680,6 +1753,8 @@
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error=mi_scan(file, buf);
table->status=error ? STATUS_NOT_FOUND: 0;
MYSQL_READ_ROW_DONE(error);
return error;
}
-@@ -1680,6 +1755,8 @@
+@@ -1696,6 +1771,8 @@
ha_statistic_increment(&SSV::ha_read_rnd_count);
int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length));
table->status=error ? STATUS_NOT_FOUND: 0;
--- /dev/null
+--- a/sql/debug_sync.cc
++++ b/sql/debug_sync.cc
+@@ -1737,7 +1737,7 @@
+ if (action->wait_for.length())
+ {
+ mysql_mutex_t *old_mutex;
+- mysql_cond_t *old_cond;
++ mysql_cond_t* UNINIT_VAR(old_cond);
+ int error= 0;
+ struct timespec abstime;
+