Skip to content

Commit c4f02fd

Browse files
Daogang QuHery Ramilison
authored andcommitted
Bug #26266758 SKIPPING MYSQL SLAVE ERROR CODE 1590 (ER_SLAVE_INCIDENT) DOES NOT WORK
After generating GTID for incident log event, skipping slave error code 'ER_SLAVE_INCIDENT' does not work. The root cause is that we did not commit the transaction to consume its owned GTID when applying the incident log event in the case, and more the slave job added by the incident's GTID is wrongly removed from the global assigned queue when MTS is enable. To make skipping slave error code 'ER_SLAVE_INCIDENT' work well, commit the transaction to consume its owned GTID when applying incident log event in the case and remove the slave job added by the incident's GTID correctly when MTS is enable. To make NDB cluster happy, if there is no binlog cache then we write incidents into the binlog directly. (cherry picked from commit 4585ae70078721d7ee1ed23ee632a251ac32de41)
1 parent 68c7d04 commit c4f02fd

11 files changed

+379
-72
lines changed

mysql-test/include/sync_slave_sql.inc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,20 @@ if ($rpl_debug)
7777
#
7878
--let $_slave_master_file= query_get_value(SHOW SLAVE STATUS, Relay_Master_Log_File, 1)
7979
--let $_slave_master_pos= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1)
80+
81+
if ($rpl_debug)
82+
{
83+
--echo _saved_file='$_saved_file'
84+
--echo _slave_master_file='$_slave_master_file'
85+
}
86+
8087
if ($_slave_master_file == $_saved_file)
8188
{
89+
if ($rpl_debug)
90+
{
91+
--echo _saved_pos='$_saved_pos'
92+
--echo _slave_master_pos='$_slave_master_pos'
93+
}
8294
if ($_slave_master_pos == $_saved_pos)
8395
{
8496
if ($rpl_debug)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
include/master-slave.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection master]
6+
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
7+
call mtr.add_suppression("Slave SQL for channel '': The incident LOST_EVENTS occured on the master.");
8+
call mtr.add_suppression("The content of the statement cache is corrupted "
9+
"while writing a rollback record of the transaction "
10+
"to the binary log. An incident event has been "
11+
"written to the binary log which will stop the "
12+
"slaves.");
13+
SET GLOBAL max_binlog_stmt_cache_size = 4096;
14+
Warnings:
15+
Warning 1745 Option binlog_stmt_cache_size (32768) is greater than max_binlog_stmt_cache_size (4096); setting binlog_stmt_cache_size equal to max_binlog_stmt_cache_size.
16+
SET GLOBAL binlog_stmt_cache_size = 4096;
17+
CREATE TABLE t1(c1 INT PRIMARY KEY, data TEXT(30000)) ENGINE=MyIsam;
18+
#
19+
# A single statement on non-transactional table causes to log an incident
20+
# event with an unique gtid due to the fact that the stmt_cache is not
21+
# big enough to accommodate the changes.
22+
#
23+
Got one of the listed errors
24+
include/show_binlog_events.inc
25+
Log_name Pos Event_type Server_id End_log_pos Info
26+
master-bin.000001 # Incident # # #1 (LOST_EVENTS)
27+
master-bin.000001 # Rotate # # master-bin.000002;pos=POS
28+
INSERT INTO t1 (c1, data) VALUES (2, 'b');
29+
#
30+
# Verify that we can skip the incident event by setting
31+
# --slave-skip-errors=1590 on slave.
32+
#
33+
include/sync_slave_sql_with_master.inc
34+
[connection master]
35+
DROP TABLE t1;
36+
SET GLOBAL max_binlog_stmt_cache_size= ORIGINAL_VALUE;
37+
SET GLOBAL binlog_stmt_cache_size= ORIGINAL_VALUE;
38+
include/rpl_end.inc
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
include/master-slave.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection master]
6+
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
7+
call mtr.add_suppression("Slave SQL for channel '': The incident LOST_EVENTS occured on the master.");
8+
call mtr.add_suppression("The content of the statement cache is corrupted "
9+
"while writing a rollback record of the transaction "
10+
"to the binary log. An incident event has been "
11+
"written to the binary log which will stop the "
12+
"slaves.");
13+
SET GLOBAL max_binlog_stmt_cache_size = 4096;
14+
Warnings:
15+
Warning 1745 Option binlog_stmt_cache_size (32768) is greater than max_binlog_stmt_cache_size (4096); setting binlog_stmt_cache_size equal to max_binlog_stmt_cache_size.
16+
SET GLOBAL binlog_stmt_cache_size = 4096;
17+
SET @save_debug=@@global.debug;
18+
SET GLOBAL DEBUG='d,simulate_write_incident_event_into_binlog_directly';
19+
CREATE TABLE t1(c1 INT PRIMARY KEY, data TEXT(30000)) ENGINE=MyIsam;
20+
#
21+
# A single statement on non-transactional table causes to log an incident
22+
# event without a GTID due to the fact that the stmt_cache is not big
23+
# enough to accommodate the changes.
24+
#
25+
Got one of the listed errors
26+
include/show_binlog_events.inc
27+
Log_name Pos Event_type Server_id End_log_pos Info
28+
master-bin.000001 # Incident # # #1 (LOST_EVENTS)
29+
master-bin.000001 # Rotate # # master-bin.000002;pos=POS
30+
#
31+
# Verify that we can skip the incident event by setting
32+
# --slave-skip-errors=1590 on slave.
33+
#
34+
include/sync_slave_sql_with_master.inc
35+
[connection master]
36+
DROP TABLE t1;
37+
SET GLOBAL DEBUG= @save_debug;
38+
SET GLOBAL max_binlog_stmt_cache_size= ORIGINAL_VALUE;
39+
SET GLOBAL binlog_stmt_cache_size= ORIGINAL_VALUE;
40+
include/rpl_end.inc
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--slave-skip-errors=1590
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# ==== Purpose ====
2+
#
3+
# Verify that we can skip an incident event by setting --slave-skip-errors=1590
4+
# on slave.
5+
#
6+
# ==== Implementation ====
7+
#
8+
# 1. a single statement on non-transactional table causes to log an incident
9+
# event with an unique gtid due to the fact that the stmt_cache is not
10+
# big enough to accommodate the changes.
11+
#
12+
# 2. sync slave sql with master to verify that we can skip the incident event
13+
# by setting --slave-skip-errors=1590 on slave.
14+
#
15+
# ==== References ====
16+
#
17+
# Bug #26266758 SKIPPING MYSQL SLAVE ERROR CODE 1590 (ER_SLAVE_INCIDENT) DOES NOT WORK
18+
# See mysql-test/suite/rpl/t/rpl_skip_incident_error_cross.test
19+
20+
--source include/not_group_replication_plugin.inc
21+
# Test in this file is binlog format agnostic, thus no need
22+
# to rerun it for every format.
23+
--source include/have_binlog_format_row.inc
24+
--source include/have_myisam.inc
25+
--source include/master-slave.inc
26+
27+
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
28+
call mtr.add_suppression("Slave SQL for channel '': The incident LOST_EVENTS occured on the master.");
29+
call mtr.add_suppression("The content of the statement cache is corrupted "
30+
"while writing a rollback record of the transaction "
31+
"to the binary log. An incident event has been "
32+
"written to the binary log which will stop the "
33+
"slaves.");
34+
35+
--let $old_max_binlog_stmt_cache_size= query_get_value(SHOW VARIABLES LIKE "max_binlog_stmt_cache_size", Value, 1)
36+
--let $old_binlog_stmt_cache_size= query_get_value(SHOW VARIABLES LIKE "binlog_stmt_cache_size", Value, 1)
37+
SET GLOBAL max_binlog_stmt_cache_size = 4096;
38+
SET GLOBAL binlog_stmt_cache_size = 4096;
39+
40+
--disconnect master
41+
--connect(master,127.0.0.1,root,,test,$MASTER_MYPORT,)
42+
--let $data= `SELECT CONCAT('"', repeat('a',2000), '"')`
43+
44+
CREATE TABLE t1(c1 INT PRIMARY KEY, data TEXT(30000)) ENGINE=MyIsam;
45+
46+
--let $saved_master_pos=query_get_value('SHOW MASTER STATUS', Position, 1)
47+
--echo #
48+
--echo # A single statement on non-transactional table causes to log an incident
49+
--echo # event with an unique gtid due to the fact that the stmt_cache is not
50+
--echo # big enough to accommodate the changes.
51+
--echo #
52+
--disable_query_log
53+
--error ER_STMT_CACHE_FULL, ER_ERROR_ON_WRITE
54+
eval INSERT INTO t1 (c1, data) VALUES (1,
55+
CONCAT($data, $data, $data, $data, $data, $data));
56+
--enable_query_log
57+
58+
--let $binlog_start= $saved_master_pos
59+
--source include/show_binlog_events.inc
60+
61+
INSERT INTO t1 (c1, data) VALUES (2, 'b');
62+
63+
--echo #
64+
--echo # Verify that we can skip the incident event by setting
65+
--echo # --slave-skip-errors=1590 on slave.
66+
--echo #
67+
--source include/sync_slave_sql_with_master.inc
68+
69+
# Cleanup
70+
--source include/rpl_connection_master.inc
71+
DROP TABLE t1;
72+
--replace_result $old_max_binlog_stmt_cache_size ORIGINAL_VALUE
73+
--eval SET GLOBAL max_binlog_stmt_cache_size= $old_max_binlog_stmt_cache_size
74+
--replace_result $old_binlog_stmt_cache_size ORIGINAL_VALUE
75+
--eval SET GLOBAL binlog_stmt_cache_size= $old_binlog_stmt_cache_size
76+
77+
--source include/rpl_end.inc
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--slave-skip-errors=1590
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# ==== Purpose ====
2+
#
3+
# Verify that we can skip an incident event without a GTID by setting
4+
# --slave-skip-errors=1590 on cross-version replication.
5+
#
6+
# ==== Implementation ====
7+
#
8+
# 1. a single statement on non-transactional table causes to log an incident
9+
# event withiout a GTID due to the fact that the stmt_cache is not big
10+
# enough to accommodate the changes.
11+
#
12+
# 2. sync slave sql with master to verify that we can skip the incident event
13+
# by setting --slave-skip-errors=1590 on slave.
14+
#
15+
# ==== References ====
16+
#
17+
# Bug #26266758 SKIPPING MYSQL SLAVE ERROR CODE 1590 (ER_SLAVE_INCIDENT) DOES NOT WORK
18+
# See mysql-test/suite/rpl/t/rpl_skip_incident_error.test
19+
20+
--source include/not_group_replication_plugin.inc
21+
# Test in this file is binlog format agnostic, thus no need
22+
# to rerun it for every format.
23+
--source include/have_binlog_format_row.inc
24+
--source include/have_myisam.inc
25+
--source include/have_debug.inc
26+
--source include/master-slave.inc
27+
28+
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
29+
call mtr.add_suppression("Slave SQL for channel '': The incident LOST_EVENTS occured on the master.");
30+
call mtr.add_suppression("The content of the statement cache is corrupted "
31+
"while writing a rollback record of the transaction "
32+
"to the binary log. An incident event has been "
33+
"written to the binary log which will stop the "
34+
"slaves.");
35+
36+
--let $old_max_binlog_stmt_cache_size= query_get_value(SHOW VARIABLES LIKE "max_binlog_stmt_cache_size", Value, 1)
37+
--let $old_binlog_stmt_cache_size= query_get_value(SHOW VARIABLES LIKE "binlog_stmt_cache_size", Value, 1)
38+
SET GLOBAL max_binlog_stmt_cache_size = 4096;
39+
SET GLOBAL binlog_stmt_cache_size = 4096;
40+
SET @save_debug=@@global.debug;
41+
SET GLOBAL DEBUG='d,simulate_write_incident_event_into_binlog_directly';
42+
43+
--disconnect master
44+
--connect(master,127.0.0.1,root,,test,$MASTER_MYPORT,)
45+
--let $data= `SELECT CONCAT('"', repeat('a',2000), '"')`
46+
47+
CREATE TABLE t1(c1 INT PRIMARY KEY, data TEXT(30000)) ENGINE=MyIsam;
48+
49+
--let $saved_master_pos=query_get_value('SHOW MASTER STATUS', Position, 1)
50+
--echo #
51+
--echo # A single statement on non-transactional table causes to log an incident
52+
--echo # event without a GTID due to the fact that the stmt_cache is not big
53+
--echo # enough to accommodate the changes.
54+
--echo #
55+
--disable_query_log
56+
--error ER_STMT_CACHE_FULL, ER_ERROR_ON_WRITE
57+
eval INSERT INTO t1 (c1, data) VALUES (1,
58+
CONCAT($data, $data, $data, $data, $data, $data));
59+
--enable_query_log
60+
--let $binlog_start= $saved_master_pos
61+
# Set this option to prove that there is no GTID generated
62+
# for the incident event.
63+
--let $keep_gtid_events= 1
64+
--source include/show_binlog_events.inc
65+
66+
67+
--echo #
68+
--echo # Verify that we can skip the incident event by setting
69+
--echo # --slave-skip-errors=1590 on slave.
70+
--echo #
71+
--source include/sync_slave_sql_with_master.inc
72+
73+
# Cleanup
74+
--source include/rpl_connection_master.inc
75+
DROP TABLE t1;
76+
77+
SET GLOBAL DEBUG= @save_debug;
78+
--replace_result $old_max_binlog_stmt_cache_size ORIGINAL_VALUE
79+
--eval SET GLOBAL max_binlog_stmt_cache_size= $old_max_binlog_stmt_cache_size
80+
--replace_result $old_binlog_stmt_cache_size ORIGINAL_VALUE
81+
--eval SET GLOBAL binlog_stmt_cache_size= $old_binlog_stmt_cache_size
82+
83+
--source include/rpl_end.inc

sql/binlog.cc

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7581,35 +7581,68 @@ bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, THD *thd,
75817581

75827582
// @todo make this work with the group log. /sven
75837583
binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
7584-
if (cache_mngr == NULL)
7585-
DBUG_RETURN(error);
75867584

7587-
if (!cache_mngr->stmt_cache.is_binlog_empty())
7585+
#ifndef DBUG_OFF
7586+
if (DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
7587+
1, 0) && !cache_mngr->stmt_cache.is_binlog_empty())
75887588
{
75897589
/* The stmt_cache contains corruption data, so we can reset it. */
75907590
cache_mngr->stmt_cache.reset();
75917591
}
7592-
if (!cache_mngr->trx_cache.is_binlog_empty())
7593-
{
7594-
/* The trx_cache contains corruption data, so we can reset it. */
7595-
cache_mngr->trx_cache.reset();
7596-
}
7592+
#endif
7593+
75977594
/*
7598-
Write the incident event into stmt_cache, so that a GTID is generated and
7599-
written for it prior to flushing the stmt_cache.
7595+
If there is no binlog cache then we write incidents directly
7596+
into the binlog. If caller needs GTIDs it has to setup the
7597+
binlog cache (for the injector thread).
76007598
*/
7601-
binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(false);
7602-
if ((error= cache_data->write_event(thd, ev)))
7599+
if (cache_mngr == NULL ||
7600+
DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
7601+
1, 0))
76037602
{
7604-
sql_print_error("Failed to write an incident event into stmt_cache.");
7605-
cache_mngr->stmt_cache.reset();
7606-
DBUG_RETURN(error);
7603+
if (need_lock_log)
7604+
mysql_mutex_lock(&LOCK_log);
7605+
else
7606+
mysql_mutex_assert_owner(&LOCK_log);
7607+
/* Write an incident event into binlog directly. */
7608+
error= ev->write(&log_file);
7609+
/*
7610+
Write an error to log. So that user might have a chance
7611+
to be alerted and explore incident details.
7612+
*/
7613+
if (!error)
7614+
sql_print_error("%s An incident event has been written to the binary "
7615+
"log which will stop the slaves.", err_msg);
76077616
}
7617+
else // (cache_mngr != NULL)
7618+
{
7619+
if (!cache_mngr->stmt_cache.is_binlog_empty())
7620+
{
7621+
/* The stmt_cache contains corruption data, so we can reset it. */
7622+
cache_mngr->stmt_cache.reset();
7623+
}
7624+
if (!cache_mngr->trx_cache.is_binlog_empty())
7625+
{
7626+
/* The trx_cache contains corruption data, so we can reset it. */
7627+
cache_mngr->trx_cache.reset();
7628+
}
7629+
/*
7630+
Write the incident event into stmt_cache, so that a GTID is generated and
7631+
written for it prior to flushing the stmt_cache.
7632+
*/
7633+
binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(false);
7634+
if ((error= cache_data->write_event(thd, ev)))
7635+
{
7636+
sql_print_error("Failed to write an incident event into stmt_cache.");
7637+
cache_mngr->stmt_cache.reset();
7638+
DBUG_RETURN(error);
7639+
}
76087640

7609-
if (need_lock_log)
7610-
mysql_mutex_lock(&LOCK_log);
7611-
else
7612-
mysql_mutex_assert_owner(&LOCK_log);
7641+
if (need_lock_log)
7642+
mysql_mutex_lock(&LOCK_log);
7643+
else
7644+
mysql_mutex_assert_owner(&LOCK_log);
7645+
}
76137646

76147647
if (do_flush_and_sync)
76157648
{
@@ -7632,7 +7665,7 @@ bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, THD *thd,
76327665
Write an error to log. So that user might have a chance
76337666
to be alerted and explore incident details.
76347667
*/
7635-
if (!error)
7668+
if (!error && cache_mngr != NULL)
76367669
sql_print_error("%s An incident event has been written to the binary "
76377670
"log which will stop the slaves.", err_msg);
76387671

0 commit comments

Comments
 (0)