Skip to content

Commit 9b2fa2a

Browse files
sjaakolajanlindstrom
authored andcommitted
MDEV-24845 Oddities around innodb_fatal_semaphore_wait_threshold and global.innodb_disallow_writes
This commit adds a mtr test for reproducing a test scenario where despite of innodb_disallow_writes blocking, writes to file system can still happen. The test launches a garbd node, which triggers one of the cluster node to switch to SST donor state. In this state, all disk activity should be halted, and e.g. innodb_disallow_writes has been set. The test records md5sum aggregate over mariadb data directory when the node enters the donor state, and records another md5sum when the node leaves the donor state. If there is no IO activity in data directory, these hashes should be equal. For this test, the Donor state processing, has beeen instrumented so that, SST donor thread can be stopped when entering the donor state. The test uses this new dbug sync point, to control when to record the md5sums. New SST script was added: wsrep_sst_backup, and garbd uses backup method to lauch the donor node to call this script, and to enter in donor state. The backup script could be later extended as general purpose backup method for the cluster. This commit fixes also one race condition happening in wsrep_sst_rsync, like this: * wsrep_rsync_sst script requests for flush tables, and then waits in a loop until mariadbd has created file tables_flushed, as confirmation that FLUSH TABLES has completed * mariadbd's SST donor thread, wakes for the flush table request and then performs FTWRL, and after this it creates the tables_flushed file * note that SST script will now continue to startup rsync sending * mariadbd's SST donor thread now calls for sst_disallow_writes(), so that innodb would setup disk IO blockage, however rsyncing may already be ongoing at this point This race condition is fixed in this commit, by performing all disk IO blocking before creating the tables_flushed file. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
1 parent 6437b30 commit 9b2fa2a

File tree

6 files changed

+350
-31
lines changed

6 files changed

+350
-31
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
connection node_1;
2+
connection node_1;
3+
connection node_2;
4+
connection node_3;
5+
connection node_1;
6+
SET GLOBAL innodb_max_dirty_pages_pct=99;
7+
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
8+
connection node_1;
9+
CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB;
10+
CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB;
11+
INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
12+
INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;
13+
connection node_2;
14+
Killing node #3 to free ports for garbd ...
15+
connection node_3;
16+
connection node_1;
17+
SET GLOBAL debug_dbug = "+d,sync.wsrep_donor_state";
18+
Starting garbd ...
19+
SET SESSION debug_sync = "now WAIT_FOR sync.wsrep_donor_state_reached";
20+
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0;
21+
SET GLOBAL innodb_max_dirty_pages_pct=0;
22+
SET SESSION debug_sync = "now SIGNAL signal.wsrep_donor_state";
23+
SET GLOBAL debug_dbug = "";
24+
SET debug_sync='RESET';
25+
connection node_2;
26+
Killing garbd ...
27+
connection node_1;
28+
connection node_2;
29+
DROP TABLE t1;
30+
DROP TABLE ten;
31+
Restarting node #3 to satisfy MTR's end-of-test checks
32+
connection node_3;
33+
connection node_1;
34+
SET GLOBAL innodb_max_dirty_pages_pct = 75.000000;
35+
SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.000000;
36+
connection node_1;
37+
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
38+
connection node_2;
39+
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
40+
connection node_3;
41+
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
!include ../galera_3nodes.cnf
2+
3+
[mysqld]
4+
wsrep_sst_method=rsync
5+
6+
[mysqld.1]
7+
wsrep_node_name=node1
8+
9+
[mysqld.2]
10+
wsrep_node_name=node2
11+
12+
[mysqld.3]
13+
wsrep_node_name=node3
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#
2+
# A very basic test for the galera arbitrator. We shut down node #3 and use its port allocation to start garbd.
3+
# As MTR does not allow multiple servers to be down at the same time, we are limited as to what we can test.
4+
#
5+
6+
--source include/galera_cluster.inc
7+
--source include/have_innodb.inc
8+
--source include/have_garbd.inc
9+
--source include/big_test.inc
10+
--source include/have_debug.inc
11+
--source include/have_debug_sync.inc
12+
13+
--connection node_1
14+
# Save original auto_increment_offset values.
15+
--let $node_1=node_1
16+
--let $node_2=node_2
17+
--let $node_3=node_3
18+
19+
--let $galera_connection_name = node_3
20+
--let $galera_server_number = 3
21+
--source include/galera_connect.inc
22+
--source suite/galera/include/galera_base_port.inc
23+
--let $NODE_GALERAPORT_3 = $_NODE_GALERAPORT
24+
25+
--source ../galera/include/auto_increment_offset_save.inc
26+
27+
# Save galera ports
28+
--connection node_1
29+
--source suite/galera/include/galera_base_port.inc
30+
--let $NODE_GALERAPORT_1 = $_NODE_GALERAPORT
31+
--let $datadir= `SELECT @@datadir`
32+
33+
--let $innodb_max_dirty_pages_pct = `SELECT @@innodb_max_dirty_pages_pct`
34+
--let $innodb_max_dirty_pages_pct_lwm = `SELECT @@innodb_max_dirty_pages_pct_lwm`
35+
36+
SET GLOBAL innodb_max_dirty_pages_pct=99;
37+
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
38+
39+
--connection node_1
40+
CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB;
41+
CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB;
42+
INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
43+
INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;
44+
45+
--connection node_2
46+
--source suite/galera/include/galera_base_port.inc
47+
--let $NODE_GALERAPORT_2 = $_NODE_GALERAPORT
48+
49+
--echo Killing node #3 to free ports for garbd ...
50+
--connection node_3
51+
--source include/shutdown_mysqld.inc
52+
53+
--connection node_1
54+
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
55+
--source include/wait_condition.inc
56+
57+
# stop SST donor thread when node is in donor state
58+
SET GLOBAL debug_dbug = "+d,sync.wsrep_donor_state";
59+
60+
--echo Starting garbd ...
61+
--exec $MTR_GARBD_EXE --address "gcomm://127.0.0.1:$NODE_GALERAPORT_1" --group my_wsrep_cluster --donor node1 --sst backup --options 'base_port=$NODE_GALERAPORT_3' > $MYSQL_TMP_DIR/garbd.log 2>&1 &
62+
63+
SET SESSION debug_sync = "now WAIT_FOR sync.wsrep_donor_state_reached";
64+
65+
#
66+
# get hash of data directory contents before BP dirty page flushing
67+
#
68+
--exec find $datadir -type f ! -name tables_flushed ! -name backup_sst_complete -exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_before
69+
70+
# this should force buffer pool flushing, if not already done by donor state change transfer
71+
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0;
72+
SET GLOBAL innodb_max_dirty_pages_pct=0;
73+
74+
--disable_query_log
75+
--disable_result_log
76+
select f1 from t1;
77+
select * from ten;
78+
--enable_result_log
79+
--enable_query_log
80+
81+
#
82+
#
83+
# record the hash of data directory contents after BP dirty page flushing
84+
#
85+
--exec find $datadir -type f ! -name tables_flushed ! -name backup_sst_complete -exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_after
86+
87+
# there should be no disk writes
88+
--diff_files $MYSQLTEST_VARDIR/tmp/innodb_before $MYSQLTEST_VARDIR/tmp/innodb_after
89+
90+
SET SESSION debug_sync = "now SIGNAL signal.wsrep_donor_state";
91+
SET GLOBAL debug_dbug = "";
92+
SET debug_sync='RESET';
93+
94+
--connection node_2
95+
96+
#
97+
# garbd will die automatically, because of the backup SST script
98+
# but just to be sure, sending explicit kill here, as well
99+
#
100+
--echo Killing garbd ...
101+
# FreeBSD's /bin/pkill only supports short versions of the options:
102+
# -o Select only the oldest (least recently started)
103+
# -f Match against full argument lists
104+
--error 0,1
105+
--exec pkill -o -f garbd.*$NODE_GALERAPORT_3
106+
107+
--connection node_1
108+
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
109+
--source include/wait_condition.inc
110+
111+
--connection node_2
112+
113+
DROP TABLE t1;
114+
DROP TABLE ten;
115+
116+
--echo Restarting node #3 to satisfy MTR's end-of-test checks
117+
--connection node_3
118+
let $restart_noprint=2;
119+
--source include/start_mysqld.inc
120+
121+
--connection node_1
122+
--eval SET GLOBAL innodb_max_dirty_pages_pct = $innodb_max_dirty_pages_pct
123+
--eval SET GLOBAL innodb_max_dirty_pages_pct_lwm = $innodb_max_dirty_pages_pct_lwm
124+
125+
--source ../galera/include/auto_increment_offset_restore.inc
126+
127+
--connection node_1
128+
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
129+
130+
--connection node_2
131+
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
132+
133+
--connection node_3
134+
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");

scripts/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ ELSE()
275275
wsrep_sst_mysqldump
276276
wsrep_sst_rsync
277277
wsrep_sst_mariabackup
278+
wsrep_sst_backup
278279
)
279280
# The following script is sourced from other SST scripts, so it should
280281
# not be made executable.

scripts/wsrep_sst_backup.sh

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#!/usr/bin/env bash
2+
3+
set -ue
4+
5+
# Copyright (C) 2017-2021 MariaDB
6+
# Copyright (C) 2010-2014 Codership Oy
7+
#
8+
# This program is free software; you can redistribute it and/or modify
9+
# it under the terms of the GNU General Public License as published by
10+
# the Free Software Foundation; version 2 of the License.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with this program; see the file COPYING. If not, write to the
19+
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston
20+
# MA 02110-1335 USA.
21+
22+
# This is a reference script for rsync-based state snapshot transfer
23+
24+
RSYNC_REAL_PID=0 # rsync process id
25+
STUNNEL_REAL_PID=0 # stunnel process id
26+
27+
OS="$(uname)"
28+
[ "$OS" = 'Darwin' ] && export -n LD_LIBRARY_PATH
29+
30+
# Setting the path for lsof on CentOS
31+
export PATH="/usr/sbin:/sbin:$PATH"
32+
33+
. $(dirname "$0")/wsrep_sst_common
34+
35+
MAGIC_FILE="$WSREP_SST_OPT_DATA/backup_sst_complete"
36+
rm -rf "$MAGIC_FILE"
37+
38+
WSREP_LOG_DIR=${WSREP_LOG_DIR:-""}
39+
# if WSREP_LOG_DIR env. variable is not set, try to get it from my.cnf
40+
if [ -z "$WSREP_LOG_DIR" ]; then
41+
WSREP_LOG_DIR=$(parse_cnf mysqld innodb-log-group-home-dir '')
42+
fi
43+
44+
if [ -n "$WSREP_LOG_DIR" ]; then
45+
# handle both relative and absolute paths
46+
WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; mkdir -p "$WSREP_LOG_DIR"; cd $WSREP_LOG_DIR; pwd -P)
47+
else
48+
# default to datadir
49+
WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; pwd -P)
50+
fi
51+
52+
if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]
53+
then
54+
55+
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
56+
57+
RC=0
58+
59+
if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then
60+
61+
FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed"
62+
ERROR="$WSREP_SST_OPT_DATA/sst_error"
63+
64+
[ -f "$FLUSHED" ] && rm -f "$FLUSHED"
65+
[ -f "$ERROR" ] && rm -f "$ERROR"
66+
67+
echo "flush tables"
68+
69+
# Wait for :
70+
# (a) Tables to be flushed, AND
71+
# (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR
72+
# (c) ERROR file, in case flush tables operation failed.
73+
74+
while [ ! -r "$FLUSHED" ] && \
75+
! grep -q -F ':' '--' "$FLUSHED" >/dev/null 2>&1
76+
do
77+
# Check whether ERROR file exists.
78+
if [ -f "$ERROR" ]; then
79+
# Flush tables operation failed.
80+
rm -f "$ERROR"
81+
exit 255
82+
fi
83+
sleep 0.2
84+
done
85+
86+
STATE=$(cat "$FLUSHED")
87+
rm -f "$FLUSHED"
88+
89+
90+
else # BYPASS
91+
92+
wsrep_log_info "Bypassing state dump."
93+
fi
94+
95+
echo 'continue' # now server can resume updating data
96+
97+
echo "$STATE" > "$MAGIC_FILE"
98+
99+
echo "done $STATE"
100+
101+
elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
102+
then
103+
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
104+
exit 22 # EINVAL
105+
106+
107+
else
108+
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
109+
exit 22 # EINVAL
110+
fi
111+
112+
exit 0

0 commit comments

Comments
 (0)