Skip to content

Commit 947efe1

Browse files
committed
MDEV-15158 On commit, do not write to the TRX_SYS page
This is based on a prototype by Thirunarayanan Balathandayuthapani <thiru@mariadb.com>. Binlog and Galera write-set replication information was written into TRX_SYS page on each commit. Instead of writing to the TRX_SYS during normal operation, InnoDB can make use of rollback segment header pages, which are already being written to during a commit. The following list of fields in rollback segment header page are added: TRX_RSEG_BINLOG_OFFSET TRX_RSEG_BINLOG_NAME (NUL-terminated; empty name = not present) TRX_RSEG_WSREP_XID_FORMAT (0=not present; 1=present) TRX_RSEG_WSREP_XID_GTRID TRX_RSEG_WSREP_XID_BQUAL TRX_RSEG_WSREP_XID_DATA trx_sys_t: Introduce the fields recovered_binlog_filename, recovered_binlog_offset, recovered_wsrep_xid. To facilitate upgrade from older mysql or mariaDB versions, we will read the information in TRX_SYS page. It will be overridden by the information that we find in rollback segment header pages. Mariabackup --prepare will read the metadata from the rollback segment header pages via trx_rseg_array_init(). It will still not read any undo log pages or recover any transactions.
1 parent f654435 commit 947efe1

File tree

12 files changed

+491
-322
lines changed

12 files changed

+491
-322
lines changed

extra/mariabackup/wsrep.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ permission notice:
4343
#include <my_global.h>
4444
#include <my_base.h>
4545
#include <handler.h>
46-
#include <trx0sys.h>
46+
#include <trx0rseg.h>
4747

4848
#include "common.h"
4949
#ifdef WITH_WSREP
@@ -181,7 +181,7 @@ xb_write_galera_info(bool incremental_prepare)
181181
memset(&xid, 0, sizeof(xid));
182182
xid.formatID = -1;
183183

184-
if (!trx_sys_read_wsrep_checkpoint(&xid)) {
184+
if (!trx_rseg_read_wsrep_checkpoint(xid)) {
185185

186186
return;
187187
}

extra/mariabackup/xtrabackup.cc

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4843,37 +4843,23 @@ xtrabackup_prepare_func(char** argv)
48434843
}
48444844

48454845
if (ok) {
4846-
mtr_tmtr;
4847-
mtr.start();
4848-
const buf_block_t* sys_header = trx_sysf_get(&mtr, false);
4849-
4850-
if (mach_read_from_4(TRX_SYS_MYSQL_LOG_INFO
4851-
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
4852-
+ TRX_SYS + sys_header->frame)
4853-
== TRX_SYS_MYSQL_LOG_MAGIC_N) {
4854-
ulonglong pos = mach_read_from_8(
4855-
TRX_SYS_MYSQL_LOG_INFO
4856-
+ TRX_SYS_MYSQL_LOG_OFFSET
4857-
+ TRX_SYS + sys_header->frame);
4858-
const char* name = reinterpret_cast<const char*>(
4859-
TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
4860-
+ TRX_SYS + sys_header->frame);
4861-
msg("Last binlog file %s, position %llu\n", name, pos);
4862-
4863-
/* output to xtrabackup_binlog_pos_innodb and
4864-
(if backup_safe_binlog_info was available on
4865-
the server) to xtrabackup_binlog_info. In the
4866-
latter case xtrabackup_binlog_pos_innodb
4867-
becomes redundant and is created only for
4868-
compatibility. */
4869-
ok = store_binlog_info(
4870-
"xtrabackup_binlog_pos_innodb", name, pos)
4871-
&& (!recover_binlog_info || store_binlog_info(
4872-
XTRABACKUP_BINLOG_INFO,
4873-
name, pos));
4874-
}
4875-
4876-
mtr.commit();
4846+
msg("Last binlog file %s, position %lld\n",
4847+
trx_sys.recovered_binlog_filename,
4848+
longlong(trx_sys.recovered_binlog_offset));
4849+
4850+
/* output to xtrabackup_binlog_pos_innodb and
4851+
(if backup_safe_binlog_info was available on
4852+
the server) to xtrabackup_binlog_info. In the
4853+
latter case xtrabackup_binlog_pos_innodb
4854+
becomes redundant and is created only for
4855+
compatibility. */
4856+
ok = store_binlog_info("xtrabackup_binlog_pos_innodb",
4857+
trx_sys.recovered_binlog_filename,
4858+
trx_sys.recovered_binlog_offset)
4859+
&& (!recover_binlog_info
4860+
|| store_binlog_info(XTRABACKUP_BINLOG_INFO,
4861+
trx_sys.recovered_binlog_filename,
4862+
trx_sys.recovered_binlog_offset));
48774863
}
48784864

48794865
/* Check whether the log is applied enough or not. */
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE TABLE t(a varchar(60)) ENGINE INNODB;
2+
INSERT INTO t VALUES(1);
3+
SHOW VARIABLES like 'log_bin';
4+
Variable_name Value
5+
log_bin ON
6+
FOUND 1 /Last binlog file .*, position .*/ in current_test
7+
# expect FOUND
8+
DROP TABLE t;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
--source include/have_innodb.inc
2+
--source include/have_log_bin.inc
3+
4+
let $basedir=$MYSQLTEST_VARDIR/tmp/backup;
5+
6+
CREATE TABLE t(a varchar(60)) ENGINE INNODB;
7+
INSERT INTO t VALUES(1);
8+
9+
SHOW VARIABLES like 'log_bin';
10+
11+
--disable_result_log
12+
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir;
13+
--enable_result_log
14+
15+
exec $XTRABACKUP --prepare --binlog-info=1 --apply-log-only --target-dir=$basedir ;
16+
17+
let SEARCH_FILE=$MYSQLTEST_VARDIR/log/current_test;
18+
--let SEARCH_PATTERN= Last binlog file .*, position .*
19+
--source include/search_pattern_in_file.inc
20+
--echo # expect FOUND
21+
22+
DROP TABLE t;
23+
24+
# Cleanup
25+
rmdir $basedir;

storage/innobase/handler/ha_innodb.cc

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,9 @@ this program; if not, write to the Free Software Foundation, Inc.,
111111
#include "trx0purge.h"
112112
#endif /* UNIV_DEBUG */
113113
#include "trx0roll.h"
114-
#include "trx0sys.h"
114+
#include "trx0rseg.h"
115115
#include "trx0trx.h"
116116
#include "fil0pagecompress.h"
117-
#include "trx0xa.h"
118117
#include "ut0mem.h"
119118
#include "row0ext.h"
120119

@@ -19679,12 +19678,8 @@ innobase_wsrep_set_checkpoint(
1967919678
DBUG_ASSERT(hton == innodb_hton_ptr);
1968019679

1968119680
if (wsrep_is_wsrep_xid(xid)) {
19682-
mtr_t mtr;
19683-
mtr_start(&mtr);
19684-
if (buf_block_t* sys_header = trx_sysf_get(&mtr)) {
19685-
trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
19686-
}
19687-
mtr_commit(&mtr);
19681+
19682+
trx_rseg_update_wsrep_checkpoint(xid);
1968819683
innobase_flush_logs(hton, false);
1968919684
return 0;
1969019685
} else {
@@ -19700,7 +19695,7 @@ innobase_wsrep_get_checkpoint(
1970019695
XID* xid)
1970119696
{
1970219697
DBUG_ASSERT(hton == innodb_hton_ptr);
19703-
trx_sys_read_wsrep_checkpoint(xid);
19698+
trx_rseg_read_wsrep_checkpoint(*xid);
1970419699
return 0;
1970519700
}
1970619701

storage/innobase/include/trx0rseg.h

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,8 @@ Created 3/26/1996 Heikki Tuuri
2727
#ifndef trx0rseg_h
2828
#define trx0rseg_h
2929

30-
#include "trx0types.h"
3130
#include "trx0sys.h"
3231
#include "fut0lst.h"
33-
#include <vector>
3432

3533
/** Gets a rollback segment header.
3634
@param[in] space space where placed
@@ -226,6 +224,30 @@ struct trx_rseg_t {
226224
/** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */
227225
#define TRX_RSEG_MAX_TRX_ID(TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS \
228226
* TRX_RSEG_SLOT_SIZE)
227+
228+
/** 8 bytes offset within the binlog file */
229+
#define TRX_RSEG_BINLOG_OFFSETTRX_RSEG_MAX_TRX_ID + 8
230+
/** MySQL log file name, 512 bytes, including terminating NUL
231+
(valid only if TRX_RSEG_FORMAT is 0).
232+
If no binlog information is present, the first byte is NUL. */
233+
#define TRX_RSEG_BINLOG_NAMETRX_RSEG_MAX_TRX_ID + 16
234+
/** Maximum length of binlog file name, including terminating NUL, in bytes */
235+
#define TRX_RSEG_BINLOG_NAME_LEN512
236+
237+
#ifdef WITH_WSREP
238+
/** The offset to WSREP XID headers */
239+
#defineTRX_RSEG_WSREP_XID_INFOTRX_RSEG_MAX_TRX_ID + 16 + 512
240+
241+
/** WSREP XID format (1 if present and valid, 0 if not present) */
242+
#define TRX_RSEG_WSREP_XID_FORMATTRX_RSEG_WSREP_XID_INFO
243+
/** WSREP XID GTRID length */
244+
#define TRX_RSEG_WSREP_XID_GTRID_LENTRX_RSEG_WSREP_XID_INFO + 4
245+
/** WSREP XID bqual length */
246+
#define TRX_RSEG_WSREP_XID_BQUAL_LENTRX_RSEG_WSREP_XID_INFO + 8
247+
/** WSREP XID data (XIDDATASIZE bytes) */
248+
#define TRX_RSEG_WSREP_XID_DATATRX_RSEG_WSREP_XID_INFO + 12
249+
#endif /* WITH_WSREP*/
250+
229251
/*-------------------------------------------------------------*/
230252

231253
/** Read the page number of an undo log slot.
@@ -240,6 +262,48 @@ trx_rsegf_get_nth_undo(const trx_rsegf_t* rsegf, ulint n)
240262
+ n * TRX_RSEG_SLOT_SIZE);
241263
}
242264

265+
#ifdef WITH_WSREP
266+
/** Update the WSREP XID information in rollback segment header.
267+
@param[in,out] rseg_header rollback segment header
268+
@param[in] xid WSREP XID
269+
@param[in,out] mtr mini-transaction */
270+
void
271+
trx_rseg_update_wsrep_checkpoint(
272+
trx_rsegf_t*rseg_header,
273+
const XID*xid,
274+
mtr_t*mtr);
275+
276+
/** Update WSREP checkpoint XID in first rollback segment header.
277+
@param[in] xid WSREP XID */
278+
void trx_rseg_update_wsrep_checkpoint(const XID* xid);
279+
280+
/** Read the WSREP XID information in rollback segment header.
281+
@param[in] rseg_header Rollback segment header
282+
@param[out] xid Transaction XID
283+
@return whether the WSREP XID was present */
284+
bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid);
285+
286+
/** Recover the latest WSREP checkpoint XID.
287+
@param[out] xid WSREP XID
288+
@return whether the WSREP XID was found */
289+
bool trx_rseg_read_wsrep_checkpoint(XID& xid);
290+
#endif /* WITH_WSREP */
291+
292+
/** Upgrade a rollback segment header page to MariaDB 10.3 format.
293+
@param[in,out] rseg_header rollback segment header page
294+
@param[in,out] mtr mini-transaction */
295+
void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr);
296+
297+
/** Update the offset information about the end of the binlog entry
298+
which corresponds to the transaction just being committed.
299+
In a replication slave, this updates the master binlog position
300+
up to which replication has proceeded.
301+
@param[in,out] rseg_header rollback segment header
302+
@param[in] trx committing transaction
303+
@param[in,out] mtr mini-transaction */
304+
void
305+
trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr);
306+
243307
#include "trx0rseg.ic"
244308

245309
#endif

storage/innobase/include/trx0sys.h

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -141,26 +141,6 @@ trx_sys_update_mysql_binlog_offset(
141141
system header. */
142142
void
143143
trx_sys_print_mysql_binlog_offset();
144-
#ifdef WITH_WSREP
145-
146-
/** Update WSREP XID info in the TRX_SYS page.
147-
@param[in] xid Transaction XID
148-
@param[in,out] sys_header TRX_SYS page
149-
@param[in,out] mtr mini-transaction */
150-
UNIV_INTERN
151-
void
152-
trx_sys_update_wsrep_checkpoint(
153-
const XID* xid,
154-
buf_block_t* sys_header,
155-
mtr_t* mtr);
156-
157-
/** Read WSREP checkpoint XID from sys header.
158-
@param[out] xid WSREP XID
159-
@return whether the checkpoint was present */
160-
UNIV_INTERN
161-
bool
162-
trx_sys_read_wsrep_checkpoint(XID* xid);
163-
#endif /* WITH_WSREP */
164144

165145
/** Create the rollback segments.
166146
@return whether the creation succeeded */
@@ -235,7 +215,8 @@ trx_sysf_rseg_get_page_no(const buf_block_t* sys_header, ulint rseg_id)
235215
+ sys_header->frame);
236216
}
237217

238-
/** Maximum length of MySQL binlog file name, in bytes. */
218+
/** Maximum length of MySQL binlog file name, in bytes.
219+
(Used before MariaDB 10.3.5.) */
239220
#define TRX_SYS_MYSQL_LOG_NAME_LEN512
240221
/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
241222
#define TRX_SYS_MYSQL_LOG_MAGIC_N873422344
@@ -312,7 +293,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
312293
313294
*/
314295
#ifdef WITH_WSREP
315-
/** The offset to WSREP XID headers */
296+
/** The offset to WSREP XID headers (used before MariaDB 10.3.5) */
316297
#define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL)
317298
#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
318299
#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
@@ -856,7 +837,6 @@ class trx_sys_t
856837
by any mutex, because it is read-only
857838
during multi-threaded operation */
858839

859-
860840
/**
861841
Lock-free hash of in memory read-write transactions.
862842
Works faster when it is on it's own cache line (tested).
@@ -865,6 +845,16 @@ class trx_sys_t
865845
MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash;
866846

867847

848+
#ifdef WITH_WSREP
849+
/** Latest recovered XID during startup */
850+
XID recovered_wsrep_xid;
851+
#endif
852+
/** Latest recovered binlog offset */
853+
int64_t recovered_binlog_offset;
854+
/** Latest recovred binlog file name */
855+
char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN];
856+
857+
868858
/**
869859
Constructor.
870860

storage/innobase/srv/srv0start.cc

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2230,19 +2230,15 @@ innobase_start_or_create_for_mysql()
22302230
if (err != DB_SUCCESS) {
22312231
return(srv_init_abort(err));
22322232
}
2233+
/* fall through */
2234+
case SRV_OPERATION_RESTORE:
22332235
/* This must precede
22342236
recv_apply_hashed_log_recs(true). */
22352237
trx_lists_init_at_db_start();
22362238
break;
22372239
case SRV_OPERATION_RESTORE_DELTA:
22382240
case SRV_OPERATION_BACKUP:
22392241
ut_ad(!"wrong mariabackup mode");
2240-
/* fall through */
2241-
case SRV_OPERATION_RESTORE:
2242-
/* mariabackup --prepare only deals with
2243-
the redo log and the data files, not with
2244-
transactions or the data dictionary. */
2245-
break;
22462242
}
22472243

22482244
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {

0 commit comments

Comments
 (0)