Skip to content

Commit c7d0448

Browse files
committed
MDEV-15132 Avoid accessing the TRX_SYS page
InnoDB maintains an internal persistent sequence of transaction identifiers. This sequence is used for assigning both transaction start identifiers (DB_TRX_ID=trx->id) and end identifiers (trx->no) as well as end identifiers for the mysql.transaction_registry table that was introduced in MDEV-12894. TRX_SYS_TRX_ID_WRITE_MARGIN: Remove. After this many updates of the sequence we used to update the TRX_SYS page. We can avoid accessing the TRX_SYS page if we modify the InnoDB startup so that resurrecting the sequence from other pages of the transaction system. TRX_SYS_TRX_ID_STORE: Deprecate. The field only exists for the purpose of upgrading from an earlier version of MySQL or MariaDB. Starting with this fix, MariaDB will rely on the fields TRX_UNDO_TRX_ID, TRX_UNDO_TRX_NO in the undo log header page of each non-committed transaction, and on the new field TRX_RSEG_MAX_TRX_ID in rollback segment header pages. Because of this change, setting innodb_force_recovery=5 or 6 may cause the system to recover with trx_sys.get_max_trx_id()==0. We must adjust checks for invalid DB_TRX_ID and PAGE_MAX_TRX_ID accordingly. We will change the startup and shutdown messages to display the trx_sys.get_max_trx_id() in addition to the log sequence number. trx_sys_t::flush_max_trx_id(): Remove. trx_undo_mem_create_at_db_start(), trx_undo_lists_init(): Add an output parameter max_trx_id, to be updated from TRX_UNDO_TRX_ID, TRX_UNDO_TRX_NO. TRX_RSEG_MAX_TRX_ID: New field, for persisting trx_sys.get_max_trx_id() at the time of the latest transaction commit. Startup is not reading the undo log pages of committed transactions. We want to avoid additional page accesses on startup, as well as trouble when all undo logs have been emptied. On startup, we will simply determine the maximum value from all pages that are being read anyway. TRX_RSEG_FORMAT: Redefined from TRX_RSEG_MAX_SIZE. Old versions of InnoDB wrote uninitialized garbage to unused data fields. Because of this, we cannot simply introduce a new field in the rollback segment pages and expect it to be always zero, like it would if the database was created by a recent enough InnoDB version. Luckily, it looks like the field TRX_RSEG_MAX_SIZE was always written as 0xfffffffe. We will indicate a new subformat of the page by writing 0 to this field. This has the nice side effect that after a downgrade to older versions of InnoDB, transactions should fail to allocate any undo log, that is, writes will be blocked. So, there is no problem of getting corrupted transaction identifiers after downgrading. trx_rseg_t::max_size: Remove. trx_rseg_header_create(): Remove the parameter max_size=ULINT_MAX. trx_purge_add_undo_to_history(): Update TRX_RSEG_MAX_SIZE (and TRX_RSEG_FORMAT if needed). This is invoked on transaction commit. trx_rseg_mem_restore(): If TRX_RSEG_FORMAT contains 0, read TRX_RSEG_MAX_SIZE. trx_rseg_array_init(): Invoke trx_sys.init_max_trx_id(max_trx_id + 1) where max_trx_id was the maximum that was encountered in the rollback segment pages and the undo log pages of recovered active, XA PREPARE, or some committed transactions. (See trx_purge_add_undo_to_history() which invokes trx_rsegf_set_nth_undo(..., FIL_NULL, ...); not all committed transactions will be immediately detached from the rollback segment header.)
1 parent bb441ca commit c7d0448

File tree

11 files changed

+104
-166
lines changed

11 files changed

+104
-166
lines changed

storage/innobase/include/trx0rseg.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,13 @@ trx_rsegf_undo_find_free(const trx_rsegf_t* rsegf);
7777
This function is called only when a new rollback segment is created in
7878
the database.
7979
@param[in] space space id
80-
@param[in] max_size max size in pages
8180
@param[in] rseg_id rollback segment identifier
8281
@param[in,out] sys_header the TRX_SYS page (NULL for temporary rseg)
8382
@param[in,out] mtr mini-transaction
8483
@return page number of the created segment, FIL_NULL if fail */
8584
ulint
8685
trx_rseg_header_create(
8786
ulint space,
88-
ulint max_size,
8987
ulint rseg_id,
9088
buf_block_t* sys_header,
9189
mtr_t* mtr);
@@ -144,9 +142,6 @@ struct trx_rseg_t {
144142
/** page number of the rollback segment header */
145143
ulint page_no;
146144

147-
/** maximum allowed size in pages */
148-
ulint max_size;
149-
150145
/** current size in pages */
151146
ulint curr_size;
152147

@@ -217,8 +212,8 @@ struct trx_rseg_t {
217212

218213
/* Transaction rollback segment header */
219214
/*-------------------------------------------------------------*/
220-
#defineTRX_RSEG_MAX_SIZE0/* Maximum allowed size for rollback
221-
segment in pages */
215+
#defineTRX_RSEG_FORMAT0/* -2 = pre-MariaDB 10.3.5 format;
216+
0=MariaDB 10.3.5 or later */
222217
#defineTRX_RSEG_HISTORY_SIZE4/* Number of file pages occupied
223218
by the logs in the history list */
224219
#defineTRX_RSEG_HISTORY8/* The update undo logs for committed
@@ -228,6 +223,9 @@ struct trx_rseg_t {
228223
this page is placed */
229224
#define TRX_RSEG_UNDO_SLOTS(8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
230225
/* Undo log segment slots */
226+
/** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */
227+
#define TRX_RSEG_MAX_TRX_ID(TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS \
228+
* TRX_RSEG_SLOT_SIZE)
231229
/*-------------------------------------------------------------*/
232230

233231
/** Read the page number of an undo log slot.

storage/innobase/include/trx0sys.h

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -175,18 +175,13 @@ trx_sys_create_rsegs();
175175

176176
/** Transaction system header */
177177
/*------------------------------------------------------------- @{ */
178-
#defineTRX_SYS_TRX_ID_STORE0/*!< the maximum trx id or trx
179-
number modulo
180-
TRX_SYS_TRX_ID_UPDATE_MARGIN
181-
written to a file page by any
182-
transaction; the assignment of
183-
transaction ids continues from
184-
this number rounded up by
185-
TRX_SYS_TRX_ID_UPDATE_MARGIN
186-
plus
187-
TRX_SYS_TRX_ID_UPDATE_MARGIN
188-
when the database is
189-
started */
178+
/** In old versions of InnoDB, this persisted the value of
179+
trx_sys.get_max_trx_id(). Starting with MariaDB 10.3.5,
180+
the field TRX_RSEG_MAX_TRX_ID in rollback segment header pages
181+
and the fields TRX_UNDO_TRX_ID, TRX_UNDO_TRX_NO in undo log pages
182+
are used instead. The field only exists for the purpose of upgrading
183+
from older MySQL or MariaDB versions. */
184+
#defineTRX_SYS_TRX_ID_STORE0
190185
#define TRX_SYS_FSEG_HEADER8/*!< segment header for the
191186
tablespace segment the trx
192187
system is created into */
@@ -379,11 +374,6 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
379374

380375
/** Size of the doublewrite block in pages */
381376
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZEFSP_EXTENT_SIZE
382-
383-
/** When a trx id which is zero modulo this number (which must be a power of
384-
two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
385-
page is updated */
386-
#define TRX_SYS_TRX_ID_WRITE_MARGIN((trx_id_t) 256)
387377
/* @} */
388378

389379
trx_t* current_trx();
@@ -925,26 +915,14 @@ struct trx_sys_t {
925915

926916
/**
927917
Allocates a new transaction id.
928-
929-
VERY important: after the database is started, m_max_trx_id value is
930-
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
931-
will evaluate to TRUE when this function is first time called,
932-
and the value for trx id will be written to disk-based header!
933-
Thus trx id values will not overlap when the database is
934-
repeatedly started!
935-
936918
@return new, allocated trx id
937919
*/
938920

939921
trx_id_t get_new_trx_id()
940922
{
941923
ut_ad(mutex_own(&mutex));
942-
trx_id_t id= static_cast<trx_id_t>(my_atomic_add64_explicit(
924+
return static_cast<trx_id_t>(my_atomic_add64_explicit(
943925
reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED));
944-
945-
if (UNIV_UNLIKELY(!(id % TRX_SYS_TRX_ID_WRITE_MARGIN)))
946-
flush_max_trx_id();
947-
return(id);
948926
}
949927

950928

@@ -1004,13 +982,6 @@ struct trx_sys_t {
1004982
}
1005983
return 0;
1006984
}
1007-
1008-
1009-
/**
1010-
Writes the value of m_max_trx_id to the file based trx system header.
1011-
*/
1012-
1013-
void flush_max_trx_id();
1014985
};
1015986

1016987

storage/innobase/include/trx0undo.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,14 @@ trx_undo_parse_page_header(
318318
page_t* page,
319319
mtr_t* mtr);
320320
/** Read an undo log when starting up the database.
321-
@param[in,out] rseg rollback segment
322-
@param[in] id rollback segment slot
323-
@param[in] page_no undo log segment page number
321+
@param[in,out] rseg rollback segment
322+
@param[in] id rollback segment slot
323+
@param[in] page_no undo log segment page number
324+
@param[in,out] max_trx_id the largest observed transaction ID
324325
@return size of the undo log in pages */
325326
ulint
326-
trx_undo_mem_create_at_db_start(trx_rseg_t* rseg, ulint id, ulint page_no);
327+
trx_undo_mem_create_at_db_start(trx_rseg_t* rseg, ulint id, ulint page_no,
328+
trx_id_t& max_trx_id);
327329
/************************************************************************
328330
Frees an undo log memory copy. */
329331
void

storage/innobase/lock/lock0lock.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,9 @@ lock_check_trx_id_sanity(
383383
ut_ad(!rec_is_default_row(rec, index));
384384

385385
trx_id_tmax_trx_id = trx_sys.get_max_trx_id();
386+
ut_ad(max_trx_id || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
386387

387-
if (trx_id >= max_trx_id) {
388+
if (max_trx_id && trx_id >= max_trx_id) {
388389
lock_report_trx_id_insanity(
389390
trx_id, rec, index, offsets, max_trx_id);
390391
return false;

storage/innobase/page/page0page.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2017, MariaDB Corporation.
5+
Copyright (c) 2017, 2018, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -2427,18 +2427,19 @@ page_validate(
24272427
same temp-table in parallel.
24282428
max_trx_id is ignored for temp tables because it not required
24292429
for MVCC. */
2430-
if (dict_index_is_sec_or_ibuf(index)
2431-
&& !dict_table_is_temporary(index->table)
2432-
&& page_is_leaf(page)
2433-
&& !page_is_empty(page)) {
2430+
if (!page_is_leaf(page) || page_is_empty(page)
2431+
|| !dict_index_is_sec_or_ibuf(index)
2432+
|| index->table->is_temporary()) {
2433+
} else if (trx_id_t sys_max_trx_id = trx_sys.get_max_trx_id()) {
24342434
trx_id_tmax_trx_id = page_get_max_trx_id(page);
2435-
trx_id_tsys_max_trx_id = trx_sys.get_max_trx_id();
24362435

24372436
if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
24382437
ib::error() << "PAGE_MAX_TRX_ID out of bounds: "
24392438
<< max_trx_id << ", " << sys_max_trx_id;
24402439
goto func_exit2;
24412440
}
2441+
} else {
2442+
ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
24422443
}
24432444

24442445
heap = mem_heap_create(UNIV_PAGE_SIZE + 200);

storage/innobase/srv/srv0start.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,8 +1088,7 @@ srv_undo_tablespaces_init(bool create_new_db)
10881088
if (trx_sysf_rseg_get_space(sys_header, i)
10891089
== *it) {
10901090
trx_rseg_header_create(
1091-
*it, ULINT_MAX, i,
1092-
sys_header, &mtr);
1091+
*it, i, sys_header, &mtr);
10931092
}
10941093
}
10951094

@@ -2679,8 +2678,9 @@ innobase_start_or_create_for_mysql()
26792678

26802679
if (srv_print_verbose_log) {
26812680
ib::info() << INNODB_VERSION_STR
2682-
<< " started; log sequence number "
2683-
<< srv_start_lsn;
2681+
<< " started; log sequence number "
2682+
<< srv_start_lsn
2683+
<< "; transaction id " << trx_sys.get_max_trx_id();
26842684
}
26852685

26862686
if (srv_force_recovery > 0) {
@@ -2929,7 +2929,8 @@ innodb_shutdown()
29292929

29302930
if (srv_was_started && srv_print_verbose_log) {
29312931
ib::info() << "Shutdown completed; log sequence number "
2932-
<< srv_shutdown_lsn;
2932+
<< srv_shutdown_lsn
2933+
<< "; transaction id " << trx_sys.get_max_trx_id();
29332934
}
29342935

29352936
srv_start_state = SRV_START_STATE_NONE;

storage/innobase/trx/trx0purge.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,10 +277,28 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
277277

278278
ut_ad(undo->size == flst_get_len(
279279
seg_header + TRX_UNDO_PAGE_LIST));
280+
byte* rseg_format = rseg_header + TRX_RSEG_FORMAT;
281+
if (UNIV_UNLIKELY(mach_read_from_4(rseg_format))) {
282+
/* This database must have been upgraded from
283+
before MariaDB 10.3.5. */
284+
mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr);
285+
/* Clear also possible garbage at the end of
286+
the page. Old InnoDB versions did not initialize
287+
unused parts of pages. */
288+
ut_ad(page_offset(rseg_header) == TRX_RSEG);
289+
byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8;
290+
ulint len = UNIV_PAGE_SIZE
291+
- (FIL_PAGE_DATA_END
292+
+ TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8);
293+
memset(b, 0, len);
294+
mlog_log_string(b, len, mtr);
295+
}
280296

281297
mlog_write_ulint(
282298
rseg_header + TRX_RSEG_HISTORY_SIZE,
283299
hist_size + undo->size, MLOG_4BYTES, mtr);
300+
mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID,
301+
trx_sys.get_max_trx_id(), mtr);
284302
}
285303

286304
/* Before any transaction-generating background threads or the

storage/innobase/trx/trx0rseg.cc

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,13 @@ Created 3/26/1996 Heikki Tuuri
3838
This function is called only when a new rollback segment is created in
3939
the database.
4040
@param[in] space space id
41-
@param[in] max_size max size in pages
4241
@param[in] rseg_id rollback segment identifier
4342
@param[in,out] sys_header the TRX_SYS page (NULL for temporary rseg)
4443
@param[in,out] mtr mini-transaction
4544
@return page number of the created segment, FIL_NULL if fail */
4645
ulint
4746
trx_rseg_header_create(
4847
ulint space,
49-
ulint max_size,
5048
ulint rseg_id,
5149
buf_block_t* sys_header,
5250
mtr_t* mtr)
@@ -76,9 +74,7 @@ trx_rseg_header_create(
7674
/* Get the rollback segment file page */
7775
rsegf = trx_rsegf_get_new(space, page_no, mtr);
7876

79-
/* Initialize max size field */
80-
mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
81-
MLOG_4BYTES, mtr);
77+
mlog_write_ulint(rsegf + TRX_RSEG_FORMAT, 0, MLOG_4BYTES, mtr);
8278

8379
/* Initialize the history list */
8480

@@ -155,7 +151,6 @@ trx_rseg_mem_create(ulint id, ulint space, ulint page_no)
155151
rseg->page_no = page_no;
156152
rseg->last_page_no = FIL_NULL;
157153
rseg->curr_size = 1;
158-
rseg->max_size = ULINT_UNDEFINED;
159154

160155
mutex_create(rseg->is_persistent()
161156
? LATCH_ID_REDO_RSEG : LATCH_ID_NOREDO_RSEG,
@@ -170,13 +165,13 @@ trx_rseg_mem_create(ulint id, ulint space, ulint page_no)
170165

171166
/** Read the undo log lists.
172167
@param[in,out] rseg rollback segment
168+
@param[in,out] max_trx_id maximum observed transaction identifier
173169
@param[in] rseg_header rollback segment header
174-
@param[in,out] mtr mini-transaction
175170
@return the combined size of undo log segments in pages */
176171
static
177172
ulint
178-
trx_undo_lists_init(trx_rseg_t* rseg, const trx_rsegf_t* rseg_header,
179-
mtr_t* mtr)
173+
trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id,
174+
const trx_rsegf_t* rseg_header)
180175
{
181176
ut_ad(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN);
182177

@@ -186,7 +181,7 @@ trx_undo_lists_init(trx_rseg_t* rseg, const trx_rsegf_t* rseg_header,
186181
ulint page_no = trx_rsegf_get_nth_undo(rseg_header, i);
187182
if (page_no != FIL_NULL) {
188183
size += trx_undo_mem_create_at_db_start(
189-
rseg, i, page_no);
184+
rseg, i, page_no, max_trx_id);
190185
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
191186
}
192187
}
@@ -204,12 +199,18 @@ trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr)
204199
{
205200
const trx_rsegf_t* rseg_header = trx_rsegf_get_new(
206201
rseg->space, rseg->page_no, mtr);
207-
rseg->max_size = mach_read_from_4(rseg_header + TRX_RSEG_MAX_SIZE);
202+
if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT) == 0) {
203+
trx_id_t id = mach_read_from_8(rseg_header
204+
+ TRX_RSEG_MAX_TRX_ID);
205+
if (id > max_trx_id) {
206+
max_trx_id = id;
207+
}
208+
}
208209

209210
/* Initialize the undo log lists according to the rseg header */
210211

211212
rseg->curr_size = mach_read_from_4(rseg_header + TRX_RSEG_HISTORY_SIZE)
212-
+ 1 + trx_undo_lists_init(rseg, rseg_header, mtr);
213+
+ 1 + trx_undo_lists_init(rseg, max_trx_id, rseg_header);
213214

214215
if (ulint len = flst_get_len(rseg_header + TRX_RSEG_HISTORY)) {
215216
my_atomic_addlint(&trx_sys.rseg_history_len, len);
@@ -262,22 +263,9 @@ trx_rseg_array_init()
262263
mtr.start();
263264
if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) {
264265
if (rseg_id == 0) {
265-
/* VERY important: after the database
266-
is started, max_trx_id value is
267-
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN,
268-
and the first call of
269-
trx_sys.get_new_trx_id() will invoke
270-
flush_max_trx_id()! Thus trx id values
271-
will not overlap when the database is
272-
repeatedly started! */
273-
274-
max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
275-
+ ut_uint64_align_up(
276-
mach_read_from_8(
277-
TRX_SYS
278-
+ TRX_SYS_TRX_ID_STORE
279-
+ sys->frame),
280-
TRX_SYS_TRX_ID_WRITE_MARGIN);
266+
max_trx_id = mach_read_from_8(
267+
TRX_SYS + TRX_SYS_TRX_ID_STORE
268+
+ sys->frame);
281269
}
282270
const uint32_tpage_no = trx_sysf_rseg_get_page_no(
283271
sys, rseg_id);
@@ -297,7 +285,7 @@ trx_rseg_array_init()
297285
mtr.commit();
298286
}
299287

300-
trx_sys.init_max_trx_id(max_trx_id);
288+
trx_sys.init_max_trx_id(max_trx_id + 1);
301289
}
302290

303291
/** Create a persistent rollback segment.
@@ -324,8 +312,8 @@ trx_rseg_create(ulint space_id)
324312
ulint rseg_id = trx_sys_rseg_find_free(sys_header);
325313
ulint page_no = rseg_id == ULINT_UNDEFINED
326314
? FIL_NULL
327-
: trx_rseg_header_create(space_id, ULINT_MAX,
328-
rseg_id, sys_header, &mtr);
315+
: trx_rseg_header_create(space_id, rseg_id, sys_header,
316+
&mtr);
329317
if (page_no != FIL_NULL) {
330318
ut_ad(trx_sysf_rseg_get_space(sys_header, rseg_id)
331319
== space_id);
@@ -358,7 +346,7 @@ trx_temp_rseg_create()
358346
ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
359347

360348
ulint page_no = trx_rseg_header_create(
361-
SRV_TMP_SPACE_ID, ULINT_MAX, i, NULL, &mtr);
349+
SRV_TMP_SPACE_ID, i, NULL, &mtr);
362350
trx_rseg_t* rseg = trx_rseg_mem_create(
363351
i, SRV_TMP_SPACE_ID, page_no);
364352
ut_ad(!rseg->is_persistent());

0 commit comments

Comments
 (0)