Skip to content

Commit b59d484

Browse files
committed
MDEV-14126: Remove page_is_root()
The predicate page_is_root(), which was added in MariaDB Server 10.2.2, is based on a wrong assumption. Under some circumstances, InnoDB can transform B-trees into a degenerate state where a non-leaf page has no sibling pages. Because of this, we cannot assume that a page that has no siblings is the root page. This bug will be tracked as MDEV-19022. Because of the bug that may affect many InnoDB data files, we must remove and replace the wrong predicate. Using the wrong predicate can cause corruption. A leaf page is not allowed to be empty except if it is the root page, and the entire table is empty.
1 parent 71c781b commit b59d484

File tree

9 files changed

+62
-66
lines changed

9 files changed

+62
-66
lines changed

storage/innobase/btr/btr0btr.cc

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2014, 2017, MariaDB Corporation.
5+
Copyright (c) 2014, 2019, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -378,8 +378,7 @@ btr_root_adjust_on_import(
378378
page = buf_block_get_frame(block);
379379
page_zip = buf_block_get_page_zip(block);
380380

381-
if (!page_is_root(page)) {
382-
381+
if (!fil_page_index_page_check(page) || page_has_siblings(page)) {
383382
err = DB_CORRUPTION;
384383

385384
} else if (dict_index_is_clust(index)) {
@@ -1161,11 +1160,11 @@ btr_free_root_check(
11611160
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
11621161

11631162
if (fil_page_index_page_check(block->frame)
1164-
&& index_id == btr_page_get_index_id(block->frame)) {
1163+
&& index_id == btr_page_get_index_id(block->frame)) {
11651164
/* This should be a root page.
11661165
It should not be possible to reassign the same
11671166
index_id for some other index in the tablespace. */
1168-
ut_ad(page_is_root(block->frame));
1167+
ut_ad(!page_has_siblings(block->frame));
11691168
} else {
11701169
block = NULL;
11711170
}
@@ -1358,7 +1357,8 @@ btr_free_but_not_root(
13581357
ibool finished;
13591358
mtr_tmtr;
13601359

1361-
ut_ad(page_is_root(block->frame));
1360+
ut_ad(fil_page_index_page_check(block->frame));
1361+
ut_ad(!page_has_siblings(block->frame));
13621362
leaf_loop:
13631363
mtr_start(&mtr);
13641364
mtr_set_log_mode(&mtr, log_mode);
@@ -1430,7 +1430,6 @@ btr_free_if_exists(
14301430
return;
14311431
}
14321432

1433-
ut_ad(page_is_root(root->frame));
14341433
btr_free_but_not_root(root, mtr->get_log_mode());
14351434
mtr->set_named_space(page_id.space());
14361435
btr_free_root(root, mtr);
@@ -1453,8 +1452,6 @@ btr_free(
14531452
page_id, page_size, RW_X_LATCH, &mtr);
14541453

14551454
if (block) {
1456-
ut_ad(page_is_root(block->frame));
1457-
14581455
btr_free_but_not_root(block, MTR_LOG_NO_REDO);
14591456
btr_free_root(block, &mtr);
14601457
}
@@ -1614,12 +1611,17 @@ btr_page_reorganize_low(
16141611

16151612
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
16161613
btr_assert_not_corrupted(block, index);
1614+
ut_ad(fil_page_index_page_check(block->frame));
1615+
ut_ad(index->is_dummy
1616+
|| block->page.id.space() == index->space);
1617+
ut_ad(index->is_dummy
1618+
|| block->page.id.page_no() != index->page
1619+
|| !page_has_siblings(page));
16171620
#ifdef UNIV_ZIP_DEBUG
16181621
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
16191622
#endif /* UNIV_ZIP_DEBUG */
16201623
data_size1 = page_get_data_size(page);
16211624
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
1622-
16231625
/* Turn logging off */
16241626
mtr_log_tlog_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
16251627

@@ -1677,7 +1679,7 @@ btr_page_reorganize_low(
16771679
|| page_get_max_trx_id(page) == 0
16781680
|| (dict_index_is_sec_or_ibuf(index)
16791681
? page_is_leaf(temp_page)
1680-
: page_is_root(temp_page)));
1682+
: block->page.id.page_no() == index->page));
16811683

16821684
/* If innodb_log_compressed_pages is ON, page reorganize should log the
16831685
compressed page image.*/
@@ -1894,6 +1896,8 @@ btr_page_empty(
18941896

18951897
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
18961898
ut_ad(page_zip == buf_block_get_page_zip(block));
1899+
ut_ad(!index->is_dummy);
1900+
ut_ad(index->space == block->page.id.space());
18971901
#ifdef UNIV_ZIP_DEBUG
18981902
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
18991903
#endif /* UNIV_ZIP_DEBUG */
@@ -1906,7 +1910,8 @@ btr_page_empty(
19061910
/* Preserve PAGE_ROOT_AUTO_INC when creating a clustered index
19071911
root page. */
19081912
const ib_uint64_tautoinc
1909-
= dict_index_is_clust(index) && page_is_root(page)
1913+
= dict_index_is_clust(index)
1914+
&& index->page == block->page.id.page_no()
19101915
? page_get_autoinc(page)
19111916
: 0;
19121917

@@ -4225,6 +4230,8 @@ btr_discard_only_page_on_level(
42254230
ulint page_level = 0;
42264231
trx_id_tmax_trx_id;
42274232

4233+
ut_ad(!index->is_dummy);
4234+
42284235
/* Save the PAGE_MAX_TRX_ID from the leaf page. */
42294236
max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
42304237

@@ -4235,11 +4242,10 @@ btr_discard_only_page_on_level(
42354242

42364243
ut_a(page_get_n_recs(page) == 1);
42374244
ut_a(page_level == btr_page_get_level(page, mtr));
4238-
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
4239-
ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
4240-
4241-
ut_ad(mtr_is_block_fix(
4242-
mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
4245+
ut_a(!page_has_siblings(page));
4246+
ut_ad(fil_page_index_page_check(page));
4247+
ut_ad(block->page.id.space() == index->space);
4248+
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
42434249
btr_search_drop_page_hash_index(block);
42444250

42454251
if (dict_index_is_spatial(index)) {
@@ -4265,6 +4271,7 @@ btr_discard_only_page_on_level(
42654271

42664272
/* block is the root page, which must be empty, except
42674273
for the node pointer to the (now discarded) block(s). */
4274+
ut_ad(!page_has_siblings(block->frame));
42684275

42694276
#ifdef UNIV_BTR_DEBUG
42704277
if (!dict_index_is_ibuf(index)) {

storage/innobase/btr/btr0cur.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Copyright (c) 1994, 2018, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2008, Google Inc.
55
Copyright (c) 2012, Facebook Inc.
6-
Copyright (c) 2015, 2018, MariaDB Corporation.
6+
Copyright (c) 2015, 2019, MariaDB Corporation.
77
88
Portions of this file contain modifications contributed and copyrighted by
99
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -5113,14 +5113,14 @@ btr_cur_optimistic_delete_func(
51135113
ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
51145114
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
51155115
MTR_MEMO_PAGE_X_FIX));
5116-
ut_ad(mtr_is_block_fix(mtr, btr_cur_get_block(cursor),
5117-
MTR_MEMO_PAGE_X_FIX, cursor->index->table));
51185116
ut_ad(mtr->is_named_space(cursor->index->space));
5117+
ut_ad(!cursor->index->is_dummy);
51195118

51205119
/* This is intended only for leaf page deletions */
51215120

51225121
block = btr_cur_get_block(cursor);
51235122

5123+
ut_ad(block->page.id.space() == cursor->index->space);
51245124
ut_ad(page_is_leaf(buf_block_get_frame(block)));
51255125
ut_ad(!dict_index_is_online_ddl(cursor->index)
51265126
|| dict_index_is_clust(cursor->index)
@@ -5242,8 +5242,10 @@ btr_cur_pessimistic_delete(
52425242
ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
52435243
MTR_MEMO_X_LOCK
52445244
| MTR_MEMO_SX_LOCK));
5245-
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
5245+
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
52465246
ut_ad(mtr->is_named_space(index->space));
5247+
ut_ad(!index->is_dummy);
5248+
ut_ad(block->page.id.space() == index->space);
52475249

52485250
if (!has_reserved_extents) {
52495251
/* First reserve enough free space for the file segments

storage/innobase/btr/btr0defragment.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
4-
Copyright (C) 2014, 2017, MariaDB Corporation.
4+
Copyright (C) 2014, 2019, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -181,7 +181,8 @@ btr_defragment_add_index(
181181
return NULL;
182182
}
183183

184-
ut_ad(page_is_root(page));
184+
ut_ad(fil_page_index_page_check(page));
185+
ut_ad(!page_has_siblings(page));
185186

186187
if (page_is_leaf(page)) {
187188
// Index root is a leaf page, no need to defragment.

storage/innobase/include/page0page.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*****************************************************************************
22
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
3-
Copyright (c) 2013, 2018, MariaDB Corporation.
3+
Copyright (c) 2013, 2019, MariaDB Corporation.
44
55
This program is free software; you can redistribute it and/or modify it under
66
the terms of the GNU General Public License as published by the Free Software
@@ -684,14 +684,19 @@ ulint
684684
page_rec_get_heap_no(
685685
/*=================*/
686686
const rec_t*rec);/*!< in: the physical record */
687-
/** Determine whether a page is an index root page.
687+
688+
/** Determine whether a page has any siblings.
688689
@param[in] page page frame
689-
@return true if the page is a root page of an index */
690-
UNIV_INLINE
691-
bool
692-
page_is_root(
693-
const page_t*page)
694-
MY_ATTRIBUTE((warn_unused_result));
690+
@return true if the page has any siblings */
691+
inline bool page_has_siblings(const page_t* page)
692+
{
693+
compile_time_assert(!(FIL_PAGE_PREV % 8));
694+
compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
695+
compile_time_assert(FIL_NULL == 0xffffffff);
696+
return *reinterpret_cast<const uint64_t*>(page + FIL_PAGE_PREV)
697+
!= ~uint64_t(0);
698+
}
699+
695700
/************************************************************//**
696701
Gets the pointer to the next record on the page.
697702
@return pointer to next record */

storage/innobase/include/page0page.ic

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22

33
Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2016, 2017, MariaDB Corporation.
4+
Copyright (c) 2016, 2019, MariaDB Corporation.
55

66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -91,7 +91,8 @@ UNIV_INLINE
9191
ib_uint64_t
9292
page_get_autoinc(const page_t* page)
9393
{
94-
ut_ad(page_is_root(page));
94+
ut_ad(fil_page_index_page_check(page));
95+
ut_ad(!page_has_siblings(page));
9596
return(mach_read_from_8(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page));
9697
}
9798

@@ -277,31 +278,6 @@ page_rec_get_heap_no(
277278
}
278279
}
279280

280-
/** Determine whether a page is an index root page.
281-
@param[in] page page frame
282-
@return true if the page is a root page of an index */
283-
UNIV_INLINE
284-
bool
285-
page_is_root(
286-
const page_t* page)
287-
{
288-
#if FIL_PAGE_PREV % 8
289-
# error FIL_PAGE_PREV must be 64-bit aligned
290-
#endif
291-
#if FIL_PAGE_NEXT != FIL_PAGE_PREV + 4
292-
# error FIL_PAGE_NEXT must be adjacent to FIL_PAGE_PREV
293-
#endif
294-
#if FIL_NULL != 0xffffffff
295-
# error FIL_NULL != 0xffffffff
296-
#endif
297-
/* Check that this is an index page and both the PREV and NEXT
298-
pointers are FIL_NULL, because the root page does not have any
299-
siblings. */
300-
return(fil_page_index_page_check(page)
301-
&& *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_PREV)
302-
== IB_UINT64_MAX);
303-
}
304-
305281
/** Determine whether an index page record is a user record.
306282
@param[in] rec record in an index page
307283
@return true if a user record */

storage/innobase/page/page0cur.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2018, MariaDB Corporation.
5+
Copyright (c) 2018, 2019, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -1990,6 +1990,8 @@ page_parse_copy_rec_list_to_created_page(
19901990
return(rec_end);
19911991
}
19921992

1993+
ut_ad(fil_page_index_page_check(block->frame));
1994+
19931995
while (ptr < rec_end) {
19941996
ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
19951997
block, index, mtr);
@@ -2045,6 +2047,7 @@ page_copy_rec_list_end_to_created_page(
20452047
ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
20462048
ut_ad(page_align(rec) != new_page);
20472049
ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
2050+
ut_ad(fil_page_index_page_check(new_page));
20482051

20492052
if (page_rec_is_infimum(rec)) {
20502053

storage/innobase/page/page0page.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2017, 2018, MariaDB Corporation.
5+
Copyright (c) 2017, 2019, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -526,6 +526,8 @@ page_create_empty(
526526
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
527527

528528
ut_ad(fil_page_index_page_check(page));
529+
ut_ad(!index->is_dummy);
530+
ut_ad(block->page.id.space() == index->space);
529531

530532
/* Multiple transactions cannot simultaneously operate on the
531533
same temp-table in parallel.
@@ -536,7 +538,7 @@ page_create_empty(
536538
&& page_is_leaf(page)) {
537539
max_trx_id = page_get_max_trx_id(page);
538540
ut_ad(max_trx_id);
539-
} else if (page_is_root(page)) {
541+
} else if (block->page.id.page_no() == index->page) {
540542
/* Preserve PAGE_ROOT_AUTO_INC. */
541543
max_trx_id = page_get_max_trx_id(page);
542544
} else {

storage/innobase/page/page0zip.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2012, Facebook Inc.
5-
Copyright (c) 2014, 2018, MariaDB Corporation.
5+
Copyright (c) 2014, 2019, MariaDB Corporation.
66
77
This program is free software; you can redistribute it and/or modify it under
88
the terms of the GNU General Public License as published by the Free Software
@@ -4727,7 +4727,7 @@ page_zip_reorganize(
47274727
clustered index root pages. */
47284728
ut_ad(page_get_max_trx_id(page) == 0
47294729
|| (dict_index_is_clust(index)
4730-
? page_is_root(temp_page)
4730+
? !page_has_siblings(temp_page)
47314731
: page_is_leaf(temp_page)));
47324732

47334733
/* Restore logging. */

storage/innobase/row/row0import.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW
668668
return set_current_xdes(block->page.id.page_no(), page);
669669
} else if (fil_page_index_page_check(page)
670670
&& !is_free(block->page.id.page_no())
671-
&& page_is_root(page)) {
671+
&& !page_has_siblings(page)) {
672672

673673
index_id_tid = btr_page_get_index_id(page);
674674

@@ -1834,7 +1834,7 @@ PageConverter::update_index_page(
18341834
page, m_page_zip_ptr, m_index->m_srv_index->id, 0);
18351835

18361836
if (dict_index_is_clust(m_index->m_srv_index)) {
1837-
if (page_is_root(page)) {
1837+
if (block->page.id.page_no() == m_index->m_srv_index->page) {
18381838
/* Preserve the PAGE_ROOT_AUTO_INC. */
18391839
} else {
18401840
/* Clear PAGE_MAX_TRX_ID so that it can be
@@ -1854,7 +1854,7 @@ PageConverter::update_index_page(
18541854
if (page_is_empty(page)) {
18551855

18561856
/* Only a root page can be empty. */
1857-
if (!page_is_root(page)) {
1857+
if (page_has_siblings(page)) {
18581858
// TODO: We should relax this and skip secondary
18591859
// indexes. Mark them as corrupt because they can
18601860
// always be rebuilt.

0 commit comments

Comments
 (0)