Skip to content

Commit 3e95fbb

Browse files
committed
[RP005] Shards global counters btr_cur_n_non_sea and btr_cur_n_sea to minimize cache coherency cost
"btr_cur_n_sea++" and "btr_cur_n_non_sea++" at btr_cur_search_to_nth_level() cause very expensive cost at high concurreency. Shards by each cpu threads (os_getcpu()) to minimize inter-CPU communication. Not for accuracy, just only for performance, not to add kind of memory barrier.
1 parent d1a17a5 commit 3e95fbb

File tree

5 files changed

+55
-19
lines changed

5 files changed

+55
-19
lines changed

MYSQL_VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ MYSQL_VERSION_MINOR=0
33
MYSQL_VERSION_PATCH=42
44
MYSQL_VERSION_EXTRA=
55
MYSQL_VERSION_STABILITY="LTS"
6-
MYSQL_RP_REVISION="-RP004"
6+
MYSQL_RP_REVISION="-RP005"

storage/innobase/btr/btr0cur.cc

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
Copyright (c) 1994, 2025, Oracle and/or its affiliates.
44
Copyright (c) 2008, Google Inc.
55
Copyright (c) 2012, Facebook Inc.
6+
Copyright (c) 2025, buildup-db.
67
78
Portions of this file contain modifications contributed and copyrighted by
89
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -124,18 +125,20 @@ throughput clearly from about 100000. */
124125
constexpr uint32_t BTR_CUR_FINE_HISTORY_LENGTH = 100000;
125126

126127
/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
127-
ulint btr_cur_n_non_sea = 0;
128+
alignas(ut::INNODB_CACHE_LINE_SIZE)
129+
std::array<uint64_t, BTR_CUR_COUNTER_SHARDING> btr_cur_n_non_sea{};
128130
/** Number of successful adaptive hash index lookups in
129131
btr_cur_search_to_nth_level(). */
130-
ulint btr_cur_n_sea = 0;
132+
alignas(ut::INNODB_CACHE_LINE_SIZE)
133+
std::array<uint64_t, BTR_CUR_COUNTER_SHARDING> btr_cur_n_sea{};
131134
/** Old value of btr_cur_n_non_sea. Copied by
132135
srv_refresh_innodb_monitor_stats(). Referenced by
133136
srv_printf_innodb_monitor(). */
134-
ulint btr_cur_n_non_sea_old = 0;
137+
uint64_t btr_cur_n_non_sea_old = 0;
135138
/** Old value of btr_cur_n_sea. Copied by
136139
srv_refresh_innodb_monitor_stats(). Referenced by
137140
srv_printf_innodb_monitor(). */
138-
ulint btr_cur_n_sea_old = 0;
141+
uint64_t btr_cur_n_sea_old = 0;
139142

140143
#ifdef UNIV_DEBUG
141144
/* Flag to limit optimistic insert records */
@@ -791,11 +794,11 @@ void btr_cur_search_to_nth_level(
791794
ut_ad(cursor->up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
792795
ut_ad(cursor->up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
793796
ut_ad(cursor->low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
794-
btr_cur_n_sea++;
797+
btr_cur_n_sea[BTR_CUR_COUNTER_INDEX]++;
795798

796799
return;
797800
}
798-
btr_cur_n_non_sea++;
801+
btr_cur_n_non_sea[BTR_CUR_COUNTER_INDEX]++;
799802
DBUG_EXECUTE_IF("non_ahi_search",
800803
assert(!strcmp(index->table->name.m_name, "test/t1")););
801804

storage/innobase/include/btr0cur.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1994, 2025, Oracle and/or its affiliates.
4+
Copyright (c) 2025, buildup-db.
45
56
This program is free software; you can redistribute it and/or modify it under
67
the terms of the GNU General Public License, version 2.0, as published by the
@@ -40,8 +41,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
4041
#include "btr0types.h"
4142
#include "dict0dict.h"
4243
#include "gis0type.h"
44+
#include "os0numa.h" /* os_getcpu() */
45+
#include "os0thread.h" /* ut::this_thread_hash */
4346
#include "page0cur.h"
4447
#include "univ.i"
48+
#include "ut0cpu_cache.h" /* ut::INNODB_CACHE_LINE_SIZE */
4549

4650
/** Mode flags for btr_cur operations; these can be ORed */
4751
enum {
@@ -756,19 +760,27 @@ is still a good change of success a little later. Sleep this many
756760
milliseconds between retries. */
757761
constexpr uint32_t BTR_CUR_RETRY_SLEEP_TIME_MS = 50;
758762

763+
/** Global counter sharded for saving CPU cache coherency cost. */
764+
constexpr size_t BTR_CUR_COUNTER_SHARDING = 128;
765+
#ifdef HAVE_OS_GETCPU
766+
#define BTR_CUR_COUNTER_INDEX \
767+
(static_cast<size_t>(os_getcpu()) % BTR_CUR_COUNTER_SHARDING)
768+
#else /* HAVE_OS_GETCPU */
769+
#define BTR_CUR_COUNTER_INDEX (ut::this_thread_hash % BTR_CUR_COUNTER_SHARDING)
770+
#endif /* HAVE_OS_GETCPU */
759771
/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
760-
extern ulint btr_cur_n_non_sea;
772+
extern std::array<uint64_t, BTR_CUR_COUNTER_SHARDING> btr_cur_n_non_sea;
761773
/** Number of successful adaptive hash index lookups in
762774
btr_cur_search_to_nth_level(). */
763-
extern ulint btr_cur_n_sea;
775+
extern std::array<uint64_t, BTR_CUR_COUNTER_SHARDING> btr_cur_n_sea;
764776
/** Old value of btr_cur_n_non_sea. Copied by
765777
srv_refresh_innodb_monitor_stats(). Referenced by
766778
srv_printf_innodb_monitor(). */
767-
extern ulint btr_cur_n_non_sea_old;
779+
extern uint64_t btr_cur_n_non_sea_old;
768780
/** Old value of btr_cur_n_sea. Copied by
769781
srv_refresh_innodb_monitor_stats(). Referenced by
770782
srv_printf_innodb_monitor(). */
771-
extern ulint btr_cur_n_sea_old;
783+
extern uint64_t btr_cur_n_sea_old;
772784

773785
#ifdef UNIV_DEBUG
774786
/* Flag to limit optimistic insert records */

storage/innobase/srv/srv0mon.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
Copyright (c) 2010, 2025, Oracle and/or its affiliates.
44
Copyright (c) 2012, Facebook Inc.
5+
Copyright (c) 2025, buildup-db.
56
67
This program is free software; you can redistribute it and/or modify it under
78
the terms of the GNU General Public License, version 2.0, as published by the
@@ -1986,11 +1987,17 @@ void srv_mon_process_existing_counter(
19861987
value = log_sys->m_capacity.adaptive_flush_max_age();
19871988
break;
19881989
case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH:
1989-
value = btr_cur_n_sea;
1990+
value = 0;
1991+
for (size_t i = 0; i < BTR_CUR_COUNTER_SHARDING; i++) {
1992+
value += btr_cur_n_sea[i];
1993+
}
19901994
break;
19911995

19921996
case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE:
1993-
value = btr_cur_n_non_sea;
1997+
value = 0;
1998+
for (size_t i = 0; i < BTR_CUR_COUNTER_SHARDING; i++) {
1999+
value += btr_cur_n_non_sea[i];
2000+
}
19942001
break;
19952002

19962003
default:

storage/innobase/srv/srv0srv.cc

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
Copyright (c) 1995, 2025, Oracle and/or its affiliates.
44
Copyright (c) 2008, 2009 Google Inc.
55
Copyright (c) 2009, Percona Inc.
6+
Copyright (c) 2025, buildup-db.
67
78
Portions of this file contain modifications contributed and copyrighted by
89
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -1298,8 +1299,14 @@ static void srv_refresh_innodb_monitor_stats(void) {
12981299

12991300
os_aio_refresh_stats();
13001301

1301-
btr_cur_n_sea_old = btr_cur_n_sea;
1302-
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1302+
uint64_t btr_cur_n_sea_sum = 0;
1303+
uint64_t btr_cur_n_non_sea_sum = 0;
1304+
for (size_t i = 0; i < BTR_CUR_COUNTER_SHARDING; i++) {
1305+
btr_cur_n_sea_sum += btr_cur_n_sea[i];
1306+
btr_cur_n_non_sea_sum += btr_cur_n_non_sea[i];
1307+
}
1308+
btr_cur_n_sea_old = btr_cur_n_sea_sum;
1309+
btr_cur_n_non_sea_old = btr_cur_n_non_sea_sum;
13031310

13041311
log_refresh_stats(*log_sys);
13051312

@@ -1450,11 +1457,18 @@ bool srv_printf_innodb_monitor(FILE *file, bool nowait, ulint *trx_start_pos,
14501457
rw_lock_s_unlock(&part.latch);
14511458
}
14521459

1460+
uint64_t btr_cur_n_sea_sum = 0;
1461+
uint64_t btr_cur_n_non_sea_sum = 0;
1462+
for (size_t i = 0; i < BTR_CUR_COUNTER_SHARDING; i++) {
1463+
btr_cur_n_sea_sum += btr_cur_n_sea[i];
1464+
btr_cur_n_non_sea_sum += btr_cur_n_non_sea[i];
1465+
}
1466+
14531467
fprintf(file, "%.2f hash searches/s, %.2f non-hash searches/s\n",
1454-
(btr_cur_n_sea - btr_cur_n_sea_old) / time_elapsed,
1455-
(btr_cur_n_non_sea - btr_cur_n_non_sea_old) / time_elapsed);
1456-
btr_cur_n_sea_old = btr_cur_n_sea;
1457-
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1468+
(btr_cur_n_sea_sum - btr_cur_n_sea_old) / time_elapsed,
1469+
(btr_cur_n_non_sea_sum - btr_cur_n_non_sea_old) / time_elapsed);
1470+
btr_cur_n_sea_old = btr_cur_n_sea_sum;
1471+
btr_cur_n_non_sea_old = btr_cur_n_non_sea_sum;
14581472

14591473
fputs(
14601474
"---\n"

0 commit comments

Comments
 (0)