Skip to content

Commit b600f30

Browse files
committed
MDEV-10814: Innodb large allocations - madvise - Don't dump
Note: Linux only Core dumps of large buffer pool pages take time and space and pose potential data expose in scenarios where data-at-rest encryption is deployed. Here we use madvise(MADV_DONT_DUMP) on large memory allocations used by the innodb buffer pool, log_sys and recv_sys. The effect of this system call is that these memory areas will not appear in a core dump. Data from these buffers is rarely useful in fault diagnosis. log_sys and recv_sys structures now use large memory allocations for their large buffer. Debug builds don't include the madvise syscall and as such will include full core dumps. A function, buf_madvise_do_dump, is added but never called. It is there to be called from a debugger to re-enable the core dumping of all of these pages if for some reason the entire contents of these buffers are needed. Idea thanks to Hartmut Holzgraefe
1 parent 990289a commit b600f30

File tree

8 files changed

+180
-41
lines changed

8 files changed

+180
-41
lines changed

storage/innobase/buf/buf0buf.cc

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,57 @@ buf_page_is_corrupted(
11731173
}
11741174

11751175
#ifndef UNIV_INNOCHECKSUM
1176+
1177+
#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
1178+
/** Enable buffers to be dumped to core files
1179+
1180+
A convience function, not called anyhwere directly however
1181+
it is left available for gdb or any debugger to call
1182+
in the event that you want all of the memory to be dumped
1183+
to a core file.
1184+
1185+
Returns number of errors found in madvise calls. */
1186+
int
1187+
buf_madvise_do_dump()
1188+
{
1189+
int ret= 0;
1190+
buf_pool_t* buf_pool;
1191+
ulint n;
1192+
buf_chunk_t* chunk;
1193+
1194+
/* mirrors allocation in log_sys_init() */
1195+
if (log_sys->buf)
1196+
{
1197+
ret+= madvise(log_sys->first_in_use ? log_sys->buf
1198+
: log_sys->buf - log_sys->buf_size,
1199+
log_sys->buf_size,
1200+
MADV_DODUMP);
1201+
}
1202+
/* mirrors recv_sys_init() */
1203+
if (recv_sys->buf)
1204+
{
1205+
ret+= madvise(recv_sys->buf, recv_sys->len, MADV_DODUMP);
1206+
}
1207+
1208+
buf_pool_mutex_enter_all();
1209+
1210+
for (int i= 0; i < srv_buf_pool_instances; i++)
1211+
{
1212+
buf_pool = buf_pool_from_array(i);
1213+
chunk = buf_pool->chunks;
1214+
1215+
for (int n = buf_pool->n_chunks; n--; chunk++)
1216+
{
1217+
ret+= madvise(chunk->mem, chunk->mem_size(), MADV_DODUMP);
1218+
}
1219+
}
1220+
1221+
buf_pool_mutex_exit_all();
1222+
1223+
return ret;
1224+
}
1225+
#endif
1226+
11761227
/** Dump a page to stderr.
11771228
@param[in] read_buf database page
11781229
@param[in] page_size page size */
@@ -1502,7 +1553,7 @@ buf_chunk_init(
15021553
DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL););
15031554

15041555
chunk->mem = buf_pool->allocator.allocate_large(mem_size,
1505-
&chunk->mem_pfx);
1556+
&chunk->mem_pfx, true);
15061557

15071558
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
15081559

@@ -1796,7 +1847,8 @@ buf_pool_init_instance(
17961847
}
17971848

17981849
buf_pool->allocator.deallocate_large(
1799-
chunk->mem, &chunk->mem_pfx);
1850+
chunk->mem, &chunk->mem_pfx, chunk->mem_size(),
1851+
true);
18001852
}
18011853
ut_free(buf_pool->chunks);
18021854
buf_pool_mutex_exit(buf_pool);
@@ -1943,7 +1995,7 @@ buf_pool_free_instance(
19431995
}
19441996

19451997
buf_pool->allocator.deallocate_large(
1946-
chunk->mem, &chunk->mem_pfx);
1998+
chunk->mem, &chunk->mem_pfx, true);
19471999
}
19482000

19492001
for (ulint i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; ++i) {
@@ -2819,7 +2871,7 @@ buf_pool_resize()
28192871
}
28202872

28212873
buf_pool->allocator.deallocate_large(
2822-
chunk->mem, &chunk->mem_pfx);
2874+
chunk->mem, &chunk->mem_pfx, true);
28232875

28242876
sum_freed += chunk->size;
28252877

storage/innobase/include/log0log.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -614,15 +614,15 @@ struct log_t{
614614
mtr_commit and still ensure that
615615
insertions in the flush_list happen
616616
in the LSN order. */
617-
byte*buf_ptr;/*!< unaligned log buffer, which should
618-
be of double of buf_size */
619-
byte*buf;/*!< log buffer currently in use;
620-
this could point to either the first
621-
half of the aligned(buf_ptr) or the
617+
byte*buf;/*!< Memory of double the buf_size is
618+
allocated here. This pointer will change
619+
however to either the first half or the
622620
second half in turns, so that log
623621
write/flush to disk don't block
624622
concurrent mtrs which will write
625-
log to this buffer */
623+
log to this buffer. Care to switch back
624+
to the first half before freeing/resizing
625+
must be undertaken. */
626626
boolfirst_in_use;/*!< true if buf points to the first
627627
half of the aligned(buf_ptr), false
628628
if the second half */

storage/innobase/include/log0recv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ struct recv_sys_t{
217217
/*!< this is TRUE when a log rec application
218218
batch is running */
219219
byte* buf;/*!< buffer for parsing log records */
220+
size_tbuf_size;/*!< size of buf */
220221
ulint len;/*!< amount of data in buf */
221222
lsn_tparse_start_lsn;
222223
/*!< this is the lsn from which we were able to

storage/innobase/include/ut0new.h

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ with (), thus:
129129
#include <string.h> /* strlen(), strrchr(), strncmp() */
130130

131131
#include "my_global.h" /* needed for headers from mysql/psi/ */
132+
#if !defined(DBUG_OFF) && defined(HAVE_MADVISE)
133+
#include <sys/mman.h>
134+
#endif
135+
132136
/* JAN: TODO: missing 5.7 header */
133137
#ifdef HAVE_MYSQL_MEMORY_H
134138
#include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
@@ -234,6 +238,45 @@ struct ut_new_pfx_t {
234238
#endif
235239
};
236240

241+
static void ut_allocate_trace_dontdump(void * ptr,
242+
size_tbytes,
243+
booldontdump,
244+
ut_new_pfx_t* pfx,
245+
const char* file)
246+
{
247+
ut_a(ptr != NULL);
248+
249+
#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
250+
if (dontdump && madvise(ptr, bytes, MADV_DONTDUMP)) {
251+
ib::warn() << "Failed to set memory to DONTDUMP: "
252+
<< strerror(errno)
253+
<< " ptr " << ptr
254+
<< " size " << bytes;
255+
}
256+
#endif
257+
if (pfx != NULL) {
258+
#ifdef UNIV_PFS_MEMORY
259+
allocate_trace(bytes, file, pfx);
260+
#endif /* UNIV_PFS_MEMORY */
261+
pfx->m_size = bytes;
262+
}
263+
}
264+
265+
static void ut_dodump(void* ptr, size_t m_size)
266+
{
267+
if (ptr == NULL) {
268+
return;
269+
}
270+
#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
271+
if (madvise(ptr, m_size, MADV_DODUMP)) {
272+
ib::warn() << "Failed to set memory to DODUMP: "
273+
<< strerror(errno)
274+
<< " ptr " << ptr
275+
<< " size " << m_size;
276+
}
277+
#endif
278+
}
279+
237280
/** Allocator class for allocating memory from inside std::* containers.
238281
@tparam T type of allocated object
239282
@tparam oom_fatal whether to commit suicide when running out of memory */
@@ -294,6 +337,7 @@ class ut_allocator {
294337
@param[in] file file name of the caller
295338
@param[in] set_to_zero if true, then the returned memory is
296339
initialized with 0x0 bytes.
340+
@param[in] throw_on_error if true, raize exception if too big
297341
@return pointer to the allocated memory */
298342
pointer
299343
allocate(
@@ -566,6 +610,8 @@ class ut_allocator {
566610
/** Allocate a large chunk of memory that can hold 'n_elements'
567611
objects of type 'T' and trace the allocation.
568612
@param[in] n_elements number of elements
613+
@param[in] dontdump if true, advise the OS is not to core
614+
dump this memory.
569615
@param[out] pfx storage for the description of the
570616
allocated memory. The caller must provide space for this one and keep
571617
it until the memory is no longer needed and then pass it to
@@ -574,7 +620,8 @@ class ut_allocator {
574620
pointer
575621
allocate_large(
576622
size_type n_elements,
577-
ut_new_pfx_t* pfx)
623+
ut_new_pfx_t* pfx,
624+
booldontdump = false)
578625
{
579626
if (n_elements == 0 || n_elements > max_size()) {
580627
return(NULL);
@@ -585,13 +632,11 @@ class ut_allocator {
585632
pointer ptr = reinterpret_cast<pointer>(
586633
os_mem_alloc_large(&n_bytes));
587634

588-
#ifdef UNIV_PFS_MEMORY
589-
if (ptr != NULL) {
590-
allocate_trace(n_bytes, NULL, pfx);
635+
if (ptr == NULL) {
636+
return NULL;
591637
}
592-
#else
593-
pfx->m_size = n_bytes;
594-
#endif /* UNIV_PFS_MEMORY */
638+
639+
ut_allocate_trace_dontdump(ptr, n_bytes, dontdump, pfx, NULL);
595640

596641
return(ptr);
597642
}
@@ -600,17 +645,26 @@ class ut_allocator {
600645
deallocation.
601646
@param[in,out] ptr pointer to memory to free
602647
@param[in] pfx descriptor of the memory, as returned by
603-
allocate_large(). */
648+
allocate_large().
649+
@param[in] dodump if true, advise the OS to include this
650+
memory again if a core dump occurs. */
604651
void
605652
deallocate_large(
606653
pointer ptr,
607-
const ut_new_pfx_t* pfx)
654+
const ut_new_pfx_t* pfx,
655+
size_tsize,
656+
booldodump = false)
608657
{
658+
if (dodump) {
659+
ut_dodump(ptr, size);
660+
}
609661
#ifdef UNIV_PFS_MEMORY
610-
deallocate_trace(pfx);
662+
if (pfx) {
663+
deallocate_trace(pfx);
664+
}
611665
#endif /* UNIV_PFS_MEMORY */
612666

613-
os_mem_free_large(ptr, pfx->m_size);
667+
os_mem_free_large(ptr, size);
614668
}
615669

616670
#ifdef UNIV_PFS_MEMORY
@@ -842,6 +896,10 @@ ut_delete_array(
842896
ut_allocator<byte>(key).allocate( \
843897
n_bytes, NULL, __FILE__, false, false))
844898

899+
#define ut_malloc_dontdump(n_bytes) static_cast<void*>( \
900+
ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate_large( \
901+
n_bytes, true))
902+
845903
#define ut_zalloc(n_bytes, key)static_cast<void*>( \
846904
ut_allocator<byte>(key).allocate( \
847905
n_bytes, NULL, __FILE__, true, false))
@@ -865,6 +923,10 @@ ut_delete_array(
865923
#define ut_free(ptr) ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
866924
reinterpret_cast<byte*>(ptr))
867925

926+
#define ut_free_dodump(ptr, size) static_cast<void*>( \
927+
ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate_large( \
928+
ptr, NULL, size, true))
929+
868930
#else /* UNIV_PFS_MEMORY */
869931

870932
/* Fallbacks when memory tracing is disabled at compile time. */
@@ -887,6 +949,14 @@ ut_delete_array(
887949

888950
#define ut_malloc_nokey(n_bytes) ::malloc(n_bytes)
889951

952+
static inline void *ut_malloc_dontdump(size_t n_bytes)
953+
{
954+
void *ptr = os_mem_alloc_large(&n_bytes);
955+
956+
ut_allocate_trace_dontdump(ptr, n_bytes, true, NULL, NULL);
957+
return ptr;
958+
}
959+
890960
#define ut_zalloc_nokey(n_bytes) ::calloc(1, n_bytes)
891961

892962
#define ut_zalloc_nokey_nofatal(n_bytes) ::calloc(1, n_bytes)
@@ -895,6 +965,12 @@ ut_delete_array(
895965

896966
#define ut_free(ptr) ::free(ptr)
897967

968+
static inline void ut_free_dodump(void *ptr, size_t size)
969+
{
970+
ut_dodump(ptr, size);
971+
os_mem_free_large(ptr, size);
972+
}
973+
898974
#endif /* UNIV_PFS_MEMORY */
899975

900976
#endif /* ut0new_h */

storage/innobase/log/log0log.cc

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -226,16 +226,18 @@ log_buffer_extend(
226226
log_sys->buf_free -= move_start;
227227
log_sys->buf_next_to_write -= move_start;
228228

229+
/* free previous after getting the right address */
230+
if (!log_sys->first_in_use) {
231+
log_sys->buf -= log_sys->buf_size;
232+
}
233+
ut_free_dodump(log_sys->buf, log_sys->buf_size * 2);
234+
229235
/* reallocate log buffer */
230236
srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
231-
ut_free(log_sys->buf_ptr);
232-
233237
log_sys->buf_size = LOG_BUFFER_SIZE;
234238

235-
log_sys->buf_ptr = static_cast<byte*>(
236-
ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
237239
log_sys->buf = static_cast<byte*>(
238-
ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
240+
ut_malloc_dontdump(log_sys->buf_size * 2));
239241

240242
log_sys->first_in_use = true;
241243

@@ -723,10 +725,8 @@ log_sys_init()
723725

724726
log_sys->buf_size = LOG_BUFFER_SIZE;
725727

726-
log_sys->buf_ptr = static_cast<byte*>(
727-
ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
728728
log_sys->buf = static_cast<byte*>(
729-
ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
729+
ut_malloc_dontdump(log_sys->buf_size * 2));
730730

731731
log_sys->first_in_use = true;
732732

@@ -1085,12 +1085,12 @@ log_buffer_switch()
10851085
OS_FILE_LOG_BLOCK_SIZE);
10861086

10871087
if (log_sys->first_in_use) {
1088-
ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
1088+
ut_ad(log_sys->buf == ut_align(log_sys->buf,
10891089
OS_FILE_LOG_BLOCK_SIZE));
10901090
log_sys->buf += log_sys->buf_size;
10911091
} else {
10921092
log_sys->buf -= log_sys->buf_size;
1093-
ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
1093+
ut_ad(log_sys->buf == ut_align(log_sys->buf,
10941094
OS_FILE_LOG_BLOCK_SIZE));
10951095
}
10961096

@@ -2254,8 +2254,10 @@ log_shutdown()
22542254
{
22552255
log_group_close_all();
22562256

2257-
ut_free(log_sys->buf_ptr);
2258-
log_sys->buf_ptr = NULL;
2257+
if (!log_sys->first_in_use) {
2258+
log_sys->buf -= log_sys->buf_size;
2259+
}
2260+
ut_free_dodump(log_sys->buf, log_sys->buf_size * 2);
22592261
log_sys->buf = NULL;
22602262
ut_free(log_sys->checkpoint_buf_ptr);
22612263
log_sys->checkpoint_buf_ptr = NULL;

0 commit comments

Comments
 (0)