Skip to content
11 changes: 10 additions & 1 deletion base/poco/Foundation/include/Poco/UTF32Encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,16 @@ class Foundation_API UTF32Encoding: public TextEncoding
int convert(int ch, unsigned char* bytes, int length) const;
int queryConvert(const unsigned char* bytes, int length) const;
int sequenceLength(const unsigned char* bytes, int length) const;


protected:
static int safeToInt(Poco::UInt32 value)
{
if (value <= 0x10FFFF)
return static_cast<int>(value);
else
return -1;
}

private:
bool _flipBytes;
static const char* _names[];
Expand Down
42 changes: 21 additions & 21 deletions base/poco/Foundation/src/UTF32Encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@ const char* UTF32Encoding::_names[] =

const TextEncoding::CharacterMap UTF32Encoding::_charMap =
{
/* 00 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 10 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 20 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 30 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 40 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 50 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 60 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 70 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 80 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 90 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* a0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* b0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* e0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* f0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 00 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 10 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 20 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 30 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 40 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 50 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 60 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 70 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 80 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 90 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* a0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* b0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* c0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* d0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* e0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
};


Expand Down Expand Up @@ -118,7 +118,7 @@ const TextEncoding::CharacterMap& UTF32Encoding::characterMap() const
int UTF32Encoding::convert(const unsigned char* bytes) const
{
UInt32 uc;
unsigned char* p = (unsigned char*) &uc;
unsigned char* p = reinterpret_cast<unsigned char*>(&uc);
*p++ = *bytes++;
*p++ = *bytes++;
*p++ = *bytes++;
Expand All @@ -129,7 +129,7 @@ int UTF32Encoding::convert(const unsigned char* bytes) const
ByteOrder::flipBytes(uc);
}

return uc;
return safeToInt(uc);
}


Expand All @@ -138,7 +138,7 @@ int UTF32Encoding::convert(int ch, unsigned char* bytes, int length) const
if (bytes && length >= 4)
{
UInt32 ch1 = _flipBytes ? ByteOrder::flipBytes((UInt32) ch) : (UInt32) ch;
unsigned char* p = (unsigned char*) &ch1;
unsigned char* p = reinterpret_cast<unsigned char*>(&ch1);
*bytes++ = *p++;
*bytes++ = *p++;
*bytes++ = *p++;
Expand All @@ -155,14 +155,14 @@ int UTF32Encoding::queryConvert(const unsigned char* bytes, int length) const
if (length >= 4)
{
UInt32 uc;
unsigned char* p = (unsigned char*) &uc;
unsigned char* p = reinterpret_cast<unsigned char*>(&uc);
*p++ = *bytes++;
*p++ = *bytes++;
*p++ = *bytes++;
*p++ = *bytes++;
if (_flipBytes)
ByteOrder::flipBytes(uc);
return uc;
ret = safeToInt(uc);
}

return ret;
Expand Down
11 changes: 11 additions & 0 deletions src/Columns/ColumnVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ class ColumnVector final : public COWHelper<ColumnVectorHelper, ColumnVector<T>>
data.push_back(assert_cast<const Self &>(src).getData()[n]);
}

void insertManyFrom(const IColumn & src, size_t position, size_t length) override
{
ValueType v = assert_cast<const Self &>(src).getData()[position];
data.resize_fill(data.size() + length, v);
}

void insertMany(const Field & field, size_t length) override
{
data.resize_fill(data.size() + length, static_cast<T>(field.get<T>()));
}

void insertData(const char * pos, size_t) override
{
data.emplace_back(unalignedLoad<T>(pos));
Expand Down
7 changes: 6 additions & 1 deletion src/Columns/FilterDescription.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,12 @@ FilterDescription::FilterDescription(const IColumn & column_)

size_t size = res.size();
for (size_t i = 0; i < size; ++i)
res[i] = res[i] && !null_map[i];
{
auto has_val = static_cast<UInt8>(!!res[i]);
auto not_null = static_cast<UInt8>(!null_map[i]);
/// Instead of the logical AND operator(&&), the bitwise one(&) is utilized for the auto vectorization.
res[i] = has_val & not_null;
}

data = &res;
data_holder = std::move(mutable_holder);
Expand Down
19 changes: 16 additions & 3 deletions src/Common/HashTable/HashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NO_AVAILABLE_DATA;
extern const int CANNOT_ALLOCATE_MEMORY;
}
}

Expand Down Expand Up @@ -501,9 +502,21 @@ class HashTable : private boost::noncopyable,
return place_value;
}

static size_t allocCheckOverflow(size_t buffer_size)
{
size_t size = 0;
if (common::mulOverflow(buffer_size, sizeof(Cell), size))
throw DB::Exception(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
"Integer overflow trying to allocate memory for HashTable. Trying to allocate {} cells of {} bytes each",
buffer_size, sizeof(Cell));

return size;
}

void alloc(const Grower & new_grower)
{
buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
buf = reinterpret_cast<Cell *>(Allocator::alloc(allocCheckOverflow(new_grower.bufSize())));
grower = new_grower;
}

Expand Down Expand Up @@ -560,11 +573,11 @@ class HashTable : private boost::noncopyable,

if constexpr (Cell::need_to_notify_cell_during_move)
{
buf = reinterpret_cast<Cell *>(Allocator::alloc(new_grower.bufSize() * sizeof(Cell)));
buf = reinterpret_cast<Cell *>(Allocator::alloc(allocCheckOverflow(new_grower.bufSize())));
memcpy(reinterpret_cast<void *>(buf), reinterpret_cast<const void *>(old_buffer.get()), old_buffer_size);
}
else
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, new_grower.bufSize() * sizeof(Cell)));
buf = reinterpret_cast<Cell *>(Allocator::realloc(buf, old_buffer_size, allocCheckOverflow(new_grower.bufSize())));

grower = new_grower;

Expand Down
6 changes: 3 additions & 3 deletions src/Common/SipHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class SipHash
current_word = 0;
}

void update(const char * data, UInt64 size)
ALWAYS_INLINE void update(const char * data, UInt64 size)
{
const char * end = data + size;

Expand Down Expand Up @@ -137,12 +137,12 @@ class SipHash
}

template <typename T>
void update(const T & x)
ALWAYS_INLINE void update(const T & x)
{
update(reinterpret_cast<const char *>(&x), sizeof(x));
}

void update(const std::string & x)
ALWAYS_INLINE void update(const std::string & x)
{
update(x.data(), x.length());
}
Expand Down
8 changes: 7 additions & 1 deletion src/DataTypes/Serializations/SerializationString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ template <int UNROLL_TIMES>
static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
{
size_t offset = data.size();
/// Avoiding calling resize in a loop improves the performance.
data.resize(std::max(data.capacity(), static_cast<size_t>(4096)));

for (size_t i = 0; i < limit; ++i)
{
if (istr.eof())
Expand All @@ -161,7 +164,8 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
offset += size + 1;
offsets.push_back(offset);

data.resize(offset);
if (unlikely(offset > data.size()))
data.resize_exact(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2)));

if (size)
{
Expand Down Expand Up @@ -193,6 +197,8 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt

data[offset - 1] = 0;
}

data.resize(offset);
}


Expand Down
6 changes: 3 additions & 3 deletions src/IO/BitHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class BitReader
{}

// reads bits_to_read high-bits from bits_buffer
inline UInt64 readBits(UInt8 bits_to_read)
ALWAYS_INLINE inline UInt64 readBits(UInt8 bits_to_read)
{
if (bits_to_read > bits_count)
fillBitBuffer();
Expand All @@ -72,7 +72,7 @@ class BitReader
return getBitsFromBitBuffer<PEEK>(8);
}

inline UInt8 readBit()
ALWAYS_INLINE inline UInt8 readBit()
{
return static_cast<UInt8>(readBits(1));
}
Expand Down Expand Up @@ -123,7 +123,7 @@ class BitReader


// Fills internal bits_buffer with data from source, reads at most 64 bits
size_t fillBitBuffer()
ALWAYS_INLINE size_t fillBitBuffer()
{
const size_t available = source_end - source_current;
const auto bytes_to_read = std::min<size_t>(64 / 8, available);
Expand Down
6 changes: 5 additions & 1 deletion src/Interpreters/ThreadStatusExt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,11 +405,15 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
query_context.reset();
thread_trace_context.trace_id = 0;
thread_trace_context.span_id = 0;

/// The memory of thread_group->finished_threads_counters_memory is temporarily moved to this vector, which is deallocated out of critical section.
std::vector<ThreadGroupStatus::ProfileEventsCountersAndMemory> move_to_temp;

/// Avoid leaking of ThreadGroupStatus::finished_threads_counters_memory
/// (this is in case someone uses system thread but did not call getProfileEventsCountersAndMemoryForThreads())
{
std::lock_guard guard(thread_group->mutex);
auto stats = std::move(thread_group->finished_threads_counters_memory);
move_to_temp = std::move(thread_group->finished_threads_counters_memory);
}

thread_group.reset();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DROP TABLE IF EXISTS tab;
create table tab (d Int64, s AggregateFunction(groupUniqArrayArray, Array(UInt64)), c SimpleAggregateFunction(groupUniqArrayArray, Array(UInt64))) engine = SummingMergeTree() order by d;
INSERT INTO tab VALUES (1, 'このコー'); -- { clientError CANNOT_ALLOCATE_MEMORY }
DROP TABLE tab;