From 938af274f380a5248c590bb6b2ab2f8fc3c81b43 Mon Sep 17 00:00:00 2001 From: Johannes Misch Date: Thu, 21 May 2026 19:46:47 +0200 Subject: [PATCH 1/2] Allow accessing ColumnArrays backing column When working with a ColumnArray, it is oftentimes useful to be able to access the entire, contiguous backing array at once without having to do row-wise access. This change simply allows reading access to the backing data array and the offsets. --- clickhouse/columns/array.cpp | 6 +++++- clickhouse/columns/array.h | 22 +++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index 94163c4d..5cff819c 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -163,10 +163,14 @@ size_t ColumnArray::GetSize(size_t n) const { return (n == 0) ? (*offsets_)[n] : ((*offsets_)[n] - (*offsets_)[n - 1]); } -ColumnRef ColumnArray::GetData() { +ColumnRef ColumnArray::GetData() const { return data_; } +const std::shared_ptr& ColumnArray::GetOffsets() const { + return offsets_; +} + void ColumnArray::Reset() { data_.reset(); offsets_.reset(); diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index 3ad9c94d..a80aecb6 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -78,14 +78,24 @@ class ColumnArray : public Column { void OffsetsIncrease(size_t); + /// Gets the backing data array of the Array's. This does not include any Array Bounds. + ColumnRef GetData() const; + + /// Gets all offsets denoting the list boundaries overlayed GetData. + /// The layout is [size_i, ...] where `i` is the row. + const std::shared_ptr& GetOffsets() const; + + /// Gets the offset of the start of row `n` into `GetData()`. + size_t GetOffset(size_t n) const; + + /// Gets the element count of row `n`. + size_t GetSize(size_t n) const; + protected: template friend class ColumnArrayT; ColumnArray(ColumnArray&& array); - size_t GetOffset(size_t n) const; - size_t GetSize(size_t n) const; - ColumnRef GetData(); void AddOffset(size_t n); void Reset(); @@ -262,11 +272,9 @@ class ColumnArrayT : public ColumnArray { template inline void Append(Container&& container) { using container_type = decltype(container); - if constexpr (std::is_lvalue_reference_v || - std::is_const_v>) { + if constexpr (std::is_lvalue_reference_v || std::is_const_v>) { Append(std::begin(container), std::end(container)); - } - else { + } else { Append(std::make_move_iterator(std::begin(container)), std::make_move_iterator(std::end(container))); } From 18bcf39279f4c7d2354c334d9f4ed383e6436b61 Mon Sep 17 00:00:00 2001 From: Johannes Misch Date: Mon, 1 Jun 2026 17:29:03 +0200 Subject: [PATCH 2/2] Make ColumnArray accessors const correct This also adds unit tests for the new accessors. --- clickhouse/columns/array.cpp | 12 ++++++++-- clickhouse/columns/array.h | 6 +++-- ut/column_array_ut.cpp | 44 ++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/clickhouse/columns/array.cpp b/clickhouse/columns/array.cpp index 5cff819c..77f8f7f9 100644 --- a/clickhouse/columns/array.cpp +++ b/clickhouse/columns/array.cpp @@ -163,11 +163,19 @@ size_t ColumnArray::GetSize(size_t n) const { return (n == 0) ? (*offsets_)[n] : ((*offsets_)[n] - (*offsets_)[n - 1]); } -ColumnRef ColumnArray::GetData() const { +ColumnRef ColumnArray::GetData() { return data_; } -const std::shared_ptr& ColumnArray::GetOffsets() const { +std::shared_ptr ColumnArray::GetData() const { + return data_; +} + +std::shared_ptr& ColumnArray::GetOffsets() { + return offsets_; +} + +std::shared_ptr ColumnArray::GetOffsets() const { return offsets_; } diff --git a/clickhouse/columns/array.h b/clickhouse/columns/array.h index a80aecb6..f771e4af 100644 --- a/clickhouse/columns/array.h +++ b/clickhouse/columns/array.h @@ -79,11 +79,13 @@ class ColumnArray : public Column { void OffsetsIncrease(size_t); /// Gets the backing data array of the Array's. This does not include any Array Bounds. - ColumnRef GetData() const; + ColumnRef GetData(); + std::shared_ptr GetData() const; /// Gets all offsets denoting the list boundaries overlayed GetData. /// The layout is [size_i, ...] where `i` is the row. - const std::shared_ptr& GetOffsets() const; + std::shared_ptr& GetOffsets(); + std::shared_ptr GetOffsets() const; /// Gets the offset of the start of row `n` into `GetData()`. size_t GetOffset(size_t n) const; diff --git a/ut/column_array_ut.cpp b/ut/column_array_ut.cpp index 887f5047..0fc28d4f 100644 --- a/ut/column_array_ut.cpp +++ b/ut/column_array_ut.cpp @@ -436,3 +436,47 @@ TEST(ColumnArrayT, const_right_value_no_move) { EXPECT_EQ(values[2], value2); } } + +TEST(ColumnArray, GetData) { + auto col = std::make_shared(std::make_shared()); + col->AppendAsColumn(std::make_shared(std::vector{1, 2, 3})); + col->AppendAsColumn(std::make_shared(std::vector{4, 5})); + + ColumnRef data = col->GetData(); + EXPECT_EQ(data->Size(), 5u); + + const auto& ccol = *col; + std::shared_ptr cdata = ccol.GetData(); + EXPECT_EQ(cdata->Size(), 5u); +} + +TEST(ColumnArray, GetOffsets) { + auto col = std::make_shared(std::make_shared()); + col->AppendAsColumn(std::make_shared(std::vector{1, 2, 3})); + col->AppendAsColumn(std::make_shared(std::vector{4, 5})); + + auto& offsets = col->GetOffsets(); + ASSERT_EQ(offsets->Size(), 2u); + EXPECT_EQ((*offsets)[0], 3u); + EXPECT_EQ((*offsets)[1], 5u); + + const auto& ccol = *col; + std::shared_ptr coffsets = ccol.GetOffsets(); + EXPECT_EQ((*coffsets)[0], 3u); + EXPECT_EQ((*coffsets)[1], 5u); +} + +TEST(ColumnArray, GetOffsetAndSize) { + auto col = std::make_shared(std::make_shared()); + col->AppendAsColumn(std::make_shared(std::vector{10, 20})); + col->AppendAsColumn(std::make_shared(std::vector{30})); + col->AppendAsColumn(std::make_shared(std::vector{})); + + EXPECT_EQ(col->GetOffset(0), 0u); + EXPECT_EQ(col->GetOffset(1), 2u); + EXPECT_EQ(col->GetOffset(2), 3u); + + EXPECT_EQ(col->GetSize(0), 2u); + EXPECT_EQ(col->GetSize(1), 1u); + EXPECT_EQ(col->GetSize(2), 0u); +}