Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,8 @@ DEFINE_Bool(enable_snapshot_action, "false");
DEFINE_mInt32(variant_max_merged_tablet_schema_size, "2048");

DEFINE_mBool(enable_column_type_check, "true");
DEFINE_mBool(enable_column_sanity_check, "false");
DEFINE_mBool(enable_arrow_validate_full, "false");

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个默认是true吧,我们还是避免挂,而且这个影响不了多少性能

// 128 MB
DEFINE_mInt64(local_exchange_buffer_mem_limit, "134217728");

Expand Down
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1553,6 +1553,8 @@ DECLARE_mInt32(variant_max_merged_tablet_schema_size);
DECLARE_mInt64(local_exchange_buffer_mem_limit);

DECLARE_mBool(enable_column_type_check);
DECLARE_mBool(enable_column_sanity_check);
DECLARE_mBool(enable_arrow_validate_full);

// Tolerance for the number of partition id 0 in rowset, default 0
DECLARE_Int32(ignore_invalid_partition_id_rowset_num);
Expand Down
7 changes: 5 additions & 2 deletions be/src/core/column/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ static constexpr auto CONVERT_COLUMN_IF_OVERFLOW_DEBUG_POINT =

template <typename T>
void ColumnStr<T>::sanity_check() const {
#ifndef NDEBUG
#ifdef NDEBUG
if (!config::enable_column_sanity_check) {
return;
}
#endif
sanity_check_simple();
auto count = cast_set<int64_t>(offsets.size());
for (int64_t i = 0; i < count; ++i) {
Expand All @@ -54,7 +58,6 @@ void ColumnStr<T>::sanity_check() const {
count, i, offsets[i], i - 1, offsets[i - 1]));
}
}
#endif
}

template <typename T>
Expand Down
8 changes: 6 additions & 2 deletions be/src/core/column/column_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include "common/cast_set.h"
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/config.h"
#include "common/exception.h"
#include "common/status.h"
#include "core/assert_cast.h"
Expand Down Expand Up @@ -118,7 +119,11 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {

void sanity_check() const override;
void sanity_check_simple() const {
#ifndef NDEBUG
#ifdef NDEBUG
if (!config::enable_column_sanity_check) {
return;
}
#endif
auto count = cast_set<int64_t>(offsets.size());
if (chars.size() != offsets[count - 1]) {
throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: {}",
Expand All @@ -128,7 +133,6 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
if (offsets[-1] != 0) {
throw Exception(Status::InternalError("wrong offsets[-1]: {}", offsets[-1]));
}
#endif
}

std::string get_name() const override { return "String"; }
Expand Down
9 changes: 5 additions & 4 deletions be/src/core/data_type_serde/data_type_array_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,10 @@ Status DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const Nu
return Status::OK();
}

Status DataTypeArraySerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
Status DataTypeArraySerDe::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
auto& column_array = static_cast<ColumnArray&>(column);
auto& offsets_data = column_array.get_offsets();
const auto* concrete_array = dynamic_cast<const arrow::ListArray*>(arrow_array);
Expand All @@ -339,7 +340,7 @@ Status DataTypeArraySerDe::read_column_from_arrow(IColumn& column, const arrow::
// convert to doris offset, start from offsets.back()
offsets_data.emplace_back(prev_size + current_offset - arrow_nested_start_offset);
}
return nested_serde->read_column_from_arrow(
return nested_serde->read_column_from_arrow_impl(
column_array.get_data(), concrete_array->values().get(), arrow_nested_start_offset,
arrow_nested_end_offset, ctz);
}
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_array_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ class DataTypeArraySerDe : public DataTypeSerDe {
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;

Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
int64_t row_idx, bool col_const,
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_bitmap_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ class DataTypeBitMapSerDe : public DataTypeSerDe {
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;

Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override {
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override {
return Status::Error(ErrorCode::NOT_IMPLEMENTED_ERROR,
"read_column_from_arrow with type " + column.get_name());
}
Expand Down
15 changes: 8 additions & 7 deletions be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ Status DataTypeDateTimeSerDe::deserialize_one_cell_from_json(IColumn& column, Sl
return Status::OK();
}

Status DataTypeDateTimeSerDe::read_column_from_arrow(IColumn& column,
const arrow::Array* arrow_array, int64_t start,
int64_t end,
const cctz::time_zone& ctz) const {
Status DataTypeDateTimeSerDe::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
return _read_column_from_arrow<false>(column, arrow_array, start, end, ctz);
}

Expand Down Expand Up @@ -250,9 +250,10 @@ Status DataTypeDateSerDe<T>::_read_column_from_arrow(IColumn& column,
}

template <PrimitiveType T>
Status DataTypeDateSerDe<T>::read_column_from_arrow(IColumn& column,
const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const {
Status DataTypeDateSerDe<T>::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
return _read_column_from_arrow<true>(column, arrow_array, start, end, ctz);
}

Expand Down
10 changes: 6 additions & 4 deletions be/src/core/data_type_serde/data_type_date_or_datetime_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,9 @@ class DataTypeDateSerDe : public DataTypeNumberSerDe<T> {
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
int64_t row_idx, bool col_const,
const FormatOptions& options) const override;
Expand Down Expand Up @@ -142,7 +143,8 @@ class DataTypeDateTimeSerDe : public DataTypeDateSerDe<PrimitiveType::TYPE_DATET
Status deserialize_column_from_json_vector(IColumn& column, std::vector<Slice>& slices,
uint64_t* num_deserialized,
const FormatOptions& options) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
};
} // namespace doris
8 changes: 4 additions & 4 deletions be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,10 +393,10 @@ Status DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column,
return Status::OK();
}

Status DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
Status DataTypeDateTimeV2SerDe::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
auto& col_data = static_cast<ColumnDateTimeV2&>(column).get_data();
int64_t divisor = 1;
if (arrow_array->type()->id() == arrow::Type::TIMESTAMP) {
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_datetimev2_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_D
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;

Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
int64_t row_idx, bool col_const,
Expand Down
7 changes: 4 additions & 3 deletions be/src/core/data_type_serde/data_type_datev2_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ Status DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const N
return Status::OK();
}

Status DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
Status DataTypeDateV2SerDe::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
auto& col_data = static_cast<ColumnDateV2&>(column).get_data();
const auto* concrete_array = dynamic_cast<const arrow::Date32Array*>(arrow_array);
const auto* base_ptr = reinterpret_cast<const uint8_t*>(concrete_array->raw_values());
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_datev2_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_DATEV
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
int64_t row_idx, bool col_const,
const FormatOptions& options) const override;
Expand Down
8 changes: 4 additions & 4 deletions be/src/core/data_type_serde/data_type_decimal_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,10 @@ Status DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column,
}

template <PrimitiveType T>
Status DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
Status DataTypeDecimalSerDe<T>::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
auto& column_data = static_cast<ColumnDecimal<T>&>(column).get_data();
// Decimal<Int128> for decimalv2
// Decimal<Int128I> for deicmalv3
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_decimal_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ class DataTypeDecimalSerDe : public DataTypeSerDe {
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
int64_t row_idx, bool col_const,
const FormatOptions& options) const override;
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_hll_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ class DataTypeHLLSerDe : public DataTypeSerDe {
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override {
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override {
return Status::Error(ErrorCode::NOT_IMPLEMENTED_ERROR,
"read_column_from_arrow with type " + column.get_name());
}
Expand Down
7 changes: 4 additions & 3 deletions be/src/core/data_type_serde/data_type_ipv4_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,10 @@ Status DataTypeIPv4SerDe::write_column_to_arrow(const IColumn& column, const Nul
return Status::OK();
}

Status DataTypeIPv4SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
Status DataTypeIPv4SerDe::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
auto& col_data = assert_cast<ColumnIPv4&>(column).get_data();
int64_t row_count = end - start;
/// buffers[0] is a null bitmap and buffers[1] are actual values
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_ipv4_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ class DataTypeIPv4SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_IPV4> {
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;

Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const override;
Expand Down
7 changes: 4 additions & 3 deletions be/src/core/data_type_serde/data_type_ipv6_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,10 @@ Status DataTypeIPv6SerDe::write_column_to_arrow(const IColumn& column, const Nul
return Status::OK();
}

Status DataTypeIPv6SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
Status DataTypeIPv6SerDe::read_column_from_arrow_impl(IColumn& column,
const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const {
auto& col_data = assert_cast<ColumnIPv6&>(column).get_data();
const auto* concrete_array = assert_cast<const arrow::StringArray*>(arrow_array);
std::shared_ptr<arrow::Buffer> buffer = concrete_array->value_data();
Expand Down
5 changes: 3 additions & 2 deletions be/src/core/data_type_serde/data_type_ipv6_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ class DataTypeIPv6SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_IPV6> {
Status write_column_to_arrow(const IColumn& column, const NullMap* null_map,
arrow::ArrayBuilder* array_builder, int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
int64_t end, const cctz::time_zone& ctz) const override;
Status read_column_from_arrow_impl(IColumn& column, const arrow::Array* arrow_array,
int64_t start, int64_t end,
const cctz::time_zone& ctz) const override;
void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override;
void write_one_cell_to_jsonb(const IColumn& column, JsonbWriterT<JsonbOutStream>& result,
Arena& mem_pool, int unique_id, int64_t row_num,
Expand Down
Loading
Loading