From a4dddd3327b54c190922897312b69c1cec406a80 Mon Sep 17 00:00:00 2001 From: lihangyu Date: Mon, 22 Jun 2026 15:53:26 +0800 Subject: [PATCH] branch-4.1: [feat](Variant) Support NestedGroup public config cherry-pick #9818 --- be/src/common/config.cpp | 2 +- .../segment/variant/nested_group_provider.cpp | 12 +++++++---- .../segment/nested_group_provider_test.cpp | 20 +++++++++++++++++- .../variant_column_writer_reader_test.cpp | 14 +++++++++++++ .../org/apache/doris/catalog/VariantType.java | 5 +++++ .../nereids/parser/LogicalPlanBuilder.java | 7 +++++-- .../org/apache/doris/catalog/TypeTest.java | 4 ++-- .../nereids/parser/NereidsParserTest.java | 21 +++++++++++++++++-- .../test_variant_search_subcolumn.groovy | 2 ++ 9 files changed, 75 insertions(+), 12 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 8984cd9d653bac..02f1d145f25471 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1164,7 +1164,7 @@ DEFINE_mBool(enable_variant_doc_sparse_write_subcolumns, "true"); // Reserved for future use when NestedGroup expansion moves to storage layer // Deeper arrays will be stored as JSONB DEFINE_mInt32(variant_nested_group_max_depth, "10"); -DEFINE_mBool(variant_nested_group_discard_scalar_on_conflict, "false"); +DEFINE_mBool(variant_nested_group_discard_scalar_on_conflict, "true"); DEFINE_Validator(variant_max_json_key_length, [](const int config) -> bool { return config > 0 && config <= 65535; }); diff --git a/be/src/storage/segment/variant/nested_group_provider.cpp b/be/src/storage/segment/variant/nested_group_provider.cpp index b2271bc69431be..27a8abf681ebf0 100644 --- a/be/src/storage/segment/variant/nested_group_provider.cpp +++ b/be/src/storage/segment/variant/nested_group_provider.cpp @@ -17,6 +17,10 @@ #include "storage/segment/variant/nested_group_provider.h" +#include +#include +#include + namespace doris::segment_v2 { namespace { @@ -123,7 +127,7 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider { statistics == nullptr) { return Status::InvalidArgument("NestedGroup provider input is null"); } - return Status::OK(); + return Status::NotSupported("NestedGroup write path is not available in this build"); } Status prepare_with_built_groups(const NestedGroupsMap& /*nested_groups*/, @@ -135,7 +139,7 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider { statistics == nullptr) { return Status::InvalidArgument("NestedGroup provider input is null"); } - return Status::OK(); + return Status::NotSupported("NestedGroup write path is not available in this build"); } Status init_with_plan(const NestedGroupStreamingWritePlan& /*plan*/, @@ -144,12 +148,12 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider { if (tablet_column == nullptr || column_id == nullptr || statistics == nullptr) { return Status::InvalidArgument("NestedGroup streaming init input is null"); } - return Status::OK(); + return Status::NotSupported("NestedGroup write path is not available in this build"); } Status append_chunk(const NestedGroupStreamingWritePlan& /*plan*/, const ColumnVariant& /*variant*/) override { - return Status::OK(); + return Status::NotSupported("NestedGroup write path is not available in this build"); } uint64_t estimate_buffer_size() const override { return 0; } diff --git a/be/test/storage/segment/nested_group_provider_test.cpp b/be/test/storage/segment/nested_group_provider_test.cpp index 7baa6f51fcbb5b..ef55d38cd5d922 100644 --- a/be/test/storage/segment/nested_group_provider_test.cpp +++ b/be/test/storage/segment/nested_group_provider_test.cpp @@ -29,6 +29,7 @@ #include #include "core/column/column_variant.h" +#include "storage/iterator/olap_data_convertor.h" #include "storage/segment/column_writer.h" #include "storage/segment/variant/variant_column_reader.h" #include "storage/segment/variant/variant_statistics.h" @@ -48,7 +49,7 @@ TEST(NestedGroupProviderTest, DefaultReadProviderIsDisabled) { EXPECT_FALSE(provider->should_enable_nested_group_read_path()); } -TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) { +TEST(NestedGroupProviderTest, DefaultWriteProviderRejectsNestedGroupWritePath) { auto write_provider = create_nested_group_write_provider(); ASSERT_TRUE(write_provider != nullptr); @@ -66,6 +67,23 @@ TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) { write_provider->prepare(*column_variant, nullptr, opts, nullptr, nullptr, &statistics); EXPECT_FALSE(status.ok()); EXPECT_TRUE(status.is()); + + TabletColumn tablet_column; + OlapBlockDataConvertor converter; + int column_id = 0; + status = write_provider->prepare(*column_variant, &tablet_column, opts, &converter, &column_id, + &statistics); + EXPECT_FALSE(status.ok()); + EXPECT_TRUE(status.is()); + EXPECT_NE(status.to_string().find("not available"), std::string::npos); + + NestedGroupsMap nested_groups; + status = write_provider->prepare_with_built_groups(nested_groups, &tablet_column, opts, + &converter, &column_id, &statistics); + EXPECT_FALSE(status.ok()); + EXPECT_TRUE(status.is()); + EXPECT_NE(status.to_string().find("not available"), std::string::npos); + EXPECT_EQ(0, write_provider->estimate_buffer_size()); EXPECT_TRUE(write_provider->finish().ok()); EXPECT_TRUE(write_provider->write_data().ok()); diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp b/be/test/storage/segment/variant_column_writer_reader_test.cpp index 7d0ef988c13382..a42466ffdeec9b 100644 --- a/be/test/storage/segment/variant_column_writer_reader_test.cpp +++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp @@ -34,6 +34,7 @@ #include "storage/segment/variant/binary_column_extract_iterator.h" #include "storage/segment/variant/hierarchical_data_iterator.h" #include "storage/segment/variant/nested_group_path.h" +#include "storage/segment/variant/nested_group_provider.h" #include "storage/segment/variant/nested_group_streaming_write_plan.h" #include "storage/segment/variant/sparse_column_merge_iterator.h" #include "storage/segment/variant/variant_column_reader.h" @@ -85,6 +86,11 @@ static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t index_id tablet_index->add_col_unique_id(col_unique_id); } +static bool nested_group_write_path_available() { + auto provider = segment_v2::create_nested_group_read_provider(); + return provider != nullptr && provider->should_enable_nested_group_read_path(); +} + struct VariantStorageParseWriteResult { size_t num_rows = 0; size_t parsed_subcolumns = 0; @@ -5309,6 +5315,10 @@ TEST_F(VariantColumnWriterReaderTest, test_concurrent_load_external_meta_and_get TEST_F(VariantColumnWriterReaderTest, test_streaming_write_plan_collects_regular_paths_from_rowset_metadata) { + if (!nested_group_write_path_available()) { + GTEST_SKIP() << "NestedGroup write path is not available in this build"; + } + init_variant_tablet(41000, 10, true); std::vector input_rowsets; @@ -5340,6 +5350,10 @@ TEST_F(VariantColumnWriterReaderTest, TEST_F(VariantColumnWriterReaderTest, test_streaming_compaction_writer_streams_regular_array_paths_across_batches) { + if (!nested_group_write_path_available()) { + GTEST_SKIP() << "NestedGroup write path is not available in this build"; + } + init_variant_tablet(41001, 10, true); std::vector input_rowsets; diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java index b3b34b2a04558b..d93ef9894dc3b8 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java @@ -187,6 +187,11 @@ public String toSql(int depth) { sb.append("\"variant_sparse_hash_shard_count\" = \"") .append(String.valueOf(Math.max(1, variantSparseHashShardCount))).append("\""); } + if (enableNestedGroup) { + sb.append(","); + sb.append("\"variant_enable_nested_group\" = \"") + .append(String.valueOf(enableNestedGroup)).append("\""); + } sb.append(")>"); return sb.toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 530352613473a6..9f3b0f8adb5cfc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -5307,8 +5307,11 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx) } if (enableNestedGroup) { - throw new NotSupportedException( - "variant_enable_nested_group is not supported now"); + enableVariantDocMode = false; + variantMaxSubcolumnsCount = 0; + enableTypedPathsToSparse = false; + variantMaxSparseColumnStatisticsSize = 0; + variantSparseHashShardCount = 0; } // When doc mode is enabled, disable subcolumn extraction and sparse column features diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java index cb544d3d01c7f2..573c0d94dbb944 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java @@ -129,11 +129,11 @@ public void testVariantPredefinedFieldsExactMatch() { } @Test - public void testVariantToSqlDoesNotSerializeUnsupportedNestedGroupProperty() { + public void testVariantToSqlSerializesNestedGroupProperty() { VariantType variantType = new VariantType(new ArrayList<>(), 0, false, 10000, 0, false, 0L, 64, true); - Assert.assertFalse(variantType.toSql().contains("variant_enable_nested_group")); + Assert.assertTrue(variantType.toSql().contains("\"variant_enable_nested_group\" = \"true\"")); } // ===================== Mixed Nesting & Precision ===================== diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java index 27a217c6b16fc2..a339ce6cf022ef 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java @@ -1364,14 +1364,31 @@ public void testCtasWithoutAs() { } @Test - public void testCreateTableVariantNestedGroupPropertyIsRejected() { + public void testCreateTableVariantNestedGroupPropertyIsAccepted() { NereidsParser parser = new NereidsParser(); String sql = "CREATE TABLE t_variant_ng (k1 INT, v VARIANT) " + "DISTRIBUTED BY HASH(k1) BUCKETS 1"; + LogicalPlan logicalPlan = parser.parseSingle(sql); + Assertions.assertInstanceOf(CreateTableCommand.class, logicalPlan); + CreateTableCommand createTableCommand = (CreateTableCommand) logicalPlan; + org.apache.doris.nereids.types.VariantType variantType = + (org.apache.doris.nereids.types.VariantType) createTableCommand.getCreateTableInfo() + .getColumnDefinitions().get(1).getType(); + Assertions.assertTrue(variantType.getEnableNestedGroup()); + } + + @Test + public void testCreateTableVariantNestedGroupPropertyConflictsWithDocMode() { + NereidsParser parser = new NereidsParser(); + String sql = "CREATE TABLE t_variant_ng (k1 INT, v VARIANT) " + + "DISTRIBUTED BY HASH(k1) BUCKETS 1"; NotSupportedException exception = Assertions.assertThrowsExactly(NotSupportedException.class, () -> parser.parseSingle(sql)); - Assertions.assertTrue(exception.getMessage().contains("variant_enable_nested_group is not supported now")); + Assertions.assertTrue(exception.getMessage() + .contains("variant_enable_nested_group and variant_enable_doc_mode cannot both be true")); } @Test diff --git a/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy b/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy index d4a4f9bc12c31c..f176c884c45b79 100644 --- a/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy +++ b/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// DORIS-25891: Variant SEARCH must bind subcolumn predicates to the real stored +// field names for direct, nested, and special-character paths. suite("test_variant_search_subcolumn") { def table_name = "test_variant_search_subcolumn" sql "set default_variant_doc_materialization_min_rows = 0"