Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,11 @@ DEFINE_mBool(variant_enable_duplicate_json_path_check, "false");
DEFINE_mInt32(variant_storage_parse_mode, "0");
DEFINE_mBool(enable_vertical_compact_variant_subcolumns, "true");
DEFINE_mBool(enable_variant_doc_sparse_write_subcolumns, "true");
DEFINE_mBool(variant_nested_group_discard_scalar_on_conflict, "false");
// Maximum depth of nested arrays to track with NestedGroup
// Reserved for future use when NestedGroup expansion moves to storage layer
// Deeper arrays will be stored as JSONB
DEFINE_mInt32(variant_nested_group_max_depth, "10");

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new config is defined and registered, but it is not declared in config.h next to the other variant configs. DECLARE_mInt32 is what exposes config::... values to BE code outside config.cpp, so any code that tries to use config::variant_nested_group_max_depth through the normal config header will fail to compile even though the runtime config name exists. Please add the matching DECLARE_mInt32(variant_nested_group_max_depth); near the adjacent variant declarations.

DEFINE_mBool(variant_nested_group_discard_scalar_on_conflict, "true");
Comment on lines +1175 to +1179

DEFINE_Validator(variant_max_json_key_length,
[](const int config) -> bool { return config > 0 && config <= 65535; });
Expand Down
12 changes: 8 additions & 4 deletions be/src/storage/segment/variant/nested_group_provider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

#include "storage/segment/variant/nested_group_provider.h"

#include <algorithm>
#include <string>
#include <utility>

namespace doris::segment_v2 {

namespace {
Expand Down Expand Up @@ -123,7 +127,7 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider {
statistics == nullptr) {
return Status::InvalidArgument("NestedGroup provider input is null");
}
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

Status prepare_with_built_groups(const NestedGroupsMap& /*nested_groups*/,
Expand All @@ -135,7 +139,7 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider {
statistics == nullptr) {
return Status::InvalidArgument("NestedGroup provider input is null");
}
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

Status init_with_plan(const NestedGroupStreamingWritePlan& /*plan*/,
Expand All @@ -144,12 +148,12 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider {
if (tablet_column == nullptr || column_id == nullptr || statistics == nullptr) {
return Status::InvalidArgument("NestedGroup streaming init input is null");
}
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

Status append_chunk(const NestedGroupStreamingWritePlan& /*plan*/,
const ColumnVariant& /*variant*/) override {
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

uint64_t estimate_buffer_size() const override { return 0; }
Expand Down
20 changes: 19 additions & 1 deletion be/test/storage/segment/nested_group_provider_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <roaring/roaring.hh>

#include "core/column/column_variant.h"
#include "storage/iterator/olap_data_convertor.h"
#include "storage/segment/column_writer.h"
#include "storage/segment/variant/variant_column_reader.h"
#include "storage/segment/variant/variant_statistics.h"
Expand All @@ -48,7 +49,7 @@ TEST(NestedGroupProviderTest, DefaultReadProviderIsDisabled) {
EXPECT_FALSE(provider->should_enable_nested_group_read_path());
}

TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) {
TEST(NestedGroupProviderTest, DefaultWriteProviderRejectsNestedGroupWritePath) {
auto write_provider = create_nested_group_write_provider();
ASSERT_TRUE(write_provider != nullptr);

Expand All @@ -66,6 +67,23 @@ TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) {
write_provider->prepare(*column_variant, nullptr, opts, nullptr, nullptr, &statistics);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.is<ErrorCode::INVALID_ARGUMENT>());

TabletColumn tablet_column;
OlapBlockDataConvertor converter;
int column_id = 0;
status = write_provider->prepare(*column_variant, &tablet_column, opts, &converter, &column_id,
&statistics);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.is<ErrorCode::NOT_IMPLEMENTED_ERROR>());
EXPECT_NE(status.to_string().find("not available"), std::string::npos);

NestedGroupsMap nested_groups;
status = write_provider->prepare_with_built_groups(nested_groups, &tablet_column, opts,
&converter, &column_id, &statistics);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.is<ErrorCode::NOT_IMPLEMENTED_ERROR>());
EXPECT_NE(status.to_string().find("not available"), std::string::npos);

EXPECT_EQ(0, write_provider->estimate_buffer_size());
EXPECT_TRUE(write_provider->finish().ok());
EXPECT_TRUE(write_provider->write_data().ok());
Expand Down
14 changes: 14 additions & 0 deletions be/test/storage/segment/variant_column_writer_reader_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "storage/segment/variant/binary_column_extract_iterator.h"
#include "storage/segment/variant/hierarchical_data_iterator.h"
#include "storage/segment/variant/nested_group_path.h"
#include "storage/segment/variant/nested_group_provider.h"
#include "storage/segment/variant/nested_group_streaming_write_plan.h"
#include "storage/segment/variant/sparse_column_merge_iterator.h"
#include "storage/segment/variant/variant_column_reader.h"
Expand Down Expand Up @@ -85,6 +86,11 @@ static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t index_id
tablet_index->add_col_unique_id(col_unique_id);
}

static bool nested_group_write_path_available() {
auto provider = segment_v2::create_nested_group_read_provider();
return provider != nullptr && provider->should_enable_nested_group_read_path();
}

static void fill_nullable_variant_block(Block* block,
std::unordered_map<int, std::string>* inserted_jsonstr,
variant_util::PathToNoneNullValues* path_with_size) {
Expand Down Expand Up @@ -5236,6 +5242,10 @@ TEST_F(VariantColumnWriterReaderTest, test_concurrent_load_external_meta_and_get

TEST_F(VariantColumnWriterReaderTest,
test_streaming_write_plan_collects_regular_paths_from_rowset_metadata) {
if (!nested_group_write_path_available()) {
GTEST_SKIP() << "NestedGroup write path is not available in this build";
}

init_variant_tablet(41000, 10, true);

std::vector<RowsetSharedPtr> input_rowsets;
Expand Down Expand Up @@ -5267,6 +5277,10 @@ TEST_F(VariantColumnWriterReaderTest,

TEST_F(VariantColumnWriterReaderTest,
test_streaming_compaction_writer_streams_regular_array_paths_across_batches) {
if (!nested_group_write_path_available()) {
GTEST_SKIP() << "NestedGroup write path is not available in this build";
}

init_variant_tablet(41001, 10, true);

std::vector<RowsetSharedPtr> input_rowsets;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5329,8 +5329,11 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx)
}

if (enableNestedGroup) {
throw new NotSupportedException(
"variant_enable_nested_group is not supported now");
enableVariantDocMode = false;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This now lets FE create a table whose VARIANT column has variant_enable_nested_group=true, but the default BE build still cannot write such a column. FE propagates the flag into tablet schema, and VariantColumnWriterImpl::finalize() calls the nested-group provider whenever the flag is true; the default provider's prepare/prepare_with_built_groups implementations now return NotSupported("NestedGroup write path is not available in this build"). That means a user can successfully create the table and only hit an insert/load failure later, while the new BE tests skip this path when the provider is unavailable. Please either keep DDL/session acceptance gated until the active BE write path is available, provide a safe fallback write behavior, or add an explicit negative end-to-end test that documents this create-vs-insert boundary.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that variant_enable_nested_group=true can be parsed into a Nereids VariantType, explicit casts to that type need to remain semantically visible. Today SimplifyCastRule removes CAST(v AS VARIANT<PROPERTIES("variant_enable_nested_group"="true")>) whenever the child is any otherwise-equal VARIANT, because VariantType.equals() and hashCode() do not include enableNestedGroup. In CTAS/schema-from-query paths, the output column type is taken from the rewritten slot via s.getDataType().conversion(), so the cast can be optimized away and the created table loses the requested nested-group property. Please either include enableNestedGroup in Nereids VariantType equality/hash semantics, or otherwise prevent cast simplification from dropping explicit VARIANT property casts, and add a test that covers this through CTAS or output schema derivation.

variantMaxSubcolumnsCount = 0;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This normalization should validate the final VARIANT property combination after session defaults and explicit properties have both been applied. Right now the validator only sees the raw property map, so conflicts where one side comes from a default are silently rewritten here. For example, with default_variant_enable_nested_group=true, a column declared as VARIANT<PROPERTIES("variant_enable_doc_mode"="true")> reaches this branch with both booleans true and the explicit doc-mode request is forced back to false; with default_variant_enable_doc_mode=true, an explicit nested-group property does the same. A positive explicit variant_max_subcolumns_count is also zeroed when nested group comes from the session default. Please validate enableNestedGroup against the final doc/sparse/subcolumn settings before normalizing them, and add parser tests that cover the session-default cases.

enableTypedPathsToSparse = false;
variantMaxSparseColumnStatisticsSize = 0;
variantSparseHashShardCount = 0;
}
Comment on lines 5331 to 5337

// When doc mode is enabled, disable subcolumn extraction and sparse column features
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ public void testVariantPredefinedFieldsExactMatch() {
}

@Test
public void testVariantToSqlDoesNotSerializeUnsupportedNestedGroupProperty() {
public void testVariantToSqlSerializesNestedGroupProperty() {
VariantType variantType = new VariantType(new ArrayList<>(), 0, false, 10000, 0,
false, 0L, 64, true);

Assert.assertFalse(variantType.toSql().contains("variant_enable_nested_group"));
Assert.assertTrue(variantType.toSql().contains("\"variant_enable_nested_group\" = \"true\""));
}

// ===================== Mixed Nesting & Precision =====================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1444,14 +1444,31 @@ public void testCtasWithoutAs() {
}

@Test
public void testCreateTableVariantNestedGroupPropertyIsRejected() {
public void testCreateTableVariantNestedGroupPropertyIsAccepted() {
NereidsParser parser = new NereidsParser();
String sql = "CREATE TABLE t_variant_ng (k1 INT, v VARIANT<PROPERTIES("
+ "\"variant_enable_nested_group\" = \"true\")>) "
+ "DISTRIBUTED BY HASH(k1) BUCKETS 1";
LogicalPlan logicalPlan = parser.parseSingle(sql);
Assertions.assertInstanceOf(CreateTableCommand.class, logicalPlan);
CreateTableCommand createTableCommand = (CreateTableCommand) logicalPlan;
org.apache.doris.nereids.types.VariantType variantType =
(org.apache.doris.nereids.types.VariantType) createTableCommand.getCreateTableInfo()
.getColumnDefinitions().get(1).getType();
Assertions.assertTrue(variantType.getEnableNestedGroup());
}

@Test
public void testCreateTableVariantNestedGroupPropertyConflictsWithDocMode() {
NereidsParser parser = new NereidsParser();
String sql = "CREATE TABLE t_variant_ng (k1 INT, v VARIANT<PROPERTIES("
+ "\"variant_enable_nested_group\" = \"true\", "
+ "\"variant_enable_doc_mode\" = \"true\")>) "
+ "DISTRIBUTED BY HASH(k1) BUCKETS 1";
NotSupportedException exception =
Assertions.assertThrowsExactly(NotSupportedException.class, () -> parser.parseSingle(sql));
Assertions.assertTrue(exception.getMessage().contains("variant_enable_nested_group is not supported now"));
Assertions.assertTrue(exception.getMessage()
.contains("variant_enable_nested_group and variant_enable_doc_mode cannot both be true"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ public String toSql(int depth) {
sb.append("\"variant_sparse_hash_shard_count\" = \"")
.append(String.valueOf(Math.max(1, variantSparseHashShardCount))).append("\"");
}
if (enableNestedGroup) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When nested group is enabled the parser zeros variantMaxSparseColumnStatisticsSize above, but this catalog toSql() path still emits that value in the non-doc property set before adding variant_enable_nested_group. A table created with VARIANT<PROPERTIES("variant_enable_nested_group" = "true")> will therefore print variant_max_sparse_column_statistics_size = "0", and re-parsing that SQL fails because PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize rejects explicit values below 1. Please serialize a nested-group property set that omits the disabled sparse/doc properties, or otherwise keeps all emitted values parser-valid, and add a DDL round-trip test for this type.

sb.append(",");
sb.append("\"variant_enable_nested_group\" = \"")
.append(String.valueOf(enableNestedGroup)).append("\"");
}
sb.append(")>");
return sb.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

// DORIS-25891: Variant SEARCH must bind subcolumn predicates to the real stored
// field names for direct, nested, and special-character paths.
suite("test_variant_search_subcolumn") {
def table_name = "test_variant_search_subcolumn"
sql "set default_variant_doc_materialization_min_rows = 0"
Expand Down
Loading