Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ DEFINE_mBool(enable_variant_doc_sparse_write_subcolumns, "true");
// Reserved for future use when NestedGroup expansion moves to storage layer
// Deeper arrays will be stored as JSONB
DEFINE_mInt32(variant_nested_group_max_depth, "10");
DEFINE_mBool(variant_nested_group_discard_scalar_on_conflict, "false");
DEFINE_mBool(variant_nested_group_discard_scalar_on_conflict, "true");

DEFINE_Validator(variant_max_json_key_length,
[](const int config) -> bool { return config > 0 && config <= 65535; });
Expand Down
12 changes: 8 additions & 4 deletions be/src/storage/segment/variant/nested_group_provider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

#include "storage/segment/variant/nested_group_provider.h"

#include <algorithm>
#include <string>
#include <utility>

namespace doris::segment_v2 {

namespace {
Expand Down Expand Up @@ -123,7 +127,7 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider {
statistics == nullptr) {
return Status::InvalidArgument("NestedGroup provider input is null");
}
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

Status prepare_with_built_groups(const NestedGroupsMap& /*nested_groups*/,
Expand All @@ -135,7 +139,7 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider {
statistics == nullptr) {
return Status::InvalidArgument("NestedGroup provider input is null");
}
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

Status init_with_plan(const NestedGroupStreamingWritePlan& /*plan*/,
Expand All @@ -144,12 +148,12 @@ class DefaultNestedGroupWriteProvider final : public NestedGroupWriteProvider {
if (tablet_column == nullptr || column_id == nullptr || statistics == nullptr) {
return Status::InvalidArgument("NestedGroup streaming init input is null");
}
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

Status append_chunk(const NestedGroupStreamingWritePlan& /*plan*/,
const ColumnVariant& /*variant*/) override {
return Status::OK();
return Status::NotSupported("NestedGroup write path is not available in this build");
}

uint64_t estimate_buffer_size() const override { return 0; }
Expand Down
20 changes: 19 additions & 1 deletion be/test/storage/segment/nested_group_provider_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <roaring/roaring.hh>

#include "core/column/column_variant.h"
#include "storage/iterator/olap_data_convertor.h"
#include "storage/segment/column_writer.h"
#include "storage/segment/variant/variant_column_reader.h"
#include "storage/segment/variant/variant_statistics.h"
Expand All @@ -48,7 +49,7 @@ TEST(NestedGroupProviderTest, DefaultReadProviderIsDisabled) {
EXPECT_FALSE(provider->should_enable_nested_group_read_path());
}

TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) {
TEST(NestedGroupProviderTest, DefaultWriteProviderRejectsNestedGroupWritePath) {
auto write_provider = create_nested_group_write_provider();
ASSERT_TRUE(write_provider != nullptr);

Expand All @@ -66,6 +67,23 @@ TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) {
write_provider->prepare(*column_variant, nullptr, opts, nullptr, nullptr, &statistics);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.is<ErrorCode::INVALID_ARGUMENT>());

TabletColumn tablet_column;
OlapBlockDataConvertor converter;
int column_id = 0;
status = write_provider->prepare(*column_variant, &tablet_column, opts, &converter, &column_id,
&statistics);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.is<ErrorCode::NOT_IMPLEMENTED_ERROR>());
EXPECT_NE(status.to_string().find("not available"), std::string::npos);

NestedGroupsMap nested_groups;
status = write_provider->prepare_with_built_groups(nested_groups, &tablet_column, opts,
&converter, &column_id, &statistics);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.is<ErrorCode::NOT_IMPLEMENTED_ERROR>());
EXPECT_NE(status.to_string().find("not available"), std::string::npos);

EXPECT_EQ(0, write_provider->estimate_buffer_size());
EXPECT_TRUE(write_provider->finish().ok());
EXPECT_TRUE(write_provider->write_data().ok());
Expand Down
14 changes: 14 additions & 0 deletions be/test/storage/segment/variant_column_writer_reader_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "storage/segment/variant/binary_column_extract_iterator.h"
#include "storage/segment/variant/hierarchical_data_iterator.h"
#include "storage/segment/variant/nested_group_path.h"
#include "storage/segment/variant/nested_group_provider.h"
#include "storage/segment/variant/nested_group_streaming_write_plan.h"
#include "storage/segment/variant/sparse_column_merge_iterator.h"
#include "storage/segment/variant/variant_column_reader.h"
Expand Down Expand Up @@ -85,6 +86,11 @@ static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t index_id
tablet_index->add_col_unique_id(col_unique_id);
}

static bool nested_group_write_path_available() {
auto provider = segment_v2::create_nested_group_read_provider();
return provider != nullptr && provider->should_enable_nested_group_read_path();
}

struct VariantStorageParseWriteResult {
size_t num_rows = 0;
size_t parsed_subcolumns = 0;
Expand Down Expand Up @@ -5309,6 +5315,10 @@ TEST_F(VariantColumnWriterReaderTest, test_concurrent_load_external_meta_and_get

TEST_F(VariantColumnWriterReaderTest,
test_streaming_write_plan_collects_regular_paths_from_rowset_metadata) {
if (!nested_group_write_path_available()) {
GTEST_SKIP() << "NestedGroup write path is not available in this build";
}

init_variant_tablet(41000, 10, true);

std::vector<RowsetSharedPtr> input_rowsets;
Expand Down Expand Up @@ -5340,6 +5350,10 @@ TEST_F(VariantColumnWriterReaderTest,

TEST_F(VariantColumnWriterReaderTest,
test_streaming_compaction_writer_streams_regular_array_paths_across_batches) {
if (!nested_group_write_path_available()) {
GTEST_SKIP() << "NestedGroup write path is not available in this build";
}

init_variant_tablet(41001, 10, true);

std::vector<RowsetSharedPtr> input_rowsets;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ public String toSql(int depth) {
sb.append("\"variant_sparse_hash_shard_count\" = \"")
.append(String.valueOf(Math.max(1, variantSparseHashShardCount))).append("\"");
}
if (enableNestedGroup) {
sb.append(",");
sb.append("\"variant_enable_nested_group\" = \"")
.append(String.valueOf(enableNestedGroup)).append("\"");
}
Comment on lines +190 to +194
sb.append(")>");
return sb.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5307,8 +5307,11 @@ public DataType visitVariantPredefinedFields(VariantPredefinedFieldsContext ctx)
}

if (enableNestedGroup) {
throw new NotSupportedException(
"variant_enable_nested_group is not supported now");
enableVariantDocMode = false;
variantMaxSubcolumnsCount = 0;
enableTypedPathsToSparse = false;
variantMaxSparseColumnStatisticsSize = 0;
variantSparseHashShardCount = 0;
Comment on lines +5310 to +5314
}

// When doc mode is enabled, disable subcolumn extraction and sparse column features
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ public void testVariantPredefinedFieldsExactMatch() {
}

@Test
public void testVariantToSqlDoesNotSerializeUnsupportedNestedGroupProperty() {
public void testVariantToSqlSerializesNestedGroupProperty() {
VariantType variantType = new VariantType(new ArrayList<>(), 0, false, 10000, 0,
false, 0L, 64, true);

Assert.assertFalse(variantType.toSql().contains("variant_enable_nested_group"));
Assert.assertTrue(variantType.toSql().contains("\"variant_enable_nested_group\" = \"true\""));
}

// ===================== Mixed Nesting & Precision =====================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1364,14 +1364,31 @@ public void testCtasWithoutAs() {
}

@Test
public void testCreateTableVariantNestedGroupPropertyIsRejected() {
public void testCreateTableVariantNestedGroupPropertyIsAccepted() {
NereidsParser parser = new NereidsParser();
String sql = "CREATE TABLE t_variant_ng (k1 INT, v VARIANT<PROPERTIES("
+ "\"variant_enable_nested_group\" = \"true\")>) "
+ "DISTRIBUTED BY HASH(k1) BUCKETS 1";
LogicalPlan logicalPlan = parser.parseSingle(sql);
Assertions.assertInstanceOf(CreateTableCommand.class, logicalPlan);
CreateTableCommand createTableCommand = (CreateTableCommand) logicalPlan;
org.apache.doris.nereids.types.VariantType variantType =
(org.apache.doris.nereids.types.VariantType) createTableCommand.getCreateTableInfo()
.getColumnDefinitions().get(1).getType();
Assertions.assertTrue(variantType.getEnableNestedGroup());
}

@Test
public void testCreateTableVariantNestedGroupPropertyConflictsWithDocMode() {
NereidsParser parser = new NereidsParser();
String sql = "CREATE TABLE t_variant_ng (k1 INT, v VARIANT<PROPERTIES("
+ "\"variant_enable_nested_group\" = \"true\", "
+ "\"variant_enable_doc_mode\" = \"true\")>) "
+ "DISTRIBUTED BY HASH(k1) BUCKETS 1";
NotSupportedException exception =
Assertions.assertThrowsExactly(NotSupportedException.class, () -> parser.parseSingle(sql));
Assertions.assertTrue(exception.getMessage().contains("variant_enable_nested_group is not supported now"));
Assertions.assertTrue(exception.getMessage()
.contains("variant_enable_nested_group and variant_enable_doc_mode cannot both be true"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

// DORIS-25891: Variant SEARCH must bind subcolumn predicates to the real stored
// field names for direct, nested, and special-character paths.
suite("test_variant_search_subcolumn") {
def table_name = "test_variant_search_subcolumn"
sql "set default_variant_doc_materialization_min_rows = 0"
Expand Down
Loading