Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
set(ICEBERG_SOURCES
arrow_c_data_guard_internal.cc
catalog/memory/in_memory_catalog.cc
data/data_writer.cc
data/equality_delete_writer.cc
data/file_writer_factory.cc
data/position_delete_writer.cc
data/writer.cc
delete_file_index.cc
expression/aggregate.cc
Expand Down
73 changes: 73 additions & 0 deletions src/iceberg/data/data_writer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/data/data_writer.h"

namespace iceberg {

//=============================================================================
// DataWriter - stub implementation (to be completed in separate PR per #441)
//=============================================================================

class DataWriter::Impl {
public:
explicit Impl(DataWriterOptions options) : options_(std::move(options)) {}
DataWriterOptions options_;
bool is_closed_ = false;
};

DataWriter::DataWriter(std::unique_ptr<Impl> impl) : impl_(std::move(impl)) {}
DataWriter::~DataWriter() = default;

Status DataWriter::Write(ArrowArray* data) {
if (!data) {
return InvalidArgument("Cannot write null data");
}
if (impl_->is_closed_) {
return Invalid("Writer is already closed");
}
return NotImplemented("DataWriter not yet implemented - see #441");
}

Result<int64_t> DataWriter::Length() const {
return NotImplemented("DataWriter not yet implemented - see #441");
}

Status DataWriter::Close() {
if (impl_->is_closed_) {
return {}; // Close is idempotent
}
impl_->is_closed_ = true;
return NotImplemented("DataWriter not yet implemented - see #441");
}

Result<FileWriter::WriteResult> DataWriter::Metadata() {
if (!impl_->is_closed_) {
return Invalid("Writer must be closed before getting metadata");
}
return NotImplemented("DataWriter not yet implemented - see #441");
}

// Internal factory function for FileWriterFactory
std::unique_ptr<DataWriter> MakeDataWriterInternal(const DataWriterOptions& options) {
auto impl = std::make_unique<DataWriter::Impl>(options);
return std::unique_ptr<DataWriter>(new DataWriter(std::move(impl)));
}

} // namespace iceberg
78 changes: 78 additions & 0 deletions src/iceberg/data/data_writer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/data/data_writer.h
/// Data writer for Iceberg tables.

#include <cstdint>
#include <memory>
#include <optional>
#include <string>

#include "iceberg/arrow_c_data.h"
#include "iceberg/data/writer.h"
#include "iceberg/file_format.h"
#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/row/partition_values.h"
#include "iceberg/type_fwd.h"

namespace iceberg {

/// \brief Options for creating a DataWriter.
///
/// \note The following features from Java DataWriter are not yet supported:
/// - Encryption key metadata (uses FileIO instead of EncryptedOutputFile)
/// - Metrics collection and reporting
/// - Split offsets tracking
struct ICEBERG_EXPORT DataWriterOptions {
std::string path;
std::shared_ptr<Schema> schema;
std::shared_ptr<PartitionSpec> spec;
PartitionValues partition;
FileFormatType format = FileFormatType::kParquet;
std::shared_ptr<FileIO> io;
std::optional<int32_t> sort_order_id;
std::shared_ptr<class WriterProperties> properties;
};

/// \brief Writer for Iceberg data files.
///
/// \warning Thread Safety: Writer instances are NOT thread-safe. Each writer should only
/// be used by a single thread. Do not call Write(), Close(), or Metadata() concurrently.
class ICEBERG_EXPORT DataWriter : public FileWriter {
public:
~DataWriter() override;

Status Write(ArrowArray* data) override;
Result<int64_t> Length() const override;
Status Close() override;
Result<WriteResult> Metadata() override;

private:
friend class FileWriterFactory;
friend std::unique_ptr<DataWriter> MakeDataWriterInternal(const DataWriterOptions&);
class Impl;
std::unique_ptr<Impl> impl_;
explicit DataWriter(std::unique_ptr<Impl> impl);
};

} // namespace iceberg
79 changes: 79 additions & 0 deletions src/iceberg/data/equality_delete_writer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/data/equality_delete_writer.h"

namespace iceberg {

//=============================================================================
// EqualityDeleteWriter - stub implementation (to be completed in separate PR per #441)
//=============================================================================

class EqualityDeleteWriter::Impl {
public:
explicit Impl(EqualityDeleteWriterOptions options) : options_(std::move(options)) {}
EqualityDeleteWriterOptions options_;
bool is_closed_ = false;
};

EqualityDeleteWriter::EqualityDeleteWriter(std::unique_ptr<Impl> impl)
: impl_(std::move(impl)) {}
EqualityDeleteWriter::~EqualityDeleteWriter() = default;

Status EqualityDeleteWriter::Write(ArrowArray* data) {
if (!data) {
return InvalidArgument("Cannot write null data");
}
if (impl_->is_closed_) {
return Invalid("Writer is already closed");
}
return NotImplemented("EqualityDeleteWriter not yet implemented - see #441");
}

Result<int64_t> EqualityDeleteWriter::Length() const {
return NotImplemented("EqualityDeleteWriter not yet implemented - see #441");
}

Status EqualityDeleteWriter::Close() {
if (impl_->is_closed_) {
return {}; // Close is idempotent
}
impl_->is_closed_ = true;
return NotImplemented("EqualityDeleteWriter not yet implemented - see #441");
}

Result<FileWriter::WriteResult> EqualityDeleteWriter::Metadata() {
if (!impl_->is_closed_) {
return Invalid("Writer must be closed before getting metadata");
}
return NotImplemented("EqualityDeleteWriter not yet implemented - see #441");
}

const std::vector<int32_t>& EqualityDeleteWriter::equality_field_ids() const {
return impl_->options_.equality_field_ids;
}

// Internal factory function for FileWriterFactory
std::unique_ptr<EqualityDeleteWriter> MakeEqualityDeleteWriterInternal(
const EqualityDeleteWriterOptions& options) {
auto impl = std::make_unique<EqualityDeleteWriter::Impl>(options);
return std::unique_ptr<EqualityDeleteWriter>(new EqualityDeleteWriter(std::move(impl)));
}

} // namespace iceberg
83 changes: 83 additions & 0 deletions src/iceberg/data/equality_delete_writer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/data/equality_delete_writer.h
/// Equality delete writer for Iceberg tables.

#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <vector>

#include "iceberg/arrow_c_data.h"
#include "iceberg/data/writer.h"
#include "iceberg/file_format.h"
#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/row/partition_values.h"
#include "iceberg/type_fwd.h"

namespace iceberg {

/// \brief Options for creating an EqualityDeleteWriter.
///
/// \note The following features from Java EqualityDeleteWriter are not yet supported:
/// - Encryption key metadata
/// - Metrics collection and reporting
/// - Split offsets tracking
struct ICEBERG_EXPORT EqualityDeleteWriterOptions {
std::string path;
std::shared_ptr<Schema> schema;
std::shared_ptr<PartitionSpec> spec;
PartitionValues partition;
FileFormatType format = FileFormatType::kParquet;
std::shared_ptr<FileIO> io;
std::vector<int32_t> equality_field_ids;
std::optional<int32_t> sort_order_id;
std::shared_ptr<class WriterProperties> properties;
};

/// \brief Writer for Iceberg equality delete files.
///
/// \warning Thread Safety: Writer instances are NOT thread-safe. Each writer should only
/// be used by a single thread. Do not call Write(), Close(), or Metadata() concurrently.
class ICEBERG_EXPORT EqualityDeleteWriter : public FileWriter {
public:
~EqualityDeleteWriter() override;

Status Write(ArrowArray* data) override;
Result<int64_t> Length() const override;
Status Close() override;
Result<WriteResult> Metadata() override;

const std::vector<int32_t>& equality_field_ids() const;

private:
friend class FileWriterFactory;
friend std::unique_ptr<EqualityDeleteWriter> MakeEqualityDeleteWriterInternal(
const EqualityDeleteWriterOptions&);
class Impl;
std::unique_ptr<Impl> impl_;
explicit EqualityDeleteWriter(std::unique_ptr<Impl> impl);
};

} // namespace iceberg
Loading
Loading