diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 617ec05b6..ec8f4a977 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -95,6 +95,7 @@ set(ICEBERG_SOURCES util/snapshot_util.cc util/temporal_util.cc util/timepoint.cc + util/transform_util.cc util/truncate_util.cc util/type_util.cc util/uuid.cc) diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 34538bde0..9c4635f4e 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -116,6 +116,7 @@ iceberg_sources = files( 'util/snapshot_util.cc', 'util/temporal_util.cc', 'util/timepoint.cc', + 'util/transform_util.cc', 'util/truncate_util.cc', 'util/type_util.cc', 'util/uuid.cc', diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index a32bbe4de..c4258ec09 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -64,6 +64,7 @@ add_iceberg_test(schema_test schema_util_test.cc sort_field_test.cc sort_order_test.cc + transform_human_string_test.cc transform_test.cc type_test.cc) diff --git a/src/iceberg/test/eval_expr_test.cc b/src/iceberg/test/eval_expr_test.cc index 880f1ffb6..39cb2a973 100644 --- a/src/iceberg/test/eval_expr_test.cc +++ b/src/iceberg/test/eval_expr_test.cc @@ -161,7 +161,7 @@ TEST_F(BoundExpressionTest, YearTransform) { // Evaluate (2021) ICEBERG_UNWRAP_OR_FAIL(auto result, bound_transform->Evaluate(*struct_like)); EXPECT_FALSE(result.IsNull()); - EXPECT_EQ(std::get(result.value()), 2021); // Year value + EXPECT_EQ(std::get(result.value()), 2021 - 1970); // Year value } TEST_F(BoundExpressionTest, MonthTransform) { diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 378182819..d979d8ec3 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -40,6 +40,7 @@ iceberg_tests = { 'schema_util_test.cc', 'sort_field_test.cc', 'sort_order_test.cc', + 'transform_human_string_test.cc', 'transform_test.cc', 'type_test.cc', ), diff --git a/src/iceberg/test/transform_human_string_test.cc b/src/iceberg/test/transform_human_string_test.cc new file mode 100644 index 000000000..fe060f81c --- /dev/null +++ b/src/iceberg/test/transform_human_string_test.cc @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include +#include + +#include "iceberg/expression/literal.h" +#include "iceberg/test/matchers.h" +#include "iceberg/transform.h" + +namespace iceberg { + +struct HumanStringTestParam { + std::string test_name; + std::shared_ptr source_type; + Literal literal; + std::vector expecteds; +}; + +class IdentityTest : public ::testing::TestWithParam { + protected: + std::vector> transforms_{{Transform::Identity()}}; +}; + +TEST_P(IdentityTest, ToHumanString) { + const auto& param = GetParam(); + for (int32_t i = 0; i < transforms_.size(); ++i) { + EXPECT_THAT(transforms_[i]->ToHumanString(param.literal), + HasValue(::testing::Eq(param.expecteds[i]))); + } +} + +INSTANTIATE_TEST_SUITE_P( + IdentityTestCases, IdentityTest, + ::testing::Values( + HumanStringTestParam{.test_name = "Null", + .literal = Literal::Null(std::make_shared()), + .expecteds{"null"}}, + HumanStringTestParam{.test_name = "Binary", + .literal = Literal::Binary(std::vector{1, 2, 3}), + .expecteds{"AQID"}}, + HumanStringTestParam{.test_name = "Fixed", + .literal = Literal::Fixed(std::vector{1, 2, 3}), + .expecteds{"AQID"}}, + HumanStringTestParam{.test_name = "Date", + .literal = Literal::Date(17501), + .expecteds{"2017-12-01"}}, + HumanStringTestParam{.test_name = "Time", + .literal = Literal::Time(36775038194), + .expecteds{"10:12:55.038194"}}, + HumanStringTestParam{.test_name = "TimestampWithZone", + .literal = Literal::TimestampTz(1512151975038194), + .expecteds{"2017-12-01T18:12:55.038194+00:00"}}, + HumanStringTestParam{.test_name = "TimestampWithoutZone", + .literal = Literal::Timestamp(1512123175038194), + .expecteds{"2017-12-01T10:12:55.038194"}}, + HumanStringTestParam{.test_name = "Long", + .literal = Literal::Long(-1234567890000L), + .expecteds{"-1234567890000"}}, + HumanStringTestParam{.test_name = "String", + .literal = Literal::String("a/b/c=d"), + .expecteds{"a/b/c=d"}}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +class DateTest : public ::testing::TestWithParam { + protected: + std::vector> transforms_{ + Transform::Year(), Transform::Month(), Transform::Day()}; +}; + +TEST_P(DateTest, ToHumanString) { + const auto& param = GetParam(); + + for (uint32_t i = 0; i < transforms_.size(); i++) { + ICEBERG_UNWRAP_OR_FAIL(auto trans_func, + transforms_[i]->Bind(std::make_shared())); + ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal)); + EXPECT_THAT(transforms_[i]->ToHumanString(literal), + HasValue(::testing::Eq(param.expecteds[i]))); + } +} + +INSTANTIATE_TEST_SUITE_P( + DateTestCases, DateTest, + ::testing::Values( + HumanStringTestParam{.test_name = "Date", + .literal = Literal::Date(17501), + .expecteds = {"2017", "2017-12", "2017-12-01"}}, + HumanStringTestParam{.test_name = "NegativeDate", + .literal = Literal::Date(-2), + .expecteds = {"1969", "1969-12", "1969-12-30"}}, + HumanStringTestParam{.test_name = "DateLowerBound", + .literal = Literal::Date(0), + .expecteds = {"1970", "1970-01", "1970-01-01"}}, + HumanStringTestParam{.test_name = "NegativeDateLowerBound", + .literal = Literal::Date(-365), + .expecteds = {"1969", "1969-01", "1969-01-01"}}, + HumanStringTestParam{.test_name = "NegativeDateUpperBound", + .literal = Literal::Date(-1), + .expecteds = {"1969", "1969-12", "1969-12-31"}}, + HumanStringTestParam{.test_name = "Null", + .literal = Literal::Null(std::make_shared()), + .expecteds = {"null", "null", "null"}}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +class TimestampTest : public ::testing::TestWithParam { + protected: + std::vector> transforms_{ + Transform::Year(), Transform::Month(), Transform::Day(), Transform::Hour()}; +}; + +TEST_P(TimestampTest, ToHumanString) { + const auto& param = GetParam(); + for (uint32_t i = 0; i < transforms_.size(); i++) { + ICEBERG_UNWRAP_OR_FAIL(auto trans_func, transforms_[i]->Bind(param.source_type)); + ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal)); + EXPECT_THAT(transforms_[i]->ToHumanString(literal), + HasValue(::testing::Eq(param.expecteds[i]))); + } +} + +INSTANTIATE_TEST_SUITE_P( + TimestampTestCases, TimestampTest, + ::testing::Values( + HumanStringTestParam{ + .test_name = "Timestamp", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(1512123175038194), + .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}}, + HumanStringTestParam{ + .test_name = "NegativeTimestamp", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(-136024961806), + .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}}, + HumanStringTestParam{ + .test_name = "TimestampLowerBound", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(0), + .expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}}, + HumanStringTestParam{ + .test_name = "NegativeTimestampLowerBound", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(-172800000000), + .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"}, + }, + HumanStringTestParam{ + .test_name = "NegativeTimestampUpperBound", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(-1), + .expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}}, + HumanStringTestParam{ + .test_name = "TimestampTz", + .source_type = std::make_shared(), + .literal = Literal::TimestampTz(1512151975038194), + .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}}, + HumanStringTestParam{.test_name = "Null", + .source_type = std::make_shared(), + .literal = Literal::Null(std::make_shared()), + .expecteds = {"null", "null", "null", "null"}}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +} // namespace iceberg diff --git a/src/iceberg/test/transform_test.cc b/src/iceberg/test/transform_test.cc index 7f0514df4..47a1e87e6 100644 --- a/src/iceberg/test/transform_test.cc +++ b/src/iceberg/test/transform_test.cc @@ -459,7 +459,7 @@ INSTANTIATE_TEST_SUITE_P( .hour = 11, .minute = 43, .second = 20})), - .expected = Literal::Int(2021)}, + .expected = Literal::Int(2021 - 1970)}, TransformParam{ .str = "TimestampTz", // 2021-01-01T07:43:20+08:00, which is 2020-12-31T23:43:20Z @@ -472,12 +472,12 @@ INSTANTIATE_TEST_SUITE_P( .minute = 43, .second = 20, .tz_offset_minutes = 480})), - .expected = Literal::Int(2020)}, + .expected = Literal::Int(2020 - 1970)}, TransformParam{.str = "Date", .source_type = iceberg::date(), .source = Literal::Date(TemporalTestHelper::CreateDate( {.year = 2052, .month = 2, .day = 20})), - .expected = Literal::Int(2052)}), + .expected = Literal::Int(2052 - 1970)}), [](const ::testing::TestParamInfo& info) { return info.param.str; }); class MonthTransformTest : public ::testing::TestWithParam {}; @@ -2061,7 +2061,8 @@ TEST_F(TransformProjectStrictTest, YearStrictLessThan) { std::move(projected)); EXPECT_EQ(unbound_projected->op(), Expression::Operation::kLt); EXPECT_EQ(unbound_projected->literals().size(), 1); - EXPECT_EQ(std::get(unbound_projected->literals().front().value()), 2021); + EXPECT_EQ(std::get(unbound_projected->literals().front().value()), + 2021 - 1970); } TEST_F(TransformProjectStrictTest, YearStrictGreaterThanOrEqual) { @@ -2085,7 +2086,8 @@ TEST_F(TransformProjectStrictTest, YearStrictGreaterThanOrEqual) { std::move(projected)); EXPECT_EQ(unbound_projected->op(), Expression::Operation::kGt); EXPECT_EQ(unbound_projected->literals().size(), 1); - EXPECT_EQ(std::get(unbound_projected->literals().front().value()), 2020); + EXPECT_EQ(std::get(unbound_projected->literals().front().value()), + 2020 - 1970); } TEST_F(TransformProjectStrictTest, YearStrictNotEqual) { @@ -2109,7 +2111,8 @@ TEST_F(TransformProjectStrictTest, YearStrictNotEqual) { std::move(projected)); EXPECT_EQ(unbound_projected->op(), Expression::Operation::kNotEq); EXPECT_EQ(unbound_projected->literals().size(), 1); - EXPECT_EQ(std::get(unbound_projected->literals().front().value()), 2021); + EXPECT_EQ(std::get(unbound_projected->literals().front().value()), + 2021 - 1970); } TEST_F(TransformProjectStrictTest, MonthStrictLessThan) { @@ -2218,7 +2221,8 @@ TEST_F(TransformProjectStrictTest, YearStrictUpperBound) { std::move(projected)); EXPECT_EQ(unbound_projected->op(), Expression::Operation::kLt); EXPECT_EQ(unbound_projected->literals().size(), 1); - EXPECT_EQ(std::get(unbound_projected->literals().front().value()), 2018); + EXPECT_EQ(std::get(unbound_projected->literals().front().value()), + 2018 - 1970); } TEST_F(TransformProjectStrictTest, VoidStrictReturnsNull) { diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 560cc3921..3da66e16c 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -31,6 +31,7 @@ #include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" #include "iceberg/util/projection_util_internal.h" +#include "iceberg/util/transform_util.h" namespace iceberg { namespace { @@ -366,6 +367,50 @@ Result> Transform::ProjectStrict( std::unreachable(); } +Result Transform::ToHumanString(const Literal& value) { + if (value.IsNull()) { + return "null"; + } + + switch (transform_type_) { + case TransformType::kYear: + return TransformUtil::HumanYear(std::get(value.value())); + case TransformType::kMonth: + return TransformUtil::HumanMonth(std::get(value.value())); + case TransformType::kDay: + return TransformUtil::HumanDay(std::get(value.value())); + case TransformType::kHour: + return TransformUtil::HumanHour(std::get(value.value())); + default: { + switch (value.type()->type_id()) { + case TypeId::kDate: + return TransformUtil::HumanDay(std::get(value.value())); + case TypeId::kTime: + return TransformUtil::HumanTime(std::get(value.value())); + case TypeId::kTimestamp: + return TransformUtil::HumanTimestamp(std::get(value.value())); + case TypeId::kTimestampTz: + return TransformUtil::HumanTimestampWithZone(std::get(value.value())); + case TypeId::kFixed: + case TypeId::kBinary: { + const auto& binary_data = std::get>(value.value()); + return TransformUtil::Base64Encode( + {reinterpret_cast(binary_data.data()), binary_data.size()}); + } + case TypeId::kDecimal: { + const auto& decimal_type = internal::checked_cast(*value.type()); + const auto& decimal = std::get<::iceberg::Decimal>(value.value()); + return decimal.ToString(decimal_type.scale()); + } + case TypeId::kString: + return std::get(value.value()); + default: + return value.ToString(); + } + } + } +} + bool TransformFunction::Equals(const TransformFunction& other) const { return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_; } diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index 1044e264f..d278ce5c4 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable { Result> ProjectStrict( std::string_view name, const std::shared_ptr& predicate); + /// \brief Returns a human-readable String representation of a transformed value. + /// + /// \param value The literal value to be transformed. + /// @return a human-readable String representation of the value + Result ToHumanString(const Literal& value); + /// \brief Returns a string representation of this transform (e.g., "bucket[16]"). std::string ToString() const override; diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build index 880f63401..86ca19cb5 100644 --- a/src/iceberg/util/meson.build +++ b/src/iceberg/util/meson.build @@ -36,6 +36,7 @@ install_headers( 'string_util.h', 'temporal_util.h', 'timepoint.h', + 'transform_util.h', 'truncate_util.h', 'type_util.h', 'uuid.h', diff --git a/src/iceberg/util/temporal_util.cc b/src/iceberg/util/temporal_util.cc index 0112e4925..05aafb961 100644 --- a/src/iceberg/util/temporal_util.cc +++ b/src/iceberg/util/temporal_util.cc @@ -68,14 +68,14 @@ template <> Result ExtractYearImpl(const Literal& literal) { auto value = std::get(literal.value()); auto ymd = DateToYmd(value); - return Literal::Int(static_cast(ymd.year())); + return Literal::Int((ymd.year() - kEpochYmd.year()).count()); } template <> Result ExtractYearImpl(const Literal& literal) { auto value = std::get(literal.value()); auto ymd = TimestampToYmd(value); - return Literal::Int(static_cast(ymd.year())); + return Literal::Int((ymd.year() - kEpochYmd.year()).count()); } template <> diff --git a/src/iceberg/util/timepoint.cc b/src/iceberg/util/timepoint.cc index 0381e90a6..ec8597730 100644 --- a/src/iceberg/util/timepoint.cc +++ b/src/iceberg/util/timepoint.cc @@ -20,8 +20,6 @@ #include "iceberg/util/timepoint.h" #include -#include -#include namespace iceberg { @@ -46,18 +44,35 @@ int64_t UnixNsFromTimePointNs(TimePointNs time_point_ns) { } std::string FormatTimePointMs(TimePointMs time_point_ms) { - auto unix_ms = UnixMsFromTimePointMs(time_point_ms); - auto time_t = std::chrono::system_clock::to_time_t(time_point_ms); + return std::format("{:%FT%T}", time_point_ms); +} + +std::string FormatUnixMicro(int64_t unix_micro) { + auto tp = std::chrono::time_point{ + std::chrono::seconds(unix_micro / kMicrosPerSecond)}; - // Format as ISO 8601-like string: YYYY-MM-DD HH:MM:SS - std::ostringstream oss; - oss << std::put_time(std::gmtime(&time_t), "%Y-%m-%d %H:%M:%S"); + auto micros = unix_micro % kMicrosPerSecond; + if (micros == 0) { + return std::format("{:%FT%T}", tp); + } else if (micros % kMicrosPerMillis == 0) { + return std::format("{:%FT%T}.{:03d}", tp, micros / kMicrosPerMillis); + } else { + return std::format("{:%FT%T}.{:06d}", tp, micros); + } +} - // Add milliseconds - auto ms = unix_ms % 1000; - oss << "." << std::setfill('0') << std::setw(3) << ms << " UTC"; +std::string FormatUnixMicroTz(int64_t unix_micro) { + auto tp = std::chrono::time_point{ + std::chrono::seconds(unix_micro / kMicrosPerSecond)}; - return oss.str(); + auto micros = unix_micro % kMicrosPerSecond; + if (micros == 0) { + return std::format("{:%FT%T}+00:00", tp); + } else if (micros % kMicrosPerMillis == 0) { + return std::format("{:%FT%T}.{:03d}+00:00", tp, micros / kMicrosPerMillis); + } else { + return std::format("{:%FT%T}.{:06d}+00:00", tp, micros); + } } } // namespace iceberg diff --git a/src/iceberg/util/timepoint.h b/src/iceberg/util/timepoint.h index 6052c94ae..b09fbb14a 100644 --- a/src/iceberg/util/timepoint.h +++ b/src/iceberg/util/timepoint.h @@ -34,6 +34,10 @@ using TimePointMs = using TimePointNs = std::chrono::time_point; +constexpr int64_t kMillisPerSecond = 1000; +constexpr int64_t kMicrosPerMillis = 1000; +constexpr int64_t kMicrosPerSecond = 1000000; + /// \brief Returns a TimePointMs from a Unix timestamp in milliseconds ICEBERG_EXPORT Result TimePointMsFromUnixMs(int64_t unix_ms); @@ -49,4 +53,12 @@ ICEBERG_EXPORT int64_t UnixNsFromTimePointNs(TimePointNs time_point_ns); /// \brief Returns a human-readable string representation of a TimePointMs ICEBERG_EXPORT std::string FormatTimePointMs(TimePointMs time_point_ms); +/// \brief Returns a human-readable string representation of a Unix timestamp in +/// microseconds +ICEBERG_EXPORT std::string FormatUnixMicro(int64_t unix_micro); + +/// \brief Returns a human-readable string representation of a Unix timestamp in +/// microseconds with time zone +ICEBERG_EXPORT std::string FormatUnixMicroTz(int64_t unix_micro); + } // namespace iceberg diff --git a/src/iceberg/util/transform_util.cc b/src/iceberg/util/transform_util.cc new file mode 100644 index 000000000..6cca2866c --- /dev/null +++ b/src/iceberg/util/transform_util.cc @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/transform_util.h" + +#include + +#include "iceberg/util/timepoint.h" + +namespace iceberg { + +namespace { +const int32_t kEpochYear = 1970; +} // namespace + +std::string TransformUtil::HumanYear(int32_t year_ordinal) { + return std::format("{:04d}", kEpochYear + year_ordinal); +} + +std::string TransformUtil::HumanMonth(int32_t month_ordinal) { + int32_t year = kEpochYear + month_ordinal / 12; + int32_t month = month_ordinal % 12 + 1; + if (month <= 0) { + year--; + month += 12; + } + return std::format("{:04d}-{:02d}", year, month); +} + +std::string TransformUtil::HumanDay(int32_t day_ordinal) { + auto tp = std::chrono::time_point{ + std::chrono::days{day_ordinal}}; + return std::format("{:%F}", tp); +} + +std::string TransformUtil::HumanHour(int32_t hour_ordinal) { + auto tp = std::chrono::time_point{ + std::chrono::hours{hour_ordinal}}; + return std::format("{:%F-%H}", tp); +} + +std::string TransformUtil::HumanTime(int64_t micros_from_midnight) { + auto tp = std::chrono::time_point{ + std::chrono::seconds{micros_from_midnight / kMicrosPerSecond}}; + auto micros = micros_from_midnight % kMicrosPerSecond; + if (micros == 0) { + return std::format("{:%T}", tp); + } else if (micros % 1000 == 0) { + return std::format("{:%T}.{:03d}", tp, micros / kMicrosPerMillis); + } else { + return std::format("{:%T}.{:06d}", tp, micros); + } +} + +std::string TransformUtil::HumanTimestamp(int64_t timestamp_micros) { + return FormatUnixMicro(timestamp_micros); +} + +std::string TransformUtil::HumanTimestampWithZone(int64_t timestamp_micros) { + return FormatUnixMicroTz(timestamp_micros); +} + +std::string TransformUtil::Base64Encode(std::string_view str_to_encode) { + static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + int32_t i = 0; + int32_t j = 0; + std::array char_array_3; + std::array char_array_4; + + std::string encoded; + encoded.reserve((str_to_encode.size() + 2) * 4 / 3); + + for (char byte : str_to_encode) { + char_array_3[i++] = byte; + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for (j = 0; j < 4; j++) encoded += base64_chars[char_array_4[j]]; + i = 0; + } + } + + if (i) { + for (j = i; j < 3; j++) char_array_3[j] = '\0'; + + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for (j = 0; j < i + 1; j++) encoded += base64_chars[char_array_4[j]]; + + while (i++ < 3) encoded += '='; + } + + return encoded; +} + +} // namespace iceberg diff --git a/src/iceberg/util/transform_util.h b/src/iceberg/util/transform_util.h new file mode 100644 index 000000000..6cc773924 --- /dev/null +++ b/src/iceberg/util/transform_util.h @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" + +namespace iceberg { + +class ICEBERG_EXPORT TransformUtil { + public: + /// \brief Returns a human-readable string for a year. + /// + /// The string is formatted as "yyyy". + /// + /// \param year The year to format. + /// \return A human-readable string for the year. + static std::string HumanYear(int32_t year); + + /// \brief Returns a human-readable string for a month. + /// + /// The string is formatted as "yyyy-MM". + /// + /// \param month The month to format. + /// \return A human-readable string for the month. + static std::string HumanMonth(int32_t month); + + /// \brief Returns a human-readable string for the given day ordinal. + /// + /// The string is formatted as: `yyyy-MM-dd`. + /// + /// \param day_ordinal The day ordinal. + /// \return A human-readable string for the given day ordinal. + static std::string HumanDay(int32_t day_ordinal); + + /// \brief Returns a human-readable string for the given hour ordinal. + /// + /// The string is formatted as: `HH:mm:ss`. + /// + /// \param hour_ordinal The hour ordinal. + /// \return A human-readable string for the given hour ordinal. + static std::string HumanHour(int32_t hour_ordinal); + + /// \brief Outputs this time as a String, such as 10:15. + /// + /// The output will be one of the following ISO-8601 formats: + /// HH:mm + /// HH:mm:ss + /// HH:mm:ss.SSS + /// HH:mm:ss.SSSSSS + /// HH:mm:ss.SSSSSSSSS + /// The format used will be the shortest that outputs the full value of the time where + /// the omitted parts are implied to be zero. + /// + /// \param microseconds_from_midnight the time in microseconds from midnight + /// \return a string representation of this time + static std::string HumanTime(int64_t micros_from_midnight); + + /// \brief Returns a string representation of a timestamp in microseconds. + /// + /// The output will be one of the following forms, according to the precision of the + /// timestamp: + /// - yyyy-MM-dd HH:mm:ss + /// - yyyy-MM-dd HH:mm:ss.SSS + /// - yyyy-MM-dd HH:mm:ss.SSSSSS + /// + /// \param timestamp_micros the timestamp in microseconds. + /// \return a string representation of this timestamp. + static std::string HumanTimestamp(int64_t timestamp_micros); + + /// \brief Returns a human-readable string representation of a timestamp with a time + /// zone. + /// + /// The output will be one of the following forms, according to the precision of the + /// timestamp: + /// - yyyy-MM-dd HH:mm:ss+00:00 + /// - yyyy-MM-dd HH:mm:ss.SSS+00:00 + /// - yyyy-MM-dd HH:mm:ss.SSSSSS+00:00 + /// + /// \param timestamp_micros the timestamp in microseconds. + /// \return a string representation of this timestamp. + static std::string HumanTimestampWithZone(int64_t timestamp_micros); + + /// \brief Base64 encode a string + static std::string Base64Encode(std::string_view str_to_encode); +}; + +} // namespace iceberg