Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ set(ICEBERG_SOURCES
util/snapshot_util.cc
util/temporal_util.cc
util/timepoint.cc
util/transform_util.cc
util/truncate_util.cc
util/type_util.cc
util/url_encoder.cc
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ iceberg_sources = files(
'util/snapshot_util.cc',
'util/temporal_util.cc',
'util/timepoint.cc',
'util/transform_util.cc',
'util/truncate_util.cc',
'util/type_util.cc',
'util/url_encoder.cc',
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ add_iceberg_test(util_test
formatter_test.cc
location_util_test.cc
string_util_test.cc
transform_util_test.cc
truncate_util_test.cc
url_encoder_test.cc
uuid_test.cc
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ iceberg_tests = {
'formatter_test.cc',
'location_util_test.cc',
'string_util_test.cc',
'transform_util_test.cc',
'truncate_util_test.cc',
'url_encoder_test.cc',
'uuid_test.cc',
Expand Down
160 changes: 160 additions & 0 deletions src/iceberg/test/transform_util_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/transform_util.h"

#include <gtest/gtest.h>

namespace iceberg {

TEST(TransformUtilTest, HumanYear) {
EXPECT_EQ("1970", TransformUtil::HumanYear(0));
EXPECT_EQ("1971", TransformUtil::HumanYear(1));
EXPECT_EQ("1969", TransformUtil::HumanYear(-1));
EXPECT_EQ("0999", TransformUtil::HumanYear(999 - 1970));
EXPECT_EQ("2026", TransformUtil::HumanYear(56));
}

TEST(TransformUtilTest, HumanMonth) {
// 0 is January 1970
EXPECT_EQ("1970-01", TransformUtil::HumanMonth(0));
// 1 is Febrary 1970
EXPECT_EQ("1970-02", TransformUtil::HumanMonth(1));
// -1 is December 1969
EXPECT_EQ("1969-12", TransformUtil::HumanMonth(-1));
// 0999-12
EXPECT_EQ("0999-12", TransformUtil::HumanMonth(-11641));
// 12 is January 1971
EXPECT_EQ("1971-01", TransformUtil::HumanMonth(12));
// 672 is December 2026-01
EXPECT_EQ("2026-01", TransformUtil::HumanMonth(672));
}

TEST(TransformUtilTest, HumanDay) {
// 0 is Unix epoch (1970-01-01)
EXPECT_EQ("1970-01-01", TransformUtil::HumanDay(0));
// 1 is 1970-01-02
EXPECT_EQ("1970-01-02", TransformUtil::HumanDay(1));
// -1 is 1969-12-31
EXPECT_EQ("1969-12-31", TransformUtil::HumanDay(-1));
// 0999-12-31
EXPECT_EQ("0999-12-31", TransformUtil::HumanDay(-354286));
// 365 is 1971-01-01 (non-leap year)
EXPECT_EQ("1971-01-01", TransformUtil::HumanDay(365));
// 20454 is 2026-01-01
EXPECT_EQ("2026-01-01", TransformUtil::HumanDay(20454));
}

TEST(TransformUtilTest, HumanHour) {
// 0 is Unix epoch at 00:00
EXPECT_EQ("1970-01-01-00", TransformUtil::HumanHour(0));
// 1 is first hour of epoch
EXPECT_EQ("1970-01-01-01", TransformUtil::HumanHour(1));
// -1 is previous day's last hour
EXPECT_EQ("1969-12-31-23", TransformUtil::HumanHour(-1));
// 999-12-31 at 23:00
EXPECT_EQ("0999-12-31-23", TransformUtil::HumanHour(-8502841));
// 24 is next day at 00:00
EXPECT_EQ("1970-01-02-00", TransformUtil::HumanHour(24));
// 490896 is 2026-01-01 at 00:00
EXPECT_EQ("2026-01-01-00", TransformUtil::HumanHour(490896));
}

TEST(TransformUtilTest, HumanTime) {
// Midnight
EXPECT_EQ("00:00", TransformUtil::HumanTime(0));
// 1 second after midnight
EXPECT_EQ("00:00:01", TransformUtil::HumanTime(1000000));
// 1.5 seconds after midnight
EXPECT_EQ("00:00:01.500", TransformUtil::HumanTime(1500000));
// 1.001 seconds after midnight
EXPECT_EQ("00:00:01.001", TransformUtil::HumanTime(1001000));
// 1.000001 seconds after midnight
EXPECT_EQ("00:00:01.000001", TransformUtil::HumanTime(1000001));
// 1 hour, 2 minutes, 3 seconds
EXPECT_EQ("01:02:03", TransformUtil::HumanTime(3723000000));
// 23:59:59
EXPECT_EQ("23:59:59", TransformUtil::HumanTime(86399000000));
}

TEST(TransformUtilTest, HumanTimestamp) {
// Unix epoch
EXPECT_EQ("1970-01-01T00:00:00", TransformUtil::HumanTimestamp(0));
// 1 second after epoch
EXPECT_EQ("1970-01-01T00:00:01", TransformUtil::HumanTimestamp(1000000));
// 1 second before epoch
EXPECT_EQ("1969-12-31T23:59:59", TransformUtil::HumanTimestamp(-1000000));
// 0999-12-31T23:59:59
EXPECT_EQ("0999-12-31T23:59:59", TransformUtil::HumanTimestamp(-30610224001000000L));
// precistion with 500 milliseconds
EXPECT_EQ("2026-01-01T00:00:01.500", TransformUtil::HumanTimestamp(1767225601500000L));
// precision with 1 millisecond
EXPECT_EQ("2026-01-01T00:00:01.001", TransformUtil::HumanTimestamp(1767225601001000L));
// precision with 1 microsecond
EXPECT_EQ("2026-01-01T00:00:01.000001",
TransformUtil::HumanTimestamp(1767225601000001L));
}

TEST(TransformUtilTest, HumanTimestampWithZone) {
// Unix epoch
EXPECT_EQ("1970-01-01T00:00:00+00:00", TransformUtil::HumanTimestampWithZone(0));
// 1 second after epoch
EXPECT_EQ("1970-01-01T00:00:01+00:00", TransformUtil::HumanTimestampWithZone(1000000));
// 1 second before epoch
EXPECT_EQ("1969-12-31T23:59:59+00:00", TransformUtil::HumanTimestampWithZone(-1000000));
// 0999-12-31T23:59:59
EXPECT_EQ("0999-12-31T23:59:59+00:00",
TransformUtil::HumanTimestampWithZone(-30610224001000000L));
// precistion with 500 milliseconds
EXPECT_EQ("2026-01-01T00:00:01.500+00:00",
TransformUtil::HumanTimestampWithZone(1767225601500000L));
// precision with 1 millisecond
EXPECT_EQ("2026-01-01T00:00:01.001+00:00",
TransformUtil::HumanTimestampWithZone(1767225601001000L));
// precision with 1 microsecond
EXPECT_EQ("2026-01-01T00:00:01.000001+00:00",
TransformUtil::HumanTimestampWithZone(1767225601000001L));
}

TEST(TransformUtilTest, Base64Encode) {
// Empty string
EXPECT_EQ("", TransformUtil::Base64Encode(""));

// Single character
EXPECT_EQ("YQ==", TransformUtil::Base64Encode("a"));
EXPECT_EQ("YWI=", TransformUtil::Base64Encode("ab"));
EXPECT_EQ("YWJj", TransformUtil::Base64Encode("abc"));

// Multiple of 3 characters
EXPECT_EQ("YWJjZGU=", TransformUtil::Base64Encode("abcde"));
EXPECT_EQ("YWJjZGVm", TransformUtil::Base64Encode("abcdef"));

// Common strings
EXPECT_EQ("U29tZSBkYXRhIHdpdGggY2hhcmFjdGVycw==",
TransformUtil::Base64Encode("Some data with characters"));
EXPECT_EQ("aGVsbG8=", TransformUtil::Base64Encode("hello"));
EXPECT_EQ("dGVzdCBzdHJpbmc=", TransformUtil::Base64Encode("test string"));

// Unicode
EXPECT_EQ("8J+EgA==", TransformUtil::Base64Encode("\xF0\x9F\x84\x80"));
// Null byte
EXPECT_EQ("AA==", TransformUtil::Base64Encode({"\x00", 1}));
}

} // namespace iceberg
1 change: 1 addition & 0 deletions src/iceberg/util/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ install_headers(
'string_util.h',
'temporal_util.h',
'timepoint.h',
'transform_util.h',
'truncate_util.h',
'type_util.h',
'url_encoder.h',
Expand Down
135 changes: 135 additions & 0 deletions src/iceberg/util/transform_util.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/transform_util.h"

#include <array>
#include <chrono>

namespace iceberg {

namespace {
const auto kEpochDate = std::chrono::year{1970} / std::chrono::January / 1;
constexpr int64_t kMicrosPerMillis = 1000;
constexpr int64_t kMicrosPerSecond = 1000000;
} // namespace

std::string TransformUtil::HumanYear(int32_t year_ordinal) {
auto y = kEpochDate + std::chrono::years{year_ordinal};
return std::format("{:%Y}", y);
}

std::string TransformUtil::HumanMonth(int32_t month_ordinal) {
auto ym = kEpochDate + std::chrono::months(month_ordinal);
return std::format("{:%Y-%m}", ym);
}

std::string TransformUtil::HumanDay(int32_t day_ordinal) {
auto ymd = std::chrono::sys_days(kEpochDate) + std::chrono::days{day_ordinal};
return std::format("{:%F}", ymd);
}

std::string TransformUtil::HumanHour(int32_t hour_ordinal) {
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::hours>{
std::chrono::hours{hour_ordinal}};
return std::format("{:%F-%H}", tp);
}

std::string TransformUtil::HumanTime(int64_t micros_from_midnight) {
std::chrono::hh_mm_ss<std::chrono::seconds> hms{
std::chrono::seconds{micros_from_midnight / kMicrosPerSecond}};
auto micros = micros_from_midnight % kMicrosPerSecond;
if (micros == 0 && hms.seconds().count() == 0) {
return std::format("{:%R}", hms);
} else if (micros == 0) {
return std::format("{:%T}", hms);
} else if (micros % kMicrosPerMillis == 0) {
return std::format("{:%T}.{:03d}", hms, micros / kMicrosPerMillis);
} else {
return std::format("{:%T}.{:06d}", hms, micros);
}
}

std::string TransformUtil::HumanTimestamp(int64_t timestamp_micros) {
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>{
std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
auto micros = timestamp_micros % kMicrosPerSecond;
if (micros == 0) {
return std::format("{:%FT%T}", tp);
} else if (micros % kMicrosPerMillis == 0) {
return std::format("{:%FT%T}.{:03d}", tp, micros / kMicrosPerMillis);
} else {
return std::format("{:%FT%T}.{:06d}", tp, micros);
}
}

std::string TransformUtil::HumanTimestampWithZone(int64_t timestamp_micros) {
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>{
std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
auto micros = timestamp_micros % kMicrosPerSecond;
if (micros == 0) {
return std::format("{:%FT%T}+00:00", tp);
} else if (micros % kMicrosPerMillis == 0) {
return std::format("{:%FT%T}.{:03d}+00:00", tp, micros / kMicrosPerMillis);
} else {
return std::format("{:%FT%T}.{:06d}+00:00", tp, micros);
}
}

std::string TransformUtil::Base64Encode(std::string_view str_to_encode) {
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
int32_t i = 0;
int32_t j = 0;
std::array<char, 3> char_array_3;
std::array<char, 4> char_array_4;

std::string encoded;
encoded.reserve((str_to_encode.size() + 2) * 4 / 3);

for (char byte : str_to_encode) {
char_array_3[i++] = byte;
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;

for (j = 0; j < 4; j++) encoded += base64_chars[char_array_4[j]];
i = 0;
}
}

if (i) {
for (j = i; j < 3; j++) char_array_3[j] = '\0';

char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;

for (j = 0; j < i + 1; j++) encoded += base64_chars[char_array_4[j]];

while (i++ < 3) encoded += '=';
}

return encoded;
}

} // namespace iceberg
Loading
Loading