Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include <limits>

#include "arrow/util/logging.h"
#include "gandiva/execution_context.h"

Expand Down Expand Up @@ -526,6 +528,21 @@ TEST(TestGdvFnStubs, TestSubstringIndex) {
out_str = gdv_fn_substring_index(ctx_ptr, "路学\\L", 8, "\\", 1, -1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "L");
EXPECT_FALSE(ctx.has_error());

// Large counts return full string when delimiter not found enough times
out_str = gdv_fn_substring_index(ctx_ptr, "a.b.c", 5, ".", 1, -1000, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "a.b.c");
EXPECT_FALSE(ctx.has_error());

out_str = gdv_fn_substring_index(ctx_ptr, "a.b.c", 5, ".", 1,
std::numeric_limits<int32_t>::max(), &out_len);
EXPECT_EQ(std::string(out_str, out_len), "a.b.c");
EXPECT_FALSE(ctx.has_error());

out_str = gdv_fn_substring_index(ctx_ptr, "a.b.c", 5, ".", 1,
std::numeric_limits<int32_t>::min(), &out_len);
EXPECT_EQ(std::string(out_str, out_len), "a.b.c");
EXPECT_FALSE(ctx.has_error());
}

TEST(TestGdvFnStubs, TestUpper) {
Expand Down
11 changes: 7 additions & 4 deletions cpp/src/gandiva/gdv_string_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -407,14 +407,17 @@ const char* gdv_fn_substring_index(int64_t context, const char* txt, int32_t txt
}
}

if (static_cast<int32_t>(abs(cnt)) <= static_cast<int32_t>(occ.size()) && cnt > 0) {
// Use int64_t to avoid undefined behavior with abs(INT_MIN)
int64_t abs_cnt = (cnt < 0) ? -static_cast<int64_t>(cnt) : static_cast<int64_t>(cnt);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to check the function parameter for the bad size and exit early like in the truncation fix? That seems simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd leave as is:

  1. The int64_t fix is more robust - it handles ALL negative values correctly, not just INT_MIN
  2. It's the same number of lines - early exit adds 5 lines, current fix adds 3 lines
  3. The current fix also simplifies the existing code - removes redundant static_cast<int32_t> casts

The early-exit approach only guards against the specific crash values, while the int64_t approach fixes the underlying type-safety issue.

int64_t occ_size = static_cast<int64_t>(occ.size());

if (abs_cnt <= occ_size && cnt > 0) {
memcpy(out, txt, occ[cnt - 1]);
*out_len = occ[cnt - 1];
return out;
} else if (static_cast<int32_t>(abs(cnt)) <= static_cast<int32_t>(occ.size()) &&
cnt < 0) {
} else if (abs_cnt <= occ_size && cnt < 0) {
int32_t sz = static_cast<int32_t>(occ.size());
int32_t temp = static_cast<int32_t>(abs(cnt));
int32_t temp = static_cast<int32_t>(abs_cnt);

memcpy(out, txt + occ[sz - temp] + pat_len, txt_len - occ[sz - temp] - pat_len);
*out_len = txt_len - occ[sz - temp] - pat_len;
Expand Down
14 changes: 10 additions & 4 deletions cpp/src/gandiva/precompiled/extended_math_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,16 +386,22 @@ gdv_int64 get_power_of_10(gdv_int32 exp) {

FORCE_INLINE
gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale) {
// For int64 (no fractional digits), positive scale is a no-op
if (out_scale >= 0) {
return in;
}
// GetScaleMultiplier only supports scales 0-38
if (out_scale < -38) {
return 0;
}

bool overflow = false;
arrow::BasicDecimal128 decimal = gandiva::decimalops::FromInt64(in, 38, 0, &overflow);
arrow::BasicDecimal128 decimal_with_outscale =
gandiva::decimalops::Truncate(gandiva::BasicDecimalScalar128(decimal, 38, 0), 38,
out_scale, out_scale, &overflow);
if (out_scale < 0) {
out_scale = 0;
}
return gandiva::decimalops::ToInt64(
gandiva::BasicDecimalScalar128(decimal_with_outscale, 38, out_scale), &overflow);
gandiva::BasicDecimalScalar128(decimal_with_outscale, 38, 0), &overflow);
}

FORCE_INLINE
Expand Down
13 changes: 13 additions & 0 deletions cpp/src/gandiva/precompiled/extended_math_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <gtest/gtest.h>

#include <cmath>
#include <limits>

#include "gandiva/execution_context.h"
#include "gandiva/precompiled/types.h"
Expand Down Expand Up @@ -208,6 +209,18 @@ TEST(TestExtendedMathOps, TestTruncate) {
EXPECT_EQ(truncate_int64_int32(-1234, -2), -1200);
EXPECT_EQ(truncate_int64_int32(8124674407369523212, 0), 8124674407369523212);
EXPECT_EQ(truncate_int64_int32(8124674407369523212, -2), 8124674407369523200);

// Positive scales are no-op for int64 (no fractional digits)
EXPECT_EQ(truncate_int64_int32(12345, std::numeric_limits<int32_t>::max()), 12345);
EXPECT_EQ(truncate_int64_int32(-12345, std::numeric_limits<int32_t>::max()), -12345);
EXPECT_EQ(truncate_int64_int32(12345, 100), 12345);
EXPECT_EQ(truncate_int64_int32(12345, 39), 12345);

// Scales beyond [-38, 0) truncate all digits
EXPECT_EQ(truncate_int64_int32(12345, std::numeric_limits<int32_t>::min()), 0);
EXPECT_EQ(truncate_int64_int32(12345, -100), 0);
EXPECT_EQ(truncate_int64_int32(12345, -39), 0);
EXPECT_EQ(truncate_int64_int32(-99999, -39), 0);
}

TEST(TestExtendedMathOps, TestTrigonometricFunctions) {
Expand Down
Loading