Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
234 changes: 133 additions & 101 deletions lib/elixir/lib/float.ex
Original file line number Diff line number Diff line change
Expand Up @@ -344,15 +344,12 @@ defmodule Float do

"""
@spec round(float, precision_range) :: float
# This implementation is slow since it relies on big integers.
# Faster implementations are available on more recent papers
# and could be implemented in the future.
def round(float, precision \\ 0)

def round(float, 0) when float == 0.0, do: float

def round(float, 0) when is_float(float) do
case float |> :erlang.round() |> :erlang.float() do
case :erlang.round(float) * 1.0 do
zero when zero == 0.0 and float < 0.0 -> -0.0
rounded -> rounded
end
Expand All @@ -366,135 +363,170 @@ defmodule Float do
raise ArgumentError, invalid_precision_message(precision)
end

# Decimal-place rounding via exact rational scaling. This is the bignum
# core used by reference implementations like David M. Gay's "Correctly
# Rounded Binary-Decimal and Decimal-Binary Conversions" (cited in the
# @doc above), Python's round(), and Java's BigDecimal.setScale.
#
# 1. Decompose float exactly: |float| = mantissa / 2^shift.
# 2. Scale exactly: |float| * 10^precision = mantissa * 10^precision / 2^shift.
# Because precision is bounded to 0..15, the product fits in ~103 bits
# (53-bit mantissa + ~50-bit power of ten) and BEAM bignums handle it
# directly without approximation.
# 3. Round the exact rational to an integer per the requested mode
# (half_up / floor / ceil) using quotient and remainder.
# 4. Emit the float closest to rounded_int / 10^precision:
# - fast path: when rounded_int < 2^53, both operands are exactly
# representable as floats and IEEE division is correctly rounded.
# - slow path: bignum alignment + manual mantissa extraction with
# round-to-nearest-even for the trailing bit.
#
# The integer-rounding decision (step 3) and the binary-emission decision
# (step 4) are deliberately independent: step 3 picks the exact rational
# the user asked for, step 4 picks the closest float to that rational.
# Conflating them is the classic source of double-rounding bugs.
#
# Faster algorithms exist (Cox 2026's table-based uscale; Ryū / Schubfach
# for round-trip printing) but target different problems or assume
# fixed-width machine arithmetic that BEAM doesn't expose efficiently.
# At precision <= 15, the exact path is small, easy to audit, and fast
# enough that a more complex algorithm has not been justified by benchmarks.
defp round(num, _precision, _rounding) when is_float(num) and num == 0.0, do: num

defp round(float, precision, rounding) do
<<sign::1, exp::11, significant::52-bitstring>> = <<float::float>>
{num, count} = decompose(significant, 1)
count = count - exp + 1023
defp round(float, precision, mode) do
<<sign::1, exp::11, mantissa::52>> = <<float::float>>

cond do
# Precision beyond 15 digits
count >= 104 ->
case rounding do
:ceil when sign === 0 -> 1 / power_of_10(precision)
:floor when sign === 1 -> -1 / power_of_10(precision)
:ceil when sign === 1 -> -0.0
:half_up when sign === 1 -> -0.0
_ -> 0.0
end
# Subnormal — tiny but non-zero; treat per-mode (ceil(+) and floor(-) bump
# to 10^-precision; everything else rounds to signed zero).
exp == 0 ->
tiny_round(sign, precision, mode)

# We are asking more precision than we have
count <= precision ->
# |float| >= 2^52 — has no fractional bits, return unchanged.
exp - 1075 >= 0 ->
float

true ->
# Difference in precision between float and asked precision
# We subtract 1 because we need to calculate the remainder too
diff = count - precision - 1

# Get up to latest so we calculate the remainder
power_of_10 = power_of_10(diff)

# Convert the numerand to decimal base
num = num * power_of_5(count)

# Move to the given precision - 1
num = div(num, power_of_10)
div = div(num, 10)
num = rounding(rounding, sign, num, div)

# Convert back to float without loss
# https://www.exploringbinary.com/correct-decimal-to-floating-point-using-big-integers/
den = power_of_10(precision)
boundary = den <<< 52

cond do
num == 0 and sign == 1 ->
-0.0

num == 0 ->
0.0

num >= boundary ->
{den, exp} = scale_down(num, boundary, 52)
decimal_to_float(sign, num, den, exp)

true ->
{num, exp} = scale_up(num, boundary, 52)
decimal_to_float(sign, num, den, exp)
end
mantissa = @power_of_2_to_52 ||| mantissa
shift = 1075 - exp
do_round(sign, mantissa, shift, precision, mode)
end
end

defp decompose(significant, initial) do
decompose(significant, 1, 0, initial)
end

defp decompose(<<1::1, bits::bitstring>>, count, last_count, acc) do
decompose(bits, count + 1, count, (acc <<< (count - last_count)) + 1)
# |float * 10^precision| < 0.5 — integer round is 0; ceil/floor still bump per sign.
defp do_round(sign, _mantissa, shift, precision, mode) when shift >= 104 do
tiny_round(sign, precision, mode)
end

defp decompose(<<0::1, bits::bitstring>>, count, last_count, acc) do
decompose(bits, count + 1, last_count, acc)
end

defp decompose(<<>>, _count, last_count, acc) do
{acc, last_count}
end
defp do_round(sign, mantissa, shift, precision, mode) do
power = power_of_10(precision)
product = mantissa * power
half = 1 <<< (shift - 1)
quotient = product >>> shift
remainder = product - (quotient <<< shift)
rounded_int = round_step(mode, sign, quotient, remainder, half)

defp scale_up(num, boundary, exp) when num >= boundary, do: {num, exp}
defp scale_up(num, boundary, exp), do: scale_up(num <<< 1, boundary, exp - 1)
cond do
rounded_int == 0 ->
signed_zero(sign)

defp scale_down(num, den, exp) do
new_den = den <<< 1
rounded_int < @power_of_2_to_52 <<< 1 ->
# Both rounded_int and power fit in 53 bits, so IEEE float division
# is correctly rounded.
result = rounded_int / power
if sign == 1, do: -result, else: result

if num < new_den do
{den >>> 52, exp}
else
scale_down(num, new_den, exp + 1)
true ->
bignum_to_float(sign, rounded_int, power)
end
end

defp decimal_to_float(sign, num, den, exp) do
quo = div(num, den)
rem = num - quo * den
defp round_step(:half_up, _sign, quotient, remainder, half) do
if remainder >= half, do: quotient + 1, else: quotient
end

tmp =
case den >>> 1 do
den when rem > den -> quo + 1
den when rem < den -> quo
_ when (quo &&& 1) === 1 -> quo + 1
_ -> quo
defp round_step(:floor, 0, quotient, _remainder, _half), do: quotient
defp round_step(:floor, 1, quotient, remainder, _half) when remainder > 0, do: quotient + 1
defp round_step(:floor, 1, quotient, _remainder, _half), do: quotient

defp round_step(:ceil, 0, quotient, remainder, _half) when remainder > 0, do: quotient + 1
defp round_step(:ceil, 0, quotient, _remainder, _half), do: quotient
defp round_step(:ceil, 1, quotient, _remainder, _half), do: quotient

defp signed_zero(0), do: 0.0
defp signed_zero(1), do: -0.0

# Result of rounding a non-zero float whose |float * 10^precision| < 0.5.
# ceil(+) → +10^-precision, floor(-) → -10^-precision, others → signed 0.
defp tiny_round(0, precision, :ceil), do: 1.0 / power_of_10(precision)
defp tiny_round(1, precision, :floor), do: -1.0 / power_of_10(precision)
defp tiny_round(sign, _precision, _mode), do: signed_zero(sign)

# Slow path: emit float closest to `sign * rounded_int / power` when
# rounded_int >= 2^53. The binary emission step is always IEEE
# round-to-nearest-even, regardless of the integer-rounding mode.
defp bignum_to_float(sign, rounded_int, power) do
shift_adjust = bit_length(rounded_int) - bit_length(power) - 53
{numerator, denominator, exp} = align(rounded_int, power, shift_adjust)

quotient = div(numerator, denominator)
remainder = numerator - quotient * denominator
half = denominator >>> 1

mantissa =
cond do
remainder > half -> quotient + 1
remainder < half -> quotient
(quotient &&& 1) === 1 -> quotient + 1
true -> quotient
end

tmp = tmp - @power_of_2_to_52
<<tmp::float>> = <<sign::1, exp + 1023::11, tmp::52>>
tmp
# Carry-bit normalization: `mantissa` lives in [2^52, 2^53]. The upper
# bound `2^53` is reachable when `align/3` returns an upper-bound quotient
# or when rounding carries. Rebalance into the canonical [2^52, 2^53)
# range so the 52-bit packing below doesn't silently truncate.
{mantissa, exp} =
if mantissa == @power_of_2_to_52 <<< 1,
do: {@power_of_2_to_52, exp + 1},
else: {mantissa, exp}

<<result::float>> = <<sign::1, exp + 1023::11, mantissa - @power_of_2_to_52::52>>
result
end

defp rounding(:floor, 1, _num, div), do: div + 1
defp rounding(:ceil, 0, _num, div), do: div + 1
# Pick (numerator, denominator, exp) so that numerator/denominator ∈ [2^52, 2^53)
# and the resulting float = numerator/denominator * 2^(exp-52).
defp align(rounded_int, power, shift_adjust) when shift_adjust >= 0 do
new_power = power <<< shift_adjust

defp rounding(:half_up, _sign, num, div) do
case rem(num, 10) do
rem when rem < 5 -> div
rem when rem >= 5 -> div + 1
if rounded_int < new_power <<< 53,
do: {rounded_int, new_power, 52 + shift_adjust},
else: {rounded_int, new_power <<< 1, 53 + shift_adjust}
end

defp align(rounded_int, power, shift_adjust) do
shifted = rounded_int <<< -shift_adjust

cond do
shifted >= power <<< 53 -> {shifted, power <<< 1, 53 + shift_adjust}
shifted >= power <<< 52 -> {shifted, power, 52 + shift_adjust}
true -> {shifted <<< 1, power, 51 + shift_adjust}
end
end

defp rounding(_, _, _, div), do: div
defp bit_length(0), do: 0
defp bit_length(integer) when integer > 0, do: bit_length(integer, 0)
defp bit_length(integer, acc) when integer >= 1 <<< 64, do: bit_length(integer >>> 64, acc + 64)
defp bit_length(integer, acc) when integer >= 1 <<< 16, do: bit_length(integer >>> 16, acc + 16)
defp bit_length(integer, acc) when integer >= 1 <<< 4, do: bit_length(integer >>> 4, acc + 4)
defp bit_length(integer, acc) when integer >= 1, do: bit_length(integer >>> 1, acc + 1)
defp bit_length(_integer, acc), do: acc

Enum.reduce(0..104, 1, fn x, acc ->
defp power_of_10(unquote(x)), do: unquote(acc)
Enum.reduce(0..15, 1, fn exponent, acc ->
defp power_of_10(unquote(exponent)), do: unquote(acc)
acc * 10
end)

Enum.reduce(0..104, 1, fn x, acc ->
defp power_of_5(unquote(x)), do: unquote(acc)
acc * 5
end)

@doc """
Returns a pair of integers whose ratio is exactly equal
to the original float and with a positive denominator.
Expand Down
Loading
Loading