From b05afe0f259f4830051088630b976e95c9a01ee3 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 11 Nov 2025 10:12:25 +0400 Subject: [PATCH] `map` in `+`/`-` for `Array`s (#59961) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `map` is a simpler operation and uses linear indexing for `Array`s. This often improves performance (occasionally enabling vectorization) and improves TTFX in common cases. It also automatically returns the correct result for 0-D arrays, unlike broadcasting that returns a scalar. Performance: ```julia julia> A = ones(3,3); julia> @btime $A + $A; 44.622 ns (2 allocations: 144 bytes) # v"1.13.0-DEV.1387" 29.047 ns (2 allocations: 144 bytes) # this PR julia> A = ones(3,3000); julia> @btime $A + $A; 10.095 μs (3 allocations: 70.40 KiB) # v"1.13.0-DEV.1387" 4.787 μs (3 allocations: 70.40 KiB) # this PR julia> @btime A + B + C + D + E + F setup=(A = rand(200,200); B = rand(200,200); C = rand(200,200); D = rand(200,200); E = rand(200,200); F = rand(200,200)); 93.910 μs (3 allocations: 312.59 KiB) # v"1.13.0-DEV.1387" 64.813 μs (9 allocations: 312.77 KiB) # this PR ``` Similarly for `-`. TTFX: ```julia julia> A = ones(3,3); julia> @time A + A; 0.174090 seconds (303.47 k allocations: 14.575 MiB, 99.98% compilation time) # v"1.13.0-DEV.1387" 0.072748 seconds (220.27 k allocations: 11.139 MiB, 99.95% compilation time) # this PR ``` These are measured on ```julia julia> versioninfo() Julia Version 1.13.0-DEV.1388 Commit c5f492781e (2025-10-27 11:44 UTC) Platform Info: OS: Linux (x86_64-linux-gnu) CPU: 8 × Intel(R) Core(TM) i5-10310U CPU @ 1.70GHz WORD_SIZE: 64 LLVM: libLLVM-20.1.8 (ORCJIT, skylake) GC: Built with stock GC Threads: 1 default, 1 interactive, 1 GC (on 8 virtual cores) Environment: LD_LIBRARY_PATH = /usr/local/lib: JULIA_EDITOR = subl ``` --- base/arraymath.jl | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/base/arraymath.jl b/base/arraymath.jl index 6488b0ae68505..4dcaa3eac8e30 100644 --- a/base/arraymath.jl +++ b/base/arraymath.jl @@ -2,10 +2,29 @@ ## Binary arithmetic operators ## +function _broadcast_preserving_zero_d(f, A, B) + broadcast_preserving_zero_d(f, A, B) +end + +# Using map over broadcast enables vectorization for wide matrices with few rows. +# This is because we use linear indexing in `map` as opposed to Cartesian indexing in broadcasting. +# https://github.com/JuliaLang/julia/issues/47873#issuecomment-1352472461 +function _broadcast_preserving_zero_d(f, A::Array, B::Array) + map(f, A, B) +end + +function _broadcast_preserving_zero_d(f, A::Array, B::Number) + map(Fix2(f, B), A) +end + +function _broadcast_preserving_zero_d(f, A::Number, B::Array) + map(Fix1(f, A), B) +end + for f in (:+, :-) @eval function ($f)(A::AbstractArray, B::AbstractArray) promote_shape(A, B) # check size compatibility - broadcast_preserving_zero_d($f, A, B) + _broadcast_preserving_zero_d($f, A, B) end end @@ -13,15 +32,15 @@ function +(A::Array, Bs::Array...) for B in Bs promote_shape(A, B) # check size compatibility end - broadcast_preserving_zero_d(+, A, Bs...) + map(+, A, Bs...) end for f in (:/, :\, :*) if f !== :/ - @eval ($f)(A::Number, B::AbstractArray) = broadcast_preserving_zero_d($f, A, B) + @eval ($f)(A::Number, B::AbstractArray) = _broadcast_preserving_zero_d($f, A, B) end if f !== :\ - @eval ($f)(A::AbstractArray, B::Number) = broadcast_preserving_zero_d($f, A, B) + @eval ($f)(A::AbstractArray, B::Number) = _broadcast_preserving_zero_d($f, A, B) end end