diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..25f76fd4 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Initial Runic formatting +4f29bf67e8be8e82da2deabd8236aaa9d95b781e diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1d4b1ca5..84e069f6 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -41,10 +41,12 @@ jobs: - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 + if: matrix.version == '1.x' && matrix.os == 'ubuntu-latest' with: directories: src - uses: codecov/codecov-action@v4 + if: matrix.version == '1.x' && matrix.os == 'ubuntu-latest' with: files: lcov.info token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/Runic.yml b/.github/workflows/Runic.yml new file mode 100644 index 00000000..35534a50 --- /dev/null +++ b/.github/workflows/Runic.yml @@ -0,0 +1,15 @@ +name: Runic +on: + push: + branches: [master] + pull_request: +jobs: + runic: + name: Runic + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: "1" + - uses: fredrikekre/runic-action@v1 diff --git a/docs/make.jl b/docs/make.jl index 88b3b074..feb85de7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -12,7 +12,7 @@ function write_if_changed(path::String, content::String) if isfile(path) && read(path, String) == content return # Content unchanged, skip write end - write(path, content) + return write(path, content) end # ============================================ diff --git a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl index 437d5163..272fcf09 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl @@ -28,8 +28,8 @@ # ============================================================================== using AdaptiveArrayPools: get_view!, get_nd_view!, get_nd_array!, allocate_vector, safe_prod, - _record_type_touch!, _fixed_slot_bit, _checkpoint_typed_pool!, - _MODE_BITS_MASK + _record_type_touch!, _fixed_slot_bit, _checkpoint_typed_pool!, + _MODE_BITS_MASK """ get_view!(tp::CuTypedPool{T}, n::Int) -> CuVector{T} @@ -205,5 +205,5 @@ end end @inbounds pool._touched_type_masks[depth] = current_mask | b end - nothing + return nothing end diff --git a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl index 3c54a119..a9803372 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl @@ -37,61 +37,61 @@ Default element type for disabled CUDA pools (matches CUDA.zeros() default). AdaptiveArrayPools.default_eltype(::DisabledPool{:cuda}) = Float32 # --- zeros! for DisabledPool{:cuda} --- -@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.zeros(T, dims...) -@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) -@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.zeros(T, dims...) -@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.zeros(T, dims...) +@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.zeros(T, dims...) +@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) # --- ones! for DisabledPool{:cuda} --- -@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.ones(T, dims...) -@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) -@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.ones(T, dims...) -@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.ones(T, dims...) +@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.ones(T, dims...) +@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) # --- similar! for DisabledPool{:cuda} --- @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x) @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}) where {T} = CUDA.similar(x, T) -@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int,N}) where {N} = CUDA.similar(x, dims...) -@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.similar(x, T, dims...) +@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int, N}) where {N} = CUDA.similar(x, dims...) +@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.similar(x, T, dims...) # Fallback for non-CuArray inputs (creates CuArray from AbstractArray) @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x)) @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}) where {T} = CuArray{T}(undef, size(x)) -@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = CuArray{eltype(x)}(undef, dims) -@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T}(undef, dims) +@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = CuArray{eltype(x)}(undef, dims) +@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T}(undef, dims) # --- unsafe_zeros! for DisabledPool{:cuda} --- -@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.zeros(T, dims...) -@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) -@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.zeros(T, dims...) -@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.zeros(T, dims...) +@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.zeros(T, dims...) +@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...) # --- unsafe_ones! for DisabledPool{:cuda} --- -@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.ones(T, dims...) -@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) -@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.ones(T, dims...) -@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.ones(T, dims...) +@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) +@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.ones(T, dims...) +@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...) # --- unsafe_similar! for DisabledPool{:cuda} --- @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x) @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}) where {T} = CUDA.similar(x, T) -@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int,N}) where {N} = CUDA.similar(x, dims...) -@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.similar(x, T, dims...) +@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int, N}) where {N} = CUDA.similar(x, dims...) +@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.similar(x, T, dims...) # Fallback for non-CuArray inputs @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x)) @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}) where {T} = CuArray{T}(undef, size(x)) -@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = CuArray{eltype(x)}(undef, dims) -@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T}(undef, dims) +@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = CuArray{eltype(x)}(undef, dims) +@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T}(undef, dims) # --- acquire! for DisabledPool{:cuda} --- @inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, n::Int) where {T} = CuVector{T}(undef, n) -@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T,N}(undef, dims) -@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CuArray{T,N}(undef, dims) +@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T, N}(undef, dims) +@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CuArray{T, N}(undef, dims) @inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x) @inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x)) # --- unsafe_acquire! for DisabledPool{:cuda} --- @inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, n::Int) where {T} = CuVector{T}(undef, n) -@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T,N}(undef, dims) -@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CuArray{T,N}(undef, dims) +@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T, N}(undef, dims) +@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CuArray{T, N}(undef, dims) @inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x) @inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x)) diff --git a/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl b/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl index 58e04806..f5ff39a9 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl @@ -10,7 +10,7 @@ using AdaptiveArrayPools: allocate_vector, wrap_array, get_typed_pool! # ============================================================================== @inline AdaptiveArrayPools.allocate_vector( - ::AbstractTypedPool{T,CuVector{T}}, n::Int + ::AbstractTypedPool{T, CuVector{T}}, n::Int ) where {T} = CuVector{T}(undef, n) # ============================================================================== @@ -20,8 +20,8 @@ using AdaptiveArrayPools: allocate_vector, wrap_array, get_typed_pool! # GPU uses reshape which returns CuArray{T,N} via GPUArrays derive() # (NOT ReshapedArray like CPU - this is simpler for GPU kernels) @inline AdaptiveArrayPools.wrap_array( - ::AbstractTypedPool{T,CuVector{T}}, flat_view, dims::NTuple{N,Int} -) where {T,N} = reshape(flat_view, dims) + ::AbstractTypedPool{T, CuVector{T}}, flat_view, dims::NTuple{N, Int} +) where {T, N} = reshape(flat_view, dims) # ============================================================================== # get_typed_pool! Dispatches for CuAdaptiveArrayPool @@ -39,7 +39,7 @@ using AdaptiveArrayPools: allocate_vector, wrap_array, get_typed_pool! # Slow path: rare types via IdDict (with checkpoint correction!) @inline function AdaptiveArrayPools.get_typed_pool!(p::CuAdaptiveArrayPool, ::Type{T}) where {T} - get!(p.others, T) do + return get!(p.others, T) do tp = CuTypedPool{T}() # CRITICAL: Match CPU behavior - auto-checkpoint new pool if inside @with_pool scope # Without this, rewind! would corrupt state for dynamically-created pools diff --git a/ext/AdaptiveArrayPoolsCUDAExt/state.jl b/ext/AdaptiveArrayPoolsCUDAExt/state.jl index 5a546c8e..46cd616c 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/state.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/state.jl @@ -6,8 +6,8 @@ # AbstractTypedPool, so they work for CuTypedPool automatically. using AdaptiveArrayPools: checkpoint!, rewind!, reset!, - _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit, - _LAZY_MODE_BIT, _TYPED_LAZY_BIT, _TYPE_BITS_MASK + _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit, + _LAZY_MODE_BIT, _TYPED_LAZY_BIT, _TYPE_BITS_MASK # ============================================================================== # GPU Fixed Slot Iteration @@ -20,7 +20,7 @@ Apply `f` to each fixed slot CuTypedPool. Zero allocation via compile-time unrol """ @generated function AdaptiveArrayPools.foreach_fixed_slot(f::F, pool::CuAdaptiveArrayPool) where {F} exprs = [:(f(getfield(pool, $(QuoteNode(field))))) for field in GPU_FIXED_SLOT_FIELDS] - quote + return quote Base.@_inline_meta $(exprs...) nothing @@ -57,7 +57,7 @@ end push!(pool._touched_type_masks, UInt16(0)) push!(pool._touched_has_others, AdaptiveArrayPools._fixed_slot_bit(T) == UInt16(0)) _checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, T), pool._current_depth) - nothing + return nothing end # Type-specific checkpoint (multiple types) @@ -72,7 +72,7 @@ end end has_any_fallback = any(i -> AdaptiveArrayPools._fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices) checkpoint_exprs = [:(_checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices] - quote + return quote pool._current_depth += 1 push!(pool._touched_type_masks, UInt16(0)) push!(pool._touched_has_others, $has_any_fallback) @@ -121,7 +121,7 @@ end pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end # Type-specific rewind (multiple types) @@ -136,7 +136,7 @@ end end rewind_exprs = [:(_rewind_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in reverse(unique_indices)] reset_exprs = [:(reset!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]))) for i in unique_indices] - quote + return quote if pool._current_depth == 1 $(reset_exprs...) return nothing @@ -175,19 +175,19 @@ end @inbounds pool._touched_has_others[depth] = true end # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed. - nothing + return nothing end @inline function AdaptiveArrayPools._lazy_rewind!(pool::CuAdaptiveArrayPool) d = pool._current_depth mask = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK - _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) - _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) - _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) - _has_bit(mask, Int32) && _rewind_typed_pool!(pool.int32, d) + _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) + _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) + _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) + _has_bit(mask, Int32) && _rewind_typed_pool!(pool.int32, d) _has_bit(mask, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d) _has_bit(mask, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d) - _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) + _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) # Bit 7: Float16 (CUDA reassignment — _fixed_slot_bit(Float16)==0, must use explicit bit check) mask & _cuda_float16_bit() != 0 && _rewind_typed_pool!(pool.float16, d) if @inbounds(pool._touched_has_others[d]) @@ -198,7 +198,7 @@ end pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end # ============================================================================== @@ -221,7 +221,7 @@ end @inbounds pool._touched_has_others[d] = true end # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed. - nothing + return nothing end # _typed_lazy_rewind!: selective rewind of (tracked | touched) mask. @@ -232,13 +232,13 @@ end d = pool._current_depth touched = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK combined = tracked_mask | touched - _has_bit(combined, Float64) && _rewind_typed_pool!(pool.float64, d) - _has_bit(combined, Float32) && _rewind_typed_pool!(pool.float32, d) - _has_bit(combined, Int64) && _rewind_typed_pool!(pool.int64, d) - _has_bit(combined, Int32) && _rewind_typed_pool!(pool.int32, d) + _has_bit(combined, Float64) && _rewind_typed_pool!(pool.float64, d) + _has_bit(combined, Float32) && _rewind_typed_pool!(pool.float32, d) + _has_bit(combined, Int64) && _rewind_typed_pool!(pool.int64, d) + _has_bit(combined, Int32) && _rewind_typed_pool!(pool.int32, d) _has_bit(combined, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d) _has_bit(combined, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d) - _has_bit(combined, Bool) && _rewind_typed_pool!(pool.bool, d) + _has_bit(combined, Bool) && _rewind_typed_pool!(pool.bool, d) # Float16: bit 7 is set by _record_type_touch! on first touch (lazy first-touch). # Also rewind when Float16 was a *tracked* type in the macro: _typed_lazy_checkpoint! # calls checkpoint!(pool, Float16) which pushes a checkpoint at depth d, but _acquire_impl! @@ -257,7 +257,7 @@ end pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end # ============================================================================== @@ -288,7 +288,7 @@ end # Type-specific reset @inline function AdaptiveArrayPools.reset!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T} reset!(AdaptiveArrayPools.get_typed_pool!(pool, T)) - pool + return pool end # ============================================================================== diff --git a/ext/AdaptiveArrayPoolsCUDAExt/types.jl b/ext/AdaptiveArrayPoolsCUDAExt/types.jl index a3673f23..4e5dd9fc 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/types.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/types.jl @@ -49,7 +49,7 @@ mutable struct CuTypedPool{T} <: AbstractTypedPool{T, CuVector{T}} end function CuTypedPool{T}() where {T} - CuTypedPool{T}( + return CuTypedPool{T}( CuVector{T}[], # vectors Any[], # views (N-way flat cache) Any[], # view_dims @@ -121,7 +121,7 @@ end function CuAdaptiveArrayPool() dev = CUDA.device() - CuAdaptiveArrayPool( + return CuAdaptiveArrayPool( CuTypedPool{Float32}(), CuTypedPool{Float64}(), CuTypedPool{Float16}(), diff --git a/ext/AdaptiveArrayPoolsCUDAExt/utils.jl b/ext/AdaptiveArrayPoolsCUDAExt/utils.jl index a8a5a424..c8c22556 100644 --- a/ext/AdaptiveArrayPoolsCUDAExt/utils.jl +++ b/ext/AdaptiveArrayPoolsCUDAExt/utils.jl @@ -13,14 +13,14 @@ using AdaptiveArrayPools: pool_stats, foreach_fixed_slot Print statistics for a CUDA typed pool. """ -function AdaptiveArrayPools.pool_stats(tp::CuTypedPool{T}; io::IO=stdout, indent::Int=0, name::String="") where {T} +function AdaptiveArrayPools.pool_stats(tp::CuTypedPool{T}; io::IO = stdout, indent::Int = 0, name::String = "") where {T} prefix = " "^indent type_name = isempty(name) ? string(T) : name n_arrays = length(tp.vectors) if n_arrays == 0 - printstyled(io, prefix, type_name, color=:cyan) - printstyled(io, " (empty)\n", color=:dark_gray) + printstyled(io, prefix, type_name, color = :cyan) + printstyled(io, " (empty)\n", color = :dark_gray) return end @@ -32,20 +32,20 @@ function AdaptiveArrayPools.pool_stats(tp::CuTypedPool{T}; io::IO=stdout, indent cpu_str = Base.format_bytes(cpu_bytes) # Header - printstyled(io, prefix, type_name, color=:cyan) - printstyled(io, " [GPU]", color=:green) + printstyled(io, prefix, type_name, color = :cyan) + printstyled(io, " [GPU]", color = :green) println(io) # Stats - printstyled(io, prefix, " slots: ", color=:dark_gray) - printstyled(io, n_arrays, color=:blue) - printstyled(io, " (active: ", color=:dark_gray) - printstyled(io, tp.n_active, color=:blue) - printstyled(io, ")\n", color=:dark_gray) - - printstyled(io, prefix, " elements: ", color=:dark_gray) - printstyled(io, total_elements, color=:blue) - printstyled(io, " ($gpu_str GPU + $cpu_str CPU)\n", color=:dark_gray) + printstyled(io, prefix, " slots: ", color = :dark_gray) + printstyled(io, n_arrays, color = :blue) + printstyled(io, " (active: ", color = :dark_gray) + printstyled(io, tp.n_active, color = :blue) + printstyled(io, ")\n", color = :dark_gray) + + printstyled(io, prefix, " elements: ", color = :dark_gray) + printstyled(io, total_elements, color = :blue) + return printstyled(io, " ($gpu_str GPU + $cpu_str CPU)\n", color = :dark_gray) end # ============================================================================== @@ -57,12 +57,12 @@ end Print statistics for a CUDA adaptive array pool. """ -function AdaptiveArrayPools.pool_stats(pool::CuAdaptiveArrayPool; io::IO=stdout) +function AdaptiveArrayPools.pool_stats(pool::CuAdaptiveArrayPool; io::IO = stdout) # Header with device info - printstyled(io, "CuAdaptiveArrayPool", bold=true, color=:green) - printstyled(io, " (device ", color=:dark_gray) - printstyled(io, pool.device_id, color=:blue) - printstyled(io, ")\n", color=:dark_gray) + printstyled(io, "CuAdaptiveArrayPool", bold = true, color = :green) + printstyled(io, " (device ", color = :dark_gray) + printstyled(io, pool.device_id, color = :blue) + printstyled(io, ")\n", color = :dark_gray) has_content = false @@ -71,18 +71,18 @@ function AdaptiveArrayPools.pool_stats(pool::CuAdaptiveArrayPool; io::IO=stdout) if !isempty(tp.vectors) has_content = true T = typeof(tp).parameters[1] - pool_stats(tp; io, indent=2, name="$T (fixed)") + pool_stats(tp; io, indent = 2, name = "$T (fixed)") end end # Fallback types for (T, tp) in pool.others has_content = true - pool_stats(tp; io, indent=2, name="$T (fallback)") + pool_stats(tp; io, indent = 2, name = "$T (fallback)") end if !has_content - printstyled(io, " (empty)\n", color=:dark_gray) + printstyled(io, " (empty)\n", color = :dark_gray) end return nothing end @@ -94,7 +94,7 @@ end # Compact one-line show function Base.show(io::IO, tp::CuTypedPool{T}) where {T} n_vectors = length(tp.vectors) - if n_vectors == 0 + return if n_vectors == 0 print(io, "CuTypedPool{$T}(empty)") else total = sum(length(v) for v in tp.vectors) @@ -104,7 +104,7 @@ end # Multi-line show function Base.show(io::IO, ::MIME"text/plain", tp::CuTypedPool{T}) where {T} - pool_stats(tp; io, name="CuTypedPool{$T}") + return pool_stats(tp; io, name = "CuTypedPool{$T}") end # ============================================================================== @@ -131,10 +131,10 @@ function Base.show(io::IO, pool::CuAdaptiveArrayPool) total_active[] += tp.n_active end - print(io, "CuAdaptiveArrayPool(device=$(pool.device_id), types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))") + return print(io, "CuAdaptiveArrayPool(device=$(pool.device_id), types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))") end # Multi-line show function Base.show(io::IO, ::MIME"text/plain", pool::CuAdaptiveArrayPool) - pool_stats(pool; io) + return pool_stats(pool; io) end diff --git a/src/acquire.jl b/src/acquire.jl index 6d9dea15..f1dbdeb9 100644 --- a/src/acquire.jl +++ b/src/acquire.jl @@ -3,13 +3,15 @@ # ============================================================================== # Allocate a new vector (dispatch point for extensions) -@inline allocate_vector(::AbstractTypedPool{T,Vector{T}}, n::Int) where {T} = +@inline allocate_vector(::AbstractTypedPool{T, Vector{T}}, n::Int) where {T} = Vector{T}(undef, n) # Wrap flat view into N-D array (dispatch point for extensions) -@inline function wrap_array(::AbstractTypedPool{T,Vector{T}}, - flat_view, dims::NTuple{N,Int}) where {T,N} - unsafe_wrap(Array{T,N}, pointer(flat_view), dims) +@inline function wrap_array( + ::AbstractTypedPool{T, Vector{T}}, + flat_view, dims::NTuple{N, Int} + ) where {T, N} + return unsafe_wrap(Array{T, N}, pointer(flat_view), dims) end # ============================================================================== @@ -53,7 +55,7 @@ end @inline function _check_pool_growth(tp::AbstractTypedPool, idx::Int) # Warn at every power of 2 from 512 onward (512, 1024, 2048, …) - if idx >= 512 && (idx & (idx - 1)) == 0 + return if idx >= 512 && (idx & (idx - 1)) == 0 _warn_pool_growing(tp, idx) end end @@ -140,7 +142,7 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+). reuses cached `Array{T,N}` wrapper with `setfield!(:ref, :size)` pointing to `A`'s memory. Automatically reclaimed on `rewind!` via `n_active` restoration. """ -@inline function _reshape_impl!(pool::AdaptiveArrayPool, A::Array{T,M}, dims::NTuple{N,Int}) where {T,M,N} +@inline function _reshape_impl!(pool::AdaptiveArrayPool, A::Array{T, M}, dims::NTuple{N, Int}) where {T, M, N} # Reject negative dimensions (match Base.reshape behavior) for d in dims d < 0 && throw(ArgumentError("invalid Array dimensions")) @@ -148,8 +150,11 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+). # Validate before claiming slot total_len = safe_prod(dims) - length(A) == total_len || throw(DimensionMismatch( - "new dimensions $(dims) must be consistent with array length $(length(A))")) + length(A) == total_len || throw( + DimensionMismatch( + "new dimensions $(dims) must be consistent with array length $(length(A))" + ) + ) # 0-D reshape: rare edge case, delegate to Base (nd_wrappers is 1-indexed by N) N == 0 && return reshape(A, dims) @@ -169,7 +174,7 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+). if wrappers !== nothing && slot <= length(wrappers) wrapper = @inbounds wrappers[slot] if wrapper !== nothing - arr = wrapper::Array{T,N} + arr = wrapper::Array{T, N} setfield!(arr, :ref, getfield(A, :ref)) setfield!(arr, :size, dims) return arr @@ -177,7 +182,7 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+). end # Cache miss (first call per slot+N): create wrapper, cache forever - arr = Array{T,N}(undef, ntuple(_ -> 0, Val(N))) + arr = Array{T, N}(undef, ntuple(_ -> 0, Val(N))) setfield!(arr, :ref, getfield(A, :ref)) setfield!(arr, :size, dims) _store_nd_wrapper!(tp, N, slot, arr) @@ -202,7 +207,7 @@ function _store_nd_wrapper!(tp::AbstractTypedPool, N::Int, slot::Int, wrapper) if N > length(tp.nd_wrappers) old_len = length(tp.nd_wrappers) resize!(tp.nd_wrappers, N) - for i in (old_len+1):N + for i in (old_len + 1):N @inbounds tp.nd_wrappers[i] = nothing end end @@ -213,12 +218,12 @@ function _store_nd_wrapper!(tp::AbstractTypedPool, N::Int, slot::Int, wrapper) elseif slot > length(wrappers) old_len = length(wrappers) resize!(wrappers, slot) - for i in (old_len+1):slot + for i in (old_len + 1):slot @inbounds wrappers[i] = nothing end end @inbounds wrappers[slot] = wrapper - nothing + return nothing end """ @@ -295,7 +300,7 @@ For non-fixed-slot types, sets `_touched_has_others` flag. else @inbounds pool._touched_type_masks[depth] |= b end - nothing + return nothing end # CPU-specific override: adds lazy first-touch checkpoint in lazy mode @@ -323,7 +328,7 @@ end end @inbounds pool._touched_type_masks[depth] = current_mask | b end - nothing + return nothing end # ============================================================================== @@ -348,7 +353,7 @@ end end @inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} - _acquire_impl!(pool, T, dims...) + return _acquire_impl!(pool, T, dims...) end # Similar-style @@ -415,19 +420,19 @@ See also: [`unsafe_acquire!`](@ref) for native array access. """ @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} _record_type_touch!(pool, T) - _acquire_impl!(pool, T, n) + return _acquire_impl!(pool, T, n) end # Multi-dimensional support (zero-allocation with N-D cache) @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _acquire_impl!(pool, T, dims...) + return _acquire_impl!(pool, T, dims...) end # Tuple support: allows acquire!(pool, T, size(A)) where size(A) returns NTuple{N,Int} @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _acquire_impl!(pool, T, dims...) + return _acquire_impl!(pool, T, dims...) end # Similar-style convenience methods @@ -447,7 +452,7 @@ end """ @inline function acquire!(pool::AbstractArrayPool, x::AbstractArray) _record_type_touch!(pool, eltype(x)) - _acquire_impl!(pool, eltype(x), size(x)) + return _acquire_impl!(pool, eltype(x), size(x)) end # ============================================================================== @@ -502,18 +507,18 @@ See also: [`acquire!`](@ref) for view-based access. """ @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} _record_type_touch!(pool, T) - _unsafe_acquire_impl!(pool, T, n) + return _unsafe_acquire_impl!(pool, T, n) end @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _unsafe_acquire_impl!(pool, T, dims...) + return _unsafe_acquire_impl!(pool, T, dims...) end # Tuple support @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _unsafe_acquire_impl!(pool, T, dims) + return _unsafe_acquire_impl!(pool, T, dims) end # Similar-style convenience methods @@ -533,7 +538,7 @@ end """ @inline function unsafe_acquire!(pool::AbstractArrayPool, x::AbstractArray) _record_type_touch!(pool, eltype(x)) - _unsafe_acquire_impl!(pool, eltype(x), size(x)) + return _unsafe_acquire_impl!(pool, eltype(x), size(x)) end # ============================================================================== @@ -570,14 +575,14 @@ const _acquire_array_impl! = _unsafe_acquire_impl! # --- acquire! for DisabledPool{:cpu} --- @inline acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n) -@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims) -@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims) @inline acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x) # --- unsafe_acquire! for DisabledPool{:cpu} --- @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims) @inline unsafe_acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x) # --- Generic DisabledPool fallbacks (unknown backend → error) --- @@ -588,11 +593,11 @@ const _acquire_array_impl! = _unsafe_acquire_impl! # Called when: USE_POOLING=true + @maybe_with_pool + MAYBE_POOLING_ENABLED[]=false # Explicit overloads for proper inlining (especially important for CUDA backend). @inline _acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = acquire!(p, T, n) -@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = acquire!(p, T, dims...) -@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = acquire!(p, T, dims) +@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = acquire!(p, T, dims...) +@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = acquire!(p, T, dims) @inline _acquire_impl!(p::DisabledPool, x::AbstractArray) = acquire!(p, x) @inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = unsafe_acquire!(p, T, n) -@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_acquire!(p, T, dims...) -@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_acquire!(p, T, dims) +@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_acquire!(p, T, dims...) +@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_acquire!(p, T, dims) @inline _unsafe_acquire_impl!(p::DisabledPool, x::AbstractArray) = unsafe_acquire!(p, x) diff --git a/src/bitarray.jl b/src/bitarray.jl index 8e9e4ab3..8f789b58 100644 --- a/src/bitarray.jl +++ b/src/bitarray.jl @@ -47,7 +47,7 @@ Uses `setfield!`-based wrapper reuse — unlimited dim patterns, 0-alloc after w The returned BitArray is only valid within the `@with_pool` scope. Do NOT use after the scope ends (use-after-free risk). """ -function get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) where {N} +function get_bitarray!(tp::BitTypedPool, dims::NTuple{N, Int}) where {N} total_len = safe_prod(dims) tp.n_active += 1 idx = tp.n_active @@ -121,11 +121,11 @@ end return _unsafe_acquire_impl!(pool, Bit, n) end -@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} +@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} return _unsafe_acquire_impl!(pool, Bit, dims...) end -@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} +@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} return _unsafe_acquire_impl!(pool, Bit, dims...) end @@ -139,12 +139,12 @@ end return get_bitarray!(tp, n) end -@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} +@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} tp = get_typed_pool!(pool, Bit)::BitTypedPool return get_bitarray!(tp, dims) end -@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} +@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} tp = get_typed_pool!(pool, Bit)::BitTypedPool return get_bitarray!(tp, dims) end @@ -155,10 +155,10 @@ end # --- acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) --- @inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n) -@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims) -@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims) # --- unsafe_acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) --- @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims) diff --git a/src/convenience.jl b/src/convenience.jl index 84312e51..ada5e828 100644 --- a/src/convenience.jl +++ b/src/convenience.jl @@ -42,52 +42,52 @@ end See also: [`ones!`](@ref), [`similar!`](@ref), [`acquire!`](@ref) """ -@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _zeros_impl!(pool, T, dims...) + return _zeros_impl!(pool, T, dims...) end -@inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _zeros_impl!(pool, default_eltype(pool), dims...) + return _zeros_impl!(pool, default_eltype(pool), dims...) end -@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} +@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _zeros_impl!(pool, T, dims...) + return _zeros_impl!(pool, T, dims...) end -@inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} +@inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _zeros_impl!(pool, default_eltype(pool), dims...) + return _zeros_impl!(pool, default_eltype(pool), dims...) end # Internal implementation (for macro transformation) -@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} arr = _acquire_impl!(pool, T, dims...) fill!(arr, zero(T)) - arr + return arr end # Default type overload for macro transformation (uses default_eltype for backend flexibility) -@inline function _zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _zeros_impl!(pool, default_eltype(pool), dims...) +@inline function _zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} + return _zeros_impl!(pool, default_eltype(pool), dims...) end # NTuple overloads for macro transformation (handles zeros!(pool, T, size(x)) form) -@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _zeros_impl!(pool, T, dims...) +@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} + return _zeros_impl!(pool, T, dims...) end -@inline function _zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _zeros_impl!(pool, default_eltype(pool), dims...) +@inline function _zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} + return _zeros_impl!(pool, default_eltype(pool), dims...) end # Bit type specialization: zeros!(pool, Bit, ...) delegates to falses!(pool, ...) -@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = falses!(pool, dims...) -@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = falses!(pool, dims) -@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = _falses_impl!(pool, dims...) -@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = _falses_impl!(pool, dims) +@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = falses!(pool, dims...) +@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = falses!(pool, dims) +@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = _falses_impl!(pool, dims...) +@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = _falses_impl!(pool, dims) # ============================================================================== # ones! - Acquire one-initialized arrays from pool @@ -115,52 +115,52 @@ end See also: [`zeros!`](@ref), [`similar!`](@ref), [`acquire!`](@ref) """ -@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _ones_impl!(pool, T, dims...) + return _ones_impl!(pool, T, dims...) end -@inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _ones_impl!(pool, default_eltype(pool), dims...) + return _ones_impl!(pool, default_eltype(pool), dims...) end -@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} +@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _ones_impl!(pool, T, dims...) + return _ones_impl!(pool, T, dims...) end -@inline function ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} +@inline function ones!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _ones_impl!(pool, default_eltype(pool), dims...) + return _ones_impl!(pool, default_eltype(pool), dims...) end # Internal implementation (for macro transformation) -@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} arr = _acquire_impl!(pool, T, dims...) fill!(arr, one(T)) - arr + return arr end # Default type overload for macro transformation (uses default_eltype for backend flexibility) -@inline function _ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _ones_impl!(pool, default_eltype(pool), dims...) +@inline function _ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} + return _ones_impl!(pool, default_eltype(pool), dims...) end # NTuple overloads for macro transformation (handles ones!(pool, T, size(x)) form) -@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _ones_impl!(pool, T, dims...) +@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} + return _ones_impl!(pool, T, dims...) end -@inline function _ones_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _ones_impl!(pool, default_eltype(pool), dims...) +@inline function _ones_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} + return _ones_impl!(pool, default_eltype(pool), dims...) end # Bit type specialization: ones!(pool, Bit, ...) delegates to trues!(pool, ...) -@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = trues!(pool, dims...) -@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = trues!(pool, dims) -@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = _trues_impl!(pool, dims...) -@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = _trues_impl!(pool, dims) +@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = trues!(pool, dims...) +@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = trues!(pool, dims) +@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = _trues_impl!(pool, dims...) +@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = _trues_impl!(pool, dims) # ============================================================================== # trues! - Acquire BitArray filled with true from pool @@ -185,22 +185,22 @@ end See also: [`falses!`](@ref), [`ones!`](@ref), [`acquire!`](@ref) """ -@inline function trues!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function trues!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, Bit) - _trues_impl!(pool, dims...) + return _trues_impl!(pool, dims...) end -@inline function trues!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} +@inline function trues!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} _record_type_touch!(pool, Bit) - _trues_impl!(pool, dims...) + return _trues_impl!(pool, dims...) end # Internal implementation (for macro transformation) -@inline function _trues_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function _trues_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} arr = _acquire_impl!(pool, Bit, dims...) fill!(arr, true) - arr + return arr end -@inline _trues_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} = _trues_impl!(pool, dims...) +@inline _trues_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} = _trues_impl!(pool, dims...) # ============================================================================== # falses! - Acquire BitArray filled with false from pool @@ -225,22 +225,22 @@ end See also: [`trues!`](@ref), [`zeros!`](@ref), [`acquire!`](@ref) """ -@inline function falses!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function falses!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, Bit) - _falses_impl!(pool, dims...) + return _falses_impl!(pool, dims...) end -@inline function falses!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} +@inline function falses!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} _record_type_touch!(pool, Bit) - _falses_impl!(pool, dims...) + return _falses_impl!(pool, dims...) end # Internal implementation (for macro transformation) -@inline function _falses_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function _falses_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} arr = _acquire_impl!(pool, Bit, dims...) fill!(arr, false) - arr + return arr end -@inline _falses_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} = _falses_impl!(pool, dims...) +@inline _falses_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} = _falses_impl!(pool, dims...) # ============================================================================== # similar! - Acquire arrays with same type/size as template @@ -274,39 +274,39 @@ See also: [`zeros!`](@ref), [`ones!`](@ref), [`acquire!`](@ref) """ @inline function similar!(pool::AbstractArrayPool, x::AbstractArray) _record_type_touch!(pool, eltype(x)) - _similar_impl!(pool, x) + return _similar_impl!(pool, x) end @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} _record_type_touch!(pool, T) - _similar_impl!(pool, x, T) + return _similar_impl!(pool, x, T) end -@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} +@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, eltype(x)) - _similar_impl!(pool, x, dims...) + return _similar_impl!(pool, x, dims...) end -@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _similar_impl!(pool, x, T, dims...) + return _similar_impl!(pool, x, T, dims...) end # Internal implementation (for macro transformation) @inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray) - _acquire_impl!(pool, eltype(x), size(x)) + return _acquire_impl!(pool, eltype(x), size(x)) end @inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} - _acquire_impl!(pool, T, size(x)) + return _acquire_impl!(pool, T, size(x)) end -@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} - _acquire_impl!(pool, eltype(x), dims...) +@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} + return _acquire_impl!(pool, eltype(x), dims...) end -@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _acquire_impl!(pool, T, dims...) +@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} + return _acquire_impl!(pool, T, dims...) end # ============================================================================== @@ -348,26 +348,26 @@ end See also: [`acquire!`](@ref), [`similar!`](@ref) """ -@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::Vararg{Int,N}) where {T,N} +@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _reshape_impl!(pool, A, dims) + return _reshape_impl!(pool, A, dims) end -@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::NTuple{N,Int}) where {T,N} +@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _reshape_impl!(pool, A, dims) + return _reshape_impl!(pool, A, dims) end # Internal implementation (fallback: delegates to Base.reshape) -@inline function _reshape_impl!(::AbstractArrayPool, A::AbstractArray, dims::NTuple{N,Int}) where {N} +@inline function _reshape_impl!(::AbstractArrayPool, A::AbstractArray, dims::NTuple{N, Int}) where {N} for d in dims d < 0 && throw(ArgumentError("invalid Array dimensions")) end - reshape(A, dims) + return reshape(A, dims) end # Vararg forwarding (macro transforms reshape!(pool, A, 3, 4) → _reshape_impl!(pool, A, 3, 4)) -@inline _reshape_impl!(pool::AbstractArrayPool, A::AbstractArray, dims::Vararg{Int,N}) where {N} = +@inline _reshape_impl!(pool::AbstractArrayPool, A::AbstractArray, dims::Vararg{Int, N}) where {N} = _reshape_impl!(pool, A, dims) # ============================================================================== @@ -396,45 +396,45 @@ end See also: [`unsafe_ones!`](@ref), [`zeros!`](@ref), [`unsafe_acquire!`](@ref) """ -@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _unsafe_zeros_impl!(pool, T, dims...) + return _unsafe_zeros_impl!(pool, T, dims...) end -@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) + return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end -@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} +@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _unsafe_zeros_impl!(pool, T, dims...) + return _unsafe_zeros_impl!(pool, T, dims...) end -@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} +@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) + return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end # Internal implementation (for macro transformation) -@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} arr = _unsafe_acquire_impl!(pool, T, dims...) fill!(arr, zero(T)) - arr + return arr end # Default type overload for macro transformation (uses default_eltype for backend flexibility) -@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) +@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} + return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end # NTuple overloads for macro transformation (handles unsafe_zeros!(pool, T, size(x)) form) -@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _unsafe_zeros_impl!(pool, T, dims...) +@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} + return _unsafe_zeros_impl!(pool, T, dims...) end -@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) +@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} + return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...) end # ============================================================================== @@ -463,45 +463,45 @@ end See also: [`unsafe_zeros!`](@ref), [`ones!`](@ref), [`unsafe_acquire!`](@ref) """ -@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _unsafe_ones_impl!(pool, T, dims...) + return _unsafe_ones_impl!(pool, T, dims...) end -@inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} +@inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _unsafe_ones_impl!(pool, default_eltype(pool), dims...) + return _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end -@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} +@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _unsafe_ones_impl!(pool, T, dims...) + return _unsafe_ones_impl!(pool, T, dims...) end -@inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} +@inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} _record_type_touch!(pool, default_eltype(pool)) - _unsafe_ones_impl!(pool, default_eltype(pool), dims...) + return _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end # Internal implementation (for macro transformation) -@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} arr = _unsafe_acquire_impl!(pool, T, dims...) fill!(arr, one(T)) - arr + return arr end # Default type overload for macro transformation (uses default_eltype for backend flexibility) -@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N} - _unsafe_ones_impl!(pool, default_eltype(pool), dims...) +@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N} + return _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end # NTuple overloads for macro transformation (handles unsafe_ones!(pool, T, size(x)) form) -@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} - _unsafe_ones_impl!(pool, T, dims...) +@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} + return _unsafe_ones_impl!(pool, T, dims...) end -@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} - _unsafe_ones_impl!(pool, default_eltype(pool), dims...) +@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} + return _unsafe_ones_impl!(pool, default_eltype(pool), dims...) end # ============================================================================== @@ -535,39 +535,39 @@ See also: [`similar!`](@ref), [`unsafe_acquire!`](@ref) """ @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray) _record_type_touch!(pool, eltype(x)) - _unsafe_similar_impl!(pool, x) + return _unsafe_similar_impl!(pool, x) end @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} _record_type_touch!(pool, T) - _unsafe_similar_impl!(pool, x, T) + return _unsafe_similar_impl!(pool, x, T) end -@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} +@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} _record_type_touch!(pool, eltype(x)) - _unsafe_similar_impl!(pool, x, dims...) + return _unsafe_similar_impl!(pool, x, dims...) end -@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} +@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _unsafe_similar_impl!(pool, x, T, dims...) + return _unsafe_similar_impl!(pool, x, T, dims...) end # Internal implementation (for macro transformation) @inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray) - _unsafe_acquire_impl!(pool, eltype(x), size(x)) + return _unsafe_acquire_impl!(pool, eltype(x), size(x)) end @inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T} - _unsafe_acquire_impl!(pool, T, size(x)) + return _unsafe_acquire_impl!(pool, T, size(x)) end -@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} - _unsafe_acquire_impl!(pool, eltype(x), dims...) +@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} + return _unsafe_acquire_impl!(pool, eltype(x), dims...) end -@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} - _unsafe_acquire_impl!(pool, T, dims...) +@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} + return _unsafe_acquire_impl!(pool, T, dims...) end # ============================================================================== @@ -593,7 +593,7 @@ end function Base.showerror(io::IO, e::BackendNotLoadedError) print(io, "Backend :$(e.backend) is not available. ") - if e.backend == :cuda + return if e.backend == :cuda print(io, "Make sure CUDA.jl is loaded: `using CUDA`") else print(io, "Make sure the appropriate backend package is loaded.") @@ -619,56 +619,56 @@ default_eltype(::DisabledPool{:cpu}) = Float64 end # --- zeros! for DisabledPool{:cpu} --- -@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...) -@inline zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = zeros(default_eltype(p), dims...) -@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...) -@inline zeros!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = zeros(default_eltype(p), dims...) +@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = zeros(T, dims...) +@inline zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = zeros(default_eltype(p), dims...) +@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = zeros(T, dims...) +@inline zeros!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = zeros(default_eltype(p), dims...) # --- ones! for DisabledPool{:cpu} --- -@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...) -@inline ones!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = ones(default_eltype(p), dims...) -@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...) -@inline ones!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = ones(default_eltype(p), dims...) +@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = ones(T, dims...) +@inline ones!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = ones(default_eltype(p), dims...) +@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = ones(T, dims...) +@inline ones!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = ones(default_eltype(p), dims...) # --- zeros!/ones! for DisabledPool{:cpu} with Bit type (returns BitArray) --- -@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = falses(dims...) -@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = falses(dims...) -@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = trues(dims...) -@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = trues(dims...) +@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = falses(dims...) +@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = falses(dims...) +@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = trues(dims...) +@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = trues(dims...) # --- trues!/falses! for DisabledPool{:cpu} --- -@inline trues!(::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = trues(dims...) -@inline trues!(::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = trues(dims...) -@inline falses!(::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = falses(dims...) -@inline falses!(::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = falses(dims...) +@inline trues!(::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = trues(dims...) +@inline trues!(::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = trues(dims...) +@inline falses!(::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = falses(dims...) +@inline falses!(::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = falses(dims...) # --- similar! for DisabledPool{:cpu} --- @inline similar!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x) @inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}) where {T} = similar(x, T) -@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...) -@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...) +@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = similar(x, dims...) +@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = similar(x, T, dims...) # --- reshape! for DisabledPool{:cpu} --- -@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::Vararg{Int,N}) where {N} = reshape(A, dims...) -@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::NTuple{N,Int}) where {N} = reshape(A, dims) +@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::Vararg{Int, N}) where {N} = reshape(A, dims...) +@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::NTuple{N, Int}) where {N} = reshape(A, dims) # --- unsafe_zeros! for DisabledPool{:cpu} --- -@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...) -@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = zeros(default_eltype(p), dims...) -@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...) -@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = zeros(default_eltype(p), dims...) +@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = zeros(T, dims...) +@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = zeros(default_eltype(p), dims...) +@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = zeros(T, dims...) +@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = zeros(default_eltype(p), dims...) # --- unsafe_ones! for DisabledPool{:cpu} --- -@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...) -@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = ones(default_eltype(p), dims...) -@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...) -@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = ones(default_eltype(p), dims...) +@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = ones(T, dims...) +@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = ones(default_eltype(p), dims...) +@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = ones(T, dims...) +@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = ones(default_eltype(p), dims...) # --- unsafe_similar! for DisabledPool{:cpu} --- @inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x) @inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}) where {T} = similar(x, T) -@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...) -@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...) +@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = similar(x, dims...) +@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = similar(x, T, dims...) # --- Generic DisabledPool fallbacks (unknown backend → error) --- @inline zeros!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B) @@ -691,48 +691,48 @@ end # Explicit overloads for proper inlining (especially important for CUDA backend). # --- _zeros_impl! --- -@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros!(p, T, dims...) -@inline _zeros_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = zeros!(p, dims...) -@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros!(p, T, dims) -@inline _zeros_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = zeros!(p, dims) +@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = zeros!(p, T, dims...) +@inline _zeros_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = zeros!(p, dims...) +@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = zeros!(p, T, dims) +@inline _zeros_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = zeros!(p, dims) # --- _ones_impl! --- -@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones!(p, T, dims...) -@inline _ones_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = ones!(p, dims...) -@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones!(p, T, dims) -@inline _ones_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = ones!(p, dims) +@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = ones!(p, T, dims...) +@inline _ones_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = ones!(p, dims...) +@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = ones!(p, T, dims) +@inline _ones_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = ones!(p, dims) # --- _trues_impl! --- -@inline _trues_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = trues!(p, dims...) -@inline _trues_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = trues!(p, dims) +@inline _trues_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = trues!(p, dims...) +@inline _trues_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = trues!(p, dims) # --- _falses_impl! --- -@inline _falses_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = falses!(p, dims...) -@inline _falses_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = falses!(p, dims) +@inline _falses_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = falses!(p, dims...) +@inline _falses_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = falses!(p, dims) # --- _similar_impl! --- @inline _similar_impl!(p::DisabledPool, x::AbstractArray) = similar!(p, x) @inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}) where {T} = similar!(p, x, T) -@inline _similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar!(p, x, dims...) -@inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar!(p, x, T, dims...) +@inline _similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} = similar!(p, x, dims...) +@inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = similar!(p, x, T, dims...) # --- _reshape_impl! --- -@inline _reshape_impl!(p::DisabledPool, A::AbstractArray, dims::NTuple{N,Int}) where {N} = reshape!(p, A, dims) +@inline _reshape_impl!(p::DisabledPool, A::AbstractArray, dims::NTuple{N, Int}) where {N} = reshape!(p, A, dims) # --- _unsafe_zeros_impl! --- -@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_zeros!(p, T, dims...) -@inline _unsafe_zeros_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = unsafe_zeros!(p, dims...) -@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_zeros!(p, T, dims) -@inline _unsafe_zeros_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = unsafe_zeros!(p, dims) +@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_zeros!(p, T, dims...) +@inline _unsafe_zeros_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = unsafe_zeros!(p, dims...) +@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_zeros!(p, T, dims) +@inline _unsafe_zeros_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = unsafe_zeros!(p, dims) # --- _unsafe_ones_impl! --- -@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_ones!(p, T, dims...) -@inline _unsafe_ones_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = unsafe_ones!(p, dims...) -@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_ones!(p, T, dims) -@inline _unsafe_ones_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = unsafe_ones!(p, dims) +@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_ones!(p, T, dims...) +@inline _unsafe_ones_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = unsafe_ones!(p, dims...) +@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_ones!(p, T, dims) +@inline _unsafe_ones_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = unsafe_ones!(p, dims) # --- _unsafe_similar_impl! --- @inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray) = unsafe_similar!(p, x) @inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}) where {T} = unsafe_similar!(p, x, T) -@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} = unsafe_similar!(p, x, dims...) -@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_similar!(p, x, T, dims...) +@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} = unsafe_similar!(p, x, dims...) +@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_similar!(p, x, T, dims...) diff --git a/src/legacy/acquire.jl b/src/legacy/acquire.jl index eb47ab03..1d94dfee 100644 --- a/src/legacy/acquire.jl +++ b/src/legacy/acquire.jl @@ -3,13 +3,15 @@ # ============================================================================== # Allocate a new vector (dispatch point for extensions) -@inline allocate_vector(::AbstractTypedPool{T,Vector{T}}, n::Int) where {T} = +@inline allocate_vector(::AbstractTypedPool{T, Vector{T}}, n::Int) where {T} = Vector{T}(undef, n) # Wrap flat view into N-D array (dispatch point for extensions) -@inline function wrap_array(::AbstractTypedPool{T,Vector{T}}, - flat_view, dims::NTuple{N,Int}) where {T,N} - unsafe_wrap(Array{T,N}, pointer(flat_view), dims) +@inline function wrap_array( + ::AbstractTypedPool{T, Vector{T}}, + flat_view, dims::NTuple{N, Int} + ) where {T, N} + return unsafe_wrap(Array{T, N}, pointer(flat_view), dims) end # ============================================================================== @@ -185,7 +187,7 @@ For non-fixed-slot types, sets `_touched_has_others` flag. else @inbounds pool._touched_type_masks[depth] |= b end - nothing + return nothing end # CPU-specific override: adds lazy first-touch checkpoint in lazy mode @@ -213,7 +215,7 @@ end end @inbounds pool._touched_type_masks[depth] = current_mask | b end - nothing + return nothing end # ============================================================================== @@ -238,7 +240,7 @@ end end @inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} - _acquire_impl!(pool, T, dims...) + return _acquire_impl!(pool, T, dims...) end # Similar-style @@ -305,19 +307,19 @@ See also: [`unsafe_acquire!`](@ref) for native array access. """ @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} _record_type_touch!(pool, T) - _acquire_impl!(pool, T, n) + return _acquire_impl!(pool, T, n) end # Multi-dimensional support (zero-allocation with N-D cache) @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _acquire_impl!(pool, T, dims...) + return _acquire_impl!(pool, T, dims...) end # Tuple support: allows acquire!(pool, T, size(A)) where size(A) returns NTuple{N,Int} @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _acquire_impl!(pool, T, dims...) + return _acquire_impl!(pool, T, dims...) end # Similar-style convenience methods @@ -337,7 +339,7 @@ end """ @inline function acquire!(pool::AbstractArrayPool, x::AbstractArray) _record_type_touch!(pool, eltype(x)) - _acquire_impl!(pool, eltype(x), size(x)) + return _acquire_impl!(pool, eltype(x), size(x)) end # ============================================================================== @@ -383,18 +385,18 @@ See also: [`acquire!`](@ref) for view-based access. """ @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T} _record_type_touch!(pool, T) - _unsafe_acquire_impl!(pool, T, n) + return _unsafe_acquire_impl!(pool, T, n) end @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} _record_type_touch!(pool, T) - _unsafe_acquire_impl!(pool, T, dims...) + return _unsafe_acquire_impl!(pool, T, dims...) end # Tuple support @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} _record_type_touch!(pool, T) - _unsafe_acquire_impl!(pool, T, dims) + return _unsafe_acquire_impl!(pool, T, dims) end # Similar-style convenience methods @@ -414,7 +416,7 @@ end """ @inline function unsafe_acquire!(pool::AbstractArrayPool, x::AbstractArray) _record_type_touch!(pool, eltype(x)) - _unsafe_acquire_impl!(pool, eltype(x), size(x)) + return _unsafe_acquire_impl!(pool, eltype(x), size(x)) end # ============================================================================== @@ -451,14 +453,14 @@ const _acquire_array_impl! = _unsafe_acquire_impl! # --- acquire! for DisabledPool{:cpu} --- @inline acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n) -@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims) -@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims) @inline acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x) # --- unsafe_acquire! for DisabledPool{:cpu} --- @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims) @inline unsafe_acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x) # --- Generic DisabledPool fallbacks (unknown backend → error) --- @@ -469,11 +471,11 @@ const _acquire_array_impl! = _unsafe_acquire_impl! # Called when: USE_POOLING=true + @maybe_with_pool + MAYBE_POOLING_ENABLED[]=false # Explicit overloads for proper inlining (especially important for CUDA backend). @inline _acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = acquire!(p, T, n) -@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = acquire!(p, T, dims...) -@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = acquire!(p, T, dims) +@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = acquire!(p, T, dims...) +@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = acquire!(p, T, dims) @inline _acquire_impl!(p::DisabledPool, x::AbstractArray) = acquire!(p, x) @inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = unsafe_acquire!(p, T, n) -@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_acquire!(p, T, dims...) -@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_acquire!(p, T, dims) +@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_acquire!(p, T, dims...) +@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_acquire!(p, T, dims) @inline _unsafe_acquire_impl!(p::DisabledPool, x::AbstractArray) = unsafe_acquire!(p, x) diff --git a/src/legacy/bitarray.jl b/src/legacy/bitarray.jl index 97731f68..7964e8cb 100644 --- a/src/legacy/bitarray.jl +++ b/src/legacy/bitarray.jl @@ -66,7 +66,7 @@ Uses N-way set-associative cache with up to CACHE_WAYS patterns per slot. The returned BitArray is only valid within the `@with_pool` scope. Do NOT use after the scope ends (use-after-free risk). """ -function get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) where {N} +function get_bitarray!(tp::BitTypedPool, dims::NTuple{N, Int}) where {N} total_len = safe_prod(dims) tp.n_active += 1 idx = tp.n_active @@ -118,7 +118,7 @@ function get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) where {N} @inbounds cached_ptr = tp.nd_ptrs[cache_idx] # Must check isa FIRST for type stability (avoids boxing in == comparison) - if cached_dims isa NTuple{N,Int} && cached_ptr == current_ptr + if cached_dims isa NTuple{N, Int} && cached_ptr == current_ptr if cached_dims == dims # Exact match - return cached BitArray directly (0 alloc) return @inbounds tp.nd_arrays[cache_idx]::BitArray{N} @@ -162,11 +162,11 @@ end return _unsafe_acquire_impl!(pool, Bit, n) end -@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} +@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} return _unsafe_acquire_impl!(pool, Bit, dims...) end -@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} +@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} return _unsafe_acquire_impl!(pool, Bit, dims...) end @@ -180,12 +180,12 @@ end return get_bitarray!(tp, n) end -@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} +@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} tp = get_typed_pool!(pool, Bit)::BitTypedPool return get_bitarray!(tp, dims) end -@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} +@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} tp = get_typed_pool!(pool, Bit)::BitTypedPool return get_bitarray!(tp, dims) end @@ -196,10 +196,10 @@ end # --- acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) --- @inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n) -@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims) -@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims) # --- unsafe_acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) --- @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims) -@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims) diff --git a/src/legacy/state.jl b/src/legacy/state.jl index 1b512104..71366b14 100644 --- a/src/legacy/state.jl +++ b/src/legacy/state.jl @@ -49,7 +49,7 @@ Also updates _current_depth and bitmask state for type touch tracking. ~77% faster than full checkpoint! when only one type is used. """ -@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where T +@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where {T} pool._current_depth += 1 push!(pool._touched_type_masks, UInt16(0)) @@ -58,7 +58,7 @@ Also updates _current_depth and bitmask state for type touch tracking. # (which bypasses _record_type_touch!) is the only acquire path. push!(pool._touched_has_others, _fixed_slot_bit(T) == UInt16(0)) _checkpoint_typed_pool!(get_typed_pool!(pool, T), pool._current_depth) - nothing + return nothing end """ @@ -83,7 +83,7 @@ compile-time unrolling. Increments _current_depth once for all types. # even when _acquire_impl! (bypassing _record_type_touch!) is used. has_any_fallback = any(i -> _fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices) checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices] - quote + return quote pool._current_depth += 1 push!(pool._touched_type_masks, UInt16(0)) push!(pool._touched_has_others, $has_any_fallback) @@ -102,7 +102,7 @@ end push!(tp._checkpoint_n_active, tp.n_active) push!(tp._checkpoint_depths, depth) end - nothing + return nothing end """ @@ -134,7 +134,7 @@ Performance: ~2ns vs ~540ns for full `checkpoint!`. _checkpoint_typed_pool!(p, depth) @inbounds pool._touched_has_others[depth] = true end - nothing + return nothing end # ============================================================================== @@ -189,7 +189,7 @@ end Restore state for a specific type only. Also updates _current_depth and bitmask state. """ -@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where T +@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where {T} # Safety guard: at global scope (depth=1), delegate to reset! if pool._current_depth == 1 @@ -200,7 +200,7 @@ Also updates _current_depth and bitmask state. pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end """ @@ -222,7 +222,7 @@ Decrements _current_depth once after all types are rewound. end rewind_exprs = [:(_rewind_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in reverse(unique_indices)] reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in unique_indices] - quote + return quote # Safety guard: at global scope (depth=1), delegate to reset! if pool._current_depth == 1 $(reset_exprs...) @@ -262,7 +262,7 @@ end # - If sentinel (_checkpoint_n_active=[0]), restores to n_active=0 tp.n_active = @inbounds tp._checkpoint_n_active[end] end - nothing + return nothing end """ @@ -278,7 +278,7 @@ Called directly from the macro-generated `finally` clause as a single function c """ @inline function _lazy_rewind!(pool::AdaptiveArrayPool) - d = pool._current_depth + d = pool._current_depth bits = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK _selective_rewind_fixed_slots!(pool, bits) if @inbounds(pool._touched_has_others[d]) @@ -289,7 +289,7 @@ Called directly from the macro-generated `finally` clause as a single function c pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end """ @@ -326,7 +326,7 @@ lazy first-touch checkpoint for each extra type on first acquire, ensuring Case end @inbounds pool._touched_has_others[d] = true end - nothing + return nothing end """ @@ -353,7 +353,7 @@ guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`. pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end """ @@ -368,15 +368,15 @@ before passing the mask (e.g. `mask & _TYPE_BITS_MASK`). @inline function _selective_rewind_fixed_slots!(pool::AdaptiveArrayPool, mask::UInt16) d = pool._current_depth - _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) - _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) - _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) - _has_bit(mask, Int32) && _rewind_typed_pool!(pool.int32, d) + _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) + _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) + _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) + _has_bit(mask, Int32) && _rewind_typed_pool!(pool.int32, d) _has_bit(mask, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d) _has_bit(mask, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d) - _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) - _has_bit(mask, Bit) && _rewind_typed_pool!(pool.bits, d) - nothing + _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) + _has_bit(mask, Bit) && _rewind_typed_pool!(pool.bits, d) + return nothing end # ============================================================================== @@ -559,9 +559,9 @@ to sentinel state while preserving allocated vectors. See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref) """ -@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where T +@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where {T} reset!(get_typed_pool!(pool, T)) - pool + return pool end """ @@ -574,7 +574,7 @@ See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref) """ @generated function reset!(pool::AdaptiveArrayPool, types::Type...) reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in 1:length(types)] - quote + return quote $(reset_exprs...) pool end diff --git a/src/legacy/types.jl b/src/legacy/types.jl index b056ddbb..8856f9e4 100644 --- a/src/legacy/types.jl +++ b/src/legacy/types.jl @@ -70,7 +70,7 @@ end Abstract base for type-specific memory pools. """ -abstract type AbstractTypedPool{T, V<:AbstractVector{T}} end +abstract type AbstractTypedPool{T, V <: AbstractVector{T}} end """ AbstractArrayPool @@ -324,10 +324,10 @@ const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :com # Bits 0-7: fixed-slot type touch tracking (one bit per type) # Bits 14-15: mode flags set during checkpoint to control lazy behavior -const _LAZY_MODE_BIT = UInt16(0x8000) # bit 15: lazy (dynamic-selective) checkpoint mode -const _TYPED_LAZY_BIT = UInt16(0x4000) # bit 14: typed lazy-fallback mode -const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15: all mode flags -const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits +const _LAZY_MODE_BIT = UInt16(0x8000) # bit 15: lazy (dynamic-selective) checkpoint mode +const _TYPED_LAZY_BIT = UInt16(0x4000) # bit 14: typed lazy-fallback mode +const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15: all mode flags +const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits # ============================================================================== # Fixed-Slot Bit Mapping (for type touch tracking) @@ -335,15 +335,15 @@ const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits # Maps each fixed-slot type to a unique bit in a UInt16 bitmask. # Bit ordering matches FIXED_SLOT_FIELDS. Non-fixed types return UInt16(0). -@inline _fixed_slot_bit(::Type{Float64}) = UInt16(1) << 0 -@inline _fixed_slot_bit(::Type{Float32}) = UInt16(1) << 1 -@inline _fixed_slot_bit(::Type{Int64}) = UInt16(1) << 2 -@inline _fixed_slot_bit(::Type{Int32}) = UInt16(1) << 3 +@inline _fixed_slot_bit(::Type{Float64}) = UInt16(1) << 0 +@inline _fixed_slot_bit(::Type{Float32}) = UInt16(1) << 1 +@inline _fixed_slot_bit(::Type{Int64}) = UInt16(1) << 2 +@inline _fixed_slot_bit(::Type{Int32}) = UInt16(1) << 3 @inline _fixed_slot_bit(::Type{ComplexF64}) = UInt16(1) << 4 @inline _fixed_slot_bit(::Type{ComplexF32}) = UInt16(1) << 5 -@inline _fixed_slot_bit(::Type{Bool}) = UInt16(1) << 6 -@inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7 -@inline _fixed_slot_bit(::Type) = UInt16(0) # non-fixed-slot → triggers has_others +@inline _fixed_slot_bit(::Type{Bool}) = UInt16(1) << 6 +@inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7 +@inline _fixed_slot_bit(::Type) = UInt16(0) # non-fixed-slot → triggers has_others # Check whether a type's bit is set in a bitmask (e.g. _touched_type_masks or combined). @inline _has_bit(mask::UInt16, ::Type{T}) where {T} = (mask & _fixed_slot_bit(T)) != 0 @@ -379,7 +379,7 @@ mutable struct AdaptiveArrayPool <: AbstractArrayPool end function AdaptiveArrayPool() - AdaptiveArrayPool( + return AdaptiveArrayPool( TypedPool{Float64}(), TypedPool{Float32}(), TypedPool{Int64}(), @@ -411,7 +411,7 @@ end # Slow Path: rare types via IdDict @inline function get_typed_pool!(p::AdaptiveArrayPool, ::Type{T}) where {T} - get!(p.others, T) do + return get!(p.others, T) do tp = TypedPool{T}() # If inside a checkpoint scope (_current_depth > 1 means inside @with_pool), # auto-checkpoint the new pool to prevent issues on rewind @@ -439,7 +439,7 @@ Apply `f` to each fixed slot TypedPool. Zero allocation via compile-time unrolli """ @generated function foreach_fixed_slot(f::F, pool::AdaptiveArrayPool) where {F} exprs = [:(f(getfield(pool, $(QuoteNode(field))))) for field in FIXED_SLOT_FIELDS] - quote + return quote Base.@_inline_meta $(exprs...) nothing diff --git a/src/macros.jl b/src/macros.jl index 752c13ef..7605abc8 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -23,7 +23,7 @@ achieving zero overhead compared to Dict-based registry. @inline _get_pool_for_backend(::Val{:cpu}) = get_task_local_pool() # Fallback with helpful error message (marked @noinline to keep hot path fast) -@noinline function _get_pool_for_backend(::Val{B}) where B +@noinline function _get_pool_for_backend(::Val{B}) where {B} error("Pool backend :$B is not available. Load the extension first (e.g., `using CUDA` for :cuda).") end @@ -106,22 +106,22 @@ end ``` """ macro with_pool(pool_name, expr) - _generate_pool_code(pool_name, expr, true; source=__source__) + return _generate_pool_code(pool_name, expr, true; source = __source__) end macro with_pool(expr) pool_name = gensym(:pool) - _generate_pool_code(pool_name, expr, true; source=__source__) + return _generate_pool_code(pool_name, expr, true; source = __source__) end # Backend-specific variants: @with_pool :cuda pool begin ... end macro with_pool(backend::QuoteNode, pool_name, expr) - _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source=__source__) + return _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source = __source__) end macro with_pool(backend::QuoteNode, expr) pool_name = gensym(:pool) - _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source=__source__) + return _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source = __source__) end """ @@ -153,22 +153,22 @@ end ``` """ macro maybe_with_pool(pool_name, expr) - _generate_pool_code(pool_name, expr, false; source=__source__) + return _generate_pool_code(pool_name, expr, false; source = __source__) end macro maybe_with_pool(expr) pool_name = gensym(:pool) - _generate_pool_code(pool_name, expr, false; source=__source__) + return _generate_pool_code(pool_name, expr, false; source = __source__) end # Backend-specific variants: @maybe_with_pool :cuda pool begin ... end macro maybe_with_pool(backend::QuoteNode, pool_name, expr) - _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source=__source__) + return _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source = __source__) end macro maybe_with_pool(backend::QuoteNode, expr) pool_name = gensym(:pool) - _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source=__source__) + return _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source = __source__) end # ============================================================================== @@ -182,7 +182,7 @@ Generate expression for DisabledPool singleton based on backend. Used when pooling is disabled to preserve backend context. """ function _disabled_pool_expr(backend::Symbol) - if backend == :cpu + return if backend == :cpu :($DISABLED_CPU) else :($(DisabledPool{backend}())) @@ -232,7 +232,7 @@ Ensure body has a LineNumberNode pointing to user source at the top level. Returns a new Expr to avoid mutating the original AST. """ -function _ensure_body_has_toplevel_lnn(body, source::Union{LineNumberNode,Nothing}) +function _ensure_body_has_toplevel_lnn(body, source::Union{LineNumberNode, Nothing}) source === nothing && return body # Don't clobber valid file info with :none from REPL/eval source.file === :none && return body @@ -275,7 +275,7 @@ Scans first few args to handle Expr(:meta, ...) from @inline etc. If source.file === :none (REPL/eval), don't clobber valid file LNNs. Modifies expr in-place and returns it. """ -function _fix_try_body_lnn!(expr, source::Union{LineNumberNode,Nothing}) +function _fix_try_body_lnn!(expr, source::Union{LineNumberNode, Nothing}) source === nothing && return expr # Don't clobber valid file info with :none from REPL/eval source.file === :none && return expr @@ -307,7 +307,7 @@ end # Internal: Code Generation # ============================================================================== -function _generate_pool_code(pool_name, expr, force_enable; source::Union{LineNumberNode,Nothing}=nothing) +function _generate_pool_code(pool_name, expr, force_enable; source::Union{LineNumberNode, Nothing} = nothing) # Compile-time check: if pooling disabled, use DisabledPool to preserve backend context if !USE_POOLING disabled_pool = _disabled_pool_expr(:cpu) @@ -402,7 +402,7 @@ Uses `_get_pool_for_backend(Val{backend}())` for zero-overhead dispatch. Includes type-specific checkpoint/rewind optimization (same as regular @with_pool). """ -function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, force_enable::Bool; source::Union{LineNumberNode,Nothing}=nothing) +function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, force_enable::Bool; source::Union{LineNumberNode, Nothing} = nothing) # Compile-time check: if pooling disabled, use DisabledPool to preserve backend context if !USE_POOLING disabled_pool = _disabled_pool_expr(backend) @@ -515,7 +515,7 @@ end Generate function code for a specific backend (e.g., :cuda). Wraps the function body with pool getter, checkpoint, try-finally, rewind. """ -function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, func_def, disable_pooling::Bool; source::Union{LineNumberNode,Nothing}=nothing) +function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, func_def, disable_pooling::Bool; source::Union{LineNumberNode, Nothing} = nothing) def_head = func_def.head call_expr = func_def.args[1] body = func_def.args[2] @@ -572,7 +572,7 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f return Expr(def_head, esc(call_expr), new_body) end -function _generate_function_pool_code(pool_name, func_def, force_enable, disable_pooling, backend::Symbol=:cpu; source::Union{LineNumberNode,Nothing}=nothing) +function _generate_function_pool_code(pool_name, func_def, force_enable, disable_pooling, backend::Symbol = :cpu; source::Union{LineNumberNode, Nothing} = nothing) def_head = func_def.head call_expr = func_def.args[1] body = func_def.args[2] @@ -670,14 +670,14 @@ These cannot be used for typed checkpoint since they're defined after checkpoint Detects patterns like: `T = eltype(x)`, `local T = ...`, etc. """ -function _extract_local_assignments(expr, locals=Set{Symbol}()) +function _extract_local_assignments(expr, locals = Set{Symbol}()) if expr isa Expr if expr.head == :(=) && length(expr.args) >= 1 lhs = expr.args[1] # Simple assignment: T = ... if lhs isa Symbol push!(locals, lhs) - # Typed assignment: T::Type = ... + # Typed assignment: T::Type = ... elseif Meta.isexpr(lhs, :(::)) && length(lhs.args) >= 1 && lhs.args[1] isa Symbol push!(locals, lhs.args[1]) end @@ -720,7 +720,7 @@ Handles various forms: - `similar!(pool, x)`: generates `eltype(x)` expression - `similar!(pool, x, Type, ...)`: extracts Type """ -function _extract_acquire_types(expr, target_pool, types=Set{Any}()) +function _extract_acquire_types(expr, target_pool, types = Set{Any}()) if expr isa Expr # Match: function calls with pool argument if expr.head == :call && length(expr.args) >= 3 @@ -752,10 +752,10 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}()) # acquire!(pool, x) - similar-style form push!(types, Expr(:call, :eltype, expr.args[3])) end - # trues!/falses! (always uses Bit type) + # trues!/falses! (always uses Bit type) elseif fn in (:trues!, :falses!) || fn_name in (:trues!, :falses!) push!(types, :Bit) - # zeros!/ones!/unsafe_zeros!/unsafe_ones! + # zeros!/ones!/unsafe_zeros!/unsafe_ones! elseif fn in (:zeros!, :ones!, :unsafe_zeros!, :unsafe_ones!) || fn_name in (:zeros!, :ones!, :unsafe_zeros!, :unsafe_ones!) if nargs >= 3 third_arg = expr.args[3] @@ -768,7 +768,7 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}()) push!(types, Expr(:call, :default_eltype, target_pool)) end end - # similar!/unsafe_similar! + # similar!/unsafe_similar! elseif fn in (:similar!, :unsafe_similar!) || fn_name in (:similar!, :unsafe_similar!) if nargs == 3 # similar!(pool, x) - same type as x @@ -783,7 +783,7 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}()) push!(types, Expr(:call, :eltype, expr.args[3])) end end - # reshape! + # reshape! elseif fn in (:reshape!,) || fn_name in (:reshape!,) # reshape!(pool, A, dims...) — extract eltype(A) from second arg if nargs >= 3 @@ -859,7 +859,7 @@ Filter types for typed checkpoint/rewind generation. Type parameters (T, S from `where` clause) resolve to concrete types at runtime. Local variables (T = eltype(x)) are defined after checkpoint! and cannot be used. """ -function _filter_static_types(types, local_vars=Set{Symbol}()) +function _filter_static_types(types, local_vars = Set{Symbol}()) static_types = Any[] has_dynamic = false @@ -926,7 +926,7 @@ function _generate_typed_checkpoint_call(pool_expr, types) else escaped_types = [esc(t) for t in types] typed_call = :($checkpoint!($pool_expr, $(escaped_types...))) - lazy_call = :($_typed_lazy_checkpoint!($pool_expr, $(escaped_types...))) + lazy_call = :($_typed_lazy_checkpoint!($pool_expr, $(escaped_types...))) return quote if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...))) $typed_call @@ -951,9 +951,13 @@ function _generate_typed_rewind_call(pool_expr, types) return :($rewind!($pool_expr)) # fallback for direct external calls (unreachable via macro) else escaped_types = [esc(t) for t in types] - typed_call = :($rewind!($pool_expr, $(escaped_types...))) - selective_call = :($_typed_lazy_rewind!($pool_expr, - $_tracked_mask_for_types($(escaped_types...)))) + typed_call = :($rewind!($pool_expr, $(escaped_types...))) + selective_call = :( + $_typed_lazy_rewind!( + $pool_expr, + $_tracked_mask_for_types($(escaped_types...)) + ) + ) return quote if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...))) $typed_call diff --git a/src/state.jl b/src/state.jl index b591b520..ec172b4e 100644 --- a/src/state.jl +++ b/src/state.jl @@ -43,7 +43,7 @@ Also updates _current_depth and bitmask state for type touch tracking. ~77% faster than full checkpoint! when only one type is used. """ -@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where T +@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where {T} pool._current_depth += 1 push!(pool._touched_type_masks, UInt16(0)) @@ -52,7 +52,7 @@ Also updates _current_depth and bitmask state for type touch tracking. # (which bypasses _record_type_touch!) is the only acquire path. push!(pool._touched_has_others, _fixed_slot_bit(T) == UInt16(0)) _checkpoint_typed_pool!(get_typed_pool!(pool, T), pool._current_depth) - nothing + return nothing end """ @@ -77,7 +77,7 @@ compile-time unrolling. Increments _current_depth once for all types. # even when _acquire_impl! (bypassing _record_type_touch!) is used. has_any_fallback = any(i -> _fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices) checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices] - quote + return quote pool._current_depth += 1 push!(pool._touched_type_masks, UInt16(0)) push!(pool._touched_has_others, $has_any_fallback) @@ -96,7 +96,7 @@ end push!(tp._checkpoint_n_active, tp.n_active) push!(tp._checkpoint_depths, depth) end - nothing + return nothing end """ @@ -128,7 +128,7 @@ Performance: ~2ns vs ~540ns for full `checkpoint!`. _checkpoint_typed_pool!(p, depth) @inbounds pool._touched_has_others[depth] = true end - nothing + return nothing end # ============================================================================== @@ -183,7 +183,7 @@ end Restore state for a specific type only. Also updates _current_depth and bitmask state. """ -@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where T +@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where {T} # Safety guard: at global scope (depth=1), delegate to reset! if pool._current_depth == 1 @@ -194,7 +194,7 @@ Also updates _current_depth and bitmask state. pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end """ @@ -216,7 +216,7 @@ Decrements _current_depth once after all types are rewound. end rewind_exprs = [:(_rewind_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in reverse(unique_indices)] reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in unique_indices] - quote + return quote # Safety guard: at global scope (depth=1), delegate to reset! if pool._current_depth == 1 $(reset_exprs...) @@ -256,7 +256,7 @@ end # - If sentinel (_checkpoint_n_active=[0]), restores to n_active=0 tp.n_active = @inbounds tp._checkpoint_n_active[end] end - nothing + return nothing end """ @@ -272,7 +272,7 @@ Called directly from the macro-generated `finally` clause as a single function c """ @inline function _lazy_rewind!(pool::AdaptiveArrayPool) - d = pool._current_depth + d = pool._current_depth bits = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK _selective_rewind_fixed_slots!(pool, bits) if @inbounds(pool._touched_has_others[d]) @@ -283,7 +283,7 @@ Called directly from the macro-generated `finally` clause as a single function c pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end """ @@ -320,7 +320,7 @@ lazy first-touch checkpoint for each extra type on first acquire, ensuring Case end @inbounds pool._touched_has_others[d] = true end - nothing + return nothing end """ @@ -347,7 +347,7 @@ guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`. pop!(pool._touched_type_masks) pop!(pool._touched_has_others) pool._current_depth -= 1 - nothing + return nothing end """ @@ -365,15 +365,15 @@ checkpoint, `_rewind_typed_pool!` Case B safely restores from the parent checkpo @inline function _selective_rewind_fixed_slots!(pool::AdaptiveArrayPool, mask::UInt16) d = pool._current_depth - _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) - _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) - _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) - _has_bit(mask, Int32) && _rewind_typed_pool!(pool.int32, d) + _has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d) + _has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d) + _has_bit(mask, Int64) && _rewind_typed_pool!(pool.int64, d) + _has_bit(mask, Int32) && _rewind_typed_pool!(pool.int32, d) _has_bit(mask, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d) _has_bit(mask, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d) - _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) - _has_bit(mask, Bit) && _rewind_typed_pool!(pool.bits, d) - nothing + _has_bit(mask, Bool) && _rewind_typed_pool!(pool.bool, d) + _has_bit(mask, Bit) && _rewind_typed_pool!(pool.bits, d) + return nothing end # ============================================================================== @@ -548,9 +548,9 @@ to sentinel state while preserving allocated vectors. See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref) """ -@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where T +@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where {T} reset!(get_typed_pool!(pool, T)) - pool + return pool end """ @@ -563,7 +563,7 @@ See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref) """ @generated function reset!(pool::AdaptiveArrayPool, types::Type...) reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in 1:length(types)] - quote + return quote $(reset_exprs...) pool end diff --git a/src/task_local_pool.jl b/src/task_local_pool.jl index cd7940cc..25f7d50e 100644 --- a/src/task_local_pool.jl +++ b/src/task_local_pool.jl @@ -63,14 +63,14 @@ ensuring thread safety without locks. # 1. Fast Path: Try to get existing pool # get(dict, key, default) is optimized in Julia Base pool = get(task_local_storage(), _POOL_KEY, nothing) - + if pool === nothing # 2. Slow Path: Create and store new pool # This branch is rarely taken (only once per Task) pool = AdaptiveArrayPool() task_local_storage(_POOL_KEY, pool) end - + return pool::AdaptiveArrayPool end @@ -96,4 +96,4 @@ Returns the dictionary of all CUDA pools for the current task (one per device). Requires CUDA.jl to be loaded. Throws an error if CUDA extension is not available. """ -function get_task_local_cuda_pools end \ No newline at end of file +function get_task_local_cuda_pools end diff --git a/src/types.jl b/src/types.jl index c2a01c9b..130479b1 100644 --- a/src/types.jl +++ b/src/types.jl @@ -70,7 +70,7 @@ end Abstract base for type-specific memory pools. """ -abstract type AbstractTypedPool{T, V<:AbstractVector{T}} end +abstract type AbstractTypedPool{T, V <: AbstractVector{T}} end """ AbstractArrayPool @@ -339,10 +339,10 @@ const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :com # Bits 0-7: fixed-slot type touch tracking (one bit per type) # Bits 14-15: mode flags set during checkpoint to control lazy behavior -const _LAZY_MODE_BIT = UInt16(0x8000) # bit 15: lazy (dynamic-selective) checkpoint mode -const _TYPED_LAZY_BIT = UInt16(0x4000) # bit 14: typed lazy-fallback mode -const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15: all mode flags -const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits +const _LAZY_MODE_BIT = UInt16(0x8000) # bit 15: lazy (dynamic-selective) checkpoint mode +const _TYPED_LAZY_BIT = UInt16(0x4000) # bit 14: typed lazy-fallback mode +const _MODE_BITS_MASK = UInt16(0xC000) # bits 14-15: all mode flags +const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits # ============================================================================== # Fixed-Slot Bit Mapping (for type touch tracking) @@ -350,15 +350,15 @@ const _TYPE_BITS_MASK = UInt16(0x00FF) # bits 0-7: fixed-slot type bits # Maps each fixed-slot type to a unique bit in a UInt16 bitmask. # Bit ordering matches FIXED_SLOT_FIELDS. Non-fixed types return UInt16(0). -@inline _fixed_slot_bit(::Type{Float64}) = UInt16(1) << 0 -@inline _fixed_slot_bit(::Type{Float32}) = UInt16(1) << 1 -@inline _fixed_slot_bit(::Type{Int64}) = UInt16(1) << 2 -@inline _fixed_slot_bit(::Type{Int32}) = UInt16(1) << 3 +@inline _fixed_slot_bit(::Type{Float64}) = UInt16(1) << 0 +@inline _fixed_slot_bit(::Type{Float32}) = UInt16(1) << 1 +@inline _fixed_slot_bit(::Type{Int64}) = UInt16(1) << 2 +@inline _fixed_slot_bit(::Type{Int32}) = UInt16(1) << 3 @inline _fixed_slot_bit(::Type{ComplexF64}) = UInt16(1) << 4 @inline _fixed_slot_bit(::Type{ComplexF32}) = UInt16(1) << 5 -@inline _fixed_slot_bit(::Type{Bool}) = UInt16(1) << 6 -@inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7 -@inline _fixed_slot_bit(::Type) = UInt16(0) # non-fixed-slot → triggers has_others +@inline _fixed_slot_bit(::Type{Bool}) = UInt16(1) << 6 +@inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7 +@inline _fixed_slot_bit(::Type) = UInt16(0) # non-fixed-slot → triggers has_others # Check whether a type's bit is set in a bitmask (e.g. _touched_type_masks or combined). @inline _has_bit(mask::UInt16, ::Type{T}) where {T} = (mask & _fixed_slot_bit(T)) != 0 @@ -394,7 +394,7 @@ mutable struct AdaptiveArrayPool <: AbstractArrayPool end function AdaptiveArrayPool() - AdaptiveArrayPool( + return AdaptiveArrayPool( TypedPool{Float64}(), TypedPool{Float32}(), TypedPool{Int64}(), @@ -426,7 +426,7 @@ end # Slow Path: rare types via IdDict @inline function get_typed_pool!(p::AdaptiveArrayPool, ::Type{T}) where {T} - get!(p.others, T) do + return get!(p.others, T) do tp = TypedPool{T}() # If inside a checkpoint scope (_current_depth > 1 means inside @with_pool), # auto-checkpoint the new pool to prevent issues on rewind @@ -454,7 +454,7 @@ Apply `f` to each fixed slot TypedPool. Zero allocation via compile-time unrolli """ @generated function foreach_fixed_slot(f::F, pool::AdaptiveArrayPool) where {F} exprs = [:(f(getfield(pool, $(QuoteNode(field))))) for field in FIXED_SLOT_FIELDS] - quote + return quote Base.@_inline_meta $(exprs...) nothing diff --git a/src/utils.jl b/src/utils.jl index ee6c1147..b528c2df 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -50,7 +50,7 @@ function _validate_pool_return(val, pool::AdaptiveArrayPool) end # 3. Check raw Array (from unsafe_acquire!) - if val isa Array + return if val isa Array _check_pointer_overlap(val, pool) end end @@ -61,7 +61,7 @@ function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool) arr_len = length(arr) * sizeof(eltype(arr)) arr_end = arr_ptr + arr_len - check_overlap = function(tp) + check_overlap = function (tp) for v in tp.vectors v_ptr = UInt(pointer(v)) v_len = length(v) * sizeof(eltype(v)) @@ -70,6 +70,7 @@ function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool) error("Safety Violation: The function returned an Array backed by pool memory. This is unsafe as the memory will be reclaimed. Please return a copy (collect) or a scalar.") end end + return end # Check fixed slots @@ -81,6 +82,7 @@ function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool) for tp in values(pool.others) check_overlap(tp) end + return end # Check if BitArray chunks overlap with the pool's BitTypedPool storage @@ -123,14 +125,14 @@ _count_label(::BitTypedPool) = "bits" Print statistics for a TypedPool or BitTypedPool. """ -function pool_stats(tp::AbstractTypedPool; io::IO=stdout, indent::Int=0, name::String="") +function pool_stats(tp::AbstractTypedPool; io::IO = stdout, indent::Int = 0, name::String = "") prefix = " "^indent type_name = isempty(name) ? _default_type_name(tp) : name n_arrays = length(tp.vectors) if n_arrays == 0 - printstyled(io, prefix, type_name, color=:cyan) - printstyled(io, " (empty)\n", color=:dark_gray) + printstyled(io, prefix, type_name, color = :cyan) + printstyled(io, " (empty)\n", color = :dark_gray) return end @@ -139,19 +141,19 @@ function pool_stats(tp::AbstractTypedPool; io::IO=stdout, indent::Int=0, name::S bytes_str = Base.format_bytes(total_bytes) # Header - printstyled(io, prefix, type_name, color=:cyan) + printstyled(io, prefix, type_name, color = :cyan) println(io) # Stats - printstyled(io, prefix, " slots: ", color=:dark_gray) - printstyled(io, n_arrays, color=:blue) - printstyled(io, " (active: ", color=:dark_gray) - printstyled(io, tp.n_active, color=:blue) - printstyled(io, ")\n", color=:dark_gray) - - printstyled(io, prefix, " ", _count_label(tp), ": ", color=:dark_gray) - printstyled(io, total_count, color=:blue) - printstyled(io, " ($bytes_str)\n", color=:dark_gray) + printstyled(io, prefix, " slots: ", color = :dark_gray) + printstyled(io, n_arrays, color = :blue) + printstyled(io, " (active: ", color = :dark_gray) + printstyled(io, tp.n_active, color = :blue) + printstyled(io, ")\n", color = :dark_gray) + + printstyled(io, prefix, " ", _count_label(tp), ": ", color = :dark_gray) + printstyled(io, total_count, color = :blue) + printstyled(io, " ($bytes_str)\n", color = :dark_gray) return nothing end @@ -169,9 +171,9 @@ pool = AdaptiveArrayPool() end ``` """ -function pool_stats(pool::AdaptiveArrayPool; io::IO=stdout) +function pool_stats(pool::AdaptiveArrayPool; io::IO = stdout) # Header - printstyled(io, "AdaptiveArrayPool", bold=true, color=:white) + printstyled(io, "AdaptiveArrayPool", bold = true, color = :white) println(io) has_content = false @@ -181,18 +183,18 @@ function pool_stats(pool::AdaptiveArrayPool; io::IO=stdout) if !isempty(tp.vectors) has_content = true name = _default_type_name(tp) * " (fixed)" - pool_stats(tp; io, indent=2, name) + pool_stats(tp; io, indent = 2, name) end end # Fallback types for (T, tp) in pool.others has_content = true - pool_stats(tp; io, indent=2, name="$T (fallback)") + pool_stats(tp; io, indent = 2, name = "$T (fallback)") end if !has_content - printstyled(io, " (empty)\n", color=:dark_gray) + printstyled(io, " (empty)\n", color = :dark_gray) end return nothing end @@ -210,7 +212,7 @@ Print statistics for all task-local pools (CPU and CUDA if loaded). end ``` """ -function pool_stats(; io::IO=stdout) +function pool_stats(; io::IO = stdout) pool_stats(:cpu; io) # Show CUDA pools if extension is loaded and pools exist try @@ -227,8 +229,8 @@ end Print statistics for the CPU task-local pool only. """ -pool_stats(::Val{:cpu}; io::IO=stdout) = pool_stats(get_task_local_pool(); io) -pool_stats(s::Symbol; io::IO=stdout) = pool_stats(Val(s); io) +pool_stats(::Val{:cpu}; io::IO = stdout) = pool_stats(get_task_local_pool(); io) +pool_stats(s::Symbol; io::IO = stdout) = pool_stats(Val(s); io) """ pool_stats(:cuda; io::IO=stdout) @@ -236,7 +238,7 @@ pool_stats(s::Symbol; io::IO=stdout) = pool_stats(Val(s); io) Print statistics for CUDA task-local pools. Requires CUDA.jl to be loaded. """ -function pool_stats(::Val{:cuda}; io::IO=stdout) +function pool_stats(::Val{:cuda}; io::IO = stdout) pools = get_task_local_cuda_pools() # Throws MethodError if extension not loaded for pool in values(pools) pool_stats(pool; io) @@ -256,7 +258,7 @@ _show_type_name(::BitTypedPool) = "BitTypedPool" function Base.show(io::IO, tp::AbstractTypedPool) name = _show_type_name(tp) n_vectors = length(tp.vectors) - if n_vectors == 0 + return if n_vectors == 0 print(io, "$name(empty)") else total = sum(length(v) for v in tp.vectors) @@ -267,7 +269,7 @@ end # Multi-line show for all AbstractTypedPool function Base.show(io::IO, ::MIME"text/plain", tp::AbstractTypedPool) - pool_stats(tp; io, name=_show_type_name(tp)) + return pool_stats(tp; io, name = _show_type_name(tp)) end # Compact one-line show for AdaptiveArrayPool @@ -290,10 +292,10 @@ function Base.show(io::IO, pool::AdaptiveArrayPool) total_active[] += tp.n_active end - print(io, "AdaptiveArrayPool(types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))") + return print(io, "AdaptiveArrayPool(types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))") end # Multi-line show for AdaptiveArrayPool function Base.show(io::IO, ::MIME"text/plain", pool::AdaptiveArrayPool) - pool_stats(pool; io) + return pool_stats(pool; io) end diff --git a/test/cuda/test_allocation.jl b/test/cuda/test_allocation.jl index a1047057..7bee5bd7 100644 --- a/test/cuda/test_allocation.jl +++ b/test/cuda/test_allocation.jl @@ -42,7 +42,7 @@ v1 = acquire!(p, Float32, 100) v2 = acquire!(p, Float32, 200) v3 = acquire!(p, Float32, 300) - v1 .= 1f0; v2 .= 2f0; v3 .= 3f0 + v1 .= 1.0f0; v2 .= 2.0f0; v3 .= 3.0f0 end end @@ -260,7 +260,7 @@ end v32 = acquire!(p, Float32, 100) v64 = acquire!(p, Float64, 100) vi32 = acquire!(p, Int32, 100) - v32 .= 1f0; v64 .= 2.0; vi32 .= 3 + v32 .= 1.0f0; v64 .= 2.0; vi32 .= 3 end end diff --git a/test/cuda/test_convenience.jl b/test/cuda/test_convenience.jl index 8bb7858b..f3d09daf 100644 --- a/test/cuda/test_convenience.jl +++ b/test/cuda/test_convenience.jl @@ -9,14 +9,14 @@ @test all(v .== 0.0f0) m = zeros!(pool, 3, 4) - @test m isa CuArray{Float32,2} + @test m isa CuArray{Float32, 2} @test size(m) == (3, 4) @test all(m .== 0.0f0) # Tuple form dims = (2, 3) t = zeros!(pool, dims) - @test t isa CuArray{Float32,2} + @test t isa CuArray{Float32, 2} @test size(t) == dims end @@ -40,14 +40,14 @@ @test all(v .== 1.0f0) m = ones!(pool, 3, 4) - @test m isa CuArray{Float32,2} + @test m isa CuArray{Float32, 2} @test size(m) == (3, 4) @test all(m .== 1.0f0) # Tuple form dims = (2, 3) t = ones!(pool, dims) - @test t isa CuArray{Float32,2} + @test t isa CuArray{Float32, 2} @test size(t) == dims end @@ -65,40 +65,40 @@ # Float32 template template32 = CUDA.rand(Float32, 5, 5) v = similar!(pool, template32) - @test v isa CuArray{Float32,2} + @test v isa CuArray{Float32, 2} @test size(v) == (5, 5) # Float64 template template64 = CUDA.rand(Float64, 3, 4) v64 = similar!(pool, template64) - @test v64 isa CuArray{Float64,2} + @test v64 isa CuArray{Float64, 2} @test size(v64) == (3, 4) # Different type v_int = similar!(pool, template32, Int32) - @test v_int isa CuArray{Int32,2} + @test v_int isa CuArray{Int32, 2} @test size(v_int) == (5, 5) # Different dims v_dims = similar!(pool, template32, 10) - @test v_dims isa CuArray{Float32,1} + @test v_dims isa CuArray{Float32, 1} @test length(v_dims) == 10 # Different type and dims v_both = similar!(pool, template32, Float64, 2, 3) - @test v_both isa CuArray{Float64,2} + @test v_both isa CuArray{Float64, 2} @test size(v_both) == (2, 3) end @testset "unsafe_zeros! default type is Float32" begin v = unsafe_zeros!(pool, 10) - @test v isa CuArray{Float32,1} + @test v isa CuArray{Float32, 1} @test !(v isa SubArray) # Raw array, not view @test length(v) == 10 @test all(v .== 0.0f0) m = unsafe_zeros!(pool, 3, 4) - @test m isa CuArray{Float32,2} + @test m isa CuArray{Float32, 2} @test !(m isa SubArray) @test size(m) == (3, 4) @test all(m .== 0.0f0) @@ -106,7 +106,7 @@ # Tuple form dims = (2, 3) t = unsafe_zeros!(pool, dims) - @test t isa CuArray{Float32,2} + @test t isa CuArray{Float32, 2} @test size(t) == dims end @@ -119,13 +119,13 @@ @testset "unsafe_ones! default type is Float32" begin v = unsafe_ones!(pool, 10) - @test v isa CuArray{Float32,1} + @test v isa CuArray{Float32, 1} @test !(v isa SubArray) @test length(v) == 10 @test all(v .== 1.0f0) m = unsafe_ones!(pool, 3, 4) - @test m isa CuArray{Float32,2} + @test m isa CuArray{Float32, 2} @test !(m isa SubArray) @test size(m) == (3, 4) @test all(m .== 1.0f0) @@ -133,7 +133,7 @@ # Tuple form dims = (2, 3) t = unsafe_ones!(pool, dims) - @test t isa CuArray{Float32,2} + @test t isa CuArray{Float32, 2} @test size(t) == dims end @@ -148,32 +148,32 @@ # Float32 template template32 = CUDA.rand(Float32, 5, 5) v = unsafe_similar!(pool, template32) - @test v isa CuArray{Float32,2} + @test v isa CuArray{Float32, 2} @test !(v isa SubArray) @test size(v) == (5, 5) # Float64 template template64 = CUDA.rand(Float64, 3, 4) v64 = unsafe_similar!(pool, template64) - @test v64 isa CuArray{Float64,2} + @test v64 isa CuArray{Float64, 2} @test !(v64 isa SubArray) @test size(v64) == (3, 4) # Different type v_int = unsafe_similar!(pool, template32, Int32) - @test v_int isa CuArray{Int32,2} + @test v_int isa CuArray{Int32, 2} @test !(v_int isa SubArray) @test size(v_int) == (5, 5) # Different dims v_dims = unsafe_similar!(pool, template32, 10) - @test v_dims isa CuArray{Float32,1} + @test v_dims isa CuArray{Float32, 1} @test !(v_dims isa SubArray) @test length(v_dims) == 10 # Different type and dims v_both = unsafe_similar!(pool, template32, Float64, 2, 3) - @test v_both isa CuArray{Float64,2} + @test v_both isa CuArray{Float64, 2} @test !(v_both isa SubArray) @test size(v_both) == (2, 3) end diff --git a/test/cuda/test_disabled_pool.jl b/test/cuda/test_disabled_pool.jl index 6c7da69a..aebbe9a8 100644 --- a/test/cuda/test_disabled_pool.jl +++ b/test/cuda/test_disabled_pool.jl @@ -24,7 +24,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test all(v1 .== 0.0f0) v2 = zeros!(DISABLED_CUDA, Float64, 5, 5) - @test v2 isa CuArray{Float64,2} + @test v2 isa CuArray{Float64, 2} @test size(v2) == (5, 5) @test all(v2 .== 0.0) @@ -34,16 +34,16 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test length(v3) == 8 v4 = zeros!(DISABLED_CUDA, 3, 4) - @test v4 isa CuArray{Float32,2} + @test v4 isa CuArray{Float32, 2} @test size(v4) == (3, 4) # Tuple dims v5 = zeros!(DISABLED_CUDA, Float32, (2, 3, 4)) - @test v5 isa CuArray{Float32,3} + @test v5 isa CuArray{Float32, 3} @test size(v5) == (2, 3, 4) v6 = zeros!(DISABLED_CUDA, (5, 6)) - @test v6 isa CuArray{Float32,2} + @test v6 isa CuArray{Float32, 2} @test size(v6) == (5, 6) end @@ -55,7 +55,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test all(v1 .== 1.0f0) v2 = ones!(DISABLED_CUDA, Float64, 5, 5) - @test v2 isa CuArray{Float64,2} + @test v2 isa CuArray{Float64, 2} @test size(v2) == (5, 5) @test all(v2 .== 1.0) @@ -65,16 +65,16 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test all(v3 .== 1.0f0) v4 = ones!(DISABLED_CUDA, 3, 4) - @test v4 isa CuArray{Float32,2} + @test v4 isa CuArray{Float32, 2} @test size(v4) == (3, 4) # Tuple dims v5 = ones!(DISABLED_CUDA, Float32, (2, 3)) - @test v5 isa CuArray{Float32,2} + @test v5 isa CuArray{Float32, 2} @test size(v5) == (2, 3) v6 = ones!(DISABLED_CUDA, (4, 5)) - @test v6 isa CuArray{Float32,2} + @test v6 isa CuArray{Float32, 2} @test size(v6) == (4, 5) end @@ -90,11 +90,11 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test length(v2) == 10 v3 = similar!(DISABLED_CUDA, template, 5, 5) - @test v3 isa CuArray{Float32,2} + @test v3 isa CuArray{Float32, 2} @test size(v3) == (5, 5) v4 = similar!(DISABLED_CUDA, template, Float64, 3, 4) - @test v4 isa CuArray{Float64,2} + @test v4 isa CuArray{Float64, 2} @test size(v4) == (3, 4) end @@ -110,11 +110,11 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test length(v2) == 8 v3 = similar!(DISABLED_CUDA, cpu_template, 4, 4) - @test v3 isa CuArray{Float64,2} + @test v3 isa CuArray{Float64, 2} @test size(v3) == (4, 4) v4 = similar!(DISABLED_CUDA, cpu_template, Int32, 2, 3) - @test v4 isa CuArray{Int32,2} + @test v4 isa CuArray{Int32, 2} @test size(v4) == (2, 3) end @@ -124,7 +124,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test all(v1 .== 0.0f0) v2 = unsafe_zeros!(DISABLED_CUDA, Float64, 5, 5) - @test v2 isa CuArray{Float64,2} + @test v2 isa CuArray{Float64, 2} @test size(v2) == (5, 5) # Without type @@ -133,11 +133,11 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e # Tuple dims v4 = unsafe_zeros!(DISABLED_CUDA, Float32, (3, 4)) - @test v4 isa CuArray{Float32,2} + @test v4 isa CuArray{Float32, 2} @test size(v4) == (3, 4) v5 = unsafe_zeros!(DISABLED_CUDA, (2, 3)) - @test v5 isa CuArray{Float32,2} + @test v5 isa CuArray{Float32, 2} end @testset "unsafe_ones!" begin @@ -146,7 +146,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test all(v1 .== 1.0f0) v2 = unsafe_ones!(DISABLED_CUDA, Float64, 5, 5) - @test v2 isa CuArray{Float64,2} + @test v2 isa CuArray{Float64, 2} @test size(v2) == (5, 5) # Without type @@ -155,10 +155,10 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e # Tuple dims v4 = unsafe_ones!(DISABLED_CUDA, Float32, (3, 4)) - @test v4 isa CuArray{Float32,2} + @test v4 isa CuArray{Float32, 2} v5 = unsafe_ones!(DISABLED_CUDA, (2, 3)) - @test v5 isa CuArray{Float32,2} + @test v5 isa CuArray{Float32, 2} end @testset "unsafe_similar! with CuArray input" begin @@ -171,10 +171,10 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test v2 isa CuVector{Float64} v3 = unsafe_similar!(DISABLED_CUDA, template, 5, 5) - @test v3 isa CuArray{Float32,2} + @test v3 isa CuArray{Float32, 2} v4 = unsafe_similar!(DISABLED_CUDA, template, Float64, 3, 4) - @test v4 isa CuArray{Float64,2} + @test v4 isa CuArray{Float64, 2} end @testset "unsafe_similar! with AbstractArray input (CPU->GPU)" begin @@ -187,10 +187,10 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e @test v2 isa CuVector{Float32} v3 = unsafe_similar!(DISABLED_CUDA, cpu_template, 4, 4) - @test v3 isa CuArray{Float64,2} + @test v3 isa CuArray{Float64, 2} v4 = unsafe_similar!(DISABLED_CUDA, cpu_template, Int32, 2, 3) - @test v4 isa CuArray{Int32,2} + @test v4 isa CuArray{Int32, 2} end @testset "acquire!" begin @@ -201,12 +201,12 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e # Type + vararg dims v2 = acquire!(DISABLED_CUDA, Float64, 5, 5) - @test v2 isa CuArray{Float64,2} + @test v2 isa CuArray{Float64, 2} @test size(v2) == (5, 5) # Type + tuple dims v3 = acquire!(DISABLED_CUDA, Float32, (3, 4, 5)) - @test v3 isa CuArray{Float32,3} + @test v3 isa CuArray{Float32, 3} @test size(v3) == (3, 4, 5) # CuArray template @@ -230,12 +230,12 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e # Type + vararg dims v2 = unsafe_acquire!(DISABLED_CUDA, Float64, 5, 5) - @test v2 isa CuArray{Float64,2} + @test v2 isa CuArray{Float64, 2} @test size(v2) == (5, 5) # Type + tuple dims v3 = unsafe_acquire!(DISABLED_CUDA, Float32, (3, 4, 5)) - @test v3 isa CuArray{Float32,3} + @test v3 isa CuArray{Float32, 3} @test size(v3) == (3, 4, 5) # CuArray template diff --git a/test/cuda/test_display.jl b/test/cuda/test_display.jl index 354e3954..b23032b3 100644 --- a/test/cuda/test_display.jl +++ b/test/cuda/test_display.jl @@ -3,7 +3,7 @@ # Helper macro to capture stdout macro capture_out(expr) - quote + return quote local old_stdout = stdout local rd, wr = redirect_stdout() try @@ -182,10 +182,10 @@ end reset!(pool) # pool_stats should return nothing - result = pool_stats(pool; io=devnull) + result = pool_stats(pool; io = devnull) @test result === nothing - result = pool_stats(:cuda; io=devnull) + result = pool_stats(:cuda; io = devnull) @test result === nothing end diff --git a/test/cuda/test_extension.jl b/test/cuda/test_extension.jl index a2bfcdb9..c4cc3797 100644 --- a/test/cuda/test_extension.jl +++ b/test/cuda/test_extension.jl @@ -57,7 +57,7 @@ end vec = CUDA.zeros(Float32, 50) flat_view = view(vec, 1:50) wrapped = AdaptiveArrayPools.wrap_array(tp, flat_view, (10, 5)) - @test wrapped isa CuArray{Float32,2} + @test wrapped isa CuArray{Float32, 2} @test size(wrapped) == (10, 5) end @@ -121,13 +121,15 @@ end @testset "get_task_local_cuda_pools before pool creation" begin # Test in a fresh task where no pool exists yet - result = fetch(Threads.@spawn begin - # Call get_task_local_cuda_pools() FIRST (before get_task_local_cuda_pool) - pools = get_task_local_cuda_pools() - @test pools isa Dict{Int, CuAdaptiveArrayPool} - @test isempty(pools) # No pools created yet - true - end) + result = fetch( + Threads.@spawn begin + # Call get_task_local_cuda_pools() FIRST (before get_task_local_cuda_pool) + pools = get_task_local_cuda_pools() + @test pools isa Dict{Int, CuAdaptiveArrayPool} + @test isempty(pools) # No pools created yet + true + end + ) @test result == true end @@ -449,7 +451,7 @@ end @testset "unsafe_acquire!" begin result = @with_pool :cuda pool begin A = unsafe_acquire!(pool, Float32, 100) - @test A isa CuArray{Float32,1} + @test A isa CuArray{Float32, 1} A .= 2.0f0 sum(A) end @@ -490,12 +492,12 @@ end pool = CuAdaptiveArrayPool() v = unsafe_acquire!(pool, Float32, 100) - @test v isa CuArray{Float32,1} + @test v isa CuArray{Float32, 1} A = unsafe_acquire!(pool, Float64, 10, 10) - @test A isa CuArray{Float64,2} + @test A isa CuArray{Float64, 2} B = unsafe_acquire!(pool, Int32, (5, 5)) - @test B isa CuArray{Int32,2} + @test B isa CuArray{Int32, 2} end end diff --git a/test/test_allocation.jl b/test/test_allocation.jl index b17631bf..3666da80 100644 --- a/test/test_allocation.jl +++ b/test/test_allocation.jl @@ -1,20 +1,20 @@ @with_pool pool function foo() - float64_vec = acquire!(pool, Float64, 10) - float32_vec = acquire!(pool, Float32, 10) + float64_vec = acquire!(pool, Float64, 10) + float32_vec = acquire!(pool, Float32, 10) - float64_mat = acquire!(pool, Float64, 10, 10) - float32_mat = acquire!(pool, Float32, 10, 10) + float64_mat = acquire!(pool, Float64, 10, 10) + float32_mat = acquire!(pool, Float32, 10, 10) - bv = acquire!(pool, Bit, 100) - ba2 = acquire!(pool, Bit, 10, 10) - ba3 = acquire!(pool, Bit, 5, 5, 4) + bv = acquire!(pool, Bit, 100) + ba2 = acquire!(pool, Bit, 10, 10) + ba3 = acquire!(pool, Bit, 5, 5, 4) - tt1 = trues!(pool, 256) - tt2 = ones!(pool, Bit, 10, 20) - ff1 = falses!(pool, 100, 5) - ff2 = zeros!(pool, Bit, 100) + tt1 = trues!(pool, 256) + tt2 = ones!(pool, Bit, 10, 20) + ff1 = falses!(pool, 100, 5) + ff2 = zeros!(pool, Bit, 100) - C = similar!(pool, tt1) + C = similar!(pool, tt1) end @@ -35,4 +35,4 @@ end alloc3 = @allocated foo() @test alloc2 == 0 @test alloc3 == 0 -end \ No newline at end of file +end diff --git a/test/test_aqua.jl b/test/test_aqua.jl index 5b3233a3..3fbe4e9d 100644 --- a/test/test_aqua.jl +++ b/test/test_aqua.jl @@ -1,5 +1,5 @@ using Aqua, Test @testset "Aqua.jl" begin - Aqua.test_all( AdaptiveArrayPools ) + Aqua.test_all(AdaptiveArrayPools) end diff --git a/test/test_backend_macro_expansion.jl b/test/test_backend_macro_expansion.jl index 9721cb05..1ac6fdc1 100644 --- a/test/test_backend_macro_expansion.jl +++ b/test/test_backend_macro_expansion.jl @@ -215,7 +215,7 @@ end @testset "Where clause preserved" begin - expr = @macroexpand @with_pool :cuda pool function generic_func(x::Vector{T}) where T + expr = @macroexpand @with_pool :cuda pool function generic_func(x::Vector{T}) where {T} v = acquire!(pool, T, length(x)) return sum(v) end @@ -379,7 +379,7 @@ expr = @macroexpand @with_pool :cuda pool function complex_func( x::AbstractArray{T}, y::AbstractArray{S}; - tol::Float64 = 1e-6 + tol::Float64 = 1.0e-6 ) where {T <: Real, S <: Real} v = acquire!(pool, T, size(x)) return sum(v) diff --git a/test/test_convenience.jl b/test/test_convenience.jl index 2b243d07..cdadaa6a 100644 --- a/test/test_convenience.jl +++ b/test/test_convenience.jl @@ -328,7 +328,7 @@ # 1D with explicit type result = @with_pool pool begin v = unsafe_zeros!(pool, Float64, size(x1d)) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test length(v) == 10 @test all(v .== 0.0) sum(v) @@ -338,7 +338,7 @@ # 1D without type result = @with_pool pool begin v = unsafe_zeros!(pool, size(x1d)) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test eltype(v) == Float64 sum(v) end @@ -347,7 +347,7 @@ # 2D with explicit type result = @with_pool pool begin m = unsafe_zeros!(pool, Float32, size(x2d)) - @test m isa Array{Float32,2} + @test m isa Array{Float32, 2} @test size(m) == (5, 8) sum(m) end @@ -361,7 +361,7 @@ # 1D with explicit type result = @with_pool pool begin v = unsafe_ones!(pool, Float64, size(x1d)) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test length(v) == 10 @test all(v .== 1.0) sum(v) @@ -371,7 +371,7 @@ # 1D without type result = @with_pool pool begin v = unsafe_ones!(pool, size(x1d)) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test eltype(v) == Float64 sum(v) end @@ -380,7 +380,7 @@ # 2D with explicit type result = @with_pool pool begin m = unsafe_ones!(pool, Float32, size(x2d)) - @test m isa Array{Float32,2} + @test m isa Array{Float32, 2} @test size(m) == (5, 8) sum(m) end @@ -424,7 +424,7 @@ @testset "returns raw array (not view)" begin v = unsafe_zeros!(pool, Float64, 10) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test !(v isa SubArray) @test length(v) == 10 @test all(v .== 0.0) @@ -432,7 +432,7 @@ @testset "default type (Float64)" begin v = unsafe_zeros!(pool, 10) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test !(v isa SubArray) @test eltype(v) == Float64 @test all(v .== 0.0) @@ -440,7 +440,7 @@ @testset "multi-dimensional" begin m = unsafe_zeros!(pool, Float64, 3, 4) - @test m isa Array{Float64,2} + @test m isa Array{Float64, 2} @test !(m isa SubArray) @test size(m) == (3, 4) @test all(m .== 0.0) @@ -469,7 +469,7 @@ @testset "returns raw array (not view)" begin v = unsafe_ones!(pool, Float64, 10) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test !(v isa SubArray) @test length(v) == 10 @test all(v .== 1.0) @@ -477,7 +477,7 @@ @testset "default type (Float64)" begin v = unsafe_ones!(pool, 10) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test !(v isa SubArray) @test eltype(v) == Float64 @test all(v .== 1.0) @@ -485,7 +485,7 @@ @testset "multi-dimensional" begin m = unsafe_ones!(pool, Float64, 3, 4) - @test m isa Array{Float64,2} + @test m isa Array{Float64, 2} @test !(m isa SubArray) @test size(m) == (3, 4) @test all(m .== 1.0) @@ -516,28 +516,28 @@ @testset "returns raw array (not view)" begin v = unsafe_similar!(pool, template) - @test v isa Array{Float64,2} + @test v isa Array{Float64, 2} @test !(v isa SubArray) @test size(v) == size(template) end @testset "different type" begin v = unsafe_similar!(pool, template, Float32) - @test v isa Array{Float32,2} + @test v isa Array{Float32, 2} @test !(v isa SubArray) @test size(v) == size(template) end @testset "different size" begin v = unsafe_similar!(pool, template, 5, 5) - @test v isa Array{Float64,2} + @test v isa Array{Float64, 2} @test !(v isa SubArray) @test size(v) == (5, 5) end @testset "different type and size" begin v = unsafe_similar!(pool, template, Int32, 3, 4) - @test v isa Array{Int32,2} + @test v isa Array{Int32, 2} @test !(v isa SubArray) @test size(v) == (3, 4) end @@ -556,7 +556,7 @@ @testset "unsafe_zeros! in macro" begin result = @with_pool pool begin v = unsafe_zeros!(pool, Float64, 100) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test !(v isa SubArray) v .+= 1.0 sum(v) @@ -567,7 +567,7 @@ @testset "unsafe_ones! in macro" begin result = @with_pool pool begin v = unsafe_ones!(pool, Float64, 50) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test !(v isa SubArray) sum(v) end @@ -578,7 +578,7 @@ template = rand(10) result = @with_pool pool begin v = unsafe_similar!(pool, template) - @test v isa Array{Float64,1} + @test v isa Array{Float64, 1} @test !(v isa SubArray) v .= 2.0 sum(v) diff --git a/test/test_coverage.jl b/test/test_coverage.jl index 5262b422..047bad51 100644 --- a/test/test_coverage.jl +++ b/test/test_coverage.jl @@ -49,26 +49,26 @@ # acquire! with vararg dims v = acquire!(pool, Float32, 3, 3) - @test v isa Array{Float32,2} + @test v isa Array{Float32, 2} # acquire! with tuple dims v = acquire!(pool, Float32, (2, 2)) - @test v isa Array{Float32,2} + @test v isa Array{Float32, 2} # acquire! with similar template = rand(Int32, 4, 4) v = acquire!(pool, template) - @test v isa Array{Int32,2} + @test v isa Array{Int32, 2} # unsafe_acquire! variants v = unsafe_acquire!(pool, Float32, 3, 3) - @test v isa Array{Float32,2} + @test v isa Array{Float32, 2} v = unsafe_acquire!(pool, Float32, (2, 2)) - @test v isa Array{Float32,2} + @test v isa Array{Float32, 2} v = unsafe_acquire!(pool, template) - @test v isa Array{Int32,2} + @test v isa Array{Int32, 2} end @testset "BackendNotLoadedError" begin @@ -90,14 +90,46 @@ # Test that errors are thrown for unknown backend fake_pool = DisabledPool{:fake_backend}() - @test try zeros!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end - @test try ones!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end - @test try similar!(fake_pool, rand(3)); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end - @test try unsafe_zeros!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end - @test try unsafe_ones!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end - @test try unsafe_similar!(fake_pool, rand(3)); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end - @test try acquire!(fake_pool, Float64, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end - @test try unsafe_acquire!(fake_pool, Float64, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end + @test try + zeros!(fake_pool, 10); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end + @test try + ones!(fake_pool, 10); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end + @test try + similar!(fake_pool, rand(3)); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end + @test try + unsafe_zeros!(fake_pool, 10); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end + @test try + unsafe_ones!(fake_pool, 10); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end + @test try + unsafe_similar!(fake_pool, rand(3)); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end + @test try + acquire!(fake_pool, Float64, 10); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end + @test try + unsafe_acquire!(fake_pool, Float64, 10); false + catch e + e isa AdaptiveArrayPools.BackendNotLoadedError + end end @testset "_impl! delegators for DisabledPool" begin @@ -237,7 +269,12 @@ @test AdaptiveArrayPools._is_function_def(:(function foo() end)) == true @test AdaptiveArrayPools._is_function_def(:(foo(x) = x + 1)) == true @test AdaptiveArrayPools._is_function_def(:(x = 1)) == false - @test AdaptiveArrayPools._is_function_def(:(begin; end)) == false + @test AdaptiveArrayPools._is_function_def( + :( + begin + end + ) + ) == false # Test _filter_static_types types = Set{Any}([Float64, Int64]) @@ -392,7 +429,11 @@ @testset "_generate_function_pool_code" begin # Test function code generation with disable_pooling=true - func_expr = :(function bar(x) x + 1 end) + func_expr = :( + function bar(x) + x + 1 + end + ) result = AdaptiveArrayPools._generate_function_pool_code(:pool, func_expr, true, true, :cpu) @test result isa Expr @test result.head == :function @@ -410,7 +451,11 @@ @testset "_generate_function_pool_code_with_backend" begin # Test function code generation with backend - func_expr = :(function compute(x) x + 1 end) + func_expr = :( + function compute(x) + x + 1 + end + ) # With disable_pooling=true result1 = AdaptiveArrayPools._generate_function_pool_code_with_backend(:cpu, :pool, func_expr, true) diff --git a/test/test_disabled_pooling.jl b/test/test_disabled_pooling.jl index cc926212..a531fd78 100644 --- a/test/test_disabled_pooling.jl +++ b/test/test_disabled_pooling.jl @@ -73,7 +73,7 @@ println(" Subprocess failed: ", e) false finally - rm(test_file; force=true) + rm(test_file; force = true) end @test result == true diff --git a/test/test_fallback_reclamation.jl b/test/test_fallback_reclamation.jl index 05b578ed..36e959e7 100644 --- a/test/test_fallback_reclamation.jl +++ b/test/test_fallback_reclamation.jl @@ -12,17 +12,17 @@ using AdaptiveArrayPools: get_typed_pool!, _lazy_checkpoint!, _lazy_rewind!, """Get n_active for a fallback type (0 if type not in pool.others).""" function others_n_active(pool, ::Type{T}) where {T} - haskey(pool.others, T) ? pool.others[T].n_active : 0 + return haskey(pool.others, T) ? pool.others[T].n_active : 0 end """Get checkpoint stack length for a fallback type.""" function others_stack_len(pool, ::Type{T}) where {T} - haskey(pool.others, T) ? length(pool.others[T]._checkpoint_depths) : 0 + return haskey(pool.others, T) ? length(pool.others[T]._checkpoint_depths) : 0 end """Get all n_active values for pool.others entries.""" function all_others_n_active(pool) - Dict(T => tp.n_active for (T, tp) in pool.others) + return Dict(T => tp.n_active for (T, tp) in pool.others) end # ============================================================================== @@ -40,7 +40,7 @@ end # - Different Tag/N params = different concrete types = separate pool entries # - Each unique Dual variant occupies its own IdDict slot in pool.others struct FakeTag{F} end -struct FakeDual{Tag, V<:Real, N} +struct FakeDual{Tag, V <: Real, N} value::V partials::NTuple{N, V} end @@ -48,7 +48,7 @@ FakeDual{Tag, V, N}(v::V) where {Tag, V, N} = FakeDual{Tag, V, N}(v, ntuple(_ -> # Type aliases for readability const Dual_f1_11 = FakeDual{FakeTag{:f1}, Float64, 11} -const Dual_f1_4 = FakeDual{FakeTag{:f1}, Float64, 4} +const Dual_f1_4 = FakeDual{FakeTag{:f1}, Float64, 4} const Dual_f2_11 = FakeDual{FakeTag{:f2}, Float64, 11} # ============================================================================== @@ -57,1464 +57,1464 @@ const Dual_f2_11 = FakeDual{FakeTag{:f2}, Float64, 11} @testset "Fallback Reclamation" begin -@testset "1. Multiple distinct fallback types in single scope" begin - pool = AdaptiveArrayPool() + @testset "1. Multiple distinct fallback types in single scope" begin + pool = AdaptiveArrayPool() - checkpoint!(pool) - v1 = acquire!(pool, UInt8, 10) - v2 = acquire!(pool, Float16, 20) - v3 = acquire!(pool, Int16, 30) + checkpoint!(pool) + v1 = acquire!(pool, UInt8, 10) + v2 = acquire!(pool, Float16, 20) + v3 = acquire!(pool, Int16, 30) - @test others_n_active(pool, UInt8) == 1 - @test others_n_active(pool, Float16) == 1 - @test others_n_active(pool, Int16) == 1 + @test others_n_active(pool, UInt8) == 1 + @test others_n_active(pool, Float16) == 1 + @test others_n_active(pool, Int16) == 1 - rewind!(pool) + rewind!(pool) - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 - @test others_n_active(pool, Int16) == 0 -end + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + @test others_n_active(pool, Int16) == 0 + end -@testset "1b. Multiple arrays per fallback type" begin - pool = AdaptiveArrayPool() + @testset "1b. Multiple arrays per fallback type" begin + pool = AdaptiveArrayPool() - checkpoint!(pool) - for _ in 1:5 - acquire!(pool, UInt8, 10) - acquire!(pool, Float16, 20) + checkpoint!(pool) + for _ in 1:5 + acquire!(pool, UInt8, 10) + acquire!(pool, Float16, 20) + end + @test others_n_active(pool, UInt8) == 5 + @test others_n_active(pool, Float16) == 5 + + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 end - @test others_n_active(pool, UInt8) == 5 - @test others_n_active(pool, Float16) == 5 - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 -end + # ============================================================================== + # 2. Deeply Nested Scopes (3+ levels) with Fallback Types + # ============================================================================== -# ============================================================================== -# 2. Deeply Nested Scopes (3+ levels) with Fallback Types -# ============================================================================== + @testset "2. Deep nesting (5 levels) with fallback types" begin + pool = AdaptiveArrayPool() + fallback_types = [UInt8, Float16, Int16, UInt16, Int8] -@testset "2. Deep nesting (5 levels) with fallback types" begin - pool = AdaptiveArrayPool() - fallback_types = [UInt8, Float16, Int16, UInt16, Int8] + # Acquire one per type at each depth level + for depth_level in 1:5 + checkpoint!(pool) + v = acquire!(pool, fallback_types[depth_level], 10 * depth_level) + @test others_n_active(pool, fallback_types[depth_level]) == 1 + end - # Acquire one per type at each depth level - for depth_level in 1:5 - checkpoint!(pool) - v = acquire!(pool, fallback_types[depth_level], 10 * depth_level) - @test others_n_active(pool, fallback_types[depth_level]) == 1 - end + # All 5 types active + for (i, T) in enumerate(fallback_types) + @test others_n_active(pool, T) == 1 + end - # All 5 types active - for (i, T) in enumerate(fallback_types) - @test others_n_active(pool, T) == 1 + # Unwind — each type should revert as we go + for depth_level in 5:-1:1 + rewind!(pool) + @test others_n_active(pool, fallback_types[depth_level]) == 0 + end end - # Unwind — each type should revert as we go - for depth_level in 5:-1:1 - rewind!(pool) - @test others_n_active(pool, fallback_types[depth_level]) == 0 - end -end + @testset "2b. Same fallback type across nested depths" begin + pool = AdaptiveArrayPool() -@testset "2b. Same fallback type across nested depths" begin - pool = AdaptiveArrayPool() + # Level 1: acquire 1 UInt8 + checkpoint!(pool) + acquire!(pool, UInt8, 10) + @test others_n_active(pool, UInt8) == 1 - # Level 1: acquire 1 UInt8 - checkpoint!(pool) - acquire!(pool, UInt8, 10) - @test others_n_active(pool, UInt8) == 1 + # Level 2: acquire 2 more + checkpoint!(pool) + acquire!(pool, UInt8, 20) + acquire!(pool, UInt8, 30) + @test others_n_active(pool, UInt8) == 3 - # Level 2: acquire 2 more - checkpoint!(pool) - acquire!(pool, UInt8, 20) - acquire!(pool, UInt8, 30) - @test others_n_active(pool, UInt8) == 3 + # Level 3: acquire 1 more + checkpoint!(pool) + acquire!(pool, UInt8, 40) + @test others_n_active(pool, UInt8) == 4 - # Level 3: acquire 1 more - checkpoint!(pool) - acquire!(pool, UInt8, 40) - @test others_n_active(pool, UInt8) == 4 + # Unwind level 3 + rewind!(pool) + @test others_n_active(pool, UInt8) == 3 - # Unwind level 3 - rewind!(pool) - @test others_n_active(pool, UInt8) == 3 + # Unwind level 2 + rewind!(pool) + @test others_n_active(pool, UInt8) == 1 - # Unwind level 2 - rewind!(pool) - @test others_n_active(pool, UInt8) == 1 + # Unwind level 1 + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end - # Unwind level 1 - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + # ============================================================================== + # 3. @with_pool Macro with Fallback Types + # ============================================================================== -# ============================================================================== -# 3. @with_pool Macro with Fallback Types -# ============================================================================== + @testset "3. @with_pool macro with fallback types" begin + # The macro uses lazy checkpoint/rewind (use_typed=false path) when + # types aren't statically extractable + function helper_fallback!(pool) + acquire!(pool, UInt8, 50) + acquire!(pool, Float16, 50) + end -@testset "3. @with_pool macro with fallback types" begin - # The macro uses lazy checkpoint/rewind (use_typed=false path) when - # types aren't statically extractable - function helper_fallback!(pool) - acquire!(pool, UInt8, 50) - acquire!(pool, Float16, 50) - end + result = @with_pool pool begin + helper_fallback!(pool) + others_n_active(pool, UInt8) + end + @test result == 1 - result = @with_pool pool begin - helper_fallback!(pool) - others_n_active(pool, UInt8) + # After scope: verify task-local pool rewound fallback allocations + task_pool = AdaptiveArrayPools.get_task_local_pool() + @test others_n_active(task_pool, UInt8) == 0 + @test others_n_active(task_pool, Float16) == 0 end - @test result == 1 - - # After scope: verify task-local pool rewound fallback allocations - task_pool = AdaptiveArrayPools.get_task_local_pool() - @test others_n_active(task_pool, UInt8) == 0 - @test others_n_active(task_pool, Float16) == 0 -end -@testset "3b. @with_pool with static fallback type" begin - # acquire!(pool, UInt8, ...) — UInt8 is not a fixed slot, so macro - # goes through lazy path (since _fixed_slot_bit(UInt8) == 0) - result = @with_pool pool begin - v = acquire!(pool, UInt8, 100) - length(v) + @testset "3b. @with_pool with static fallback type" begin + # acquire!(pool, UInt8, ...) — UInt8 is not a fixed slot, so macro + # goes through lazy path (since _fixed_slot_bit(UInt8) == 0) + result = @with_pool pool begin + v = acquire!(pool, UInt8, 100) + length(v) + end + @test result == 100 end - @test result == 100 -end -@testset "3c. Nested @with_pool with fallback types" begin - result = @with_pool p1 begin - a = acquire!(p1, UInt8, 10) - inner = @with_pool p2 begin - b = acquire!(p2, UInt8, 20) - others_n_active(p2, UInt8) + @testset "3c. Nested @with_pool with fallback types" begin + result = @with_pool p1 begin + a = acquire!(p1, UInt8, 10) + inner = @with_pool p2 begin + b = acquire!(p2, UInt8, 20) + others_n_active(p2, UInt8) + end + # After inner scope rewinds, the task-local pool's UInt8 n_active + # should be back to 1 (only 'a' from outer scope) + (inner, others_n_active(p1, UInt8)) end - # After inner scope rewinds, the task-local pool's UInt8 n_active - # should be back to 1 (only 'a' from outer scope) - (inner, others_n_active(p1, UInt8)) + @test result[1] == 2 # inner scope had 2 (1 from outer + 1 from inner) + @test result[2] == 1 # after inner rewind, back to 1 end - @test result[1] == 2 # inner scope had 2 (1 from outer + 1 from inner) - @test result[2] == 1 # after inner rewind, back to 1 -end -# ============================================================================== -# 4. Lazy Mode with Fallback Types -# ============================================================================== + # ============================================================================== + # 4. Lazy Mode with Fallback Types + # ============================================================================== -@testset "4. Lazy checkpoint/rewind with fallback types" begin - pool = AdaptiveArrayPool() + @testset "4. Lazy checkpoint/rewind with fallback types" begin + pool = AdaptiveArrayPool() - _lazy_checkpoint!(pool) - v1 = acquire!(pool, UInt8, 10) - v2 = acquire!(pool, Float16, 20) + _lazy_checkpoint!(pool) + v1 = acquire!(pool, UInt8, 10) + v2 = acquire!(pool, Float16, 20) - @test others_n_active(pool, UInt8) == 1 - @test others_n_active(pool, Float16) == 1 - @test pool._touched_has_others[pool._current_depth] == true + @test others_n_active(pool, UInt8) == 1 + @test others_n_active(pool, Float16) == 1 + @test pool._touched_has_others[pool._current_depth] == true - _lazy_rewind!(pool) - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 -end + _lazy_rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + end -@testset "4b. Lazy mode: pre-existing others get eagerly checkpointed" begin - pool = AdaptiveArrayPool() + @testset "4b. Lazy mode: pre-existing others get eagerly checkpointed" begin + pool = AdaptiveArrayPool() - # Pre-populate at global scope - checkpoint!(pool) - acquire!(pool, UInt8, 10) - @test others_n_active(pool, UInt8) == 1 - rewind!(pool) + # Pre-populate at global scope + checkpoint!(pool) + acquire!(pool, UInt8, 10) + @test others_n_active(pool, UInt8) == 1 + rewind!(pool) - # UInt8 pool exists now with n_active=0 - @test haskey(pool.others, UInt8) - @test others_n_active(pool, UInt8) == 0 + # UInt8 pool exists now with n_active=0 + @test haskey(pool.others, UInt8) + @test others_n_active(pool, UInt8) == 0 - # Now use lazy mode — pre-existing UInt8 should be eagerly checkpointed - _lazy_checkpoint!(pool) - acquire!(pool, UInt8, 20) - @test others_n_active(pool, UInt8) == 1 + # Now use lazy mode — pre-existing UInt8 should be eagerly checkpointed + _lazy_checkpoint!(pool) + acquire!(pool, UInt8, 20) + @test others_n_active(pool, UInt8) == 1 - _lazy_rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + _lazy_rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end -@testset "4c. Lazy mode: new fallback type created during scope" begin - pool = AdaptiveArrayPool() + @testset "4c. Lazy mode: new fallback type created during scope" begin + pool = AdaptiveArrayPool() - _lazy_checkpoint!(pool) - # Int16 doesn't exist yet — created inside lazy scope - v = acquire!(pool, Int16, 10) - @test others_n_active(pool, Int16) == 1 + _lazy_checkpoint!(pool) + # Int16 doesn't exist yet — created inside lazy scope + v = acquire!(pool, Int16, 10) + @test others_n_active(pool, Int16) == 1 - _lazy_rewind!(pool) - # Should revert to 0 (auto-checkpoint sentinel covers new types) - @test others_n_active(pool, Int16) == 0 -end + _lazy_rewind!(pool) + # Should revert to 0 (auto-checkpoint sentinel covers new types) + @test others_n_active(pool, Int16) == 0 + end -# ============================================================================== -# 5. Typed-Lazy Mode with Fallback Types -# ============================================================================== + # ============================================================================== + # 5. Typed-Lazy Mode with Fallback Types + # ============================================================================== -@testset "5. Typed-lazy checkpoint/rewind with fallback types" begin - pool = AdaptiveArrayPool() + @testset "5. Typed-lazy checkpoint/rewind with fallback types" begin + pool = AdaptiveArrayPool() - # Pre-populate a fallback type - checkpoint!(pool) - acquire!(pool, UInt8, 10) - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 + # Pre-populate a fallback type + checkpoint!(pool) + acquire!(pool, UInt8, 10) + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 - # Parent acquires UInt8 - checkpoint!(pool) - acquire!(pool, UInt8, 5) - @test others_n_active(pool, UInt8) == 1 + # Parent acquires UInt8 + checkpoint!(pool) + acquire!(pool, UInt8, 5) + @test others_n_active(pool, UInt8) == 1 - # Child typed-lazy scope tracking Float64 - _typed_lazy_checkpoint!(pool, Float64) - acquire!(pool, UInt8, 15) # Helper touches fallback type - @test others_n_active(pool, UInt8) == 2 + # Child typed-lazy scope tracking Float64 + _typed_lazy_checkpoint!(pool, Float64) + acquire!(pool, UInt8, 15) # Helper touches fallback type + @test others_n_active(pool, UInt8) == 2 - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - # Must restore parent's n_active=1 (NOT 0) - @test others_n_active(pool, UInt8) == 1 + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + # Must restore parent's n_active=1 (NOT 0) + @test others_n_active(pool, UInt8) == 1 - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end -@testset "5b. Typed-lazy with new fallback type in child scope" begin - pool = AdaptiveArrayPool() + @testset "5b. Typed-lazy with new fallback type in child scope" begin + pool = AdaptiveArrayPool() - checkpoint!(pool) - # Parent doesn't use any fallback types + checkpoint!(pool) + # Parent doesn't use any fallback types - _typed_lazy_checkpoint!(pool, Float64) - # Child creates new fallback type - acquire!(pool, Int16, 10) - @test others_n_active(pool, Int16) == 1 + _typed_lazy_checkpoint!(pool, Float64) + # Child creates new fallback type + acquire!(pool, Int16, 10) + @test others_n_active(pool, Int16) == 1 - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - @test others_n_active(pool, Int16) == 0 + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + @test others_n_active(pool, Int16) == 0 - rewind!(pool) -end + rewind!(pool) + end -# ============================================================================== -# 6. Mixed Fixed + Fallback Types -# ============================================================================== + # ============================================================================== + # 6. Mixed Fixed + Fallback Types + # ============================================================================== -@testset "6. Mixed fixed and fallback types rewind correctly" begin - pool = AdaptiveArrayPool() + @testset "6. Mixed fixed and fallback types rewind correctly" begin + pool = AdaptiveArrayPool() - checkpoint!(pool) - f64_v = acquire!(pool, Float64, 10) - u8_v = acquire!(pool, UInt8, 20) - i32_v = acquire!(pool, Int32, 30) - f16_v = acquire!(pool, Float16, 40) + checkpoint!(pool) + f64_v = acquire!(pool, Float64, 10) + u8_v = acquire!(pool, UInt8, 20) + i32_v = acquire!(pool, Int32, 30) + f16_v = acquire!(pool, Float16, 40) - @test pool.float64.n_active == 1 - @test pool.int32.n_active == 1 - @test others_n_active(pool, UInt8) == 1 - @test others_n_active(pool, Float16) == 1 + @test pool.float64.n_active == 1 + @test pool.int32.n_active == 1 + @test others_n_active(pool, UInt8) == 1 + @test others_n_active(pool, Float16) == 1 - rewind!(pool) + rewind!(pool) - @test pool.float64.n_active == 0 - @test pool.int32.n_active == 0 - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 -end + @test pool.float64.n_active == 0 + @test pool.int32.n_active == 0 + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + end -@testset "6b. Lazy mode: mixed fixed + fallback" begin - pool = AdaptiveArrayPool() + @testset "6b. Lazy mode: mixed fixed + fallback" begin + pool = AdaptiveArrayPool() - _lazy_checkpoint!(pool) - acquire!(pool, Float64, 10) - acquire!(pool, UInt8, 20) - acquire!(pool, Int32, 30) - acquire!(pool, Float16, 40) + _lazy_checkpoint!(pool) + acquire!(pool, Float64, 10) + acquire!(pool, UInt8, 20) + acquire!(pool, Int32, 30) + acquire!(pool, Float16, 40) - @test pool.float64.n_active == 1 - @test pool.int32.n_active == 1 - @test others_n_active(pool, UInt8) == 1 - @test others_n_active(pool, Float16) == 1 + @test pool.float64.n_active == 1 + @test pool.int32.n_active == 1 + @test others_n_active(pool, UInt8) == 1 + @test others_n_active(pool, Float16) == 1 - _lazy_rewind!(pool) + _lazy_rewind!(pool) - @test pool.float64.n_active == 0 - @test pool.int32.n_active == 0 - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 -end + @test pool.float64.n_active == 0 + @test pool.int32.n_active == 0 + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + end -# ============================================================================== -# 7. Fallback Type Rewind → Re-acquire Cycle -# ============================================================================== + # ============================================================================== + # 7. Fallback Type Rewind → Re-acquire Cycle + # ============================================================================== -@testset "7. Rewind then re-acquire fallback type reuses memory" begin - pool = AdaptiveArrayPool() - - # First cycle - checkpoint!(pool) - v1 = acquire!(pool, UInt8, 100) - v1 .= 0x42 - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 - - # Second cycle — should reuse existing backing vector - checkpoint!(pool) - v2 = acquire!(pool, UInt8, 100) - @test others_n_active(pool, UInt8) == 1 - # The backing vector should be reused (same object) - @test parent(v1) === parent(v2) - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + @testset "7. Rewind then re-acquire fallback type reuses memory" begin + pool = AdaptiveArrayPool() -# ============================================================================== -# 8. Checkpoint Stack Invariants (No Stack Leak) -# ============================================================================== + # First cycle + checkpoint!(pool) + v1 = acquire!(pool, UInt8, 100) + v1 .= 0x42 + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 -@testset "8. Full checkpoint: no stack leak over many iterations" begin - pool = AdaptiveArrayPool() + # Second cycle — should reuse existing backing vector + checkpoint!(pool) + v2 = acquire!(pool, UInt8, 100) + @test others_n_active(pool, UInt8) == 1 + # The backing vector should be reused (same object) + @test parent(v1) === parent(v2) + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end - # Pre-populate - checkpoint!(pool) - acquire!(pool, UInt8, 10) - rewind!(pool) + # ============================================================================== + # 8. Checkpoint Stack Invariants (No Stack Leak) + # ============================================================================== - uint8_pool = pool.others[UInt8] - initial_stack_len = length(uint8_pool._checkpoint_depths) + @testset "8. Full checkpoint: no stack leak over many iterations" begin + pool = AdaptiveArrayPool() - for _ in 1:100 + # Pre-populate checkpoint!(pool) acquire!(pool, UInt8, 10) rewind!(pool) - end - @test length(uint8_pool._checkpoint_depths) == initial_stack_len - @test others_n_active(pool, UInt8) == 0 -end + uint8_pool = pool.others[UInt8] + initial_stack_len = length(uint8_pool._checkpoint_depths) -@testset "8b. Lazy checkpoint: no stack leak over many iterations" begin - pool = AdaptiveArrayPool() + for _ in 1:100 + checkpoint!(pool) + acquire!(pool, UInt8, 10) + rewind!(pool) + end - # Pre-populate - checkpoint!(pool) - acquire!(pool, UInt8, 10) - rewind!(pool) + @test length(uint8_pool._checkpoint_depths) == initial_stack_len + @test others_n_active(pool, UInt8) == 0 + end - uint8_pool = pool.others[UInt8] - initial_stack_len = length(uint8_pool._checkpoint_depths) + @testset "8b. Lazy checkpoint: no stack leak over many iterations" begin + pool = AdaptiveArrayPool() - for _ in 1:100 - _lazy_checkpoint!(pool) + # Pre-populate + checkpoint!(pool) acquire!(pool, UInt8, 10) - _lazy_rewind!(pool) - end + rewind!(pool) - @test length(uint8_pool._checkpoint_depths) == initial_stack_len - @test others_n_active(pool, UInt8) == 0 -end + uint8_pool = pool.others[UInt8] + initial_stack_len = length(uint8_pool._checkpoint_depths) -@testset "8c. Typed-lazy checkpoint: no stack leak over many iterations" begin - pool = AdaptiveArrayPool() + for _ in 1:100 + _lazy_checkpoint!(pool) + acquire!(pool, UInt8, 10) + _lazy_rewind!(pool) + end - # Pre-populate - checkpoint!(pool) - acquire!(pool, UInt8, 10) - rewind!(pool) + @test length(uint8_pool._checkpoint_depths) == initial_stack_len + @test others_n_active(pool, UInt8) == 0 + end - uint8_pool = pool.others[UInt8] - initial_stack_len = length(uint8_pool._checkpoint_depths) + @testset "8c. Typed-lazy checkpoint: no stack leak over many iterations" begin + pool = AdaptiveArrayPool() - for _ in 1:100 - _typed_lazy_checkpoint!(pool, Float64) + # Pre-populate + checkpoint!(pool) acquire!(pool, UInt8, 10) - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - end + rewind!(pool) - @test length(uint8_pool._checkpoint_depths) == initial_stack_len - @test others_n_active(pool, UInt8) == 0 -end + uint8_pool = pool.others[UInt8] + initial_stack_len = length(uint8_pool._checkpoint_depths) -@testset "8d. @with_pool macro: no stack leak over many iterations" begin - pool_ref = Ref{AdaptiveArrayPool}() + for _ in 1:100 + _typed_lazy_checkpoint!(pool, Float64) + acquire!(pool, UInt8, 10) + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + end - # Pre-populate - @with_pool pool begin - acquire!(pool, UInt8, 10) - pool_ref[] = pool + @test length(uint8_pool._checkpoint_depths) == initial_stack_len + @test others_n_active(pool, UInt8) == 0 end - uint8_pool = pool_ref[].others[UInt8] - initial_stack_len = length(uint8_pool._checkpoint_depths) + @testset "8d. @with_pool macro: no stack leak over many iterations" begin + pool_ref = Ref{AdaptiveArrayPool}() - for _ in 1:100 + # Pre-populate @with_pool pool begin acquire!(pool, UInt8, 10) + pool_ref[] = pool end - end - - @test length(uint8_pool._checkpoint_depths) == initial_stack_len - @test uint8_pool.n_active == 0 -end -# ============================================================================== -# 9. n_active Monotonicity (Memory Leak Detection) -# ============================================================================== + uint8_pool = pool_ref[].others[UInt8] + initial_stack_len = length(uint8_pool._checkpoint_depths) -@testset "9. n_active doesn't grow over repeated checkpoint/rewind cycles" begin - pool = AdaptiveArrayPool() - fallback_types = [UInt8, Float16, Int16] + for _ in 1:100 + @with_pool pool begin + acquire!(pool, UInt8, 10) + end + end - # Pre-populate all types - checkpoint!(pool) - for T in fallback_types - acquire!(pool, T, 10) + @test length(uint8_pool._checkpoint_depths) == initial_stack_len + @test uint8_pool.n_active == 0 end - rewind!(pool) - # Run 200 iterations — n_active should always return to 0 - for iter in 1:200 + # ============================================================================== + # 9. n_active Monotonicity (Memory Leak Detection) + # ============================================================================== + + @testset "9. n_active doesn't grow over repeated checkpoint/rewind cycles" begin + pool = AdaptiveArrayPool() + fallback_types = [UInt8, Float16, Int16] + + # Pre-populate all types checkpoint!(pool) for T in fallback_types acquire!(pool, T, 10) end rewind!(pool) - for T in fallback_types - n = others_n_active(pool, T) - if n != 0 - @test n == 0 # Will show which type leaked - @info "LEAK DETECTED" iteration=iter type=T n_active=n - break + # Run 200 iterations — n_active should always return to 0 + for iter in 1:200 + checkpoint!(pool) + for T in fallback_types + acquire!(pool, T, 10) end - end - end - - # Final check - for T in fallback_types - @test others_n_active(pool, T) == 0 - end -end + rewind!(pool) -@testset "9b. Lazy mode: n_active doesn't grow over iterations" begin - pool = AdaptiveArrayPool() + for T in fallback_types + n = others_n_active(pool, T) + if n != 0 + @test n == 0 # Will show which type leaked + @info "LEAK DETECTED" iteration = iter type = T n_active = n + break + end + end + end - for iter in 1:200 - _lazy_checkpoint!(pool) - acquire!(pool, UInt8, 10) - acquire!(pool, Float16, 20) - _lazy_rewind!(pool) + # Final check + for T in fallback_types + @test others_n_active(pool, T) == 0 + end end - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 -end + @testset "9b. Lazy mode: n_active doesn't grow over iterations" begin + pool = AdaptiveArrayPool() -@testset "9c. @with_pool: n_active doesn't grow over iterations" begin - for iter in 1:200 - @with_pool pool begin + for iter in 1:200 + _lazy_checkpoint!(pool) acquire!(pool, UInt8, 10) acquire!(pool, Float16, 20) + _lazy_rewind!(pool) end - end - # Verify task-local pool is clean - pool = AdaptiveArrayPools.get_task_local_pool() - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 -end + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + end -# ============================================================================== -# 10. Backing Vector Count Stability (Pool Growth Detection) -# ============================================================================== + @testset "9c. @with_pool: n_active doesn't grow over iterations" begin + for iter in 1:200 + @with_pool pool begin + acquire!(pool, UInt8, 10) + acquire!(pool, Float16, 20) + end + end -@testset "10. Pool vectors don't grow over checkpoint/rewind cycles" begin - pool = AdaptiveArrayPool() + # Verify task-local pool is clean + pool = AdaptiveArrayPools.get_task_local_pool() + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + end - # Warmup: first cycle creates backing vectors - checkpoint!(pool) - acquire!(pool, UInt8, 100) - acquire!(pool, UInt8, 200) - acquire!(pool, Float16, 50) - rewind!(pool) + # ============================================================================== + # 10. Backing Vector Count Stability (Pool Growth Detection) + # ============================================================================== - # Record vector counts after warmup - u8_vec_count = length(pool.others[UInt8].vectors) - f16_vec_count = length(pool.others[Float16].vectors) + @testset "10. Pool vectors don't grow over checkpoint/rewind cycles" begin + pool = AdaptiveArrayPool() - # Run many iterations — vector count should stay stable - for _ in 1:100 + # Warmup: first cycle creates backing vectors checkpoint!(pool) acquire!(pool, UInt8, 100) acquire!(pool, UInt8, 200) acquire!(pool, Float16, 50) rewind!(pool) - end - @test length(pool.others[UInt8].vectors) == u8_vec_count - @test length(pool.others[Float16].vectors) == f16_vec_count -end + # Record vector counts after warmup + u8_vec_count = length(pool.others[UInt8].vectors) + f16_vec_count = length(pool.others[Float16].vectors) -@testset "10b. Lazy mode: pool vectors don't grow" begin - pool = AdaptiveArrayPool() + # Run many iterations — vector count should stay stable + for _ in 1:100 + checkpoint!(pool) + acquire!(pool, UInt8, 100) + acquire!(pool, UInt8, 200) + acquire!(pool, Float16, 50) + rewind!(pool) + end - # Warmup - _lazy_checkpoint!(pool) - acquire!(pool, UInt8, 100) - acquire!(pool, Float16, 50) - _lazy_rewind!(pool) + @test length(pool.others[UInt8].vectors) == u8_vec_count + @test length(pool.others[Float16].vectors) == f16_vec_count + end - u8_vec_count = length(pool.others[UInt8].vectors) - f16_vec_count = length(pool.others[Float16].vectors) + @testset "10b. Lazy mode: pool vectors don't grow" begin + pool = AdaptiveArrayPool() - for _ in 1:100 + # Warmup _lazy_checkpoint!(pool) acquire!(pool, UInt8, 100) acquire!(pool, Float16, 50) _lazy_rewind!(pool) - end - @test length(pool.others[UInt8].vectors) == u8_vec_count - @test length(pool.others[Float16].vectors) == f16_vec_count -end - -# ============================================================================== -# 11. unsafe_acquire! with Fallback Types -# ============================================================================== + u8_vec_count = length(pool.others[UInt8].vectors) + f16_vec_count = length(pool.others[Float16].vectors) -@testset "11. unsafe_acquire! with fallback types" begin - pool = AdaptiveArrayPool() - - checkpoint!(pool) - v = unsafe_acquire!(pool, UInt8, 10) - @test v isa Array{UInt8, 1} - @test length(v) == 10 - @test others_n_active(pool, UInt8) == 1 + for _ in 1:100 + _lazy_checkpoint!(pool) + acquire!(pool, UInt8, 100) + acquire!(pool, Float16, 50) + _lazy_rewind!(pool) + end - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + @test length(pool.others[UInt8].vectors) == u8_vec_count + @test length(pool.others[Float16].vectors) == f16_vec_count + end -@testset "11b. unsafe_acquire! N-D with fallback types" begin - pool = AdaptiveArrayPool() + # ============================================================================== + # 11. unsafe_acquire! with Fallback Types + # ============================================================================== - checkpoint!(pool) - m = unsafe_acquire!(pool, UInt8, 3, 4) - @test m isa Array{UInt8, 2} - @test size(m) == (3, 4) - @test others_n_active(pool, UInt8) == 1 + @testset "11. unsafe_acquire! with fallback types" begin + pool = AdaptiveArrayPool() - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + checkpoint!(pool) + v = unsafe_acquire!(pool, UInt8, 10) + @test v isa Array{UInt8, 1} + @test length(v) == 10 + @test others_n_active(pool, UInt8) == 1 -# ============================================================================== -# 12. Convenience Functions with Fallback Types -# ============================================================================== + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end -@testset "12. zeros!/ones!/similar! with fallback types" begin - pool = AdaptiveArrayPool() + @testset "11b. unsafe_acquire! N-D with fallback types" begin + pool = AdaptiveArrayPool() - checkpoint!(pool) - z = zeros!(pool, UInt8, 10) - @test all(z .== 0) - @test others_n_active(pool, UInt8) == 1 + checkpoint!(pool) + m = unsafe_acquire!(pool, UInt8, 3, 4) + @test m isa Array{UInt8, 2} + @test size(m) == (3, 4) + @test others_n_active(pool, UInt8) == 1 - o = ones!(pool, UInt8, 10) - @test all(o .== 1) - @test others_n_active(pool, UInt8) == 2 + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end - src = UInt8[1, 2, 3] - s = similar!(pool, src) - @test length(s) == 3 - @test others_n_active(pool, UInt8) == 3 + # ============================================================================== + # 12. Convenience Functions with Fallback Types + # ============================================================================== - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + @testset "12. zeros!/ones!/similar! with fallback types" begin + pool = AdaptiveArrayPool() -# ============================================================================== -# 13. Exception Safety -# ============================================================================== + checkpoint!(pool) + z = zeros!(pool, UInt8, 10) + @test all(z .== 0) + @test others_n_active(pool, UInt8) == 1 -@testset "13. Exception during fallback acquire doesn't leak" begin - pool = AdaptiveArrayPool() + o = ones!(pool, UInt8, 10) + @test all(o .== 1) + @test others_n_active(pool, UInt8) == 2 - checkpoint!(pool) - acquire!(pool, UInt8, 10) - @test others_n_active(pool, UInt8) == 1 + src = UInt8[1, 2, 3] + s = similar!(pool, src) + @test length(s) == 3 + @test others_n_active(pool, UInt8) == 3 - try - checkpoint!(pool) - acquire!(pool, UInt8, 20) - @test others_n_active(pool, UInt8) == 2 - error("simulated failure") - catch rewind!(pool) + @test others_n_active(pool, UInt8) == 0 end - @test others_n_active(pool, UInt8) == 1 - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + # ============================================================================== + # 13. Exception Safety + # ============================================================================== -@testset "13b. @with_pool exception safety with fallback types" begin - try - @with_pool pool begin - acquire!(pool, UInt8, 10) - acquire!(pool, Float16, 20) + @testset "13. Exception during fallback acquire doesn't leak" begin + pool = AdaptiveArrayPool() + + checkpoint!(pool) + acquire!(pool, UInt8, 10) + @test others_n_active(pool, UInt8) == 1 + + try + checkpoint!(pool) + acquire!(pool, UInt8, 20) + @test others_n_active(pool, UInt8) == 2 error("simulated failure") + catch + rewind!(pool) end - catch - end - - # After exception + rewind via finally, pool should be clean - pool = AdaptiveArrayPools.get_task_local_pool() - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 -end -# ============================================================================== -# 14. Depth Tracking Consistency -# ============================================================================== + @test others_n_active(pool, UInt8) == 1 + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end -@testset "14. _current_depth returns to 1 after cleanup" begin - pool = AdaptiveArrayPool() - @test pool._current_depth == 1 + @testset "13b. @with_pool exception safety with fallback types" begin + try + @with_pool pool begin + acquire!(pool, UInt8, 10) + acquire!(pool, Float16, 20) + error("simulated failure") + end + catch + end - checkpoint!(pool) - acquire!(pool, UInt8, 10) - @test pool._current_depth == 2 + # After exception + rewind via finally, pool should be clean + pool = AdaptiveArrayPools.get_task_local_pool() + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + end - checkpoint!(pool) - acquire!(pool, Float16, 20) - @test pool._current_depth == 3 + # ============================================================================== + # 14. Depth Tracking Consistency + # ============================================================================== - rewind!(pool) - @test pool._current_depth == 2 + @testset "14. _current_depth returns to 1 after cleanup" begin + pool = AdaptiveArrayPool() + @test pool._current_depth == 1 - rewind!(pool) - @test pool._current_depth == 1 -end + checkpoint!(pool) + acquire!(pool, UInt8, 10) + @test pool._current_depth == 2 -@testset "14b. _touched_has_others stack cleaned properly" begin - pool = AdaptiveArrayPool() - @test length(pool._touched_has_others) == 1 # sentinel + checkpoint!(pool) + acquire!(pool, Float16, 20) + @test pool._current_depth == 3 - checkpoint!(pool) - acquire!(pool, UInt8, 10) - @test length(pool._touched_has_others) == 2 - @test pool._touched_has_others[2] == true + rewind!(pool) + @test pool._current_depth == 2 - rewind!(pool) - @test length(pool._touched_has_others) == 1 # back to sentinel -end + rewind!(pool) + @test pool._current_depth == 1 + end -# ============================================================================== -# 15. Custom Struct Types as Fallback -# ============================================================================== + @testset "14b. _touched_has_others stack cleaned properly" begin + pool = AdaptiveArrayPool() + @test length(pool._touched_has_others) == 1 # sentinel -@testset "15. Custom struct type as fallback" begin - pool = AdaptiveArrayPool() + checkpoint!(pool) + acquire!(pool, UInt8, 10) + @test length(pool._touched_has_others) == 2 + @test pool._touched_has_others[2] == true - checkpoint!(pool) - v = acquire!(pool, MyTestElement, 5) - @test v isa SubArray - @test length(v) == 5 - @test eltype(v) == MyTestElement - @test others_n_active(pool, MyTestElement) == 1 + rewind!(pool) + @test length(pool._touched_has_others) == 1 # back to sentinel + end - rewind!(pool) - @test others_n_active(pool, MyTestElement) == 0 -end + # ============================================================================== + # 15. Custom Struct Types as Fallback + # ============================================================================== -@testset "15b. Custom struct: repeated cycles don't leak" begin - pool = AdaptiveArrayPool() + @testset "15. Custom struct type as fallback" begin + pool = AdaptiveArrayPool() - for _ in 1:50 checkpoint!(pool) - acquire!(pool, MyTestElement, 10) + v = acquire!(pool, MyTestElement, 5) + @test v isa SubArray + @test length(v) == 5 + @test eltype(v) == MyTestElement + @test others_n_active(pool, MyTestElement) == 1 + rewind!(pool) + @test others_n_active(pool, MyTestElement) == 0 end - @test others_n_active(pool, MyTestElement) == 0 - @test length(pool.others[MyTestElement].vectors) == 1 # reuses single backing -end - -# ============================================================================== -# 16. Full Mode ↔ Lazy Mode Transitions with Fallback -# ============================================================================== + @testset "15b. Custom struct: repeated cycles don't leak" begin + pool = AdaptiveArrayPool() -@testset "16. Parent full checkpoint, child lazy, fallback touched" begin - pool = AdaptiveArrayPool() + for _ in 1:50 + checkpoint!(pool) + acquire!(pool, MyTestElement, 10) + rewind!(pool) + end - # Pre-populate UInt8 - checkpoint!(pool) - acquire!(pool, UInt8, 10) - rewind!(pool) + @test others_n_active(pool, MyTestElement) == 0 + @test length(pool.others[MyTestElement].vectors) == 1 # reuses single backing + end - # Parent: full checkpoint, acquires UInt8 - checkpoint!(pool) - acquire!(pool, UInt8, 5) - @test others_n_active(pool, UInt8) == 1 + # ============================================================================== + # 16. Full Mode ↔ Lazy Mode Transitions with Fallback + # ============================================================================== - # Child: lazy checkpoint, touches same fallback type - _lazy_checkpoint!(pool) - acquire!(pool, UInt8, 15) - @test others_n_active(pool, UInt8) == 2 + @testset "16. Parent full checkpoint, child lazy, fallback touched" begin + pool = AdaptiveArrayPool() - _lazy_rewind!(pool) - @test others_n_active(pool, UInt8) == 1 # parent's UInt8 preserved + # Pre-populate UInt8 + checkpoint!(pool) + acquire!(pool, UInt8, 10) + rewind!(pool) - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + # Parent: full checkpoint, acquires UInt8 + checkpoint!(pool) + acquire!(pool, UInt8, 5) + @test others_n_active(pool, UInt8) == 1 -@testset "16b. Parent lazy, child full checkpoint, fallback touched" begin - pool = AdaptiveArrayPool() + # Child: lazy checkpoint, touches same fallback type + _lazy_checkpoint!(pool) + acquire!(pool, UInt8, 15) + @test others_n_active(pool, UInt8) == 2 - _lazy_checkpoint!(pool) - acquire!(pool, UInt8, 10) - @test others_n_active(pool, UInt8) == 1 + _lazy_rewind!(pool) + @test others_n_active(pool, UInt8) == 1 # parent's UInt8 preserved - # Child: full checkpoint - checkpoint!(pool) - acquire!(pool, UInt8, 20) - @test others_n_active(pool, UInt8) == 2 + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end - rewind!(pool) - @test others_n_active(pool, UInt8) == 1 + @testset "16b. Parent lazy, child full checkpoint, fallback touched" begin + pool = AdaptiveArrayPool() - _lazy_rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + _lazy_checkpoint!(pool) + acquire!(pool, UInt8, 10) + @test others_n_active(pool, UInt8) == 1 -@testset "16c. Parent full, child typed-lazy (Float64), helper touches fallback" begin - pool = AdaptiveArrayPool() + # Child: full checkpoint + checkpoint!(pool) + acquire!(pool, UInt8, 20) + @test others_n_active(pool, UInt8) == 2 - # Pre-populate - checkpoint!(pool) - acquire!(pool, UInt8, 10) - rewind!(pool) + rewind!(pool) + @test others_n_active(pool, UInt8) == 1 - # Parent full checkpoint - checkpoint!(pool) - acquire!(pool, UInt8, 5) - @test others_n_active(pool, UInt8) == 1 + _lazy_rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end - # Child typed-lazy tracking Float64, helper acquires UInt8 - _typed_lazy_checkpoint!(pool, Float64) - acquire!(pool, Float64, 10) # tracked type - acquire!(pool, UInt8, 15) # untracked fallback - @test others_n_active(pool, UInt8) == 2 + @testset "16c. Parent full, child typed-lazy (Float64), helper touches fallback" begin + pool = AdaptiveArrayPool() - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - @test others_n_active(pool, UInt8) == 1 # parent's UInt8 preserved - @test pool.float64.n_active == 0 # tracked type also cleaned + # Pre-populate + checkpoint!(pool) + acquire!(pool, UInt8, 10) + rewind!(pool) - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + # Parent full checkpoint + checkpoint!(pool) + acquire!(pool, UInt8, 5) + @test others_n_active(pool, UInt8) == 1 -# ============================================================================== -# 17. Stress Test: Simulated Realistic Workload -# ============================================================================== + # Child typed-lazy tracking Float64, helper acquires UInt8 + _typed_lazy_checkpoint!(pool, Float64) + acquire!(pool, Float64, 10) # tracked type + acquire!(pool, UInt8, 15) # untracked fallback + @test others_n_active(pool, UInt8) == 2 -@testset "17. Realistic workload: nested function calls with fallback types" begin - pool = AdaptiveArrayPool() + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + @test others_n_active(pool, UInt8) == 1 # parent's UInt8 preserved + @test pool.float64.n_active == 0 # tracked type also cleaned - function inner_compute!(pool) - a = acquire!(pool, UInt8, 100) - b = acquire!(pool, Float16, 50) - a .= 0x01 - b .= Float16(2.0) - sum(a) + sum(b) + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 end - function middle_compute!(pool) - checkpoint!(pool) - try - x = acquire!(pool, Float64, 10) - x .= 1.0 - result = inner_compute!(pool) - return sum(x) + result - finally - rewind!(pool) + # ============================================================================== + # 17. Stress Test: Simulated Realistic Workload + # ============================================================================== + + @testset "17. Realistic workload: nested function calls with fallback types" begin + pool = AdaptiveArrayPool() + + function inner_compute!(pool) + a = acquire!(pool, UInt8, 100) + b = acquire!(pool, Float16, 50) + a .= 0x01 + b .= Float16(2.0) + sum(a) + sum(b) end - end - # Outer scope - for _ in 1:100 - checkpoint!(pool) - try - r = middle_compute!(pool) - @test r ≈ 10.0 + 100.0 + 100.0 # 10 Float64 + 100 UInt8 + 50 Float16 - finally - rewind!(pool) + function middle_compute!(pool) + checkpoint!(pool) + try + x = acquire!(pool, Float64, 10) + x .= 1.0 + result = inner_compute!(pool) + return sum(x) + result + finally + rewind!(pool) + end end - end - # After all iterations: no leaks - @test pool.float64.n_active == 0 - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 + # Outer scope + for _ in 1:100 + checkpoint!(pool) + try + r = middle_compute!(pool) + @test r ≈ 10.0 + 100.0 + 100.0 # 10 Float64 + 100 UInt8 + 50 Float16 + finally + rewind!(pool) + end + end - # Backing vectors: should not have grown - @test length(pool.others[UInt8].vectors) == 1 - @test length(pool.others[Float16].vectors) == 1 -end + # After all iterations: no leaks + @test pool.float64.n_active == 0 + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 -@testset "17b. @with_pool stress: 500 iterations with multiple fallback types" begin - # Warmup cycle to populate task-local pool (may already have entries from prior tests) - @with_pool pool begin - acquire!(pool, UInt8, 10) - acquire!(pool, Float16, 20) - acquire!(pool, Int16, 30) + # Backing vectors: should not have grown + @test length(pool.others[UInt8].vectors) == 1 + @test length(pool.others[Float16].vectors) == 1 end - pool = AdaptiveArrayPools.get_task_local_pool() - u8_baseline = length(pool.others[UInt8].vectors) - f16_baseline = length(pool.others[Float16].vectors) - i16_baseline = length(pool.others[Int16].vectors) - - for _ in 1:500 + @testset "17b. @with_pool stress: 500 iterations with multiple fallback types" begin + # Warmup cycle to populate task-local pool (may already have entries from prior tests) @with_pool pool begin acquire!(pool, UInt8, 10) acquire!(pool, Float16, 20) acquire!(pool, Int16, 30) end - end - pool = AdaptiveArrayPools.get_task_local_pool() - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 - @test others_n_active(pool, Int16) == 0 + pool = AdaptiveArrayPools.get_task_local_pool() + u8_baseline = length(pool.others[UInt8].vectors) + f16_baseline = length(pool.others[Float16].vectors) + i16_baseline = length(pool.others[Int16].vectors) - # Backing vectors should not have grown beyond warmup baseline - @test length(pool.others[UInt8].vectors) == u8_baseline - @test length(pool.others[Float16].vectors) == f16_baseline - @test length(pool.others[Int16].vectors) == i16_baseline -end - -# ============================================================================== -# 18. Memory Leak Canary: Total Pool Size Stability -# ============================================================================== - -@testset "18. Total others pool size doesn't grow unbounded" begin - pool = AdaptiveArrayPool() - - # Warmup - checkpoint!(pool) - acquire!(pool, UInt8, 1000) - acquire!(pool, Float16, 1000) - rewind!(pool) - - # Measure baseline memory footprint - function total_backing_bytes(pool) - total = 0 - for (T, tp) in pool.others - for v in tp.vectors - total += sizeof(v) + for _ in 1:500 + @with_pool pool begin + acquire!(pool, UInt8, 10) + acquire!(pool, Float16, 20) + acquire!(pool, Int16, 30) end end - total + + pool = AdaptiveArrayPools.get_task_local_pool() + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + @test others_n_active(pool, Int16) == 0 + + # Backing vectors should not have grown beyond warmup baseline + @test length(pool.others[UInt8].vectors) == u8_baseline + @test length(pool.others[Float16].vectors) == f16_baseline + @test length(pool.others[Int16].vectors) == i16_baseline end - baseline_bytes = total_backing_bytes(pool) + # ============================================================================== + # 18. Memory Leak Canary: Total Pool Size Stability + # ============================================================================== - # Run many cycles - for _ in 1:500 + @testset "18. Total others pool size doesn't grow unbounded" begin + pool = AdaptiveArrayPool() + + # Warmup checkpoint!(pool) acquire!(pool, UInt8, 1000) acquire!(pool, Float16, 1000) rewind!(pool) + + # Measure baseline memory footprint + function total_backing_bytes(pool) + total = 0 + for (T, tp) in pool.others + for v in tp.vectors + total += sizeof(v) + end + end + total + end + + baseline_bytes = total_backing_bytes(pool) + + # Run many cycles + for _ in 1:500 + checkpoint!(pool) + acquire!(pool, UInt8, 1000) + acquire!(pool, Float16, 1000) + rewind!(pool) + end + + final_bytes = total_backing_bytes(pool) + @test final_bytes == baseline_bytes # No growth end - final_bytes = total_backing_bytes(pool) - @test final_bytes == baseline_bytes # No growth -end + # ============================================================================== + # 19. reset! and empty! Properly Handle Fallback Types + # ============================================================================== -# ============================================================================== -# 19. reset! and empty! Properly Handle Fallback Types -# ============================================================================== + @testset "19. reset! clears fallback n_active but preserves vectors" begin + pool = AdaptiveArrayPool() -@testset "19. reset! clears fallback n_active but preserves vectors" begin - pool = AdaptiveArrayPool() + checkpoint!(pool) + acquire!(pool, UInt8, 100) + acquire!(pool, Float16, 200) + # Don't rewind — simulate leaked state - checkpoint!(pool) - acquire!(pool, UInt8, 100) - acquire!(pool, Float16, 200) - # Don't rewind — simulate leaked state + reset!(pool) - reset!(pool) + @test others_n_active(pool, UInt8) == 0 + @test others_n_active(pool, Float16) == 0 + @test pool._current_depth == 1 + # Vectors should be preserved for reuse + @test length(pool.others[UInt8].vectors) == 1 + @test length(pool.others[Float16].vectors) == 1 + end - @test others_n_active(pool, UInt8) == 0 - @test others_n_active(pool, Float16) == 0 - @test pool._current_depth == 1 - # Vectors should be preserved for reuse - @test length(pool.others[UInt8].vectors) == 1 - @test length(pool.others[Float16].vectors) == 1 -end + @testset "19b. empty! clears fallback types completely" begin + pool = AdaptiveArrayPool() -@testset "19b. empty! clears fallback types completely" begin - pool = AdaptiveArrayPool() + checkpoint!(pool) + acquire!(pool, UInt8, 100) + acquire!(pool, Float16, 200) + rewind!(pool) - checkpoint!(pool) - acquire!(pool, UInt8, 100) - acquire!(pool, Float16, 200) - rewind!(pool) + empty!(pool) - empty!(pool) + @test isempty(pool.others) + @test pool._current_depth == 1 + end - @test isempty(pool.others) - @test pool._current_depth == 1 -end + # ============================================================================== + # 20. Edge Case: Acquire Zero-Length Array of Fallback Type + # ============================================================================== -# ============================================================================== -# 20. Edge Case: Acquire Zero-Length Array of Fallback Type -# ============================================================================== + @testset "20. Zero-length fallback array acquire/rewind" begin + pool = AdaptiveArrayPool() -@testset "20. Zero-length fallback array acquire/rewind" begin - pool = AdaptiveArrayPool() + checkpoint!(pool) + v = acquire!(pool, UInt8, 0) + @test length(v) == 0 + @test others_n_active(pool, UInt8) == 1 - checkpoint!(pool) - v = acquire!(pool, UInt8, 0) - @test length(v) == 0 - @test others_n_active(pool, UInt8) == 1 + rewind!(pool) + @test others_n_active(pool, UInt8) == 0 + end - rewind!(pool) - @test others_n_active(pool, UInt8) == 0 -end + # ============================================================================== + # 21. Parametric Dual-Like Type: Basic Reclamation + # ============================================================================== + # ForwardDiff.Dual{Tag{f}, V, N} — each unique parameterization is a DIFFERENT + # concrete type, creating separate pool.others entries. This tests that pool + # correctly handles multiple parametric variants of the same "family" of types. -# ============================================================================== -# 21. Parametric Dual-Like Type: Basic Reclamation -# ============================================================================== -# ForwardDiff.Dual{Tag{f}, V, N} — each unique parameterization is a DIFFERENT -# concrete type, creating separate pool.others entries. This tests that pool -# correctly handles multiple parametric variants of the same "family" of types. + @testset "21. Dual-like parametric type: basic acquire/rewind" begin + pool = AdaptiveArrayPool() -@testset "21. Dual-like parametric type: basic acquire/rewind" begin - pool = AdaptiveArrayPool() + checkpoint!(pool) + v = acquire!(pool, Dual_f1_11, 10) + @test eltype(v) == Dual_f1_11 + @test others_n_active(pool, Dual_f1_11) == 1 - checkpoint!(pool) - v = acquire!(pool, Dual_f1_11, 10) - @test eltype(v) == Dual_f1_11 - @test others_n_active(pool, Dual_f1_11) == 1 + rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 + end - rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 0 -end + @testset "21b. Dual-like: different param variants are separate pool entries" begin + pool = AdaptiveArrayPool() -@testset "21b. Dual-like: different param variants are separate pool entries" begin - pool = AdaptiveArrayPool() + checkpoint!(pool) + # Three different Dual variants — each gets its own IdDict entry + acquire!(pool, Dual_f1_11, 10) + acquire!(pool, Dual_f1_4, 20) + acquire!(pool, Dual_f2_11, 30) - checkpoint!(pool) - # Three different Dual variants — each gets its own IdDict entry - acquire!(pool, Dual_f1_11, 10) - acquire!(pool, Dual_f1_4, 20) - acquire!(pool, Dual_f2_11, 30) + @test length(pool.others) == 3 + @test others_n_active(pool, Dual_f1_11) == 1 + @test others_n_active(pool, Dual_f1_4) == 1 + @test others_n_active(pool, Dual_f2_11) == 1 - @test length(pool.others) == 3 - @test others_n_active(pool, Dual_f1_11) == 1 - @test others_n_active(pool, Dual_f1_4) == 1 - @test others_n_active(pool, Dual_f2_11) == 1 + rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 + @test others_n_active(pool, Dual_f1_4) == 0 + @test others_n_active(pool, Dual_f2_11) == 0 + end - rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 0 - @test others_n_active(pool, Dual_f1_4) == 0 - @test others_n_active(pool, Dual_f2_11) == 0 -end + # ============================================================================== + # 22. Parametric Dual-Like: Nested Scopes (Simulates ForwardDiff Chunk Processing) + # ============================================================================== + # ForwardDiff.gradient processes data in chunks, calling the function N times. + # Each call creates a @with_pool scope. The pool must correctly rewind Dual + # arrays created during each chunk evaluation. -# ============================================================================== -# 22. Parametric Dual-Like: Nested Scopes (Simulates ForwardDiff Chunk Processing) -# ============================================================================== -# ForwardDiff.gradient processes data in chunks, calling the function N times. -# Each call creates a @with_pool scope. The pool must correctly rewind Dual -# arrays created during each chunk evaluation. + @testset "22. Dual-like: simulated ForwardDiff chunk processing" begin + pool = AdaptiveArrayPool() + n_chunks = 11 # Like processing 121 elements in chunks of 11 -@testset "22. Dual-like: simulated ForwardDiff chunk processing" begin - pool = AdaptiveArrayPool() - n_chunks = 11 # Like processing 121 elements in chunks of 11 + for chunk in 1:n_chunks + checkpoint!(pool) + # Simulates what happens inside cubic_interp when called with Dual data + partials = acquire!(pool, Dual_f1_11, 44) # like (4, 11) partials array + workspace = acquire!(pool, Dual_f1_11, 11) # temporary workspace + @test others_n_active(pool, Dual_f1_11) == 2 - for chunk in 1:n_chunks - checkpoint!(pool) - # Simulates what happens inside cubic_interp when called with Dual data - partials = acquire!(pool, Dual_f1_11, 44) # like (4, 11) partials array - workspace = acquire!(pool, Dual_f1_11, 11) # temporary workspace - @test others_n_active(pool, Dual_f1_11) == 2 + rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 + end - rewind!(pool) + # After all chunks: zero leak @test others_n_active(pool, Dual_f1_11) == 0 + @test length(pool.others[Dual_f1_11].vectors) == 2 # reuses 2 backing vectors end - # After all chunks: zero leak - @test others_n_active(pool, Dual_f1_11) == 0 - @test length(pool.others[Dual_f1_11].vectors) == 2 # reuses 2 backing vectors -end + @testset "22b. Dual-like: simulated nested @with_pool in chunk processing" begin + pool = AdaptiveArrayPool() -@testset "22b. Dual-like: simulated nested @with_pool in chunk processing" begin - pool = AdaptiveArrayPool() + for chunk in 1:11 + # Outer scope: oneshot function + checkpoint!(pool) + partials = acquire!(pool, Dual_f1_11, 44) - for chunk in 1:11 - # Outer scope: oneshot function - checkpoint!(pool) - partials = acquire!(pool, Dual_f1_11, 44) + # Inner scope: solver function (nested @with_pool) + checkpoint!(pool) + m = acquire!(pool, Dual_f1_11, 11) + @test others_n_active(pool, Dual_f1_11) == 2 + rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 1 # only partials - # Inner scope: solver function (nested @with_pool) - checkpoint!(pool) - m = acquire!(pool, Dual_f1_11, 11) - @test others_n_active(pool, Dual_f1_11) == 2 - rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 1 # only partials + rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 + end - rewind!(pool) @test others_n_active(pool, Dual_f1_11) == 0 end - @test others_n_active(pool, Dual_f1_11) == 0 -end + # ============================================================================== + # 23. Parametric Dual-Like: Lazy Mode (Macro-Generated Path) + # ============================================================================== -# ============================================================================== -# 23. Parametric Dual-Like: Lazy Mode (Macro-Generated Path) -# ============================================================================== + @testset "23. Dual-like: lazy checkpoint/rewind" begin + pool = AdaptiveArrayPool() -@testset "23. Dual-like: lazy checkpoint/rewind" begin - pool = AdaptiveArrayPool() + _lazy_checkpoint!(pool) + acquire!(pool, Dual_f1_11, 44) + acquire!(pool, Dual_f1_11, 11) + @test others_n_active(pool, Dual_f1_11) == 2 + @test pool._touched_has_others[pool._current_depth] == true - _lazy_checkpoint!(pool) - acquire!(pool, Dual_f1_11, 44) - acquire!(pool, Dual_f1_11, 11) - @test others_n_active(pool, Dual_f1_11) == 2 - @test pool._touched_has_others[pool._current_depth] == true + _lazy_rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 + end - _lazy_rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 0 -end + @testset "23b. Dual-like: lazy mode with pre-existing Dual pool" begin + pool = AdaptiveArrayPool() -@testset "23b. Dual-like: lazy mode with pre-existing Dual pool" begin - pool = AdaptiveArrayPool() + # Pre-populate Dual pool (simulates warmup call) + checkpoint!(pool) + acquire!(pool, Dual_f1_11, 10) + rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 - # Pre-populate Dual pool (simulates warmup call) - checkpoint!(pool) - acquire!(pool, Dual_f1_11, 10) - rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 0 + # Lazy scope — pre-existing Dual pool must be eagerly checkpointed + _lazy_checkpoint!(pool) + acquire!(pool, Dual_f1_11, 44) + @test others_n_active(pool, Dual_f1_11) == 1 - # Lazy scope — pre-existing Dual pool must be eagerly checkpointed - _lazy_checkpoint!(pool) - acquire!(pool, Dual_f1_11, 44) - @test others_n_active(pool, Dual_f1_11) == 1 + _lazy_rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 + end - _lazy_rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 0 -end + @testset "23c. Dual-like: lazy nested, Dual acquired only in inner scope" begin + pool = AdaptiveArrayPool() -@testset "23c. Dual-like: lazy nested, Dual acquired only in inner scope" begin - pool = AdaptiveArrayPool() + # Outer: lazy, acquires Float64 only + _lazy_checkpoint!(pool) + acquire!(pool, Float64, 10) - # Outer: lazy, acquires Float64 only - _lazy_checkpoint!(pool) - acquire!(pool, Float64, 10) + # Inner: lazy, acquires Dual (new type created inside nested lazy scope) + _lazy_checkpoint!(pool) + acquire!(pool, Dual_f1_11, 44) + @test others_n_active(pool, Dual_f1_11) == 1 - # Inner: lazy, acquires Dual (new type created inside nested lazy scope) - _lazy_checkpoint!(pool) - acquire!(pool, Dual_f1_11, 44) - @test others_n_active(pool, Dual_f1_11) == 1 + _lazy_rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 - _lazy_rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 0 + _lazy_rewind!(pool) + @test pool.float64.n_active == 0 + end - _lazy_rewind!(pool) - @test pool.float64.n_active == 0 -end + # ============================================================================== + # 24. Parametric Dual-Like: Typed-Lazy (use_typed=true Macro Path) + # ============================================================================== + # This simulates the MOST LIKELY leak scenario: + # The macro extracts Float64 as the tracked type, but at runtime the data is + # Dual-typed. The typed-lazy path must correctly handle untracked Dual types. -# ============================================================================== -# 24. Parametric Dual-Like: Typed-Lazy (use_typed=true Macro Path) -# ============================================================================== -# This simulates the MOST LIKELY leak scenario: -# The macro extracts Float64 as the tracked type, but at runtime the data is -# Dual-typed. The typed-lazy path must correctly handle untracked Dual types. - -@testset "24. Dual-like: typed-lazy with Dual as untracked type" begin - pool = AdaptiveArrayPool() - - # Typed-lazy tracking Float64 — Dual is untracked (goes through has_others) - _typed_lazy_checkpoint!(pool, Float64) - acquire!(pool, Float64, 10) # tracked type - acquire!(pool, Dual_f1_11, 44) # untracked Dual - @test others_n_active(pool, Dual_f1_11) == 1 - @test pool._touched_has_others[pool._current_depth] == true - - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - @test pool.float64.n_active == 0 - @test others_n_active(pool, Dual_f1_11) == 0 -end + @testset "24. Dual-like: typed-lazy with Dual as untracked type" begin + pool = AdaptiveArrayPool() -@testset "24b. Dual-like: typed-lazy nested, parent has Dual, child adds more" begin - pool = AdaptiveArrayPool() + # Typed-lazy tracking Float64 — Dual is untracked (goes through has_others) + _typed_lazy_checkpoint!(pool, Float64) + acquire!(pool, Float64, 10) # tracked type + acquire!(pool, Dual_f1_11, 44) # untracked Dual + @test others_n_active(pool, Dual_f1_11) == 1 + @test pool._touched_has_others[pool._current_depth] == true - # Pre-populate Dual pool - checkpoint!(pool) - acquire!(pool, Dual_f1_11, 10) - rewind!(pool) + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + @test pool.float64.n_active == 0 + @test others_n_active(pool, Dual_f1_11) == 0 + end - # Parent: full checkpoint, acquires Dual - checkpoint!(pool) - acquire!(pool, Dual_f1_11, 5) - @test others_n_active(pool, Dual_f1_11) == 1 + @testset "24b. Dual-like: typed-lazy nested, parent has Dual, child adds more" begin + pool = AdaptiveArrayPool() - # Child: typed-lazy (Float64), helper acquires more Dual - _typed_lazy_checkpoint!(pool, Float64) - acquire!(pool, Float64, 10) - acquire!(pool, Dual_f1_11, 44) - @test others_n_active(pool, Dual_f1_11) == 2 + # Pre-populate Dual pool + checkpoint!(pool) + acquire!(pool, Dual_f1_11, 10) + rewind!(pool) - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - @test others_n_active(pool, Dual_f1_11) == 1 # parent's preserved + # Parent: full checkpoint, acquires Dual + checkpoint!(pool) + acquire!(pool, Dual_f1_11, 5) + @test others_n_active(pool, Dual_f1_11) == 1 - rewind!(pool) - @test others_n_active(pool, Dual_f1_11) == 0 -end + # Child: typed-lazy (Float64), helper acquires more Dual + _typed_lazy_checkpoint!(pool, Float64) + acquire!(pool, Float64, 10) + acquire!(pool, Dual_f1_11, 44) + @test others_n_active(pool, Dual_f1_11) == 2 -@testset "24c. Dual-like: typed-lazy with Dual as TRACKED type via eltype" begin - pool = AdaptiveArrayPool() + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + @test others_n_active(pool, Dual_f1_11) == 1 # parent's preserved - # Simulates: @with_pool pool function f(y::Vector{Dual}) - # z = similar!(pool, y) → macro extracts eltype(y) = Dual as tracked type - # Since Dual is a fallback type, _tracked_mask_for_types(Dual) == UInt16(0) - # The _can_use_typed_path check becomes: - # touched_mask & ~0 == 0 → always true IF no has_others set - # But _checkpoint!(pool, Dual) does checkpoint the Dual pool. + rewind!(pool) + @test others_n_active(pool, Dual_f1_11) == 0 + end - # This tests the typed path where the only tracked type is a fallback type - checkpoint!(pool, Dual_f1_11) - acquire!(pool, Dual_f1_11, 44) - acquire!(pool, Dual_f1_11, 11) - @test others_n_active(pool, Dual_f1_11) == 2 + @testset "24c. Dual-like: typed-lazy with Dual as TRACKED type via eltype" begin + pool = AdaptiveArrayPool() - rewind!(pool, Dual_f1_11) - @test others_n_active(pool, Dual_f1_11) == 0 -end + # Simulates: @with_pool pool function f(y::Vector{Dual}) + # z = similar!(pool, y) → macro extracts eltype(y) = Dual as tracked type + # Since Dual is a fallback type, _tracked_mask_for_types(Dual) == UInt16(0) + # The _can_use_typed_path check becomes: + # touched_mask & ~0 == 0 → always true IF no has_others set + # But _checkpoint!(pool, Dual) does checkpoint the Dual pool. -# ============================================================================== -# 25. Dual-Like: Stress Test — Simulates Full ForwardDiff.gradient Pipeline -# ============================================================================== -# Simulates: ForwardDiff.gradient(f, z) where f uses cubic_interp -# ForwardDiff calls f(z_dual) multiple times (one per chunk) -# Each call: @with_pool → acquire Dual arrays → rewind + # This tests the typed path where the only tracked type is a fallback type + checkpoint!(pool, Dual_f1_11) + acquire!(pool, Dual_f1_11, 44) + acquire!(pool, Dual_f1_11, 11) + @test others_n_active(pool, Dual_f1_11) == 2 -@testset "25. Dual-like: full gradient simulation stress test" begin - pool = AdaptiveArrayPool() - n_chunks = 11 + rewind!(pool, Dual_f1_11) + @test others_n_active(pool, Dual_f1_11) == 0 + end - function simulate_cubic_interp_dual!(pool) - # Outer oneshot scope - checkpoint!(pool) - try - partials = acquire!(pool, Dual_f1_11, 4 * 11) - workspace = acquire!(pool, Float64, 10) # spacing (Float64, not Dual) + # ============================================================================== + # 25. Dual-Like: Stress Test — Simulates Full ForwardDiff.gradient Pipeline + # ============================================================================== + # Simulates: ForwardDiff.gradient(f, z) where f uses cubic_interp + # ForwardDiff calls f(z_dual) multiple times (one per chunk) + # Each call: @with_pool → acquire Dual arrays → rewind - # Inner solver scope - checkpoint!(pool) - try - m = acquire!(pool, Dual_f1_11, 11) - # solve... - finally - rewind!(pool) - end + @testset "25. Dual-like: full gradient simulation stress test" begin + pool = AdaptiveArrayPool() + n_chunks = 11 - # Inner differentiation scope + function simulate_cubic_interp_dual!(pool) + # Outer oneshot scope checkpoint!(pool) try - line = acquire!(pool, Dual_f1_11, 11) - dline = acquire!(pool, Dual_f1_11, 11) - # compute derivatives... + partials = acquire!(pool, Dual_f1_11, 4 * 11) + workspace = acquire!(pool, Float64, 10) # spacing (Float64, not Dual) + + # Inner solver scope + checkpoint!(pool) + try + m = acquire!(pool, Dual_f1_11, 11) + # solve... + finally + rewind!(pool) + end + + # Inner differentiation scope + checkpoint!(pool) + try + line = acquire!(pool, Dual_f1_11, 11) + dline = acquire!(pool, Dual_f1_11, 11) + # compute derivatives... + finally + rewind!(pool) + end + + return nothing finally rewind!(pool) end - - return nothing - finally - rewind!(pool) end - end - - for chunk in 1:n_chunks - simulate_cubic_interp_dual!(pool) - end - # After all chunks: zero leak - @test pool.float64.n_active == 0 - @test others_n_active(pool, Dual_f1_11) == 0 + for chunk in 1:n_chunks + simulate_cubic_interp_dual!(pool) + end - # Checkpoint stack must be clean - if haskey(pool.others, Dual_f1_11) - dual_pool = pool.others[Dual_f1_11] - @test length(dual_pool._checkpoint_depths) == 1 # sentinel only - @test dual_pool._checkpoint_depths[1] == 0 # sentinel value - end -end + # After all chunks: zero leak + @test pool.float64.n_active == 0 + @test others_n_active(pool, Dual_f1_11) == 0 -@testset "25b. Dual-like: @with_pool stress with Dual types" begin - # Warmup - @with_pool pool begin - acquire!(pool, Dual_f1_11, 44) + # Checkpoint stack must be clean + if haskey(pool.others, Dual_f1_11) + dual_pool = pool.others[Dual_f1_11] + @test length(dual_pool._checkpoint_depths) == 1 # sentinel only + @test dual_pool._checkpoint_depths[1] == 0 # sentinel value + end end - tl_pool = AdaptiveArrayPools.get_task_local_pool() - baseline_vecs = length(tl_pool.others[Dual_f1_11].vectors) - - for _ in 1:200 + @testset "25b. Dual-like: @with_pool stress with Dual types" begin + # Warmup @with_pool pool begin acquire!(pool, Dual_f1_11, 44) - acquire!(pool, Dual_f1_11, 11) end + + tl_pool = AdaptiveArrayPools.get_task_local_pool() + baseline_vecs = length(tl_pool.others[Dual_f1_11].vectors) + + for _ in 1:200 + @with_pool pool begin + acquire!(pool, Dual_f1_11, 44) + acquire!(pool, Dual_f1_11, 11) + end + end + + tl_pool = AdaptiveArrayPools.get_task_local_pool() + @test others_n_active(tl_pool, Dual_f1_11) == 0 + @test length(tl_pool.others[Dual_f1_11].vectors) <= baseline_vecs + 1 end - tl_pool = AdaptiveArrayPools.get_task_local_pool() - @test others_n_active(tl_pool, Dual_f1_11) == 0 - @test length(tl_pool.others[Dual_f1_11].vectors) <= baseline_vecs + 1 -end + # ============================================================================== + # 26. Dual-Like: New Dual Type Created Mid-Scope (First-Touch Scenario) + # ============================================================================== + # When ForwardDiff first calls f(z_dual), the Dual type doesn't exist in pool.others yet. + # get_typed_pool! auto-checkpoints new types (pushes sentinel n_active=0). + # This tests that the auto-checkpoint + rewind cycle is correct for first-ever encounter. -# ============================================================================== -# 26. Dual-Like: New Dual Type Created Mid-Scope (First-Touch Scenario) -# ============================================================================== -# When ForwardDiff first calls f(z_dual), the Dual type doesn't exist in pool.others yet. -# get_typed_pool! auto-checkpoints new types (pushes sentinel n_active=0). -# This tests that the auto-checkpoint + rewind cycle is correct for first-ever encounter. - -@testset "26. Dual-like: first-touch auto-checkpoint in lazy mode" begin - pool = AdaptiveArrayPool() - # Pool has never seen Dual_f2_11 before - @test !haskey(pool.others, Dual_f2_11) - - _lazy_checkpoint!(pool) - # First-ever acquire of this Dual variant - v = acquire!(pool, Dual_f2_11, 10) - @test haskey(pool.others, Dual_f2_11) - @test others_n_active(pool, Dual_f2_11) == 1 - - _lazy_rewind!(pool) - @test others_n_active(pool, Dual_f2_11) == 0 -end + @testset "26. Dual-like: first-touch auto-checkpoint in lazy mode" begin + pool = AdaptiveArrayPool() + # Pool has never seen Dual_f2_11 before + @test !haskey(pool.others, Dual_f2_11) -@testset "26b. Dual-like: first-touch in typed-lazy mode" begin - pool = AdaptiveArrayPool() - @test !haskey(pool.others, Dual_f2_11) + _lazy_checkpoint!(pool) + # First-ever acquire of this Dual variant + v = acquire!(pool, Dual_f2_11, 10) + @test haskey(pool.others, Dual_f2_11) + @test others_n_active(pool, Dual_f2_11) == 1 - _typed_lazy_checkpoint!(pool, Float64) - v = acquire!(pool, Dual_f2_11, 10) - @test others_n_active(pool, Dual_f2_11) == 1 + _lazy_rewind!(pool) + @test others_n_active(pool, Dual_f2_11) == 0 + end - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - @test others_n_active(pool, Dual_f2_11) == 0 -end + @testset "26b. Dual-like: first-touch in typed-lazy mode" begin + pool = AdaptiveArrayPool() + @test !haskey(pool.others, Dual_f2_11) -@testset "26c. Dual-like: first-touch in typed (only) checkpoint — tracked fallback" begin - pool = AdaptiveArrayPool() - @test !haskey(pool.others, Dual_f2_11) + _typed_lazy_checkpoint!(pool, Float64) + v = acquire!(pool, Dual_f2_11, 10) + @test others_n_active(pool, Dual_f2_11) == 1 - # checkpoint!(pool, Dual_f2_11) creates the pool entry via get_typed_pool! - # AND pushes checkpoint for it - checkpoint!(pool, Dual_f2_11) - v = acquire!(pool, Dual_f2_11, 10) - @test others_n_active(pool, Dual_f2_11) == 1 + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + @test others_n_active(pool, Dual_f2_11) == 0 + end - rewind!(pool, Dual_f2_11) - @test others_n_active(pool, Dual_f2_11) == 0 -end + @testset "26c. Dual-like: first-touch in typed (only) checkpoint — tracked fallback" begin + pool = AdaptiveArrayPool() + @test !haskey(pool.others, Dual_f2_11) -# ============================================================================== -# 27. Dual-Like: Mixed Fixed + Multiple Dual Variants (Type Explosion) -# ============================================================================== + # checkpoint!(pool, Dual_f2_11) creates the pool entry via get_typed_pool! + # AND pushes checkpoint for it + checkpoint!(pool, Dual_f2_11) + v = acquire!(pool, Dual_f2_11, 10) + @test others_n_active(pool, Dual_f2_11) == 1 -@testset "27. Type explosion: multiple Dual variants + fixed types" begin - pool = AdaptiveArrayPool() - - checkpoint!(pool) - acquire!(pool, Float64, 10) # fixed slot - acquire!(pool, Int32, 5) # fixed slot - acquire!(pool, Dual_f1_11, 44) # fallback Dual variant 1 - acquire!(pool, Dual_f1_4, 16) # fallback Dual variant 2 - acquire!(pool, Dual_f2_11, 33) # fallback Dual variant 3 - acquire!(pool, UInt8, 20) # fallback primitive - - @test pool.float64.n_active == 1 - @test pool.int32.n_active == 1 - @test others_n_active(pool, Dual_f1_11) == 1 - @test others_n_active(pool, Dual_f1_4) == 1 - @test others_n_active(pool, Dual_f2_11) == 1 - @test others_n_active(pool, UInt8) == 1 - - rewind!(pool) - - @test pool.float64.n_active == 0 - @test pool.int32.n_active == 0 - @test others_n_active(pool, Dual_f1_11) == 0 - @test others_n_active(pool, Dual_f1_4) == 0 - @test others_n_active(pool, Dual_f2_11) == 0 - @test others_n_active(pool, UInt8) == 0 -end + rewind!(pool, Dual_f2_11) + @test others_n_active(pool, Dual_f2_11) == 0 + end -# ============================================================================== -# 28. Dual-Like: _acquire_impl! Bypass (Macro Transform Path) -# ============================================================================== -# When @with_pool transforms acquire! → _acquire_impl!, _record_type_touch! -# is bypassed. This tests that fallback types still get properly rewound -# even when type touch recording is skipped. - -@testset "28. _acquire_impl! bypass: fallback type with typed checkpoint" begin - pool = AdaptiveArrayPool() - using AdaptiveArrayPools: _acquire_impl! - - # Simulate typed path: checkpoint specific type, use _acquire_impl! directly - # checkpoint!(pool, Dual) creates the pool entry via get_typed_pool! which - # now sets has_others=true at creation time - checkpoint!(pool, Dual_f1_11) - # _acquire_impl! does NOT call _record_type_touch! - v = _acquire_impl!(pool, Dual_f1_11, 10) - @test others_n_active(pool, Dual_f1_11) == 1 - - rewind!(pool, Dual_f1_11) - @test others_n_active(pool, Dual_f1_11) == 0 -end + # ============================================================================== + # 27. Dual-Like: Mixed Fixed + Multiple Dual Variants (Type Explosion) + # ============================================================================== -@testset "28b. _acquire_impl! bypass: typed-lazy fallback to lazy rewind" begin - pool = AdaptiveArrayPool() - using AdaptiveArrayPools: _acquire_impl! + @testset "27. Type explosion: multiple Dual variants + fixed types" begin + pool = AdaptiveArrayPool() - # Typed-lazy tracking Float64, Dual acquired via _acquire_impl! (no touch) - _typed_lazy_checkpoint!(pool, Float64) + checkpoint!(pool) + acquire!(pool, Float64, 10) # fixed slot + acquire!(pool, Int32, 5) # fixed slot + acquire!(pool, Dual_f1_11, 44) # fallback Dual variant 1 + acquire!(pool, Dual_f1_4, 16) # fallback Dual variant 2 + acquire!(pool, Dual_f2_11, 33) # fallback Dual variant 3 + acquire!(pool, UInt8, 20) # fallback primitive + + @test pool.float64.n_active == 1 + @test pool.int32.n_active == 1 + @test others_n_active(pool, Dual_f1_11) == 1 + @test others_n_active(pool, Dual_f1_4) == 1 + @test others_n_active(pool, Dual_f2_11) == 1 + @test others_n_active(pool, UInt8) == 1 - # Tracked type via _acquire_impl! - _acquire_impl!(pool, Float64, 10) + rewind!(pool) - # Untracked Dual via _acquire_impl! — NO touch recording! - # But get_typed_pool! now sets _touched_has_others when creating new fallback type - _acquire_impl!(pool, Dual_f1_11, 44) - @test others_n_active(pool, Dual_f1_11) == 1 + @test pool.float64.n_active == 0 + @test pool.int32.n_active == 0 + @test others_n_active(pool, Dual_f1_11) == 0 + @test others_n_active(pool, Dual_f1_4) == 0 + @test others_n_active(pool, Dual_f2_11) == 0 + @test others_n_active(pool, UInt8) == 0 + end - # has_others should now be true (set by get_typed_pool! on first creation) - @test pool._touched_has_others[pool._current_depth] == true + # ============================================================================== + # 28. Dual-Like: _acquire_impl! Bypass (Macro Transform Path) + # ============================================================================== + # When @with_pool transforms acquire! → _acquire_impl!, _record_type_touch! + # is bypassed. This tests that fallback types still get properly rewound + # even when type touch recording is skipped. + + @testset "28. _acquire_impl! bypass: fallback type with typed checkpoint" begin + pool = AdaptiveArrayPool() + using AdaptiveArrayPools: _acquire_impl! + + # Simulate typed path: checkpoint specific type, use _acquire_impl! directly + # checkpoint!(pool, Dual) creates the pool entry via get_typed_pool! which + # now sets has_others=true at creation time + checkpoint!(pool, Dual_f1_11) + # _acquire_impl! does NOT call _record_type_touch! + v = _acquire_impl!(pool, Dual_f1_11, 10) + @test others_n_active(pool, Dual_f1_11) == 1 - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) + rewind!(pool, Dual_f1_11) + @test others_n_active(pool, Dual_f1_11) == 0 + end - # Fix: get_typed_pool! sets has_others → rewind iterates pool.others → n_active restored - @test others_n_active(pool, Dual_f1_11) == 0 -end + @testset "28b. _acquire_impl! bypass: typed-lazy fallback to lazy rewind" begin + pool = AdaptiveArrayPool() + using AdaptiveArrayPools: _acquire_impl! -# ============================================================================== -# 29. Repeated typed checkpoint with existing fallback + extra touched types -# ============================================================================== -# Reproduces the real-world ForwardDiff.gradient scenario: -# - @with_pool extracts Tv (Dual) as static type → use_typed=true -# - Macro transforms acquire!(pool, Tv, ...) → _acquire_impl!(pool, Tv, ...) -# - Helper function (not in @with_pool body) acquires Float64 via normal acquire! -# → _record_type_touch! sets touched_type_masks for Float64 -# - At rewind: _can_use_typed_path returns false (Float64 was extra-touched) -# → falls to _typed_lazy_rewind! which checks has_others -# - BUG: has_others was false on 2nd+ calls because get_typed_pool! closure only -# runs for NEW types. checkpoint!(pool, Dual) always pushed has_others=false. -# - FIX: checkpoint!(pool, types...) now pushes has_others=true when any type -# is a fallback (compile-time check via _fixed_slot_bit). - -@testset "29. Repeated typed checkpoint: existing fallback + extra touched types" begin - pool = AdaptiveArrayPool() - using AdaptiveArrayPools: _acquire_impl!, _typed_lazy_checkpoint!, _typed_lazy_rewind!, - _tracked_mask_for_types, _can_use_typed_path - - # Simulate 3 iterations of ForwardDiff.gradient calling cubic_interp - for iter in 1:3 - # 1. Typed checkpoint for Dual (the macro fast path if _can_use_typed_path) - # On iter=1: Dual is new → get_typed_pool! creates it - # On iter≥2: Dual exists → get_typed_pool! returns immediately - if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11)) - checkpoint!(pool, Dual_f1_11) - else - _typed_lazy_checkpoint!(pool, Dual_f1_11) - end + # Typed-lazy tracking Float64, Dual acquired via _acquire_impl! (no touch) + _typed_lazy_checkpoint!(pool, Float64) - # 2. Helper function acquires Float64 via normal acquire! (NOT transformed) - # This sets touched_type_masks for Float64, causing _can_use_typed_path=false at rewind - acquire!(pool, Float64, 10) + # Tracked type via _acquire_impl! + _acquire_impl!(pool, Float64, 10) - # 3. _acquire_impl! for Dual (macro-transformed, no _record_type_touch!) + # Untracked Dual via _acquire_impl! — NO touch recording! + # But get_typed_pool! now sets _touched_has_others when creating new fallback type _acquire_impl!(pool, Dual_f1_11, 44) @test others_n_active(pool, Dual_f1_11) == 1 - # 4. Rewind — same conditional as the macro generates - if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11)) - rewind!(pool, Dual_f1_11) - else - _typed_lazy_rewind!(pool, _tracked_mask_for_types(Dual_f1_11)) - end + # has_others should now be true (set by get_typed_pool! on first creation) + @test pool._touched_has_others[pool._current_depth] == true + + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64)) - # Key assertion: Dual must be rewound on EVERY iteration, not just the first + # Fix: get_typed_pool! sets has_others → rewind iterates pool.others → n_active restored @test others_n_active(pool, Dual_f1_11) == 0 - @test pool.float64.n_active == 0 end -end -@testset "29b. Repeated typed checkpoint: multiple Dual variants + helpers" begin - pool = AdaptiveArrayPool() - using AdaptiveArrayPools: _acquire_impl!, _unsafe_acquire_impl!, - _typed_lazy_checkpoint!, _typed_lazy_rewind!, - _tracked_mask_for_types, _can_use_typed_path + # ============================================================================== + # 29. Repeated typed checkpoint with existing fallback + extra touched types + # ============================================================================== + # Reproduces the real-world ForwardDiff.gradient scenario: + # - @with_pool extracts Tv (Dual) as static type → use_typed=true + # - Macro transforms acquire!(pool, Tv, ...) → _acquire_impl!(pool, Tv, ...) + # - Helper function (not in @with_pool body) acquires Float64 via normal acquire! + # → _record_type_touch! sets touched_type_masks for Float64 + # - At rewind: _can_use_typed_path returns false (Float64 was extra-touched) + # → falls to _typed_lazy_rewind! which checks has_others + # - BUG: has_others was false on 2nd+ calls because get_typed_pool! closure only + # runs for NEW types. checkpoint!(pool, Dual) always pushed has_others=false. + # - FIX: checkpoint!(pool, types...) now pushes has_others=true when any type + # is a fallback (compile-time check via _fixed_slot_bit). + + @testset "29. Repeated typed checkpoint: existing fallback + extra touched types" begin + pool = AdaptiveArrayPool() + using AdaptiveArrayPools: _acquire_impl!, _typed_lazy_checkpoint!, _typed_lazy_rewind!, + _tracked_mask_for_types, _can_use_typed_path + + # Simulate 3 iterations of ForwardDiff.gradient calling cubic_interp + for iter in 1:3 + # 1. Typed checkpoint for Dual (the macro fast path if _can_use_typed_path) + # On iter=1: Dual is new → get_typed_pool! creates it + # On iter≥2: Dual exists → get_typed_pool! returns immediately + if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11)) + checkpoint!(pool, Dual_f1_11) + else + _typed_lazy_checkpoint!(pool, Dual_f1_11) + end + + # 2. Helper function acquires Float64 via normal acquire! (NOT transformed) + # This sets touched_type_masks for Float64, causing _can_use_typed_path=false at rewind + acquire!(pool, Float64, 10) + + # 3. _acquire_impl! for Dual (macro-transformed, no _record_type_touch!) + _acquire_impl!(pool, Dual_f1_11, 44) + @test others_n_active(pool, Dual_f1_11) == 1 - for iter in 1:5 - # Simulate @with_pool with Tv=Dual_f1_11, Tg=Float64 - tracked_mask = _tracked_mask_for_types(Dual_f1_11, Float64) + # 4. Rewind — same conditional as the macro generates + if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11)) + rewind!(pool, Dual_f1_11) + else + _typed_lazy_rewind!(pool, _tracked_mask_for_types(Dual_f1_11)) + end - if _can_use_typed_path(pool, tracked_mask) - checkpoint!(pool, Dual_f1_11, Float64) - else - _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64) + # Key assertion: Dual must be rewound on EVERY iteration, not just the first + @test others_n_active(pool, Dual_f1_11) == 0 + @test pool.float64.n_active == 0 end + end - # Outer scope: multi-dim Dual acquire (unsafe_acquire, macro-transformed) - _unsafe_acquire_impl!(pool, Dual_f1_11, 4, 11, 11) - @test others_n_active(pool, Dual_f1_11) == 1 + @testset "29b. Repeated typed checkpoint: multiple Dual variants + helpers" begin + pool = AdaptiveArrayPool() + using AdaptiveArrayPools: _acquire_impl!, _unsafe_acquire_impl!, + _typed_lazy_checkpoint!, _typed_lazy_rewind!, + _tracked_mask_for_types, _can_use_typed_path - # Helper function: normal acquire! for Float64 + Int32 (extra touched types) - acquire!(pool, Float64, 20) - acquire!(pool, Int32, 5) + for iter in 1:5 + # Simulate @with_pool with Tv=Dual_f1_11, Tg=Float64 + tracked_mask = _tracked_mask_for_types(Dual_f1_11, Float64) - # Inner scope: nested @with_pool for solver - if _can_use_typed_path(pool, tracked_mask) - checkpoint!(pool, Dual_f1_11, Float64) - else - _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64) - end - _acquire_impl!(pool, Dual_f1_11, 11) - @test others_n_active(pool, Dual_f1_11) == 2 # outer + inner - _acquire_impl!(pool, Float64, 11) - - # Inner rewind - if _can_use_typed_path(pool, tracked_mask) - rewind!(pool, Dual_f1_11, Float64) - else - _typed_lazy_rewind!(pool, tracked_mask) - end - @test others_n_active(pool, Dual_f1_11) == 1 # back to outer's count + if _can_use_typed_path(pool, tracked_mask) + checkpoint!(pool, Dual_f1_11, Float64) + else + _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64) + end - # Outer rewind - if _can_use_typed_path(pool, tracked_mask) - rewind!(pool, Dual_f1_11, Float64) - else - _typed_lazy_rewind!(pool, tracked_mask) - end + # Outer scope: multi-dim Dual acquire (unsafe_acquire, macro-transformed) + _unsafe_acquire_impl!(pool, Dual_f1_11, 4, 11, 11) + @test others_n_active(pool, Dual_f1_11) == 1 - @test others_n_active(pool, Dual_f1_11) == 0 - @test pool.float64.n_active == 0 - @test pool.int32.n_active == 0 + # Helper function: normal acquire! for Float64 + Int32 (extra touched types) + acquire!(pool, Float64, 20) + acquire!(pool, Int32, 5) + + # Inner scope: nested @with_pool for solver + if _can_use_typed_path(pool, tracked_mask) + checkpoint!(pool, Dual_f1_11, Float64) + else + _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64) + end + _acquire_impl!(pool, Dual_f1_11, 11) + @test others_n_active(pool, Dual_f1_11) == 2 # outer + inner + _acquire_impl!(pool, Float64, 11) + + # Inner rewind + if _can_use_typed_path(pool, tracked_mask) + rewind!(pool, Dual_f1_11, Float64) + else + _typed_lazy_rewind!(pool, tracked_mask) + end + @test others_n_active(pool, Dual_f1_11) == 1 # back to outer's count + + # Outer rewind + if _can_use_typed_path(pool, tracked_mask) + rewind!(pool, Dual_f1_11, Float64) + else + _typed_lazy_rewind!(pool, tracked_mask) + end + + @test others_n_active(pool, Dual_f1_11) == 0 + @test pool.float64.n_active == 0 + @test pool.int32.n_active == 0 + end end -end end # top-level @testset diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl index ca3faeb5..fe62228a 100644 --- a/test/test_macro_internals.jl +++ b/test/test_macro_internals.jl @@ -81,7 +81,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke # Assignment in function call (should NOT be captured) @testset "function call with kwarg" begin - expr = :(foo(x, y=1)) + expr = :(foo(x, y = 1)) locals = _extract_local_assignments(expr) # y=1 inside function call is a kwarg, not an assignment # This depends on Julia parsing - in some cases it might be captured @@ -1565,4 +1565,4 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke @test pool.int64.n_active == 0 end -end # Macro Internals \ No newline at end of file +end # Macro Internals diff --git a/test/test_macros.jl b/test/test_macros.jl index 606c3f41..eaafc2bd 100644 --- a/test/test_macros.jl +++ b/test/test_macros.jl @@ -256,4 +256,4 @@ import AdaptiveArrayPools: checkpoint!, rewind! MAYBE_POOLING_ENABLED[] = true end -end # Macro System \ No newline at end of file +end # Macro System diff --git a/test/test_multidimensional.jl b/test/test_multidimensional.jl index dca6f290..6e330d25 100644 --- a/test/test_multidimensional.jl +++ b/test/test_multidimensional.jl @@ -37,7 +37,7 @@ using AdaptiveArrayPools: checkpoint!, rewind! # Without pool (fallback) mat_alloc = acquire!(DISABLED_CPU, Float64, 10, 10) - @test mat_alloc isa Array{Float64,2} + @test mat_alloc isa Array{Float64, 2} @test size(mat_alloc) == (10, 10) end @@ -265,4 +265,4 @@ using AdaptiveArrayPools: checkpoint!, rewind! @test alloc == 0 end -end # Multi-dimensional Arrays \ No newline at end of file +end # Multi-dimensional Arrays diff --git a/test/test_nway_cache.jl b/test/test_nway_cache.jl index cd1c2c76..33cb64ea 100644 --- a/test/test_nway_cache.jl +++ b/test/test_nway_cache.jl @@ -130,8 +130,10 @@ end pool = AdaptiveArrayPool() function test_nd_many_patterns!(p) - dims_list = ((2, 50), (5, 20), (10, 10), (20, 5), (50, 2), - (1, 100), (100, 1), (4, 25), (25, 4), (8, 13)) + dims_list = ( + (2, 50), (5, 20), (10, 10), (20, 5), (50, 2), + (1, 100), (100, 1), (4, 25), (25, 4), (8, 13), + ) for _ in 1:50 for dims in dims_list @with_pool p begin diff --git a/test/test_reshape.jl b/test/test_reshape.jl index 1316d9be..5ccb29e0 100644 --- a/test/test_reshape.jl +++ b/test/test_reshape.jl @@ -390,7 +390,9 @@ end # Warmup (compile + cache) - for _ in 1:4; _test_reshape_func_alloc(ext); end + for _ in 1:4 + _test_reshape_func_alloc(ext) + end alloc = @allocated _test_reshape_func_alloc(ext) println(" @with_pool function (acquire+reshape+zeros!): $alloc bytes") @@ -415,7 +417,9 @@ function _measure_maybe_reshape(data, enabled) MAYBE_POOLING_ENABLED[] = enabled - for _ in 1:4; _test_maybe_reshape_alloc(data); end + for _ in 1:4 + _test_maybe_reshape_alloc(data) + end return @allocated _test_maybe_reshape_alloc(data) end @@ -428,7 +432,7 @@ _measure_maybe_reshape(ext, false) # Measure - alloc_pooled = _measure_maybe_reshape(ext, true) + alloc_pooled = _measure_maybe_reshape(ext, true) alloc_unpooled = _measure_maybe_reshape(ext, false) println(" @maybe_with_pool pooled: $alloc_pooled bytes") diff --git a/test/test_state.jl b/test/test_state.jl index 4254c87d..ce186686 100644 --- a/test/test_state.jl +++ b/test/test_state.jl @@ -863,7 +863,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke v_bool .= true l3_results[] = ( bool_n_active = pool.bool.n_active, - depth = pool._current_depth + depth = pool._current_depth, ) end @@ -885,7 +885,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke l2_results[] = ( int64_n_active = pool.int64.n_active, float32_n_active = pool.float32.n_active, - l3_bool_after = pool.bool.n_active + l3_bool_after = pool.bool.n_active, ) end @@ -962,7 +962,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke bool_after_l3 = p.bool.n_active, # Cleaned by L3 complexf64_after_l3 = p.complexf64.n_active, # Cleaned by L3 l3_bool_was = l3_active[1], - l3_cf64_was = l3_active[2] + l3_cf64_was = l3_active[2], ) end @@ -1028,7 +1028,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke v_bool .= true l3_results[] = ( bool_n_active = pool.bool.n_active, - depth = pool._current_depth + depth = pool._current_depth, ) end @@ -1050,7 +1050,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke l2_results[] = ( int64_n_active = p.int64.n_active, float32_n_active = p.float32.n_active, - l3_bool_after = p.bool.n_active + l3_bool_after = p.bool.n_active, ) end @@ -1182,7 +1182,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke l3_complexf64 = l3_active[3], bool_after = p.bool.n_active, int32_after = p.int32.n_active, - complexf64_after = p.complexf64.n_active + complexf64_after = p.complexf64.n_active, ) end @@ -1489,20 +1489,20 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke using AdaptiveArrayPools: _fixed_slot_bit, Bit # Each fixed slot returns a unique nonzero bit - @test _fixed_slot_bit(Float64) == UInt16(1) << 0 - @test _fixed_slot_bit(Float32) == UInt16(1) << 1 - @test _fixed_slot_bit(Int64) == UInt16(1) << 2 - @test _fixed_slot_bit(Int32) == UInt16(1) << 3 + @test _fixed_slot_bit(Float64) == UInt16(1) << 0 + @test _fixed_slot_bit(Float32) == UInt16(1) << 1 + @test _fixed_slot_bit(Int64) == UInt16(1) << 2 + @test _fixed_slot_bit(Int32) == UInt16(1) << 3 @test _fixed_slot_bit(ComplexF64) == UInt16(1) << 4 @test _fixed_slot_bit(ComplexF32) == UInt16(1) << 5 - @test _fixed_slot_bit(Bool) == UInt16(1) << 6 - @test _fixed_slot_bit(Bit) == UInt16(1) << 7 + @test _fixed_slot_bit(Bool) == UInt16(1) << 6 + @test _fixed_slot_bit(Bit) == UInt16(1) << 7 # Non-fixed-slot types return 0 - @test _fixed_slot_bit(UInt8) == UInt16(0) - @test _fixed_slot_bit(UInt16) == UInt16(0) - @test _fixed_slot_bit(Float16) == UInt16(0) - @test _fixed_slot_bit(String) == UInt16(0) + @test _fixed_slot_bit(UInt8) == UInt16(0) + @test _fixed_slot_bit(UInt16) == UInt16(0) + @test _fixed_slot_bit(Float16) == UInt16(0) + @test _fixed_slot_bit(String) == UInt16(0) # All 8 bits are unique (no collisions) bits = [_fixed_slot_bit(T) for T in (Float64, Float32, Int64, Int32, ComplexF64, ComplexF32, Bool, Bit)] @@ -1745,8 +1745,8 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke # causing false negatives: the typed fast path is rejected even when only # tracked types were touched. using AdaptiveArrayPools: _can_use_typed_path, _tracked_mask_for_types, - _lazy_checkpoint!, _lazy_rewind!, _LAZY_MODE_BIT, _TYPED_LAZY_BIT, - _acquire_impl! + _lazy_checkpoint!, _lazy_rewind!, _LAZY_MODE_BIT, _TYPED_LAZY_BIT, + _acquire_impl! # --- Case 1: _LAZY_MODE_BIT (bit 15) should be ignored --- pool = AdaptiveArrayPool() @@ -2024,14 +2024,14 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke checkpoint!(pool) depth = pool._current_depth # = 2 - @test pool.float64._checkpoint_depths[end] == depth - @test pool.float32._checkpoint_depths[end] == depth - @test pool.int64._checkpoint_depths[end] == depth - @test pool.int32._checkpoint_depths[end] == depth + @test pool.float64._checkpoint_depths[end] == depth + @test pool.float32._checkpoint_depths[end] == depth + @test pool.int64._checkpoint_depths[end] == depth + @test pool.int32._checkpoint_depths[end] == depth @test pool.complexf64._checkpoint_depths[end] == depth @test pool.complexf32._checkpoint_depths[end] == depth - @test pool.bool._checkpoint_depths[end] == depth - @test pool.bits._checkpoint_depths[end] == depth + @test pool.bool._checkpoint_depths[end] == depth + @test pool.bits._checkpoint_depths[end] == depth rewind!(pool) end @@ -2117,10 +2117,10 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke depth = pool._current_depth # = 2 # No typed pool should have an eager checkpoint at this depth - @test pool.float64._checkpoint_depths[end] < depth - @test pool.float32._checkpoint_depths[end] < depth - @test pool.int64._checkpoint_depths[end] < depth - @test pool.bool._checkpoint_depths[end] < depth + @test pool.float64._checkpoint_depths[end] < depth + @test pool.float32._checkpoint_depths[end] < depth + @test pool.int64._checkpoint_depths[end] < depth + @test pool.bool._checkpoint_depths[end] < depth # But depth metadata IS updated @test pool._current_depth == 2 @@ -2148,7 +2148,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke # First acquire triggers lazy checkpoint for Float64 only acquire!(pool, Float64, 5) @test pool.float64._checkpoint_depths[end] == depth # NOW checkpointed - @test pool.float32._checkpoint_depths[end] < depth # Float32 untouched + @test pool.float32._checkpoint_depths[end] < depth # Float32 untouched rewind!(pool) @test pool.float64.n_active == 0 @@ -2439,4 +2439,4 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke end end -end # State Management \ No newline at end of file +end # State Management diff --git a/test/test_task_local_pool.jl b/test/test_task_local_pool.jl index c408578d..61078f52 100644 --- a/test/test_task_local_pool.jl +++ b/test/test_task_local_pool.jl @@ -230,4 +230,4 @@ empty!(pool) end -end # Task-local Pool \ No newline at end of file +end # Task-local Pool diff --git a/test/test_utils.jl b/test/test_utils.jl index ebb32264..163adf86 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -2,7 +2,7 @@ import AdaptiveArrayPools: _validate_pool_return # Helper macro to capture stdout (must be defined before use) macro capture_out(expr) - quote + return quote local old_stdout = stdout local rd, wr = redirect_stdout() try @@ -197,7 +197,7 @@ end end @testset "Base.show for TypedPool & BitTypedPool" begin - import AdaptiveArrayPools: TypedPool, BitTypedPool + import AdaptiveArrayPools: TypedPool, BitTypedPool # Empty TypedPool - compact show tp_empty = TypedPool{Float64}() @@ -573,4 +573,4 @@ end POOL_DEBUG[] = old_debug end -end # Utilities and Debugging \ No newline at end of file +end # Utilities and Debugging diff --git a/test/test_zero_allocation.jl b/test/test_zero_allocation.jl index 7beab686..a1349e1c 100644 --- a/test/test_zero_allocation.jl +++ b/test/test_zero_allocation.jl @@ -11,7 +11,7 @@ # 3. Verify: loop has 0 bytes allocation after warmup @testset "Zero-allocation Patterns" begin - + # ============================================================================== # Pattern 1: acquire! only (SubArray) - N-D matrices # ============================================================================== @@ -410,4 +410,4 @@ end end -end # Zero-allocation Patterns \ No newline at end of file +end # Zero-allocation Patterns