Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/workflows/Documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ jobs:

- uses: julia-actions/cache@v2

- name: Replace git@github.com with https in Package.toml files
run: |
find ~/.julia/registries/FuseRegistry -type f -name 'Package.toml' -exec sed -i 's|git@github.com:|https://project-torrey-pines:${{secrets.PTP_READ_TOKEN}}@github.com/|g' {} +

- name: Install dependencies
run: |
julia --project=docs -e '
Expand Down
3 changes: 3 additions & 0 deletions ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ using AdaptiveArrayPools: allocate_vector, wrap_array, get_typed_pool!
if p._current_depth > 1
push!(tp._checkpoint_n_active, 0) # n_active starts at 0
push!(tp._checkpoint_depths, p._current_depth)
# Signal that a fallback type was touched so lazy/typed-lazy rewind
# iterates pool.others (same fix as CPU get_typed_pool!)
@inbounds p._touched_has_others[p._current_depth] = true
end
tp
end::CuTypedPool{T}
Expand Down
5 changes: 3 additions & 2 deletions ext/AdaptiveArrayPoolsCUDAExt/state.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ end
@inline function AdaptiveArrayPools.checkpoint!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T}
pool._current_depth += 1
push!(pool._touched_type_masks, UInt16(0))
push!(pool._touched_has_others, false)
push!(pool._touched_has_others, AdaptiveArrayPools._fixed_slot_bit(T) == UInt16(0))
_checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, T), pool._current_depth)
nothing
end
Expand All @@ -70,11 +70,12 @@ end
push!(unique_indices, i)
end
end
has_any_fallback = any(i -> AdaptiveArrayPools._fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices)
checkpoint_exprs = [:(_checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices]
quote
pool._current_depth += 1
push!(pool._touched_type_masks, UInt16(0))
push!(pool._touched_has_others, false)
push!(pool._touched_has_others, $has_any_fallback)
$(checkpoint_exprs...)
nothing
end
Expand Down
33 changes: 29 additions & 4 deletions src/state.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ After warmup, this function has **zero allocation**.
See also: [`rewind!`](@ref), [`@with_pool`](@ref)
"""
function checkpoint!(pool::AdaptiveArrayPool)

# Increment depth and initialize type touch tracking state
pool._current_depth += 1
push!(pool._touched_type_masks, UInt16(0))
Expand Down Expand Up @@ -43,9 +44,13 @@ Also updates _current_depth and bitmask state for type touch tracking.
~77% faster than full checkpoint! when only one type is used.
"""
@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where T

pool._current_depth += 1
push!(pool._touched_type_masks, UInt16(0))
push!(pool._touched_has_others, false)
# Push true when T is a fallback type (non-fixed-slot) so that
# _typed_lazy_rewind! iterates pool.others even if _acquire_impl!
# (which bypasses _record_type_touch!) is the only acquire path.
push!(pool._touched_has_others, _fixed_slot_bit(T) == UInt16(0))
_checkpoint_typed_pool!(get_typed_pool!(pool, T), pool._current_depth)
nothing
end
Expand All @@ -57,6 +62,7 @@ Save state for multiple specific types. Uses @generated for zero-overhead
compile-time unrolling. Increments _current_depth once for all types.
"""
@generated function checkpoint!(pool::AdaptiveArrayPool, types::Type...)

# Deduplicate types at compile time (e.g., Float64, Float64 → Float64)
seen = Set{Any}()
unique_indices = Int[]
Expand All @@ -66,20 +72,30 @@ compile-time unrolling. Increments _current_depth once for all types.
push!(unique_indices, i)
end
end
# Check at compile time if any type is a fallback (non-fixed-slot).
# If so, push has_others=true so _typed_lazy_rewind! iterates pool.others
# even when _acquire_impl! (bypassing _record_type_touch!) is used.
has_any_fallback = any(i -> _fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices)
checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices]
quote
pool._current_depth += 1
push!(pool._touched_type_masks, UInt16(0))
push!(pool._touched_has_others, false)
push!(pool._touched_has_others, $has_any_fallback)
$(checkpoint_exprs...)
nothing
end
end

# Internal helper for checkpoint (works for any AbstractTypedPool)
@inline function _checkpoint_typed_pool!(tp::AbstractTypedPool, depth::Int)
push!(tp._checkpoint_n_active, tp.n_active)
push!(tp._checkpoint_depths, depth)

# Guard: skip if already checkpointed at this depth (prevents double-push
# when get_typed_pool! auto-checkpoints a new fallback type and then
# checkpoint!(pool, types...) calls _checkpoint_typed_pool! for the same type).
if @inbounds(tp._checkpoint_depths[end]) != depth
push!(tp._checkpoint_n_active, tp.n_active)
push!(tp._checkpoint_depths, depth)
end
nothing
end

Expand All @@ -100,6 +116,7 @@ new `others` entries created during the scope (n_active starts at 0 = sentinel).
Performance: ~2ns vs ~540ns for full `checkpoint!`.
"""
@inline function _lazy_checkpoint!(pool::AdaptiveArrayPool)

pool._current_depth += 1
# _LAZY_MODE_BIT = lazy mode flag (bits 0–7 are fixed-slot type bits)
push!(pool._touched_type_masks, _LAZY_MODE_BIT)
Expand Down Expand Up @@ -133,6 +150,7 @@ automatically delegates to `reset!` to safely clear all n_active counters.
See also: [`checkpoint!`](@ref), [`reset!`](@ref), [`@with_pool`](@ref)
"""
function rewind!(pool::AdaptiveArrayPool)

cur_depth = pool._current_depth

# Safety guard: at global scope (depth=1), no checkpoint to rewind to
Expand Down Expand Up @@ -166,6 +184,7 @@ Restore state for a specific type only.
Also updates _current_depth and bitmask state.
"""
@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where T

# Safety guard: at global scope (depth=1), delegate to reset!
if pool._current_depth == 1
reset!(get_typed_pool!(pool, T))
Expand All @@ -185,6 +204,7 @@ Restore state for multiple specific types in reverse order.
Decrements _current_depth once after all types are rewound.
"""
@generated function rewind!(pool::AdaptiveArrayPool, types::Type...)

# Deduplicate types at compile time (e.g., Float64, Float64 → Float64)
seen = Set{Any}()
unique_indices = Int[]
Expand Down Expand Up @@ -213,6 +233,7 @@ end
# Internal helper for rewind with orphan cleanup (works for any AbstractTypedPool)
# Uses 1-based sentinel pattern: no isempty checks needed (sentinel [0] guarantees non-empty)
@inline function _rewind_typed_pool!(tp::AbstractTypedPool, current_depth::Int)

# 1. Orphaned Checkpoints Cleanup
# If there are checkpoints from deeper scopes (depth > current), pop them first.
# This happens when a nested scope did full checkpoint but typed rewind,
Expand Down Expand Up @@ -250,6 +271,7 @@ Called directly from the macro-generated `finally` clause as a single function c
(matching the structure of `_lazy_checkpoint!` for symmetry and performance).
"""
@inline function _lazy_rewind!(pool::AdaptiveArrayPool)

d = pool._current_depth
bits = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK
_selective_rewind_fixed_slots!(pool, bits)
Expand Down Expand Up @@ -281,6 +303,7 @@ lazy first-touch checkpoint for each extra type on first acquire, ensuring Case
checkpoint!(pool, types...)
d = pool._current_depth
@inbounds pool._touched_type_masks[d] |= _TYPED_LAZY_BIT

# Eagerly snapshot pre-existing others entries — mirrors _lazy_checkpoint!.
# _record_type_touch! cannot lazy-checkpoint others types (b==0 branch, no per-type bit).
# Without this, a helper that re-acquires an already-active others type triggers Case B
Expand Down Expand Up @@ -311,6 +334,7 @@ statically-tracked set). Rewinds only pools whose bits are set in
guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`.
"""
@inline function _typed_lazy_rewind!(pool::AdaptiveArrayPool, tracked_mask::UInt16)

d = pool._current_depth
touched = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK
combined = tracked_mask | touched
Expand Down Expand Up @@ -339,6 +363,7 @@ Unset bits are skipped entirely: for pools that were acquired without a matching
checkpoint, `_rewind_typed_pool!` Case B safely restores from the parent checkpoint.
"""
@inline function _selective_rewind_fixed_slots!(pool::AdaptiveArrayPool, mask::UInt16)

d = pool._current_depth
_has_bit(mask, Float64) && _rewind_typed_pool!(pool.float64, d)
_has_bit(mask, Float32) && _rewind_typed_pool!(pool.float32, d)
Expand Down
5 changes: 5 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,11 @@ end
if p._current_depth > 1
push!(tp._checkpoint_n_active, 0) # n_active starts at 0
push!(tp._checkpoint_depths, p._current_depth)
# Signal that a fallback type was touched so lazy/typed-lazy rewind
# iterates pool.others. Without this, _acquire_impl! (which bypasses
# _record_type_touch!) would leave has_others=false, causing the
# rewind to skip pool.others entirely and leak this new type's n_active.
@inbounds p._touched_has_others[p._current_depth] = true
end
tp
end::TypedPool{T}
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ else
include("test_bitarray.jl")
include("test_coverage.jl")
include("test_allocation.jl")
include("test_fallback_reclamation.jl")

# CUDA extension tests (auto-detect, skip with TEST_CUDA=false)
if get(ENV, "TEST_CUDA", "true") != "false"
Expand Down
Loading