diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 00000000..25f76fd4
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+# Initial Runic formatting
+4f29bf67e8be8e82da2deabd8236aaa9d95b781e
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 1d4b1ca5..84e069f6 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -41,10 +41,12 @@ jobs:
       - uses: julia-actions/julia-runtest@v1
 
       - uses: julia-actions/julia-processcoverage@v1
+        if: matrix.version == '1.x' && matrix.os == 'ubuntu-latest'
         with:
           directories: src
 
       - uses: codecov/codecov-action@v4
+        if: matrix.version == '1.x' && matrix.os == 'ubuntu-latest'
         with:
           files: lcov.info
           token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/Runic.yml b/.github/workflows/Runic.yml
new file mode 100644
index 00000000..35534a50
--- /dev/null
+++ b/.github/workflows/Runic.yml
@@ -0,0 +1,15 @@
+name: Runic
+on:
+  push:
+    branches: [master]
+  pull_request:
+jobs:
+  runic:
+    name: Runic
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: "1"
+      - uses: fredrikekre/runic-action@v1
diff --git a/docs/make.jl b/docs/make.jl
index 88b3b074..feb85de7 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -12,7 +12,7 @@ function write_if_changed(path::String, content::String)
     if isfile(path) && read(path, String) == content
         return  # Content unchanged, skip write
     end
-    write(path, content)
+    return write(path, content)
 end
 
 # ============================================
diff --git a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl
index 437d5163..272fcf09 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/acquire.jl
@@ -28,8 +28,8 @@
 # ==============================================================================
 
 using AdaptiveArrayPools: get_view!, get_nd_view!, get_nd_array!, allocate_vector, safe_prod,
-                          _record_type_touch!, _fixed_slot_bit, _checkpoint_typed_pool!,
-                          _MODE_BITS_MASK
+    _record_type_touch!, _fixed_slot_bit, _checkpoint_typed_pool!,
+    _MODE_BITS_MASK
 
 """
     get_view!(tp::CuTypedPool{T}, n::Int) -> CuVector{T}
@@ -205,5 +205,5 @@ end
         end
         @inbounds pool._touched_type_masks[depth] = current_mask | b
     end
-    nothing
+    return nothing
 end
diff --git a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
index 3c54a119..a9803372 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
@@ -37,61 +37,61 @@ Default element type for disabled CUDA pools (matches CUDA.zeros() default).
 AdaptiveArrayPools.default_eltype(::DisabledPool{:cuda}) = Float32
 
 # --- zeros! for DisabledPool{:cuda} ---
-@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.zeros(T, dims...)
-@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
-@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.zeros(T, dims...)
-@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
 
 # --- ones! for DisabledPool{:cuda} ---
-@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.ones(T, dims...)
-@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
-@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.ones(T, dims...)
-@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
 
 # --- similar! for DisabledPool{:cuda} ---
 @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
 @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}) where {T} = CUDA.similar(x, T)
-@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int,N}) where {N} = CUDA.similar(x, dims...)
-@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.similar(x, T, dims...)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int, N}) where {N} = CUDA.similar(x, dims...)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.similar(x, T, dims...)
 # Fallback for non-CuArray inputs (creates CuArray from AbstractArray)
 @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
 @inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}) where {T} = CuArray{T}(undef, size(x))
-@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = CuArray{eltype(x)}(undef, dims)
-@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T}(undef, dims)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = CuArray{eltype(x)}(undef, dims)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T}(undef, dims)
 
 # --- unsafe_zeros! for DisabledPool{:cuda} ---
-@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.zeros(T, dims...)
-@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
-@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.zeros(T, dims...)
-@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
 
 # --- unsafe_ones! for DisabledPool{:cuda} ---
-@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.ones(T, dims...)
-@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
-@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.ones(T, dims...)
-@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::Vararg{Int, N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::NTuple{N, Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
 
 # --- unsafe_similar! for DisabledPool{:cuda} ---
 @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
 @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}) where {T} = CUDA.similar(x, T)
-@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int,N}) where {N} = CUDA.similar(x, dims...)
-@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.similar(x, T, dims...)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int, N}) where {N} = CUDA.similar(x, dims...)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CUDA.similar(x, T, dims...)
 # Fallback for non-CuArray inputs
 @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
 @inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}) where {T} = CuArray{T}(undef, size(x))
-@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = CuArray{eltype(x)}(undef, dims)
-@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T}(undef, dims)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = CuArray{eltype(x)}(undef, dims)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T}(undef, dims)
 
 # --- acquire! for DisabledPool{:cuda} ---
 @inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, n::Int) where {T} = CuVector{T}(undef, n)
-@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T,N}(undef, dims)
-@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CuArray{T,N}(undef, dims)
+@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T, N}(undef, dims)
+@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CuArray{T, N}(undef, dims)
 @inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
 @inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
 
 # --- unsafe_acquire! for DisabledPool{:cuda} ---
 @inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, n::Int) where {T} = CuVector{T}(undef, n)
-@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T,N}(undef, dims)
-@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CuArray{T,N}(undef, dims)
+@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = CuArray{T, N}(undef, dims)
+@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = CuArray{T, N}(undef, dims)
 @inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
 @inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
diff --git a/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl b/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl
index 58e04806..f5ff39a9 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/dispatch.jl
@@ -10,7 +10,7 @@ using AdaptiveArrayPools: allocate_vector, wrap_array, get_typed_pool!
 # ==============================================================================
 
 @inline AdaptiveArrayPools.allocate_vector(
-    ::AbstractTypedPool{T,CuVector{T}}, n::Int
+    ::AbstractTypedPool{T, CuVector{T}}, n::Int
 ) where {T} = CuVector{T}(undef, n)
 
 # ==============================================================================
@@ -20,8 +20,8 @@ using AdaptiveArrayPools: allocate_vector, wrap_array, get_typed_pool!
 # GPU uses reshape which returns CuArray{T,N} via GPUArrays derive()
 # (NOT ReshapedArray like CPU - this is simpler for GPU kernels)
 @inline AdaptiveArrayPools.wrap_array(
-    ::AbstractTypedPool{T,CuVector{T}}, flat_view, dims::NTuple{N,Int}
-) where {T,N} = reshape(flat_view, dims)
+    ::AbstractTypedPool{T, CuVector{T}}, flat_view, dims::NTuple{N, Int}
+) where {T, N} = reshape(flat_view, dims)
 
 # ==============================================================================
 # get_typed_pool! Dispatches for CuAdaptiveArrayPool
@@ -39,7 +39,7 @@ using AdaptiveArrayPools: allocate_vector, wrap_array, get_typed_pool!
 
 # Slow path: rare types via IdDict (with checkpoint correction!)
 @inline function AdaptiveArrayPools.get_typed_pool!(p::CuAdaptiveArrayPool, ::Type{T}) where {T}
-    get!(p.others, T) do
+    return get!(p.others, T) do
         tp = CuTypedPool{T}()
         # CRITICAL: Match CPU behavior - auto-checkpoint new pool if inside @with_pool scope
         # Without this, rewind! would corrupt state for dynamically-created pools
diff --git a/ext/AdaptiveArrayPoolsCUDAExt/state.jl b/ext/AdaptiveArrayPoolsCUDAExt/state.jl
index 5a546c8e..46cd616c 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/state.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/state.jl
@@ -6,8 +6,8 @@
 # AbstractTypedPool, so they work for CuTypedPool automatically.
 
 using AdaptiveArrayPools: checkpoint!, rewind!, reset!,
-                          _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit,
-                          _LAZY_MODE_BIT, _TYPED_LAZY_BIT, _TYPE_BITS_MASK
+    _checkpoint_typed_pool!, _rewind_typed_pool!, _has_bit,
+    _LAZY_MODE_BIT, _TYPED_LAZY_BIT, _TYPE_BITS_MASK
 
 # ==============================================================================
 # GPU Fixed Slot Iteration
@@ -20,7 +20,7 @@ Apply `f` to each fixed slot CuTypedPool. Zero allocation via compile-time unrol
 """
 @generated function AdaptiveArrayPools.foreach_fixed_slot(f::F, pool::CuAdaptiveArrayPool) where {F}
     exprs = [:(f(getfield(pool, $(QuoteNode(field))))) for field in GPU_FIXED_SLOT_FIELDS]
-    quote
+    return quote
         Base.@_inline_meta
         $(exprs...)
         nothing
@@ -57,7 +57,7 @@ end
     push!(pool._touched_type_masks, UInt16(0))
     push!(pool._touched_has_others, AdaptiveArrayPools._fixed_slot_bit(T) == UInt16(0))
     _checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, T), pool._current_depth)
-    nothing
+    return nothing
 end
 
 # Type-specific checkpoint (multiple types)
@@ -72,7 +72,7 @@ end
     end
     has_any_fallback = any(i -> AdaptiveArrayPools._fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices)
     checkpoint_exprs = [:(_checkpoint_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices]
-    quote
+    return quote
         pool._current_depth += 1
         push!(pool._touched_type_masks, UInt16(0))
         push!(pool._touched_has_others, $has_any_fallback)
@@ -121,7 +121,7 @@ end
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 # Type-specific rewind (multiple types)
@@ -136,7 +136,7 @@ end
     end
     rewind_exprs = [:(_rewind_typed_pool!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in reverse(unique_indices)]
     reset_exprs = [:(reset!(AdaptiveArrayPools.get_typed_pool!(pool, types[$i]))) for i in unique_indices]
-    quote
+    return quote
         if pool._current_depth == 1
             $(reset_exprs...)
             return nothing
@@ -175,19 +175,19 @@ end
         @inbounds pool._touched_has_others[depth] = true
     end
     # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed.
-    nothing
+    return nothing
 end
 
 @inline function AdaptiveArrayPools._lazy_rewind!(pool::CuAdaptiveArrayPool)
     d = pool._current_depth
     mask = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK
-    _has_bit(mask, Float64)    && _rewind_typed_pool!(pool.float64,    d)
-    _has_bit(mask, Float32)    && _rewind_typed_pool!(pool.float32,    d)
-    _has_bit(mask, Int64)      && _rewind_typed_pool!(pool.int64,      d)
-    _has_bit(mask, Int32)      && _rewind_typed_pool!(pool.int32,      d)
+    _has_bit(mask, Float64)    && _rewind_typed_pool!(pool.float64, d)
+    _has_bit(mask, Float32)    && _rewind_typed_pool!(pool.float32, d)
+    _has_bit(mask, Int64)      && _rewind_typed_pool!(pool.int64, d)
+    _has_bit(mask, Int32)      && _rewind_typed_pool!(pool.int32, d)
     _has_bit(mask, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d)
     _has_bit(mask, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d)
-    _has_bit(mask, Bool)       && _rewind_typed_pool!(pool.bool,       d)
+    _has_bit(mask, Bool)       && _rewind_typed_pool!(pool.bool, d)
     # Bit 7: Float16 (CUDA reassignment — _fixed_slot_bit(Float16)==0, must use explicit bit check)
     mask & _cuda_float16_bit() != 0 && _rewind_typed_pool!(pool.float16, d)
     if @inbounds(pool._touched_has_others[d])
@@ -198,7 +198,7 @@ end
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 # ==============================================================================
@@ -221,7 +221,7 @@ end
         @inbounds pool._touched_has_others[d] = true
     end
     # Float16 uses lazy first-touch via bit 7 in _record_type_touch! — no eager checkpoint needed.
-    nothing
+    return nothing
 end
 
 # _typed_lazy_rewind!: selective rewind of (tracked | touched) mask.
@@ -232,13 +232,13 @@ end
     d = pool._current_depth
     touched = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK
     combined = tracked_mask | touched
-    _has_bit(combined, Float64)    && _rewind_typed_pool!(pool.float64,    d)
-    _has_bit(combined, Float32)    && _rewind_typed_pool!(pool.float32,    d)
-    _has_bit(combined, Int64)      && _rewind_typed_pool!(pool.int64,      d)
-    _has_bit(combined, Int32)      && _rewind_typed_pool!(pool.int32,      d)
+    _has_bit(combined, Float64)    && _rewind_typed_pool!(pool.float64, d)
+    _has_bit(combined, Float32)    && _rewind_typed_pool!(pool.float32, d)
+    _has_bit(combined, Int64)      && _rewind_typed_pool!(pool.int64, d)
+    _has_bit(combined, Int32)      && _rewind_typed_pool!(pool.int32, d)
     _has_bit(combined, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d)
     _has_bit(combined, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d)
-    _has_bit(combined, Bool)       && _rewind_typed_pool!(pool.bool,       d)
+    _has_bit(combined, Bool)       && _rewind_typed_pool!(pool.bool, d)
     # Float16: bit 7 is set by _record_type_touch! on first touch (lazy first-touch).
     # Also rewind when Float16 was a *tracked* type in the macro: _typed_lazy_checkpoint!
     # calls checkpoint!(pool, Float16) which pushes a checkpoint at depth d, but _acquire_impl!
@@ -257,7 +257,7 @@ end
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 # ==============================================================================
@@ -288,7 +288,7 @@ end
 # Type-specific reset
 @inline function AdaptiveArrayPools.reset!(pool::CuAdaptiveArrayPool, ::Type{T}) where {T}
     reset!(AdaptiveArrayPools.get_typed_pool!(pool, T))
-    pool
+    return pool
 end
 
 # ==============================================================================
diff --git a/ext/AdaptiveArrayPoolsCUDAExt/types.jl b/ext/AdaptiveArrayPoolsCUDAExt/types.jl
index a3673f23..4e5dd9fc 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/types.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/types.jl
@@ -49,7 +49,7 @@ mutable struct CuTypedPool{T} <: AbstractTypedPool{T, CuVector{T}}
 end
 
 function CuTypedPool{T}() where {T}
-    CuTypedPool{T}(
+    return CuTypedPool{T}(
         CuVector{T}[],      # vectors
         Any[],              # views (N-way flat cache)
         Any[],              # view_dims
@@ -121,7 +121,7 @@ end
 
 function CuAdaptiveArrayPool()
     dev = CUDA.device()
-    CuAdaptiveArrayPool(
+    return CuAdaptiveArrayPool(
         CuTypedPool{Float32}(),
         CuTypedPool{Float64}(),
         CuTypedPool{Float16}(),
diff --git a/ext/AdaptiveArrayPoolsCUDAExt/utils.jl b/ext/AdaptiveArrayPoolsCUDAExt/utils.jl
index a8a5a424..c8c22556 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/utils.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/utils.jl
@@ -13,14 +13,14 @@ using AdaptiveArrayPools: pool_stats, foreach_fixed_slot
 
 Print statistics for a CUDA typed pool.
 """
-function AdaptiveArrayPools.pool_stats(tp::CuTypedPool{T}; io::IO=stdout, indent::Int=0, name::String="") where {T}
+function AdaptiveArrayPools.pool_stats(tp::CuTypedPool{T}; io::IO = stdout, indent::Int = 0, name::String = "") where {T}
     prefix = " "^indent
     type_name = isempty(name) ? string(T) : name
 
     n_arrays = length(tp.vectors)
     if n_arrays == 0
-        printstyled(io, prefix, type_name, color=:cyan)
-        printstyled(io, " (empty)\n", color=:dark_gray)
+        printstyled(io, prefix, type_name, color = :cyan)
+        printstyled(io, " (empty)\n", color = :dark_gray)
         return
     end
 
@@ -32,20 +32,20 @@ function AdaptiveArrayPools.pool_stats(tp::CuTypedPool{T}; io::IO=stdout, indent
     cpu_str = Base.format_bytes(cpu_bytes)
 
     # Header
-    printstyled(io, prefix, type_name, color=:cyan)
-    printstyled(io, " [GPU]", color=:green)
+    printstyled(io, prefix, type_name, color = :cyan)
+    printstyled(io, " [GPU]", color = :green)
     println(io)
 
     # Stats
-    printstyled(io, prefix, "  slots: ", color=:dark_gray)
-    printstyled(io, n_arrays, color=:blue)
-    printstyled(io, " (active: ", color=:dark_gray)
-    printstyled(io, tp.n_active, color=:blue)
-    printstyled(io, ")\n", color=:dark_gray)
-
-    printstyled(io, prefix, "  elements: ", color=:dark_gray)
-    printstyled(io, total_elements, color=:blue)
-    printstyled(io, " ($gpu_str GPU + $cpu_str CPU)\n", color=:dark_gray)
+    printstyled(io, prefix, "  slots: ", color = :dark_gray)
+    printstyled(io, n_arrays, color = :blue)
+    printstyled(io, " (active: ", color = :dark_gray)
+    printstyled(io, tp.n_active, color = :blue)
+    printstyled(io, ")\n", color = :dark_gray)
+
+    printstyled(io, prefix, "  elements: ", color = :dark_gray)
+    printstyled(io, total_elements, color = :blue)
+    return printstyled(io, " ($gpu_str GPU + $cpu_str CPU)\n", color = :dark_gray)
 end
 
 # ==============================================================================
@@ -57,12 +57,12 @@ end
 
 Print statistics for a CUDA adaptive array pool.
 """
-function AdaptiveArrayPools.pool_stats(pool::CuAdaptiveArrayPool; io::IO=stdout)
+function AdaptiveArrayPools.pool_stats(pool::CuAdaptiveArrayPool; io::IO = stdout)
     # Header with device info
-    printstyled(io, "CuAdaptiveArrayPool", bold=true, color=:green)
-    printstyled(io, " (device ", color=:dark_gray)
-    printstyled(io, pool.device_id, color=:blue)
-    printstyled(io, ")\n", color=:dark_gray)
+    printstyled(io, "CuAdaptiveArrayPool", bold = true, color = :green)
+    printstyled(io, " (device ", color = :dark_gray)
+    printstyled(io, pool.device_id, color = :blue)
+    printstyled(io, ")\n", color = :dark_gray)
 
     has_content = false
 
@@ -71,18 +71,18 @@ function AdaptiveArrayPools.pool_stats(pool::CuAdaptiveArrayPool; io::IO=stdout)
         if !isempty(tp.vectors)
             has_content = true
             T = typeof(tp).parameters[1]
-            pool_stats(tp; io, indent=2, name="$T (fixed)")
+            pool_stats(tp; io, indent = 2, name = "$T (fixed)")
         end
     end
 
     # Fallback types
     for (T, tp) in pool.others
         has_content = true
-        pool_stats(tp; io, indent=2, name="$T (fallback)")
+        pool_stats(tp; io, indent = 2, name = "$T (fallback)")
     end
 
     if !has_content
-        printstyled(io, "  (empty)\n", color=:dark_gray)
+        printstyled(io, "  (empty)\n", color = :dark_gray)
     end
     return nothing
 end
@@ -94,7 +94,7 @@ end
 # Compact one-line show
 function Base.show(io::IO, tp::CuTypedPool{T}) where {T}
     n_vectors = length(tp.vectors)
-    if n_vectors == 0
+    return if n_vectors == 0
         print(io, "CuTypedPool{$T}(empty)")
     else
         total = sum(length(v) for v in tp.vectors)
@@ -104,7 +104,7 @@ end
 
 # Multi-line show
 function Base.show(io::IO, ::MIME"text/plain", tp::CuTypedPool{T}) where {T}
-    pool_stats(tp; io, name="CuTypedPool{$T}")
+    return pool_stats(tp; io, name = "CuTypedPool{$T}")
 end
 
 # ==============================================================================
@@ -131,10 +131,10 @@ function Base.show(io::IO, pool::CuAdaptiveArrayPool)
         total_active[] += tp.n_active
     end
 
-    print(io, "CuAdaptiveArrayPool(device=$(pool.device_id), types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))")
+    return print(io, "CuAdaptiveArrayPool(device=$(pool.device_id), types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))")
 end
 
 # Multi-line show
 function Base.show(io::IO, ::MIME"text/plain", pool::CuAdaptiveArrayPool)
-    pool_stats(pool; io)
+    return pool_stats(pool; io)
 end
diff --git a/src/acquire.jl b/src/acquire.jl
index 6d9dea15..f1dbdeb9 100644
--- a/src/acquire.jl
+++ b/src/acquire.jl
@@ -3,13 +3,15 @@
 # ==============================================================================
 
 # Allocate a new vector (dispatch point for extensions)
-@inline allocate_vector(::AbstractTypedPool{T,Vector{T}}, n::Int) where {T} =
+@inline allocate_vector(::AbstractTypedPool{T, Vector{T}}, n::Int) where {T} =
     Vector{T}(undef, n)
 
 # Wrap flat view into N-D array (dispatch point for extensions)
-@inline function wrap_array(::AbstractTypedPool{T,Vector{T}},
-                            flat_view, dims::NTuple{N,Int}) where {T,N}
-    unsafe_wrap(Array{T,N}, pointer(flat_view), dims)
+@inline function wrap_array(
+        ::AbstractTypedPool{T, Vector{T}},
+        flat_view, dims::NTuple{N, Int}
+    ) where {T, N}
+    return unsafe_wrap(Array{T, N}, pointer(flat_view), dims)
 end
 
 # ==============================================================================
@@ -53,7 +55,7 @@ end
 
 @inline function _check_pool_growth(tp::AbstractTypedPool, idx::Int)
     # Warn at every power of 2 from 512 onward (512, 1024, 2048, …)
-    if idx >= 512 && (idx & (idx - 1)) == 0
+    return if idx >= 512 && (idx & (idx - 1)) == 0
         _warn_pool_growing(tp, idx)
     end
 end
@@ -140,7 +142,7 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+).
   reuses cached `Array{T,N}` wrapper with `setfield!(:ref, :size)` pointing to `A`'s memory.
   Automatically reclaimed on `rewind!` via `n_active` restoration.
 """
-@inline function _reshape_impl!(pool::AdaptiveArrayPool, A::Array{T,M}, dims::NTuple{N,Int}) where {T,M,N}
+@inline function _reshape_impl!(pool::AdaptiveArrayPool, A::Array{T, M}, dims::NTuple{N, Int}) where {T, M, N}
     # Reject negative dimensions (match Base.reshape behavior)
     for d in dims
         d < 0 && throw(ArgumentError("invalid Array dimensions"))
@@ -148,8 +150,11 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+).
 
     # Validate before claiming slot
     total_len = safe_prod(dims)
-    length(A) == total_len || throw(DimensionMismatch(
-        "new dimensions $(dims) must be consistent with array length $(length(A))"))
+    length(A) == total_len || throw(
+        DimensionMismatch(
+            "new dimensions $(dims) must be consistent with array length $(length(A))"
+        )
+    )
 
     # 0-D reshape: rare edge case, delegate to Base (nd_wrappers is 1-indexed by N)
     N == 0 && return reshape(A, dims)
@@ -169,7 +174,7 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+).
     if wrappers !== nothing && slot <= length(wrappers)
         wrapper = @inbounds wrappers[slot]
         if wrapper !== nothing
-            arr = wrapper::Array{T,N}
+            arr = wrapper::Array{T, N}
             setfield!(arr, :ref, getfield(A, :ref))
             setfield!(arr, :size, dims)
             return arr
@@ -177,7 +182,7 @@ Zero-allocation reshape using `setfield!`-based wrapper reuse (Julia 1.11+).
     end
 
     # Cache miss (first call per slot+N): create wrapper, cache forever
-    arr = Array{T,N}(undef, ntuple(_ -> 0, Val(N)))
+    arr = Array{T, N}(undef, ntuple(_ -> 0, Val(N)))
     setfield!(arr, :ref, getfield(A, :ref))
     setfield!(arr, :size, dims)
     _store_nd_wrapper!(tp, N, slot, arr)
@@ -202,7 +207,7 @@ function _store_nd_wrapper!(tp::AbstractTypedPool, N::Int, slot::Int, wrapper)
     if N > length(tp.nd_wrappers)
         old_len = length(tp.nd_wrappers)
         resize!(tp.nd_wrappers, N)
-        for i in (old_len+1):N
+        for i in (old_len + 1):N
             @inbounds tp.nd_wrappers[i] = nothing
         end
     end
@@ -213,12 +218,12 @@ function _store_nd_wrapper!(tp::AbstractTypedPool, N::Int, slot::Int, wrapper)
     elseif slot > length(wrappers)
         old_len = length(wrappers)
         resize!(wrappers, slot)
-        for i in (old_len+1):slot
+        for i in (old_len + 1):slot
             @inbounds wrappers[i] = nothing
         end
     end
     @inbounds wrappers[slot] = wrapper
-    nothing
+    return nothing
 end
 
 """
@@ -295,7 +300,7 @@ For non-fixed-slot types, sets `_touched_has_others` flag.
     else
         @inbounds pool._touched_type_masks[depth] |= b
     end
-    nothing
+    return nothing
 end
 
 # CPU-specific override: adds lazy first-touch checkpoint in lazy mode
@@ -323,7 +328,7 @@ end
         end
         @inbounds pool._touched_type_masks[depth] = current_mask | b
     end
-    nothing
+    return nothing
 end
 
 # ==============================================================================
@@ -348,7 +353,7 @@ end
 end
 
 @inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
-    _acquire_impl!(pool, T, dims...)
+    return _acquire_impl!(pool, T, dims...)
 end
 
 # Similar-style
@@ -415,19 +420,19 @@ See also: [`unsafe_acquire!`](@ref) for native array access.
 """
 @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T}
     _record_type_touch!(pool, T)
-    _acquire_impl!(pool, T, n)
+    return _acquire_impl!(pool, T, n)
 end
 
 # Multi-dimensional support (zero-allocation with N-D cache)
 @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _acquire_impl!(pool, T, dims...)
+    return _acquire_impl!(pool, T, dims...)
 end
 
 # Tuple support: allows acquire!(pool, T, size(A)) where size(A) returns NTuple{N,Int}
 @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _acquire_impl!(pool, T, dims...)
+    return _acquire_impl!(pool, T, dims...)
 end
 
 # Similar-style convenience methods
@@ -447,7 +452,7 @@ end
 """
 @inline function acquire!(pool::AbstractArrayPool, x::AbstractArray)
     _record_type_touch!(pool, eltype(x))
-    _acquire_impl!(pool, eltype(x), size(x))
+    return _acquire_impl!(pool, eltype(x), size(x))
 end
 
 # ==============================================================================
@@ -502,18 +507,18 @@ See also: [`acquire!`](@ref) for view-based access.
 """
 @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T}
     _record_type_touch!(pool, T)
-    _unsafe_acquire_impl!(pool, T, n)
+    return _unsafe_acquire_impl!(pool, T, n)
 end
 
 @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_acquire_impl!(pool, T, dims...)
+    return _unsafe_acquire_impl!(pool, T, dims...)
 end
 
 # Tuple support
 @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_acquire_impl!(pool, T, dims)
+    return _unsafe_acquire_impl!(pool, T, dims)
 end
 
 # Similar-style convenience methods
@@ -533,7 +538,7 @@ end
 """
 @inline function unsafe_acquire!(pool::AbstractArrayPool, x::AbstractArray)
     _record_type_touch!(pool, eltype(x))
-    _unsafe_acquire_impl!(pool, eltype(x), size(x))
+    return _unsafe_acquire_impl!(pool, eltype(x), size(x))
 end
 
 # ==============================================================================
@@ -570,14 +575,14 @@ const _acquire_array_impl! = _unsafe_acquire_impl!
 
 # --- acquire! for DisabledPool{:cpu} ---
 @inline acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims)
 @inline acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
 
 # --- unsafe_acquire! for DisabledPool{:cpu} ---
 @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims)
 @inline unsafe_acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
 
 # --- Generic DisabledPool fallbacks (unknown backend → error) ---
@@ -588,11 +593,11 @@ const _acquire_array_impl! = _unsafe_acquire_impl!
 # Called when: USE_POOLING=true + @maybe_with_pool + MAYBE_POOLING_ENABLED[]=false
 # Explicit overloads for proper inlining (especially important for CUDA backend).
 @inline _acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = acquire!(p, T, n)
-@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = acquire!(p, T, dims...)
-@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = acquire!(p, T, dims)
+@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = acquire!(p, T, dims...)
+@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = acquire!(p, T, dims)
 @inline _acquire_impl!(p::DisabledPool, x::AbstractArray) = acquire!(p, x)
 
 @inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = unsafe_acquire!(p, T, n)
-@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_acquire!(p, T, dims...)
-@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_acquire!(p, T, dims)
+@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_acquire!(p, T, dims...)
+@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_acquire!(p, T, dims)
 @inline _unsafe_acquire_impl!(p::DisabledPool, x::AbstractArray) = unsafe_acquire!(p, x)
diff --git a/src/bitarray.jl b/src/bitarray.jl
index 8e9e4ab3..8f789b58 100644
--- a/src/bitarray.jl
+++ b/src/bitarray.jl
@@ -47,7 +47,7 @@ Uses `setfield!`-based wrapper reuse — unlimited dim patterns, 0-alloc after w
 The returned BitArray is only valid within the `@with_pool` scope.
 Do NOT use after the scope ends (use-after-free risk).
 """
-function get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) where {N}
+function get_bitarray!(tp::BitTypedPool, dims::NTuple{N, Int}) where {N}
     total_len = safe_prod(dims)
     tp.n_active += 1
     idx = tp.n_active
@@ -121,11 +121,11 @@ end
     return _unsafe_acquire_impl!(pool, Bit, n)
 end
 
-@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N}
+@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N}
     return _unsafe_acquire_impl!(pool, Bit, dims...)
 end
 
-@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N}
+@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N}
     return _unsafe_acquire_impl!(pool, Bit, dims...)
 end
 
@@ -139,12 +139,12 @@ end
     return get_bitarray!(tp, n)
 end
 
-@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N}
+@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N}
     tp = get_typed_pool!(pool, Bit)::BitTypedPool
     return get_bitarray!(tp, dims)
 end
 
-@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N}
+@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N}
     tp = get_typed_pool!(pool, Bit)::BitTypedPool
     return get_bitarray!(tp, dims)
 end
@@ -155,10 +155,10 @@ end
 
 # --- acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) ---
 @inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims)
 
 # --- unsafe_acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) ---
 @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims)
diff --git a/src/convenience.jl b/src/convenience.jl
index 84312e51..ada5e828 100644
--- a/src/convenience.jl
+++ b/src/convenience.jl
@@ -42,52 +42,52 @@ end
 
 See also: [`ones!`](@ref), [`similar!`](@ref), [`acquire!`](@ref)
 """
-@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _zeros_impl!(pool, T, dims...)
+    return _zeros_impl!(pool, T, dims...)
 end
 
-@inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _zeros_impl!(pool, default_eltype(pool), dims...)
+    return _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
-@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _zeros_impl!(pool, T, dims...)
+    return _zeros_impl!(pool, T, dims...)
 end
 
-@inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+@inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _zeros_impl!(pool, default_eltype(pool), dims...)
+    return _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
-@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     arr = _acquire_impl!(pool, T, dims...)
     fill!(arr, zero(T))
-    arr
+    return arr
 end
 
 # Default type overload for macro transformation (uses default_eltype for backend flexibility)
-@inline function _zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _zeros_impl!(pool, default_eltype(pool), dims...)
+@inline function _zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
+    return _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # NTuple overloads for macro transformation (handles zeros!(pool, T, size(x)) form)
-@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
-    _zeros_impl!(pool, T, dims...)
+@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
+    return _zeros_impl!(pool, T, dims...)
 end
 
-@inline function _zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
-    _zeros_impl!(pool, default_eltype(pool), dims...)
+@inline function _zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
+    return _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Bit type specialization: zeros!(pool, Bit, ...) delegates to falses!(pool, ...)
-@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = falses!(pool, dims...)
-@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = falses!(pool, dims)
-@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = _falses_impl!(pool, dims...)
-@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = _falses_impl!(pool, dims)
+@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = falses!(pool, dims...)
+@inline zeros!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = falses!(pool, dims)
+@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = _falses_impl!(pool, dims...)
+@inline _zeros_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = _falses_impl!(pool, dims)
 
 # ==============================================================================
 # ones! - Acquire one-initialized arrays from pool
@@ -115,52 +115,52 @@ end
 
 See also: [`zeros!`](@ref), [`similar!`](@ref), [`acquire!`](@ref)
 """
-@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _ones_impl!(pool, T, dims...)
+    return _ones_impl!(pool, T, dims...)
 end
 
-@inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _ones_impl!(pool, default_eltype(pool), dims...)
+    return _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
-@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _ones_impl!(pool, T, dims...)
+    return _ones_impl!(pool, T, dims...)
 end
 
-@inline function ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+@inline function ones!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _ones_impl!(pool, default_eltype(pool), dims...)
+    return _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
-@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     arr = _acquire_impl!(pool, T, dims...)
     fill!(arr, one(T))
-    arr
+    return arr
 end
 
 # Default type overload for macro transformation (uses default_eltype for backend flexibility)
-@inline function _ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _ones_impl!(pool, default_eltype(pool), dims...)
+@inline function _ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
+    return _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # NTuple overloads for macro transformation (handles ones!(pool, T, size(x)) form)
-@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
-    _ones_impl!(pool, T, dims...)
+@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
+    return _ones_impl!(pool, T, dims...)
 end
 
-@inline function _ones_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
-    _ones_impl!(pool, default_eltype(pool), dims...)
+@inline function _ones_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
+    return _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Bit type specialization: ones!(pool, Bit, ...) delegates to trues!(pool, ...)
-@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = trues!(pool, dims...)
-@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = trues!(pool, dims)
-@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = _trues_impl!(pool, dims...)
-@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = _trues_impl!(pool, dims)
+@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = trues!(pool, dims...)
+@inline ones!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = trues!(pool, dims)
+@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = _trues_impl!(pool, dims...)
+@inline _ones_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = _trues_impl!(pool, dims)
 
 # ==============================================================================
 # trues! - Acquire BitArray filled with true from pool
@@ -185,22 +185,22 @@ end
 
 See also: [`falses!`](@ref), [`ones!`](@ref), [`acquire!`](@ref)
 """
-@inline function trues!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function trues!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, Bit)
-    _trues_impl!(pool, dims...)
+    return _trues_impl!(pool, dims...)
 end
-@inline function trues!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+@inline function trues!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
     _record_type_touch!(pool, Bit)
-    _trues_impl!(pool, dims...)
+    return _trues_impl!(pool, dims...)
 end
 
 # Internal implementation (for macro transformation)
-@inline function _trues_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function _trues_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     arr = _acquire_impl!(pool, Bit, dims...)
     fill!(arr, true)
-    arr
+    return arr
 end
-@inline _trues_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} = _trues_impl!(pool, dims...)
+@inline _trues_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} = _trues_impl!(pool, dims...)
 
 # ==============================================================================
 # falses! - Acquire BitArray filled with false from pool
@@ -225,22 +225,22 @@ end
 
 See also: [`trues!`](@ref), [`zeros!`](@ref), [`acquire!`](@ref)
 """
-@inline function falses!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function falses!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, Bit)
-    _falses_impl!(pool, dims...)
+    return _falses_impl!(pool, dims...)
 end
-@inline function falses!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+@inline function falses!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
     _record_type_touch!(pool, Bit)
-    _falses_impl!(pool, dims...)
+    return _falses_impl!(pool, dims...)
 end
 
 # Internal implementation (for macro transformation)
-@inline function _falses_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function _falses_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     arr = _acquire_impl!(pool, Bit, dims...)
     fill!(arr, false)
-    arr
+    return arr
 end
-@inline _falses_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N} = _falses_impl!(pool, dims...)
+@inline _falses_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N} = _falses_impl!(pool, dims...)
 
 # ==============================================================================
 # similar! - Acquire arrays with same type/size as template
@@ -274,39 +274,39 @@ See also: [`zeros!`](@ref), [`ones!`](@ref), [`acquire!`](@ref)
 """
 @inline function similar!(pool::AbstractArrayPool, x::AbstractArray)
     _record_type_touch!(pool, eltype(x))
-    _similar_impl!(pool, x)
+    return _similar_impl!(pool, x)
 end
 
 @inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
     _record_type_touch!(pool, T)
-    _similar_impl!(pool, x, T)
+    return _similar_impl!(pool, x, T)
 end
 
-@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
+@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, eltype(x))
-    _similar_impl!(pool, x, dims...)
+    return _similar_impl!(pool, x, dims...)
 end
 
-@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _similar_impl!(pool, x, T, dims...)
+    return _similar_impl!(pool, x, T, dims...)
 end
 
 # Internal implementation (for macro transformation)
 @inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray)
-    _acquire_impl!(pool, eltype(x), size(x))
+    return _acquire_impl!(pool, eltype(x), size(x))
 end
 
 @inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
-    _acquire_impl!(pool, T, size(x))
+    return _acquire_impl!(pool, T, size(x))
 end
 
-@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
-    _acquire_impl!(pool, eltype(x), dims...)
+@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N}
+    return _acquire_impl!(pool, eltype(x), dims...)
 end
 
-@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
-    _acquire_impl!(pool, T, dims...)
+@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
+    return _acquire_impl!(pool, T, dims...)
 end
 
 # ==============================================================================
@@ -348,26 +348,26 @@ end
 
 See also: [`acquire!`](@ref), [`similar!`](@ref)
 """
-@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _reshape_impl!(pool, A, dims)
+    return _reshape_impl!(pool, A, dims)
 end
 
-@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::NTuple{N,Int}) where {T,N}
+@inline function reshape!(pool::AbstractArrayPool, A::AbstractArray{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _reshape_impl!(pool, A, dims)
+    return _reshape_impl!(pool, A, dims)
 end
 
 # Internal implementation (fallback: delegates to Base.reshape)
-@inline function _reshape_impl!(::AbstractArrayPool, A::AbstractArray, dims::NTuple{N,Int}) where {N}
+@inline function _reshape_impl!(::AbstractArrayPool, A::AbstractArray, dims::NTuple{N, Int}) where {N}
     for d in dims
         d < 0 && throw(ArgumentError("invalid Array dimensions"))
     end
-    reshape(A, dims)
+    return reshape(A, dims)
 end
 
 # Vararg forwarding (macro transforms reshape!(pool, A, 3, 4) → _reshape_impl!(pool, A, 3, 4))
-@inline _reshape_impl!(pool::AbstractArrayPool, A::AbstractArray, dims::Vararg{Int,N}) where {N} =
+@inline _reshape_impl!(pool::AbstractArrayPool, A::AbstractArray, dims::Vararg{Int, N}) where {N} =
     _reshape_impl!(pool, A, dims)
 
 # ==============================================================================
@@ -396,45 +396,45 @@ end
 
 See also: [`unsafe_ones!`](@ref), [`zeros!`](@ref), [`unsafe_acquire!`](@ref)
 """
-@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_zeros_impl!(pool, T, dims...)
+    return _unsafe_zeros_impl!(pool, T, dims...)
 end
 
-@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
+    return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
-@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_zeros_impl!(pool, T, dims...)
+    return _unsafe_zeros_impl!(pool, T, dims...)
 end
 
-@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
+    return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
-@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     arr = _unsafe_acquire_impl!(pool, T, dims...)
     fill!(arr, zero(T))
-    arr
+    return arr
 end
 
 # Default type overload for macro transformation (uses default_eltype for backend flexibility)
-@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
+@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
+    return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # NTuple overloads for macro transformation (handles unsafe_zeros!(pool, T, size(x)) form)
-@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
-    _unsafe_zeros_impl!(pool, T, dims...)
+@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
+    return _unsafe_zeros_impl!(pool, T, dims...)
 end
 
-@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
-    _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
+@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
+    return _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # ==============================================================================
@@ -463,45 +463,45 @@ end
 
 See also: [`unsafe_zeros!`](@ref), [`ones!`](@ref), [`unsafe_acquire!`](@ref)
 """
-@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_ones_impl!(pool, T, dims...)
+    return _unsafe_ones_impl!(pool, T, dims...)
 end
 
-@inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+@inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
+    return _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
-@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_ones_impl!(pool, T, dims...)
+    return _unsafe_ones_impl!(pool, T, dims...)
 end
 
-@inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+@inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
     _record_type_touch!(pool, default_eltype(pool))
-    _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
+    return _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
-@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     arr = _unsafe_acquire_impl!(pool, T, dims...)
     fill!(arr, one(T))
-    arr
+    return arr
 end
 
 # Default type overload for macro transformation (uses default_eltype for backend flexibility)
-@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
+@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int, N}) where {N}
+    return _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # NTuple overloads for macro transformation (handles unsafe_ones!(pool, T, size(x)) form)
-@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
-    _unsafe_ones_impl!(pool, T, dims...)
+@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
+    return _unsafe_ones_impl!(pool, T, dims...)
 end
 
-@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
-    _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
+@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::NTuple{N, Int}) where {N}
+    return _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # ==============================================================================
@@ -535,39 +535,39 @@ See also: [`similar!`](@ref), [`unsafe_acquire!`](@ref)
 """
 @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray)
     _record_type_touch!(pool, eltype(x))
-    _unsafe_similar_impl!(pool, x)
+    return _unsafe_similar_impl!(pool, x)
 end
 
 @inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
     _record_type_touch!(pool, T)
-    _unsafe_similar_impl!(pool, x, T)
+    return _unsafe_similar_impl!(pool, x, T)
 end
 
-@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
+@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N}
     _record_type_touch!(pool, eltype(x))
-    _unsafe_similar_impl!(pool, x, dims...)
+    return _unsafe_similar_impl!(pool, x, dims...)
 end
 
-@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_similar_impl!(pool, x, T, dims...)
+    return _unsafe_similar_impl!(pool, x, T, dims...)
 end
 
 # Internal implementation (for macro transformation)
 @inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray)
-    _unsafe_acquire_impl!(pool, eltype(x), size(x))
+    return _unsafe_acquire_impl!(pool, eltype(x), size(x))
 end
 
 @inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
-    _unsafe_acquire_impl!(pool, T, size(x))
+    return _unsafe_acquire_impl!(pool, T, size(x))
 end
 
-@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
-    _unsafe_acquire_impl!(pool, eltype(x), dims...)
+@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int, N}) where {N}
+    return _unsafe_acquire_impl!(pool, eltype(x), dims...)
 end
 
-@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
-    _unsafe_acquire_impl!(pool, T, dims...)
+@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
+    return _unsafe_acquire_impl!(pool, T, dims...)
 end
 
 # ==============================================================================
@@ -593,7 +593,7 @@ end
 
 function Base.showerror(io::IO, e::BackendNotLoadedError)
     print(io, "Backend :$(e.backend) is not available. ")
-    if e.backend == :cuda
+    return if e.backend == :cuda
         print(io, "Make sure CUDA.jl is loaded: `using CUDA`")
     else
         print(io, "Make sure the appropriate backend package is loaded.")
@@ -619,56 +619,56 @@ default_eltype(::DisabledPool{:cpu}) = Float64
 end
 
 # --- zeros! for DisabledPool{:cpu} ---
-@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
-@inline zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = zeros(default_eltype(p), dims...)
-@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
-@inline zeros!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = zeros(default_eltype(p), dims...)
+@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = zeros(T, dims...)
+@inline zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = zeros(default_eltype(p), dims...)
+@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = zeros(T, dims...)
+@inline zeros!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = zeros(default_eltype(p), dims...)
 
 # --- ones! for DisabledPool{:cpu} ---
-@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
-@inline ones!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = ones(default_eltype(p), dims...)
-@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
-@inline ones!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = ones(default_eltype(p), dims...)
+@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = ones(T, dims...)
+@inline ones!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = ones(default_eltype(p), dims...)
+@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = ones(T, dims...)
+@inline ones!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = ones(default_eltype(p), dims...)
 
 # --- zeros!/ones! for DisabledPool{:cpu} with Bit type (returns BitArray) ---
-@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = falses(dims...)
-@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = falses(dims...)
-@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = trues(dims...)
-@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = trues(dims...)
+@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = falses(dims...)
+@inline zeros!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = falses(dims...)
+@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = trues(dims...)
+@inline ones!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = trues(dims...)
 
 # --- trues!/falses! for DisabledPool{:cpu} ---
-@inline trues!(::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = trues(dims...)
-@inline trues!(::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = trues(dims...)
-@inline falses!(::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = falses(dims...)
-@inline falses!(::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = falses(dims...)
+@inline trues!(::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = trues(dims...)
+@inline trues!(::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = trues(dims...)
+@inline falses!(::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = falses(dims...)
+@inline falses!(::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = falses(dims...)
 
 # --- similar! for DisabledPool{:cpu} ---
 @inline similar!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
 @inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
-@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
-@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
+@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = similar(x, dims...)
+@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = similar(x, T, dims...)
 
 # --- reshape! for DisabledPool{:cpu} ---
-@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::Vararg{Int,N}) where {N} = reshape(A, dims...)
-@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::NTuple{N,Int}) where {N} = reshape(A, dims)
+@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::Vararg{Int, N}) where {N} = reshape(A, dims...)
+@inline reshape!(::DisabledPool{:cpu}, A::AbstractArray, dims::NTuple{N, Int}) where {N} = reshape(A, dims)
 
 # --- unsafe_zeros! for DisabledPool{:cpu} ---
-@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
-@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = zeros(default_eltype(p), dims...)
-@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
-@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = zeros(default_eltype(p), dims...)
+@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = zeros(T, dims...)
+@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = zeros(default_eltype(p), dims...)
+@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = zeros(T, dims...)
+@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = zeros(default_eltype(p), dims...)
 
 # --- unsafe_ones! for DisabledPool{:cpu} ---
-@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
-@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = ones(default_eltype(p), dims...)
-@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
-@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = ones(default_eltype(p), dims...)
+@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = ones(T, dims...)
+@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::Vararg{Int, N}) where {N} = ones(default_eltype(p), dims...)
+@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = ones(T, dims...)
+@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::NTuple{N, Int}) where {N} = ones(default_eltype(p), dims...)
 
 # --- unsafe_similar! for DisabledPool{:cpu} ---
 @inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
 @inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
-@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
-@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
+@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int, N}) where {N} = similar(x, dims...)
+@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = similar(x, T, dims...)
 
 # --- Generic DisabledPool fallbacks (unknown backend → error) ---
 @inline zeros!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
@@ -691,48 +691,48 @@ end
 # Explicit overloads for proper inlining (especially important for CUDA backend).
 
 # --- _zeros_impl! ---
-@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros!(p, T, dims...)
-@inline _zeros_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = zeros!(p, dims...)
-@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros!(p, T, dims)
-@inline _zeros_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = zeros!(p, dims)
+@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = zeros!(p, T, dims...)
+@inline _zeros_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = zeros!(p, dims...)
+@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = zeros!(p, T, dims)
+@inline _zeros_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = zeros!(p, dims)
 
 # --- _ones_impl! ---
-@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones!(p, T, dims...)
-@inline _ones_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = ones!(p, dims...)
-@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones!(p, T, dims)
-@inline _ones_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = ones!(p, dims)
+@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = ones!(p, T, dims...)
+@inline _ones_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = ones!(p, dims...)
+@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = ones!(p, T, dims)
+@inline _ones_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = ones!(p, dims)
 
 # --- _trues_impl! ---
-@inline _trues_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = trues!(p, dims...)
-@inline _trues_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = trues!(p, dims)
+@inline _trues_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = trues!(p, dims...)
+@inline _trues_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = trues!(p, dims)
 
 # --- _falses_impl! ---
-@inline _falses_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = falses!(p, dims...)
-@inline _falses_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = falses!(p, dims)
+@inline _falses_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = falses!(p, dims...)
+@inline _falses_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = falses!(p, dims)
 
 # --- _similar_impl! ---
 @inline _similar_impl!(p::DisabledPool, x::AbstractArray) = similar!(p, x)
 @inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}) where {T} = similar!(p, x, T)
-@inline _similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar!(p, x, dims...)
-@inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar!(p, x, T, dims...)
+@inline _similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} = similar!(p, x, dims...)
+@inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = similar!(p, x, T, dims...)
 
 # --- _reshape_impl! ---
-@inline _reshape_impl!(p::DisabledPool, A::AbstractArray, dims::NTuple{N,Int}) where {N} = reshape!(p, A, dims)
+@inline _reshape_impl!(p::DisabledPool, A::AbstractArray, dims::NTuple{N, Int}) where {N} = reshape!(p, A, dims)
 
 # --- _unsafe_zeros_impl! ---
-@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_zeros!(p, T, dims...)
-@inline _unsafe_zeros_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = unsafe_zeros!(p, dims...)
-@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_zeros!(p, T, dims)
-@inline _unsafe_zeros_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = unsafe_zeros!(p, dims)
+@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_zeros!(p, T, dims...)
+@inline _unsafe_zeros_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = unsafe_zeros!(p, dims...)
+@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_zeros!(p, T, dims)
+@inline _unsafe_zeros_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = unsafe_zeros!(p, dims)
 
 # --- _unsafe_ones_impl! ---
-@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_ones!(p, T, dims...)
-@inline _unsafe_ones_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = unsafe_ones!(p, dims...)
-@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_ones!(p, T, dims)
-@inline _unsafe_ones_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = unsafe_ones!(p, dims)
+@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_ones!(p, T, dims...)
+@inline _unsafe_ones_impl!(p::DisabledPool, dims::Vararg{Int, N}) where {N} = unsafe_ones!(p, dims...)
+@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_ones!(p, T, dims)
+@inline _unsafe_ones_impl!(p::DisabledPool, dims::NTuple{N, Int}) where {N} = unsafe_ones!(p, dims)
 
 # --- _unsafe_similar_impl! ---
 @inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray) = unsafe_similar!(p, x)
 @inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}) where {T} = unsafe_similar!(p, x, T)
-@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} = unsafe_similar!(p, x, dims...)
-@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_similar!(p, x, T, dims...)
+@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int, N}) where {N} = unsafe_similar!(p, x, dims...)
+@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_similar!(p, x, T, dims...)
diff --git a/src/legacy/acquire.jl b/src/legacy/acquire.jl
index eb47ab03..1d94dfee 100644
--- a/src/legacy/acquire.jl
+++ b/src/legacy/acquire.jl
@@ -3,13 +3,15 @@
 # ==============================================================================
 
 # Allocate a new vector (dispatch point for extensions)
-@inline allocate_vector(::AbstractTypedPool{T,Vector{T}}, n::Int) where {T} =
+@inline allocate_vector(::AbstractTypedPool{T, Vector{T}}, n::Int) where {T} =
     Vector{T}(undef, n)
 
 # Wrap flat view into N-D array (dispatch point for extensions)
-@inline function wrap_array(::AbstractTypedPool{T,Vector{T}},
-                            flat_view, dims::NTuple{N,Int}) where {T,N}
-    unsafe_wrap(Array{T,N}, pointer(flat_view), dims)
+@inline function wrap_array(
+        ::AbstractTypedPool{T, Vector{T}},
+        flat_view, dims::NTuple{N, Int}
+    ) where {T, N}
+    return unsafe_wrap(Array{T, N}, pointer(flat_view), dims)
 end
 
 # ==============================================================================
@@ -185,7 +187,7 @@ For non-fixed-slot types, sets `_touched_has_others` flag.
     else
         @inbounds pool._touched_type_masks[depth] |= b
     end
-    nothing
+    return nothing
 end
 
 # CPU-specific override: adds lazy first-touch checkpoint in lazy mode
@@ -213,7 +215,7 @@ end
         end
         @inbounds pool._touched_type_masks[depth] = current_mask | b
     end
-    nothing
+    return nothing
 end
 
 # ==============================================================================
@@ -238,7 +240,7 @@ end
 end
 
 @inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
-    _acquire_impl!(pool, T, dims...)
+    return _acquire_impl!(pool, T, dims...)
 end
 
 # Similar-style
@@ -305,19 +307,19 @@ See also: [`unsafe_acquire!`](@ref) for native array access.
 """
 @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T}
     _record_type_touch!(pool, T)
-    _acquire_impl!(pool, T, n)
+    return _acquire_impl!(pool, T, n)
 end
 
 # Multi-dimensional support (zero-allocation with N-D cache)
 @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _acquire_impl!(pool, T, dims...)
+    return _acquire_impl!(pool, T, dims...)
 end
 
 # Tuple support: allows acquire!(pool, T, size(A)) where size(A) returns NTuple{N,Int}
 @inline function acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _acquire_impl!(pool, T, dims...)
+    return _acquire_impl!(pool, T, dims...)
 end
 
 # Similar-style convenience methods
@@ -337,7 +339,7 @@ end
 """
 @inline function acquire!(pool::AbstractArrayPool, x::AbstractArray)
     _record_type_touch!(pool, eltype(x))
-    _acquire_impl!(pool, eltype(x), size(x))
+    return _acquire_impl!(pool, eltype(x), size(x))
 end
 
 # ==============================================================================
@@ -383,18 +385,18 @@ See also: [`acquire!`](@ref) for view-based access.
 """
 @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, n::Int) where {T}
     _record_type_touch!(pool, T)
-    _unsafe_acquire_impl!(pool, T, n)
+    return _unsafe_acquire_impl!(pool, T, n)
 end
 
 @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_acquire_impl!(pool, T, dims...)
+    return _unsafe_acquire_impl!(pool, T, dims...)
 end
 
 # Tuple support
 @inline function unsafe_acquire!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
     _record_type_touch!(pool, T)
-    _unsafe_acquire_impl!(pool, T, dims)
+    return _unsafe_acquire_impl!(pool, T, dims)
 end
 
 # Similar-style convenience methods
@@ -414,7 +416,7 @@ end
 """
 @inline function unsafe_acquire!(pool::AbstractArrayPool, x::AbstractArray)
     _record_type_touch!(pool, eltype(x))
-    _unsafe_acquire_impl!(pool, eltype(x), size(x))
+    return _unsafe_acquire_impl!(pool, eltype(x), size(x))
 end
 
 # ==============================================================================
@@ -451,14 +453,14 @@ const _acquire_array_impl! = _unsafe_acquire_impl!
 
 # --- acquire! for DisabledPool{:cpu} ---
 @inline acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims)
 @inline acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
 
 # --- unsafe_acquire! for DisabledPool{:cpu} ---
 @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = Array{T, N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, dims)
 @inline unsafe_acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
 
 # --- Generic DisabledPool fallbacks (unknown backend → error) ---
@@ -469,11 +471,11 @@ const _acquire_array_impl! = _unsafe_acquire_impl!
 # Called when: USE_POOLING=true + @maybe_with_pool + MAYBE_POOLING_ENABLED[]=false
 # Explicit overloads for proper inlining (especially important for CUDA backend).
 @inline _acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = acquire!(p, T, n)
-@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = acquire!(p, T, dims...)
-@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = acquire!(p, T, dims)
+@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = acquire!(p, T, dims...)
+@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = acquire!(p, T, dims)
 @inline _acquire_impl!(p::DisabledPool, x::AbstractArray) = acquire!(p, x)
 
 @inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = unsafe_acquire!(p, T, n)
-@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_acquire!(p, T, dims...)
-@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_acquire!(p, T, dims)
+@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int, N}) where {T, N} = unsafe_acquire!(p, T, dims...)
+@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N, Int}) where {T, N} = unsafe_acquire!(p, T, dims)
 @inline _unsafe_acquire_impl!(p::DisabledPool, x::AbstractArray) = unsafe_acquire!(p, x)
diff --git a/src/legacy/bitarray.jl b/src/legacy/bitarray.jl
index 97731f68..7964e8cb 100644
--- a/src/legacy/bitarray.jl
+++ b/src/legacy/bitarray.jl
@@ -66,7 +66,7 @@ Uses N-way set-associative cache with up to CACHE_WAYS patterns per slot.
 The returned BitArray is only valid within the `@with_pool` scope.
 Do NOT use after the scope ends (use-after-free risk).
 """
-function get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) where {N}
+function get_bitarray!(tp::BitTypedPool, dims::NTuple{N, Int}) where {N}
     total_len = safe_prod(dims)
     tp.n_active += 1
     idx = tp.n_active
@@ -118,7 +118,7 @@ function get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) where {N}
         @inbounds cached_ptr = tp.nd_ptrs[cache_idx]
 
         # Must check isa FIRST for type stability (avoids boxing in == comparison)
-        if cached_dims isa NTuple{N,Int} && cached_ptr == current_ptr
+        if cached_dims isa NTuple{N, Int} && cached_ptr == current_ptr
             if cached_dims == dims
                 # Exact match - return cached BitArray directly (0 alloc)
                 return @inbounds tp.nd_arrays[cache_idx]::BitArray{N}
@@ -162,11 +162,11 @@ end
     return _unsafe_acquire_impl!(pool, Bit, n)
 end
 
-@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N}
+@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N}
     return _unsafe_acquire_impl!(pool, Bit, dims...)
 end
 
-@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N}
+@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N}
     return _unsafe_acquire_impl!(pool, Bit, dims...)
 end
 
@@ -180,12 +180,12 @@ end
     return get_bitarray!(tp, n)
 end
 
-@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N}
+@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int, N}) where {N}
     tp = get_typed_pool!(pool, Bit)::BitTypedPool
     return get_bitarray!(tp, dims)
 end
 
-@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N}
+@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N, Int}) where {N}
     tp = get_typed_pool!(pool, Bit)::BitTypedPool
     return get_bitarray!(tp, dims)
 end
@@ -196,10 +196,10 @@ end
 
 # --- acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) ---
 @inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims)
-@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims)
 
 # --- unsafe_acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) ---
 @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims)
-@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int, N}) where {N} = BitArray{N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N, Int}) where {N} = BitArray{N}(undef, dims)
diff --git a/src/legacy/state.jl b/src/legacy/state.jl
index 1b512104..71366b14 100644
--- a/src/legacy/state.jl
+++ b/src/legacy/state.jl
@@ -49,7 +49,7 @@ Also updates _current_depth and bitmask state for type touch tracking.
 
 ~77% faster than full checkpoint! when only one type is used.
 """
-@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where T
+@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where {T}
 
     pool._current_depth += 1
     push!(pool._touched_type_masks, UInt16(0))
@@ -58,7 +58,7 @@ Also updates _current_depth and bitmask state for type touch tracking.
     # (which bypasses _record_type_touch!) is the only acquire path.
     push!(pool._touched_has_others, _fixed_slot_bit(T) == UInt16(0))
     _checkpoint_typed_pool!(get_typed_pool!(pool, T), pool._current_depth)
-    nothing
+    return nothing
 end
 
 """
@@ -83,7 +83,7 @@ compile-time unrolling. Increments _current_depth once for all types.
     # even when _acquire_impl! (bypassing _record_type_touch!) is used.
     has_any_fallback = any(i -> _fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices)
     checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices]
-    quote
+    return quote
         pool._current_depth += 1
         push!(pool._touched_type_masks, UInt16(0))
         push!(pool._touched_has_others, $has_any_fallback)
@@ -102,7 +102,7 @@ end
         push!(tp._checkpoint_n_active, tp.n_active)
         push!(tp._checkpoint_depths, depth)
     end
-    nothing
+    return nothing
 end
 
 """
@@ -134,7 +134,7 @@ Performance: ~2ns vs ~540ns for full `checkpoint!`.
         _checkpoint_typed_pool!(p, depth)
         @inbounds pool._touched_has_others[depth] = true
     end
-    nothing
+    return nothing
 end
 
 # ==============================================================================
@@ -189,7 +189,7 @@ end
 Restore state for a specific type only.
 Also updates _current_depth and bitmask state.
 """
-@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where T
+@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where {T}
 
     # Safety guard: at global scope (depth=1), delegate to reset!
     if pool._current_depth == 1
@@ -200,7 +200,7 @@ Also updates _current_depth and bitmask state.
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 """
@@ -222,7 +222,7 @@ Decrements _current_depth once after all types are rewound.
     end
     rewind_exprs = [:(_rewind_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in reverse(unique_indices)]
     reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in unique_indices]
-    quote
+    return quote
         # Safety guard: at global scope (depth=1), delegate to reset!
         if pool._current_depth == 1
             $(reset_exprs...)
@@ -262,7 +262,7 @@ end
         # - If sentinel (_checkpoint_n_active=[0]), restores to n_active=0
         tp.n_active = @inbounds tp._checkpoint_n_active[end]
     end
-    nothing
+    return nothing
 end
 
 """
@@ -278,7 +278,7 @@ Called directly from the macro-generated `finally` clause as a single function c
 """
 @inline function _lazy_rewind!(pool::AdaptiveArrayPool)
 
-    d    = pool._current_depth
+    d = pool._current_depth
     bits = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK
     _selective_rewind_fixed_slots!(pool, bits)
     if @inbounds(pool._touched_has_others[d])
@@ -289,7 +289,7 @@ Called directly from the macro-generated `finally` clause as a single function c
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 """
@@ -326,7 +326,7 @@ lazy first-touch checkpoint for each extra type on first acquire, ensuring Case
         end
         @inbounds pool._touched_has_others[d] = true
     end
-    nothing
+    return nothing
 end
 
 """
@@ -353,7 +353,7 @@ guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`.
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 """
@@ -368,15 +368,15 @@ before passing the mask (e.g. `mask & _TYPE_BITS_MASK`).
 @inline function _selective_rewind_fixed_slots!(pool::AdaptiveArrayPool, mask::UInt16)
 
     d = pool._current_depth
-    _has_bit(mask, Float64)    && _rewind_typed_pool!(pool.float64,    d)
-    _has_bit(mask, Float32)    && _rewind_typed_pool!(pool.float32,    d)
-    _has_bit(mask, Int64)      && _rewind_typed_pool!(pool.int64,      d)
-    _has_bit(mask, Int32)      && _rewind_typed_pool!(pool.int32,      d)
+    _has_bit(mask, Float64)    && _rewind_typed_pool!(pool.float64, d)
+    _has_bit(mask, Float32)    && _rewind_typed_pool!(pool.float32, d)
+    _has_bit(mask, Int64)      && _rewind_typed_pool!(pool.int64, d)
+    _has_bit(mask, Int32)      && _rewind_typed_pool!(pool.int32, d)
     _has_bit(mask, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d)
     _has_bit(mask, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d)
-    _has_bit(mask, Bool)       && _rewind_typed_pool!(pool.bool,       d)
-    _has_bit(mask, Bit)        && _rewind_typed_pool!(pool.bits,       d)
-    nothing
+    _has_bit(mask, Bool)       && _rewind_typed_pool!(pool.bool, d)
+    _has_bit(mask, Bit)        && _rewind_typed_pool!(pool.bits, d)
+    return nothing
 end
 
 # ==============================================================================
@@ -559,9 +559,9 @@ to sentinel state while preserving allocated vectors.
 
 See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref)
 """
-@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where T
+@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where {T}
     reset!(get_typed_pool!(pool, T))
-    pool
+    return pool
 end
 
 """
@@ -574,7 +574,7 @@ See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref)
 """
 @generated function reset!(pool::AdaptiveArrayPool, types::Type...)
     reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in 1:length(types)]
-    quote
+    return quote
         $(reset_exprs...)
         pool
     end
diff --git a/src/legacy/types.jl b/src/legacy/types.jl
index b056ddbb..8856f9e4 100644
--- a/src/legacy/types.jl
+++ b/src/legacy/types.jl
@@ -70,7 +70,7 @@ end
 
 Abstract base for type-specific memory pools.
 """
-abstract type AbstractTypedPool{T, V<:AbstractVector{T}} end
+abstract type AbstractTypedPool{T, V <: AbstractVector{T}} end
 
 """
     AbstractArrayPool
@@ -324,10 +324,10 @@ const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :com
 # Bits 0-7: fixed-slot type touch tracking (one bit per type)
 # Bits 14-15: mode flags set during checkpoint to control lazy behavior
 
-const _LAZY_MODE_BIT   = UInt16(0x8000)  # bit 15: lazy (dynamic-selective) checkpoint mode
-const _TYPED_LAZY_BIT  = UInt16(0x4000)  # bit 14: typed lazy-fallback mode
-const _MODE_BITS_MASK  = UInt16(0xC000)  # bits 14-15: all mode flags
-const _TYPE_BITS_MASK  = UInt16(0x00FF)  # bits 0-7: fixed-slot type bits
+const _LAZY_MODE_BIT = UInt16(0x8000)  # bit 15: lazy (dynamic-selective) checkpoint mode
+const _TYPED_LAZY_BIT = UInt16(0x4000)  # bit 14: typed lazy-fallback mode
+const _MODE_BITS_MASK = UInt16(0xC000)  # bits 14-15: all mode flags
+const _TYPE_BITS_MASK = UInt16(0x00FF)  # bits 0-7: fixed-slot type bits
 
 # ==============================================================================
 # Fixed-Slot Bit Mapping (for type touch tracking)
@@ -335,15 +335,15 @@ const _TYPE_BITS_MASK  = UInt16(0x00FF)  # bits 0-7: fixed-slot type bits
 # Maps each fixed-slot type to a unique bit in a UInt16 bitmask.
 # Bit ordering matches FIXED_SLOT_FIELDS. Non-fixed types return UInt16(0).
 
-@inline _fixed_slot_bit(::Type{Float64})    = UInt16(1) << 0
-@inline _fixed_slot_bit(::Type{Float32})    = UInt16(1) << 1
-@inline _fixed_slot_bit(::Type{Int64})      = UInt16(1) << 2
-@inline _fixed_slot_bit(::Type{Int32})      = UInt16(1) << 3
+@inline _fixed_slot_bit(::Type{Float64}) = UInt16(1) << 0
+@inline _fixed_slot_bit(::Type{Float32}) = UInt16(1) << 1
+@inline _fixed_slot_bit(::Type{Int64}) = UInt16(1) << 2
+@inline _fixed_slot_bit(::Type{Int32}) = UInt16(1) << 3
 @inline _fixed_slot_bit(::Type{ComplexF64}) = UInt16(1) << 4
 @inline _fixed_slot_bit(::Type{ComplexF32}) = UInt16(1) << 5
-@inline _fixed_slot_bit(::Type{Bool})       = UInt16(1) << 6
-@inline _fixed_slot_bit(::Type{Bit})        = UInt16(1) << 7
-@inline _fixed_slot_bit(::Type)             = UInt16(0)  # non-fixed-slot → triggers has_others
+@inline _fixed_slot_bit(::Type{Bool}) = UInt16(1) << 6
+@inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7
+@inline _fixed_slot_bit(::Type) = UInt16(0)  # non-fixed-slot → triggers has_others
 
 # Check whether a type's bit is set in a bitmask (e.g. _touched_type_masks or combined).
 @inline _has_bit(mask::UInt16, ::Type{T}) where {T} = (mask & _fixed_slot_bit(T)) != 0
@@ -379,7 +379,7 @@ mutable struct AdaptiveArrayPool <: AbstractArrayPool
 end
 
 function AdaptiveArrayPool()
-    AdaptiveArrayPool(
+    return AdaptiveArrayPool(
         TypedPool{Float64}(),
         TypedPool{Float32}(),
         TypedPool{Int64}(),
@@ -411,7 +411,7 @@ end
 
 # Slow Path: rare types via IdDict
 @inline function get_typed_pool!(p::AdaptiveArrayPool, ::Type{T}) where {T}
-    get!(p.others, T) do
+    return get!(p.others, T) do
         tp = TypedPool{T}()
         # If inside a checkpoint scope (_current_depth > 1 means inside @with_pool),
         # auto-checkpoint the new pool to prevent issues on rewind
@@ -439,7 +439,7 @@ Apply `f` to each fixed slot TypedPool. Zero allocation via compile-time unrolli
 """
 @generated function foreach_fixed_slot(f::F, pool::AdaptiveArrayPool) where {F}
     exprs = [:(f(getfield(pool, $(QuoteNode(field))))) for field in FIXED_SLOT_FIELDS]
-    quote
+    return quote
         Base.@_inline_meta
         $(exprs...)
         nothing
diff --git a/src/macros.jl b/src/macros.jl
index 752c13ef..7605abc8 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -23,7 +23,7 @@ achieving zero overhead compared to Dict-based registry.
 @inline _get_pool_for_backend(::Val{:cpu}) = get_task_local_pool()
 
 # Fallback with helpful error message (marked @noinline to keep hot path fast)
-@noinline function _get_pool_for_backend(::Val{B}) where B
+@noinline function _get_pool_for_backend(::Val{B}) where {B}
     error("Pool backend :$B is not available. Load the extension first (e.g., `using CUDA` for :cuda).")
 end
 
@@ -106,22 +106,22 @@ end
 ```
 """
 macro with_pool(pool_name, expr)
-    _generate_pool_code(pool_name, expr, true; source=__source__)
+    return _generate_pool_code(pool_name, expr, true; source = __source__)
 end
 
 macro with_pool(expr)
     pool_name = gensym(:pool)
-    _generate_pool_code(pool_name, expr, true; source=__source__)
+    return _generate_pool_code(pool_name, expr, true; source = __source__)
 end
 
 # Backend-specific variants: @with_pool :cuda pool begin ... end
 macro with_pool(backend::QuoteNode, pool_name, expr)
-    _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source=__source__)
+    return _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source = __source__)
 end
 
 macro with_pool(backend::QuoteNode, expr)
     pool_name = gensym(:pool)
-    _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source=__source__)
+    return _generate_pool_code_with_backend(backend.value, pool_name, expr, true; source = __source__)
 end
 
 """
@@ -153,22 +153,22 @@ end
 ```
 """
 macro maybe_with_pool(pool_name, expr)
-    _generate_pool_code(pool_name, expr, false; source=__source__)
+    return _generate_pool_code(pool_name, expr, false; source = __source__)
 end
 
 macro maybe_with_pool(expr)
     pool_name = gensym(:pool)
-    _generate_pool_code(pool_name, expr, false; source=__source__)
+    return _generate_pool_code(pool_name, expr, false; source = __source__)
 end
 
 # Backend-specific variants: @maybe_with_pool :cuda pool begin ... end
 macro maybe_with_pool(backend::QuoteNode, pool_name, expr)
-    _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source=__source__)
+    return _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source = __source__)
 end
 
 macro maybe_with_pool(backend::QuoteNode, expr)
     pool_name = gensym(:pool)
-    _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source=__source__)
+    return _generate_pool_code_with_backend(backend.value, pool_name, expr, false; source = __source__)
 end
 
 # ==============================================================================
@@ -182,7 +182,7 @@ Generate expression for DisabledPool singleton based on backend.
 Used when pooling is disabled to preserve backend context.
 """
 function _disabled_pool_expr(backend::Symbol)
-    if backend == :cpu
+    return if backend == :cpu
         :($DISABLED_CPU)
     else
         :($(DisabledPool{backend}()))
@@ -232,7 +232,7 @@ Ensure body has a LineNumberNode pointing to user source at the top level.
 
 Returns a new Expr to avoid mutating the original AST.
 """
-function _ensure_body_has_toplevel_lnn(body, source::Union{LineNumberNode,Nothing})
+function _ensure_body_has_toplevel_lnn(body, source::Union{LineNumberNode, Nothing})
     source === nothing && return body
     # Don't clobber valid file info with :none from REPL/eval
     source.file === :none && return body
@@ -275,7 +275,7 @@ Scans first few args to handle Expr(:meta, ...) from @inline etc.
 If source.file === :none (REPL/eval), don't clobber valid file LNNs.
 Modifies expr in-place and returns it.
 """
-function _fix_try_body_lnn!(expr, source::Union{LineNumberNode,Nothing})
+function _fix_try_body_lnn!(expr, source::Union{LineNumberNode, Nothing})
     source === nothing && return expr
     # Don't clobber valid file info with :none from REPL/eval
     source.file === :none && return expr
@@ -307,7 +307,7 @@ end
 # Internal: Code Generation
 # ==============================================================================
 
-function _generate_pool_code(pool_name, expr, force_enable; source::Union{LineNumberNode,Nothing}=nothing)
+function _generate_pool_code(pool_name, expr, force_enable; source::Union{LineNumberNode, Nothing} = nothing)
     # Compile-time check: if pooling disabled, use DisabledPool to preserve backend context
     if !USE_POOLING
         disabled_pool = _disabled_pool_expr(:cpu)
@@ -402,7 +402,7 @@ Uses `_get_pool_for_backend(Val{backend}())` for zero-overhead dispatch.
 
 Includes type-specific checkpoint/rewind optimization (same as regular @with_pool).
 """
-function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, force_enable::Bool; source::Union{LineNumberNode,Nothing}=nothing)
+function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, force_enable::Bool; source::Union{LineNumberNode, Nothing} = nothing)
     # Compile-time check: if pooling disabled, use DisabledPool to preserve backend context
     if !USE_POOLING
         disabled_pool = _disabled_pool_expr(backend)
@@ -515,7 +515,7 @@ end
 Generate function code for a specific backend (e.g., :cuda).
 Wraps the function body with pool getter, checkpoint, try-finally, rewind.
 """
-function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, func_def, disable_pooling::Bool; source::Union{LineNumberNode,Nothing}=nothing)
+function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, func_def, disable_pooling::Bool; source::Union{LineNumberNode, Nothing} = nothing)
     def_head = func_def.head
     call_expr = func_def.args[1]
     body = func_def.args[2]
@@ -572,7 +572,7 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f
     return Expr(def_head, esc(call_expr), new_body)
 end
 
-function _generate_function_pool_code(pool_name, func_def, force_enable, disable_pooling, backend::Symbol=:cpu; source::Union{LineNumberNode,Nothing}=nothing)
+function _generate_function_pool_code(pool_name, func_def, force_enable, disable_pooling, backend::Symbol = :cpu; source::Union{LineNumberNode, Nothing} = nothing)
     def_head = func_def.head
     call_expr = func_def.args[1]
     body = func_def.args[2]
@@ -670,14 +670,14 @@ These cannot be used for typed checkpoint since they're defined after checkpoint
 
 Detects patterns like: `T = eltype(x)`, `local T = ...`, etc.
 """
-function _extract_local_assignments(expr, locals=Set{Symbol}())
+function _extract_local_assignments(expr, locals = Set{Symbol}())
     if expr isa Expr
         if expr.head == :(=) && length(expr.args) >= 1
             lhs = expr.args[1]
             # Simple assignment: T = ...
             if lhs isa Symbol
                 push!(locals, lhs)
-            # Typed assignment: T::Type = ...
+                # Typed assignment: T::Type = ...
             elseif Meta.isexpr(lhs, :(::)) && length(lhs.args) >= 1 && lhs.args[1] isa Symbol
                 push!(locals, lhs.args[1])
             end
@@ -720,7 +720,7 @@ Handles various forms:
 - `similar!(pool, x)`: generates `eltype(x)` expression
 - `similar!(pool, x, Type, ...)`: extracts Type
 """
-function _extract_acquire_types(expr, target_pool, types=Set{Any}())
+function _extract_acquire_types(expr, target_pool, types = Set{Any}())
     if expr isa Expr
         # Match: function calls with pool argument
         if expr.head == :call && length(expr.args) >= 3
@@ -752,10 +752,10 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}())
                         # acquire!(pool, x) - similar-style form
                         push!(types, Expr(:call, :eltype, expr.args[3]))
                     end
-                # trues!/falses! (always uses Bit type)
+                    # trues!/falses! (always uses Bit type)
                 elseif fn in (:trues!, :falses!) || fn_name in (:trues!, :falses!)
                     push!(types, :Bit)
-                # zeros!/ones!/unsafe_zeros!/unsafe_ones!
+                    # zeros!/ones!/unsafe_zeros!/unsafe_ones!
                 elseif fn in (:zeros!, :ones!, :unsafe_zeros!, :unsafe_ones!) || fn_name in (:zeros!, :ones!, :unsafe_zeros!, :unsafe_ones!)
                     if nargs >= 3
                         third_arg = expr.args[3]
@@ -768,7 +768,7 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}())
                             push!(types, Expr(:call, :default_eltype, target_pool))
                         end
                     end
-                # similar!/unsafe_similar!
+                    # similar!/unsafe_similar!
                 elseif fn in (:similar!, :unsafe_similar!) || fn_name in (:similar!, :unsafe_similar!)
                     if nargs == 3
                         # similar!(pool, x) - same type as x
@@ -783,7 +783,7 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}())
                             push!(types, Expr(:call, :eltype, expr.args[3]))
                         end
                     end
-                # reshape!
+                    # reshape!
                 elseif fn in (:reshape!,) || fn_name in (:reshape!,)
                     # reshape!(pool, A, dims...) — extract eltype(A) from second arg
                     if nargs >= 3
@@ -859,7 +859,7 @@ Filter types for typed checkpoint/rewind generation.
 Type parameters (T, S from `where` clause) resolve to concrete types at runtime.
 Local variables (T = eltype(x)) are defined after checkpoint! and cannot be used.
 """
-function _filter_static_types(types, local_vars=Set{Symbol}())
+function _filter_static_types(types, local_vars = Set{Symbol}())
     static_types = Any[]
     has_dynamic = false
 
@@ -926,7 +926,7 @@ function _generate_typed_checkpoint_call(pool_expr, types)
     else
         escaped_types = [esc(t) for t in types]
         typed_call = :($checkpoint!($pool_expr, $(escaped_types...)))
-        lazy_call  = :($_typed_lazy_checkpoint!($pool_expr, $(escaped_types...)))
+        lazy_call = :($_typed_lazy_checkpoint!($pool_expr, $(escaped_types...)))
         return quote
             if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...)))
                 $typed_call
@@ -951,9 +951,13 @@ function _generate_typed_rewind_call(pool_expr, types)
         return :($rewind!($pool_expr))       # fallback for direct external calls (unreachable via macro)
     else
         escaped_types = [esc(t) for t in types]
-        typed_call     = :($rewind!($pool_expr, $(escaped_types...)))
-        selective_call = :($_typed_lazy_rewind!($pool_expr,
-                              $_tracked_mask_for_types($(escaped_types...))))
+        typed_call = :($rewind!($pool_expr, $(escaped_types...)))
+        selective_call = :(
+            $_typed_lazy_rewind!(
+                $pool_expr,
+                $_tracked_mask_for_types($(escaped_types...))
+            )
+        )
         return quote
             if $_can_use_typed_path($pool_expr, $_tracked_mask_for_types($(escaped_types...)))
                 $typed_call
diff --git a/src/state.jl b/src/state.jl
index b591b520..ec172b4e 100644
--- a/src/state.jl
+++ b/src/state.jl
@@ -43,7 +43,7 @@ Also updates _current_depth and bitmask state for type touch tracking.
 
 ~77% faster than full checkpoint! when only one type is used.
 """
-@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where T
+@inline function checkpoint!(pool::AdaptiveArrayPool, ::Type{T}) where {T}
 
     pool._current_depth += 1
     push!(pool._touched_type_masks, UInt16(0))
@@ -52,7 +52,7 @@ Also updates _current_depth and bitmask state for type touch tracking.
     # (which bypasses _record_type_touch!) is the only acquire path.
     push!(pool._touched_has_others, _fixed_slot_bit(T) == UInt16(0))
     _checkpoint_typed_pool!(get_typed_pool!(pool, T), pool._current_depth)
-    nothing
+    return nothing
 end
 
 """
@@ -77,7 +77,7 @@ compile-time unrolling. Increments _current_depth once for all types.
     # even when _acquire_impl! (bypassing _record_type_touch!) is used.
     has_any_fallback = any(i -> _fixed_slot_bit(types[i].parameters[1]) == UInt16(0), unique_indices)
     checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices]
-    quote
+    return quote
         pool._current_depth += 1
         push!(pool._touched_type_masks, UInt16(0))
         push!(pool._touched_has_others, $has_any_fallback)
@@ -96,7 +96,7 @@ end
         push!(tp._checkpoint_n_active, tp.n_active)
         push!(tp._checkpoint_depths, depth)
     end
-    nothing
+    return nothing
 end
 
 """
@@ -128,7 +128,7 @@ Performance: ~2ns vs ~540ns for full `checkpoint!`.
         _checkpoint_typed_pool!(p, depth)
         @inbounds pool._touched_has_others[depth] = true
     end
-    nothing
+    return nothing
 end
 
 # ==============================================================================
@@ -183,7 +183,7 @@ end
 Restore state for a specific type only.
 Also updates _current_depth and bitmask state.
 """
-@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where T
+@inline function rewind!(pool::AdaptiveArrayPool, ::Type{T}) where {T}
 
     # Safety guard: at global scope (depth=1), delegate to reset!
     if pool._current_depth == 1
@@ -194,7 +194,7 @@ Also updates _current_depth and bitmask state.
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 """
@@ -216,7 +216,7 @@ Decrements _current_depth once after all types are rewound.
     end
     rewind_exprs = [:(_rewind_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in reverse(unique_indices)]
     reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in unique_indices]
-    quote
+    return quote
         # Safety guard: at global scope (depth=1), delegate to reset!
         if pool._current_depth == 1
             $(reset_exprs...)
@@ -256,7 +256,7 @@ end
         # - If sentinel (_checkpoint_n_active=[0]), restores to n_active=0
         tp.n_active = @inbounds tp._checkpoint_n_active[end]
     end
-    nothing
+    return nothing
 end
 
 """
@@ -272,7 +272,7 @@ Called directly from the macro-generated `finally` clause as a single function c
 """
 @inline function _lazy_rewind!(pool::AdaptiveArrayPool)
 
-    d    = pool._current_depth
+    d = pool._current_depth
     bits = @inbounds(pool._touched_type_masks[d]) & _TYPE_BITS_MASK
     _selective_rewind_fixed_slots!(pool, bits)
     if @inbounds(pool._touched_has_others[d])
@@ -283,7 +283,7 @@ Called directly from the macro-generated `finally` clause as a single function c
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 """
@@ -320,7 +320,7 @@ lazy first-touch checkpoint for each extra type on first acquire, ensuring Case
         end
         @inbounds pool._touched_has_others[d] = true
     end
-    nothing
+    return nothing
 end
 
 """
@@ -347,7 +347,7 @@ guaranteed by the `_TYPED_LAZY_BIT` mode set in `_typed_lazy_checkpoint!`.
     pop!(pool._touched_type_masks)
     pop!(pool._touched_has_others)
     pool._current_depth -= 1
-    nothing
+    return nothing
 end
 
 """
@@ -365,15 +365,15 @@ checkpoint, `_rewind_typed_pool!` Case B safely restores from the parent checkpo
 @inline function _selective_rewind_fixed_slots!(pool::AdaptiveArrayPool, mask::UInt16)
 
     d = pool._current_depth
-    _has_bit(mask, Float64)    && _rewind_typed_pool!(pool.float64,    d)
-    _has_bit(mask, Float32)    && _rewind_typed_pool!(pool.float32,    d)
-    _has_bit(mask, Int64)      && _rewind_typed_pool!(pool.int64,      d)
-    _has_bit(mask, Int32)      && _rewind_typed_pool!(pool.int32,      d)
+    _has_bit(mask, Float64)    && _rewind_typed_pool!(pool.float64, d)
+    _has_bit(mask, Float32)    && _rewind_typed_pool!(pool.float32, d)
+    _has_bit(mask, Int64)      && _rewind_typed_pool!(pool.int64, d)
+    _has_bit(mask, Int32)      && _rewind_typed_pool!(pool.int32, d)
     _has_bit(mask, ComplexF64) && _rewind_typed_pool!(pool.complexf64, d)
     _has_bit(mask, ComplexF32) && _rewind_typed_pool!(pool.complexf32, d)
-    _has_bit(mask, Bool)       && _rewind_typed_pool!(pool.bool,       d)
-    _has_bit(mask, Bit)        && _rewind_typed_pool!(pool.bits,       d)
-    nothing
+    _has_bit(mask, Bool)       && _rewind_typed_pool!(pool.bool, d)
+    _has_bit(mask, Bit)        && _rewind_typed_pool!(pool.bits, d)
+    return nothing
 end
 
 # ==============================================================================
@@ -548,9 +548,9 @@ to sentinel state while preserving allocated vectors.
 
 See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref)
 """
-@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where T
+@inline function reset!(pool::AdaptiveArrayPool, ::Type{T}) where {T}
     reset!(get_typed_pool!(pool, T))
-    pool
+    return pool
 end
 
 """
@@ -563,7 +563,7 @@ See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref)
 """
 @generated function reset!(pool::AdaptiveArrayPool, types::Type...)
     reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in 1:length(types)]
-    quote
+    return quote
         $(reset_exprs...)
         pool
     end
diff --git a/src/task_local_pool.jl b/src/task_local_pool.jl
index cd7940cc..25f7d50e 100644
--- a/src/task_local_pool.jl
+++ b/src/task_local_pool.jl
@@ -63,14 +63,14 @@ ensuring thread safety without locks.
     # 1. Fast Path: Try to get existing pool
     # get(dict, key, default) is optimized in Julia Base
     pool = get(task_local_storage(), _POOL_KEY, nothing)
-    
+
     if pool === nothing
         # 2. Slow Path: Create and store new pool
         # This branch is rarely taken (only once per Task)
         pool = AdaptiveArrayPool()
         task_local_storage(_POOL_KEY, pool)
     end
-    
+
     return pool::AdaptiveArrayPool
 end
 
@@ -96,4 +96,4 @@ Returns the dictionary of all CUDA pools for the current task (one per device).
 
 Requires CUDA.jl to be loaded. Throws an error if CUDA extension is not available.
 """
-function get_task_local_cuda_pools end
\ No newline at end of file
+function get_task_local_cuda_pools end
diff --git a/src/types.jl b/src/types.jl
index c2a01c9b..130479b1 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -70,7 +70,7 @@ end
 
 Abstract base for type-specific memory pools.
 """
-abstract type AbstractTypedPool{T, V<:AbstractVector{T}} end
+abstract type AbstractTypedPool{T, V <: AbstractVector{T}} end
 
 """
     AbstractArrayPool
@@ -339,10 +339,10 @@ const FIXED_SLOT_FIELDS = (:float64, :float32, :int64, :int32, :complexf64, :com
 # Bits 0-7: fixed-slot type touch tracking (one bit per type)
 # Bits 14-15: mode flags set during checkpoint to control lazy behavior
 
-const _LAZY_MODE_BIT   = UInt16(0x8000)  # bit 15: lazy (dynamic-selective) checkpoint mode
-const _TYPED_LAZY_BIT  = UInt16(0x4000)  # bit 14: typed lazy-fallback mode
-const _MODE_BITS_MASK  = UInt16(0xC000)  # bits 14-15: all mode flags
-const _TYPE_BITS_MASK  = UInt16(0x00FF)  # bits 0-7: fixed-slot type bits
+const _LAZY_MODE_BIT = UInt16(0x8000)  # bit 15: lazy (dynamic-selective) checkpoint mode
+const _TYPED_LAZY_BIT = UInt16(0x4000)  # bit 14: typed lazy-fallback mode
+const _MODE_BITS_MASK = UInt16(0xC000)  # bits 14-15: all mode flags
+const _TYPE_BITS_MASK = UInt16(0x00FF)  # bits 0-7: fixed-slot type bits
 
 # ==============================================================================
 # Fixed-Slot Bit Mapping (for type touch tracking)
@@ -350,15 +350,15 @@ const _TYPE_BITS_MASK  = UInt16(0x00FF)  # bits 0-7: fixed-slot type bits
 # Maps each fixed-slot type to a unique bit in a UInt16 bitmask.
 # Bit ordering matches FIXED_SLOT_FIELDS. Non-fixed types return UInt16(0).
 
-@inline _fixed_slot_bit(::Type{Float64})    = UInt16(1) << 0
-@inline _fixed_slot_bit(::Type{Float32})    = UInt16(1) << 1
-@inline _fixed_slot_bit(::Type{Int64})      = UInt16(1) << 2
-@inline _fixed_slot_bit(::Type{Int32})      = UInt16(1) << 3
+@inline _fixed_slot_bit(::Type{Float64}) = UInt16(1) << 0
+@inline _fixed_slot_bit(::Type{Float32}) = UInt16(1) << 1
+@inline _fixed_slot_bit(::Type{Int64}) = UInt16(1) << 2
+@inline _fixed_slot_bit(::Type{Int32}) = UInt16(1) << 3
 @inline _fixed_slot_bit(::Type{ComplexF64}) = UInt16(1) << 4
 @inline _fixed_slot_bit(::Type{ComplexF32}) = UInt16(1) << 5
-@inline _fixed_slot_bit(::Type{Bool})       = UInt16(1) << 6
-@inline _fixed_slot_bit(::Type{Bit})        = UInt16(1) << 7
-@inline _fixed_slot_bit(::Type)             = UInt16(0)  # non-fixed-slot → triggers has_others
+@inline _fixed_slot_bit(::Type{Bool}) = UInt16(1) << 6
+@inline _fixed_slot_bit(::Type{Bit}) = UInt16(1) << 7
+@inline _fixed_slot_bit(::Type) = UInt16(0)  # non-fixed-slot → triggers has_others
 
 # Check whether a type's bit is set in a bitmask (e.g. _touched_type_masks or combined).
 @inline _has_bit(mask::UInt16, ::Type{T}) where {T} = (mask & _fixed_slot_bit(T)) != 0
@@ -394,7 +394,7 @@ mutable struct AdaptiveArrayPool <: AbstractArrayPool
 end
 
 function AdaptiveArrayPool()
-    AdaptiveArrayPool(
+    return AdaptiveArrayPool(
         TypedPool{Float64}(),
         TypedPool{Float32}(),
         TypedPool{Int64}(),
@@ -426,7 +426,7 @@ end
 
 # Slow Path: rare types via IdDict
 @inline function get_typed_pool!(p::AdaptiveArrayPool, ::Type{T}) where {T}
-    get!(p.others, T) do
+    return get!(p.others, T) do
         tp = TypedPool{T}()
         # If inside a checkpoint scope (_current_depth > 1 means inside @with_pool),
         # auto-checkpoint the new pool to prevent issues on rewind
@@ -454,7 +454,7 @@ Apply `f` to each fixed slot TypedPool. Zero allocation via compile-time unrolli
 """
 @generated function foreach_fixed_slot(f::F, pool::AdaptiveArrayPool) where {F}
     exprs = [:(f(getfield(pool, $(QuoteNode(field))))) for field in FIXED_SLOT_FIELDS]
-    quote
+    return quote
         Base.@_inline_meta
         $(exprs...)
         nothing
diff --git a/src/utils.jl b/src/utils.jl
index ee6c1147..b528c2df 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -50,7 +50,7 @@ function _validate_pool_return(val, pool::AdaptiveArrayPool)
     end
 
     # 3. Check raw Array (from unsafe_acquire!)
-    if val isa Array
+    return if val isa Array
         _check_pointer_overlap(val, pool)
     end
 end
@@ -61,7 +61,7 @@ function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool)
     arr_len = length(arr) * sizeof(eltype(arr))
     arr_end = arr_ptr + arr_len
 
-    check_overlap = function(tp)
+    check_overlap = function (tp)
         for v in tp.vectors
             v_ptr = UInt(pointer(v))
             v_len = length(v) * sizeof(eltype(v))
@@ -70,6 +70,7 @@ function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool)
                 error("Safety Violation: The function returned an Array backed by pool memory. This is unsafe as the memory will be reclaimed. Please return a copy (collect) or a scalar.")
             end
         end
+        return
     end
 
     # Check fixed slots
@@ -81,6 +82,7 @@ function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool)
     for tp in values(pool.others)
         check_overlap(tp)
     end
+    return
 end
 
 # Check if BitArray chunks overlap with the pool's BitTypedPool storage
@@ -123,14 +125,14 @@ _count_label(::BitTypedPool) = "bits"
 
 Print statistics for a TypedPool or BitTypedPool.
 """
-function pool_stats(tp::AbstractTypedPool; io::IO=stdout, indent::Int=0, name::String="")
+function pool_stats(tp::AbstractTypedPool; io::IO = stdout, indent::Int = 0, name::String = "")
     prefix = " "^indent
     type_name = isempty(name) ? _default_type_name(tp) : name
 
     n_arrays = length(tp.vectors)
     if n_arrays == 0
-        printstyled(io, prefix, type_name, color=:cyan)
-        printstyled(io, " (empty)\n", color=:dark_gray)
+        printstyled(io, prefix, type_name, color = :cyan)
+        printstyled(io, " (empty)\n", color = :dark_gray)
         return
     end
 
@@ -139,19 +141,19 @@ function pool_stats(tp::AbstractTypedPool; io::IO=stdout, indent::Int=0, name::S
     bytes_str = Base.format_bytes(total_bytes)
 
     # Header
-    printstyled(io, prefix, type_name, color=:cyan)
+    printstyled(io, prefix, type_name, color = :cyan)
     println(io)
 
     # Stats
-    printstyled(io, prefix, "  slots: ", color=:dark_gray)
-    printstyled(io, n_arrays, color=:blue)
-    printstyled(io, " (active: ", color=:dark_gray)
-    printstyled(io, tp.n_active, color=:blue)
-    printstyled(io, ")\n", color=:dark_gray)
-
-    printstyled(io, prefix, "  ", _count_label(tp), ": ", color=:dark_gray)
-    printstyled(io, total_count, color=:blue)
-    printstyled(io, " ($bytes_str)\n", color=:dark_gray)
+    printstyled(io, prefix, "  slots: ", color = :dark_gray)
+    printstyled(io, n_arrays, color = :blue)
+    printstyled(io, " (active: ", color = :dark_gray)
+    printstyled(io, tp.n_active, color = :blue)
+    printstyled(io, ")\n", color = :dark_gray)
+
+    printstyled(io, prefix, "  ", _count_label(tp), ": ", color = :dark_gray)
+    printstyled(io, total_count, color = :blue)
+    printstyled(io, " ($bytes_str)\n", color = :dark_gray)
     return nothing
 end
 
@@ -169,9 +171,9 @@ pool = AdaptiveArrayPool()
 end
 ```
 """
-function pool_stats(pool::AdaptiveArrayPool; io::IO=stdout)
+function pool_stats(pool::AdaptiveArrayPool; io::IO = stdout)
     # Header
-    printstyled(io, "AdaptiveArrayPool", bold=true, color=:white)
+    printstyled(io, "AdaptiveArrayPool", bold = true, color = :white)
     println(io)
 
     has_content = false
@@ -181,18 +183,18 @@ function pool_stats(pool::AdaptiveArrayPool; io::IO=stdout)
         if !isempty(tp.vectors)
             has_content = true
             name = _default_type_name(tp) * " (fixed)"
-            pool_stats(tp; io, indent=2, name)
+            pool_stats(tp; io, indent = 2, name)
         end
     end
 
     # Fallback types
     for (T, tp) in pool.others
         has_content = true
-        pool_stats(tp; io, indent=2, name="$T (fallback)")
+        pool_stats(tp; io, indent = 2, name = "$T (fallback)")
     end
 
     if !has_content
-        printstyled(io, "  (empty)\n", color=:dark_gray)
+        printstyled(io, "  (empty)\n", color = :dark_gray)
     end
     return nothing
 end
@@ -210,7 +212,7 @@ Print statistics for all task-local pools (CPU and CUDA if loaded).
 end
 ```
 """
-function pool_stats(; io::IO=stdout)
+function pool_stats(; io::IO = stdout)
     pool_stats(:cpu; io)
     # Show CUDA pools if extension is loaded and pools exist
     try
@@ -227,8 +229,8 @@ end
 
 Print statistics for the CPU task-local pool only.
 """
-pool_stats(::Val{:cpu}; io::IO=stdout) = pool_stats(get_task_local_pool(); io)
-pool_stats(s::Symbol; io::IO=stdout) = pool_stats(Val(s); io)
+pool_stats(::Val{:cpu}; io::IO = stdout) = pool_stats(get_task_local_pool(); io)
+pool_stats(s::Symbol; io::IO = stdout) = pool_stats(Val(s); io)
 
 """
     pool_stats(:cuda; io::IO=stdout)
@@ -236,7 +238,7 @@ pool_stats(s::Symbol; io::IO=stdout) = pool_stats(Val(s); io)
 Print statistics for CUDA task-local pools.
 Requires CUDA.jl to be loaded.
 """
-function pool_stats(::Val{:cuda}; io::IO=stdout)
+function pool_stats(::Val{:cuda}; io::IO = stdout)
     pools = get_task_local_cuda_pools()  # Throws MethodError if extension not loaded
     for pool in values(pools)
         pool_stats(pool; io)
@@ -256,7 +258,7 @@ _show_type_name(::BitTypedPool) = "BitTypedPool"
 function Base.show(io::IO, tp::AbstractTypedPool)
     name = _show_type_name(tp)
     n_vectors = length(tp.vectors)
-    if n_vectors == 0
+    return if n_vectors == 0
         print(io, "$name(empty)")
     else
         total = sum(length(v) for v in tp.vectors)
@@ -267,7 +269,7 @@ end
 
 # Multi-line show for all AbstractTypedPool
 function Base.show(io::IO, ::MIME"text/plain", tp::AbstractTypedPool)
-    pool_stats(tp; io, name=_show_type_name(tp))
+    return pool_stats(tp; io, name = _show_type_name(tp))
 end
 
 # Compact one-line show for AdaptiveArrayPool
@@ -290,10 +292,10 @@ function Base.show(io::IO, pool::AdaptiveArrayPool)
         total_active[] += tp.n_active
     end
 
-    print(io, "AdaptiveArrayPool(types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))")
+    return print(io, "AdaptiveArrayPool(types=$(n_types[]), slots=$(total_vectors[]), active=$(total_active[]))")
 end
 
 # Multi-line show for AdaptiveArrayPool
 function Base.show(io::IO, ::MIME"text/plain", pool::AdaptiveArrayPool)
-    pool_stats(pool; io)
+    return pool_stats(pool; io)
 end
diff --git a/test/cuda/test_allocation.jl b/test/cuda/test_allocation.jl
index a1047057..7bee5bd7 100644
--- a/test/cuda/test_allocation.jl
+++ b/test/cuda/test_allocation.jl
@@ -42,7 +42,7 @@
                 v1 = acquire!(p, Float32, 100)
                 v2 = acquire!(p, Float32, 200)
                 v3 = acquire!(p, Float32, 300)
-                v1 .= 1f0; v2 .= 2f0; v3 .= 3f0
+                v1 .= 1.0f0; v2 .= 2.0f0; v3 .= 3.0f0
             end
         end
 
@@ -260,7 +260,7 @@ end
                 v32 = acquire!(p, Float32, 100)
                 v64 = acquire!(p, Float64, 100)
                 vi32 = acquire!(p, Int32, 100)
-                v32 .= 1f0; v64 .= 2.0; vi32 .= 3
+                v32 .= 1.0f0; v64 .= 2.0; vi32 .= 3
             end
         end
 
diff --git a/test/cuda/test_convenience.jl b/test/cuda/test_convenience.jl
index 8bb7858b..f3d09daf 100644
--- a/test/cuda/test_convenience.jl
+++ b/test/cuda/test_convenience.jl
@@ -9,14 +9,14 @@
         @test all(v .== 0.0f0)
 
         m = zeros!(pool, 3, 4)
-        @test m isa CuArray{Float32,2}
+        @test m isa CuArray{Float32, 2}
         @test size(m) == (3, 4)
         @test all(m .== 0.0f0)
 
         # Tuple form
         dims = (2, 3)
         t = zeros!(pool, dims)
-        @test t isa CuArray{Float32,2}
+        @test t isa CuArray{Float32, 2}
         @test size(t) == dims
     end
 
@@ -40,14 +40,14 @@
         @test all(v .== 1.0f0)
 
         m = ones!(pool, 3, 4)
-        @test m isa CuArray{Float32,2}
+        @test m isa CuArray{Float32, 2}
         @test size(m) == (3, 4)
         @test all(m .== 1.0f0)
 
         # Tuple form
         dims = (2, 3)
         t = ones!(pool, dims)
-        @test t isa CuArray{Float32,2}
+        @test t isa CuArray{Float32, 2}
         @test size(t) == dims
     end
 
@@ -65,40 +65,40 @@
         # Float32 template
         template32 = CUDA.rand(Float32, 5, 5)
         v = similar!(pool, template32)
-        @test v isa CuArray{Float32,2}
+        @test v isa CuArray{Float32, 2}
         @test size(v) == (5, 5)
 
         # Float64 template
         template64 = CUDA.rand(Float64, 3, 4)
         v64 = similar!(pool, template64)
-        @test v64 isa CuArray{Float64,2}
+        @test v64 isa CuArray{Float64, 2}
         @test size(v64) == (3, 4)
 
         # Different type
         v_int = similar!(pool, template32, Int32)
-        @test v_int isa CuArray{Int32,2}
+        @test v_int isa CuArray{Int32, 2}
         @test size(v_int) == (5, 5)
 
         # Different dims
         v_dims = similar!(pool, template32, 10)
-        @test v_dims isa CuArray{Float32,1}
+        @test v_dims isa CuArray{Float32, 1}
         @test length(v_dims) == 10
 
         # Different type and dims
         v_both = similar!(pool, template32, Float64, 2, 3)
-        @test v_both isa CuArray{Float64,2}
+        @test v_both isa CuArray{Float64, 2}
         @test size(v_both) == (2, 3)
     end
 
     @testset "unsafe_zeros! default type is Float32" begin
         v = unsafe_zeros!(pool, 10)
-        @test v isa CuArray{Float32,1}
+        @test v isa CuArray{Float32, 1}
         @test !(v isa SubArray)  # Raw array, not view
         @test length(v) == 10
         @test all(v .== 0.0f0)
 
         m = unsafe_zeros!(pool, 3, 4)
-        @test m isa CuArray{Float32,2}
+        @test m isa CuArray{Float32, 2}
         @test !(m isa SubArray)
         @test size(m) == (3, 4)
         @test all(m .== 0.0f0)
@@ -106,7 +106,7 @@
         # Tuple form
         dims = (2, 3)
         t = unsafe_zeros!(pool, dims)
-        @test t isa CuArray{Float32,2}
+        @test t isa CuArray{Float32, 2}
         @test size(t) == dims
     end
 
@@ -119,13 +119,13 @@
 
     @testset "unsafe_ones! default type is Float32" begin
         v = unsafe_ones!(pool, 10)
-        @test v isa CuArray{Float32,1}
+        @test v isa CuArray{Float32, 1}
         @test !(v isa SubArray)
         @test length(v) == 10
         @test all(v .== 1.0f0)
 
         m = unsafe_ones!(pool, 3, 4)
-        @test m isa CuArray{Float32,2}
+        @test m isa CuArray{Float32, 2}
         @test !(m isa SubArray)
         @test size(m) == (3, 4)
         @test all(m .== 1.0f0)
@@ -133,7 +133,7 @@
         # Tuple form
         dims = (2, 3)
         t = unsafe_ones!(pool, dims)
-        @test t isa CuArray{Float32,2}
+        @test t isa CuArray{Float32, 2}
         @test size(t) == dims
     end
 
@@ -148,32 +148,32 @@
         # Float32 template
         template32 = CUDA.rand(Float32, 5, 5)
         v = unsafe_similar!(pool, template32)
-        @test v isa CuArray{Float32,2}
+        @test v isa CuArray{Float32, 2}
         @test !(v isa SubArray)
         @test size(v) == (5, 5)
 
         # Float64 template
         template64 = CUDA.rand(Float64, 3, 4)
         v64 = unsafe_similar!(pool, template64)
-        @test v64 isa CuArray{Float64,2}
+        @test v64 isa CuArray{Float64, 2}
         @test !(v64 isa SubArray)
         @test size(v64) == (3, 4)
 
         # Different type
         v_int = unsafe_similar!(pool, template32, Int32)
-        @test v_int isa CuArray{Int32,2}
+        @test v_int isa CuArray{Int32, 2}
         @test !(v_int isa SubArray)
         @test size(v_int) == (5, 5)
 
         # Different dims
         v_dims = unsafe_similar!(pool, template32, 10)
-        @test v_dims isa CuArray{Float32,1}
+        @test v_dims isa CuArray{Float32, 1}
         @test !(v_dims isa SubArray)
         @test length(v_dims) == 10
 
         # Different type and dims
         v_both = unsafe_similar!(pool, template32, Float64, 2, 3)
-        @test v_both isa CuArray{Float64,2}
+        @test v_both isa CuArray{Float64, 2}
         @test !(v_both isa SubArray)
         @test size(v_both) == (2, 3)
     end
diff --git a/test/cuda/test_disabled_pool.jl b/test/cuda/test_disabled_pool.jl
index 6c7da69a..aebbe9a8 100644
--- a/test/cuda/test_disabled_pool.jl
+++ b/test/cuda/test_disabled_pool.jl
@@ -24,7 +24,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test all(v1 .== 0.0f0)
 
         v2 = zeros!(DISABLED_CUDA, Float64, 5, 5)
-        @test v2 isa CuArray{Float64,2}
+        @test v2 isa CuArray{Float64, 2}
         @test size(v2) == (5, 5)
         @test all(v2 .== 0.0)
 
@@ -34,16 +34,16 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test length(v3) == 8
 
         v4 = zeros!(DISABLED_CUDA, 3, 4)
-        @test v4 isa CuArray{Float32,2}
+        @test v4 isa CuArray{Float32, 2}
         @test size(v4) == (3, 4)
 
         # Tuple dims
         v5 = zeros!(DISABLED_CUDA, Float32, (2, 3, 4))
-        @test v5 isa CuArray{Float32,3}
+        @test v5 isa CuArray{Float32, 3}
         @test size(v5) == (2, 3, 4)
 
         v6 = zeros!(DISABLED_CUDA, (5, 6))
-        @test v6 isa CuArray{Float32,2}
+        @test v6 isa CuArray{Float32, 2}
         @test size(v6) == (5, 6)
     end
 
@@ -55,7 +55,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test all(v1 .== 1.0f0)
 
         v2 = ones!(DISABLED_CUDA, Float64, 5, 5)
-        @test v2 isa CuArray{Float64,2}
+        @test v2 isa CuArray{Float64, 2}
         @test size(v2) == (5, 5)
         @test all(v2 .== 1.0)
 
@@ -65,16 +65,16 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test all(v3 .== 1.0f0)
 
         v4 = ones!(DISABLED_CUDA, 3, 4)
-        @test v4 isa CuArray{Float32,2}
+        @test v4 isa CuArray{Float32, 2}
         @test size(v4) == (3, 4)
 
         # Tuple dims
         v5 = ones!(DISABLED_CUDA, Float32, (2, 3))
-        @test v5 isa CuArray{Float32,2}
+        @test v5 isa CuArray{Float32, 2}
         @test size(v5) == (2, 3)
 
         v6 = ones!(DISABLED_CUDA, (4, 5))
-        @test v6 isa CuArray{Float32,2}
+        @test v6 isa CuArray{Float32, 2}
         @test size(v6) == (4, 5)
     end
 
@@ -90,11 +90,11 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test length(v2) == 10
 
         v3 = similar!(DISABLED_CUDA, template, 5, 5)
-        @test v3 isa CuArray{Float32,2}
+        @test v3 isa CuArray{Float32, 2}
         @test size(v3) == (5, 5)
 
         v4 = similar!(DISABLED_CUDA, template, Float64, 3, 4)
-        @test v4 isa CuArray{Float64,2}
+        @test v4 isa CuArray{Float64, 2}
         @test size(v4) == (3, 4)
     end
 
@@ -110,11 +110,11 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test length(v2) == 8
 
         v3 = similar!(DISABLED_CUDA, cpu_template, 4, 4)
-        @test v3 isa CuArray{Float64,2}
+        @test v3 isa CuArray{Float64, 2}
         @test size(v3) == (4, 4)
 
         v4 = similar!(DISABLED_CUDA, cpu_template, Int32, 2, 3)
-        @test v4 isa CuArray{Int32,2}
+        @test v4 isa CuArray{Int32, 2}
         @test size(v4) == (2, 3)
     end
 
@@ -124,7 +124,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test all(v1 .== 0.0f0)
 
         v2 = unsafe_zeros!(DISABLED_CUDA, Float64, 5, 5)
-        @test v2 isa CuArray{Float64,2}
+        @test v2 isa CuArray{Float64, 2}
         @test size(v2) == (5, 5)
 
         # Without type
@@ -133,11 +133,11 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
 
         # Tuple dims
         v4 = unsafe_zeros!(DISABLED_CUDA, Float32, (3, 4))
-        @test v4 isa CuArray{Float32,2}
+        @test v4 isa CuArray{Float32, 2}
         @test size(v4) == (3, 4)
 
         v5 = unsafe_zeros!(DISABLED_CUDA, (2, 3))
-        @test v5 isa CuArray{Float32,2}
+        @test v5 isa CuArray{Float32, 2}
     end
 
     @testset "unsafe_ones!" begin
@@ -146,7 +146,7 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test all(v1 .== 1.0f0)
 
         v2 = unsafe_ones!(DISABLED_CUDA, Float64, 5, 5)
-        @test v2 isa CuArray{Float64,2}
+        @test v2 isa CuArray{Float64, 2}
         @test size(v2) == (5, 5)
 
         # Without type
@@ -155,10 +155,10 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
 
         # Tuple dims
         v4 = unsafe_ones!(DISABLED_CUDA, Float32, (3, 4))
-        @test v4 isa CuArray{Float32,2}
+        @test v4 isa CuArray{Float32, 2}
 
         v5 = unsafe_ones!(DISABLED_CUDA, (2, 3))
-        @test v5 isa CuArray{Float32,2}
+        @test v5 isa CuArray{Float32, 2}
     end
 
     @testset "unsafe_similar! with CuArray input" begin
@@ -171,10 +171,10 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test v2 isa CuVector{Float64}
 
         v3 = unsafe_similar!(DISABLED_CUDA, template, 5, 5)
-        @test v3 isa CuArray{Float32,2}
+        @test v3 isa CuArray{Float32, 2}
 
         v4 = unsafe_similar!(DISABLED_CUDA, template, Float64, 3, 4)
-        @test v4 isa CuArray{Float64,2}
+        @test v4 isa CuArray{Float64, 2}
     end
 
     @testset "unsafe_similar! with AbstractArray input (CPU->GPU)" begin
@@ -187,10 +187,10 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
         @test v2 isa CuVector{Float32}
 
         v3 = unsafe_similar!(DISABLED_CUDA, cpu_template, 4, 4)
-        @test v3 isa CuArray{Float64,2}
+        @test v3 isa CuArray{Float64, 2}
 
         v4 = unsafe_similar!(DISABLED_CUDA, cpu_template, Int32, 2, 3)
-        @test v4 isa CuArray{Int32,2}
+        @test v4 isa CuArray{Int32, 2}
     end
 
     @testset "acquire!" begin
@@ -201,12 +201,12 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
 
         # Type + vararg dims
         v2 = acquire!(DISABLED_CUDA, Float64, 5, 5)
-        @test v2 isa CuArray{Float64,2}
+        @test v2 isa CuArray{Float64, 2}
         @test size(v2) == (5, 5)
 
         # Type + tuple dims
         v3 = acquire!(DISABLED_CUDA, Float32, (3, 4, 5))
-        @test v3 isa CuArray{Float32,3}
+        @test v3 isa CuArray{Float32, 3}
         @test size(v3) == (3, 4, 5)
 
         # CuArray template
@@ -230,12 +230,12 @@ using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_e
 
         # Type + vararg dims
         v2 = unsafe_acquire!(DISABLED_CUDA, Float64, 5, 5)
-        @test v2 isa CuArray{Float64,2}
+        @test v2 isa CuArray{Float64, 2}
         @test size(v2) == (5, 5)
 
         # Type + tuple dims
         v3 = unsafe_acquire!(DISABLED_CUDA, Float32, (3, 4, 5))
-        @test v3 isa CuArray{Float32,3}
+        @test v3 isa CuArray{Float32, 3}
         @test size(v3) == (3, 4, 5)
 
         # CuArray template
diff --git a/test/cuda/test_display.jl b/test/cuda/test_display.jl
index 354e3954..b23032b3 100644
--- a/test/cuda/test_display.jl
+++ b/test/cuda/test_display.jl
@@ -3,7 +3,7 @@
 
 # Helper macro to capture stdout
 macro capture_out(expr)
-    quote
+    return quote
         local old_stdout = stdout
         local rd, wr = redirect_stdout()
         try
@@ -182,10 +182,10 @@ end
         reset!(pool)
 
         # pool_stats should return nothing
-        result = pool_stats(pool; io=devnull)
+        result = pool_stats(pool; io = devnull)
         @test result === nothing
 
-        result = pool_stats(:cuda; io=devnull)
+        result = pool_stats(:cuda; io = devnull)
         @test result === nothing
     end
 
diff --git a/test/cuda/test_extension.jl b/test/cuda/test_extension.jl
index a2bfcdb9..c4cc3797 100644
--- a/test/cuda/test_extension.jl
+++ b/test/cuda/test_extension.jl
@@ -57,7 +57,7 @@ end
         vec = CUDA.zeros(Float32, 50)
         flat_view = view(vec, 1:50)
         wrapped = AdaptiveArrayPools.wrap_array(tp, flat_view, (10, 5))
-        @test wrapped isa CuArray{Float32,2}
+        @test wrapped isa CuArray{Float32, 2}
         @test size(wrapped) == (10, 5)
     end
 
@@ -121,13 +121,15 @@ end
 
     @testset "get_task_local_cuda_pools before pool creation" begin
         # Test in a fresh task where no pool exists yet
-        result = fetch(Threads.@spawn begin
-            # Call get_task_local_cuda_pools() FIRST (before get_task_local_cuda_pool)
-            pools = get_task_local_cuda_pools()
-            @test pools isa Dict{Int, CuAdaptiveArrayPool}
-            @test isempty(pools)  # No pools created yet
-            true
-        end)
+        result = fetch(
+            Threads.@spawn begin
+                # Call get_task_local_cuda_pools() FIRST (before get_task_local_cuda_pool)
+                pools = get_task_local_cuda_pools()
+                @test pools isa Dict{Int, CuAdaptiveArrayPool}
+                @test isempty(pools)  # No pools created yet
+                true
+            end
+        )
         @test result == true
     end
 
@@ -449,7 +451,7 @@ end
     @testset "unsafe_acquire!" begin
         result = @with_pool :cuda pool begin
             A = unsafe_acquire!(pool, Float32, 100)
-            @test A isa CuArray{Float32,1}
+            @test A isa CuArray{Float32, 1}
             A .= 2.0f0
             sum(A)
         end
@@ -490,12 +492,12 @@ end
         pool = CuAdaptiveArrayPool()
 
         v = unsafe_acquire!(pool, Float32, 100)
-        @test v isa CuArray{Float32,1}
+        @test v isa CuArray{Float32, 1}
 
         A = unsafe_acquire!(pool, Float64, 10, 10)
-        @test A isa CuArray{Float64,2}
+        @test A isa CuArray{Float64, 2}
 
         B = unsafe_acquire!(pool, Int32, (5, 5))
-        @test B isa CuArray{Int32,2}
+        @test B isa CuArray{Int32, 2}
     end
 end
diff --git a/test/test_allocation.jl b/test/test_allocation.jl
index b17631bf..3666da80 100644
--- a/test/test_allocation.jl
+++ b/test/test_allocation.jl
@@ -1,20 +1,20 @@
 @with_pool pool function foo()
-	float64_vec = acquire!(pool, Float64, 10)
-	float32_vec = acquire!(pool, Float32, 10)
+    float64_vec = acquire!(pool, Float64, 10)
+    float32_vec = acquire!(pool, Float32, 10)
 
-	float64_mat = acquire!(pool, Float64, 10, 10)
-	float32_mat = acquire!(pool, Float32, 10, 10)
+    float64_mat = acquire!(pool, Float64, 10, 10)
+    float32_mat = acquire!(pool, Float32, 10, 10)
 
-	bv = acquire!(pool, Bit, 100)
-	ba2 = acquire!(pool, Bit, 10, 10)
-	ba3 = acquire!(pool, Bit, 5, 5, 4)
+    bv = acquire!(pool, Bit, 100)
+    ba2 = acquire!(pool, Bit, 10, 10)
+    ba3 = acquire!(pool, Bit, 5, 5, 4)
 
-	tt1 = trues!(pool, 256)
-	tt2 = ones!(pool, Bit, 10, 20)
-	ff1 = falses!(pool, 100, 5)
-	ff2 = zeros!(pool, Bit, 100)
+    tt1 = trues!(pool, 256)
+    tt2 = ones!(pool, Bit, 10, 20)
+    ff1 = falses!(pool, 100, 5)
+    ff2 = zeros!(pool, Bit, 100)
 
-	C = similar!(pool, tt1)
+    C = similar!(pool, tt1)
 end
 
 
@@ -35,4 +35,4 @@ end
     alloc3 = @allocated foo()
     @test alloc2 == 0
     @test alloc3 == 0
-end
\ No newline at end of file
+end
diff --git a/test/test_aqua.jl b/test/test_aqua.jl
index 5b3233a3..3fbe4e9d 100644
--- a/test/test_aqua.jl
+++ b/test/test_aqua.jl
@@ -1,5 +1,5 @@
 using Aqua, Test
 
 @testset "Aqua.jl" begin
-    Aqua.test_all( AdaptiveArrayPools )
+    Aqua.test_all(AdaptiveArrayPools)
 end
diff --git a/test/test_backend_macro_expansion.jl b/test/test_backend_macro_expansion.jl
index 9721cb05..1ac6fdc1 100644
--- a/test/test_backend_macro_expansion.jl
+++ b/test/test_backend_macro_expansion.jl
@@ -215,7 +215,7 @@
         end
 
         @testset "Where clause preserved" begin
-            expr = @macroexpand @with_pool :cuda pool function generic_func(x::Vector{T}) where T
+            expr = @macroexpand @with_pool :cuda pool function generic_func(x::Vector{T}) where {T}
                 v = acquire!(pool, T, length(x))
                 return sum(v)
             end
@@ -379,7 +379,7 @@
             expr = @macroexpand @with_pool :cuda pool function complex_func(
                     x::AbstractArray{T},
                     y::AbstractArray{S};
-                    tol::Float64 = 1e-6
+                    tol::Float64 = 1.0e-6
                 ) where {T <: Real, S <: Real}
                 v = acquire!(pool, T, size(x))
                 return sum(v)
diff --git a/test/test_convenience.jl b/test/test_convenience.jl
index 2b243d07..cdadaa6a 100644
--- a/test/test_convenience.jl
+++ b/test/test_convenience.jl
@@ -328,7 +328,7 @@
             # 1D with explicit type
             result = @with_pool pool begin
                 v = unsafe_zeros!(pool, Float64, size(x1d))
-                @test v isa Array{Float64,1}
+                @test v isa Array{Float64, 1}
                 @test length(v) == 10
                 @test all(v .== 0.0)
                 sum(v)
@@ -338,7 +338,7 @@
             # 1D without type
             result = @with_pool pool begin
                 v = unsafe_zeros!(pool, size(x1d))
-                @test v isa Array{Float64,1}
+                @test v isa Array{Float64, 1}
                 @test eltype(v) == Float64
                 sum(v)
             end
@@ -347,7 +347,7 @@
             # 2D with explicit type
             result = @with_pool pool begin
                 m = unsafe_zeros!(pool, Float32, size(x2d))
-                @test m isa Array{Float32,2}
+                @test m isa Array{Float32, 2}
                 @test size(m) == (5, 8)
                 sum(m)
             end
@@ -361,7 +361,7 @@
             # 1D with explicit type
             result = @with_pool pool begin
                 v = unsafe_ones!(pool, Float64, size(x1d))
-                @test v isa Array{Float64,1}
+                @test v isa Array{Float64, 1}
                 @test length(v) == 10
                 @test all(v .== 1.0)
                 sum(v)
@@ -371,7 +371,7 @@
             # 1D without type
             result = @with_pool pool begin
                 v = unsafe_ones!(pool, size(x1d))
-                @test v isa Array{Float64,1}
+                @test v isa Array{Float64, 1}
                 @test eltype(v) == Float64
                 sum(v)
             end
@@ -380,7 +380,7 @@
             # 2D with explicit type
             result = @with_pool pool begin
                 m = unsafe_ones!(pool, Float32, size(x2d))
-                @test m isa Array{Float32,2}
+                @test m isa Array{Float32, 2}
                 @test size(m) == (5, 8)
                 sum(m)
             end
@@ -424,7 +424,7 @@
 
         @testset "returns raw array (not view)" begin
             v = unsafe_zeros!(pool, Float64, 10)
-            @test v isa Array{Float64,1}
+            @test v isa Array{Float64, 1}
             @test !(v isa SubArray)
             @test length(v) == 10
             @test all(v .== 0.0)
@@ -432,7 +432,7 @@
 
         @testset "default type (Float64)" begin
             v = unsafe_zeros!(pool, 10)
-            @test v isa Array{Float64,1}
+            @test v isa Array{Float64, 1}
             @test !(v isa SubArray)
             @test eltype(v) == Float64
             @test all(v .== 0.0)
@@ -440,7 +440,7 @@
 
         @testset "multi-dimensional" begin
             m = unsafe_zeros!(pool, Float64, 3, 4)
-            @test m isa Array{Float64,2}
+            @test m isa Array{Float64, 2}
             @test !(m isa SubArray)
             @test size(m) == (3, 4)
             @test all(m .== 0.0)
@@ -469,7 +469,7 @@
 
         @testset "returns raw array (not view)" begin
             v = unsafe_ones!(pool, Float64, 10)
-            @test v isa Array{Float64,1}
+            @test v isa Array{Float64, 1}
             @test !(v isa SubArray)
             @test length(v) == 10
             @test all(v .== 1.0)
@@ -477,7 +477,7 @@
 
         @testset "default type (Float64)" begin
             v = unsafe_ones!(pool, 10)
-            @test v isa Array{Float64,1}
+            @test v isa Array{Float64, 1}
             @test !(v isa SubArray)
             @test eltype(v) == Float64
             @test all(v .== 1.0)
@@ -485,7 +485,7 @@
 
         @testset "multi-dimensional" begin
             m = unsafe_ones!(pool, Float64, 3, 4)
-            @test m isa Array{Float64,2}
+            @test m isa Array{Float64, 2}
             @test !(m isa SubArray)
             @test size(m) == (3, 4)
             @test all(m .== 1.0)
@@ -516,28 +516,28 @@
 
         @testset "returns raw array (not view)" begin
             v = unsafe_similar!(pool, template)
-            @test v isa Array{Float64,2}
+            @test v isa Array{Float64, 2}
             @test !(v isa SubArray)
             @test size(v) == size(template)
         end
 
         @testset "different type" begin
             v = unsafe_similar!(pool, template, Float32)
-            @test v isa Array{Float32,2}
+            @test v isa Array{Float32, 2}
             @test !(v isa SubArray)
             @test size(v) == size(template)
         end
 
         @testset "different size" begin
             v = unsafe_similar!(pool, template, 5, 5)
-            @test v isa Array{Float64,2}
+            @test v isa Array{Float64, 2}
             @test !(v isa SubArray)
             @test size(v) == (5, 5)
         end
 
         @testset "different type and size" begin
             v = unsafe_similar!(pool, template, Int32, 3, 4)
-            @test v isa Array{Int32,2}
+            @test v isa Array{Int32, 2}
             @test !(v isa SubArray)
             @test size(v) == (3, 4)
         end
@@ -556,7 +556,7 @@
         @testset "unsafe_zeros! in macro" begin
             result = @with_pool pool begin
                 v = unsafe_zeros!(pool, Float64, 100)
-                @test v isa Array{Float64,1}
+                @test v isa Array{Float64, 1}
                 @test !(v isa SubArray)
                 v .+= 1.0
                 sum(v)
@@ -567,7 +567,7 @@
         @testset "unsafe_ones! in macro" begin
             result = @with_pool pool begin
                 v = unsafe_ones!(pool, Float64, 50)
-                @test v isa Array{Float64,1}
+                @test v isa Array{Float64, 1}
                 @test !(v isa SubArray)
                 sum(v)
             end
@@ -578,7 +578,7 @@
             template = rand(10)
             result = @with_pool pool begin
                 v = unsafe_similar!(pool, template)
-                @test v isa Array{Float64,1}
+                @test v isa Array{Float64, 1}
                 @test !(v isa SubArray)
                 v .= 2.0
                 sum(v)
diff --git a/test/test_coverage.jl b/test/test_coverage.jl
index 5262b422..047bad51 100644
--- a/test/test_coverage.jl
+++ b/test/test_coverage.jl
@@ -49,26 +49,26 @@
 
         # acquire! with vararg dims
         v = acquire!(pool, Float32, 3, 3)
-        @test v isa Array{Float32,2}
+        @test v isa Array{Float32, 2}
 
         # acquire! with tuple dims
         v = acquire!(pool, Float32, (2, 2))
-        @test v isa Array{Float32,2}
+        @test v isa Array{Float32, 2}
 
         # acquire! with similar
         template = rand(Int32, 4, 4)
         v = acquire!(pool, template)
-        @test v isa Array{Int32,2}
+        @test v isa Array{Int32, 2}
 
         # unsafe_acquire! variants
         v = unsafe_acquire!(pool, Float32, 3, 3)
-        @test v isa Array{Float32,2}
+        @test v isa Array{Float32, 2}
 
         v = unsafe_acquire!(pool, Float32, (2, 2))
-        @test v isa Array{Float32,2}
+        @test v isa Array{Float32, 2}
 
         v = unsafe_acquire!(pool, template)
-        @test v isa Array{Int32,2}
+        @test v isa Array{Int32, 2}
     end
 
     @testset "BackendNotLoadedError" begin
@@ -90,14 +90,46 @@
 
         # Test that errors are thrown for unknown backend
         fake_pool = DisabledPool{:fake_backend}()
-        @test try zeros!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
-        @test try ones!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
-        @test try similar!(fake_pool, rand(3)); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
-        @test try unsafe_zeros!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
-        @test try unsafe_ones!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
-        @test try unsafe_similar!(fake_pool, rand(3)); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
-        @test try acquire!(fake_pool, Float64, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
-        @test try unsafe_acquire!(fake_pool, Float64, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try
+            zeros!(fake_pool, 10); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
+        @test try
+            ones!(fake_pool, 10); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
+        @test try
+            similar!(fake_pool, rand(3)); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
+        @test try
+            unsafe_zeros!(fake_pool, 10); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
+        @test try
+            unsafe_ones!(fake_pool, 10); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
+        @test try
+            unsafe_similar!(fake_pool, rand(3)); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
+        @test try
+            acquire!(fake_pool, Float64, 10); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
+        @test try
+            unsafe_acquire!(fake_pool, Float64, 10); false
+        catch e
+            e isa AdaptiveArrayPools.BackendNotLoadedError
+        end
     end
 
     @testset "_impl! delegators for DisabledPool" begin
@@ -237,7 +269,12 @@
         @test AdaptiveArrayPools._is_function_def(:(function foo() end)) == true
         @test AdaptiveArrayPools._is_function_def(:(foo(x) = x + 1)) == true
         @test AdaptiveArrayPools._is_function_def(:(x = 1)) == false
-        @test AdaptiveArrayPools._is_function_def(:(begin; end)) == false
+        @test AdaptiveArrayPools._is_function_def(
+            :(
+                begin
+                end
+            )
+        ) == false
 
         # Test _filter_static_types
         types = Set{Any}([Float64, Int64])
@@ -392,7 +429,11 @@
 
     @testset "_generate_function_pool_code" begin
         # Test function code generation with disable_pooling=true
-        func_expr = :(function bar(x) x + 1 end)
+        func_expr = :(
+            function bar(x)
+                x + 1
+            end
+        )
         result = AdaptiveArrayPools._generate_function_pool_code(:pool, func_expr, true, true, :cpu)
         @test result isa Expr
         @test result.head == :function
@@ -410,7 +451,11 @@
 
     @testset "_generate_function_pool_code_with_backend" begin
         # Test function code generation with backend
-        func_expr = :(function compute(x) x + 1 end)
+        func_expr = :(
+            function compute(x)
+                x + 1
+            end
+        )
 
         # With disable_pooling=true
         result1 = AdaptiveArrayPools._generate_function_pool_code_with_backend(:cpu, :pool, func_expr, true)
diff --git a/test/test_disabled_pooling.jl b/test/test_disabled_pooling.jl
index cc926212..a531fd78 100644
--- a/test/test_disabled_pooling.jl
+++ b/test/test_disabled_pooling.jl
@@ -73,7 +73,7 @@
         println("  Subprocess failed: ", e)
         false
     finally
-        rm(test_file; force=true)
+        rm(test_file; force = true)
     end
 
     @test result == true
diff --git a/test/test_fallback_reclamation.jl b/test/test_fallback_reclamation.jl
index 05b578ed..36e959e7 100644
--- a/test/test_fallback_reclamation.jl
+++ b/test/test_fallback_reclamation.jl
@@ -12,17 +12,17 @@ using AdaptiveArrayPools: get_typed_pool!, _lazy_checkpoint!, _lazy_rewind!,
 
 """Get n_active for a fallback type (0 if type not in pool.others)."""
 function others_n_active(pool, ::Type{T}) where {T}
-    haskey(pool.others, T) ? pool.others[T].n_active : 0
+    return haskey(pool.others, T) ? pool.others[T].n_active : 0
 end
 
 """Get checkpoint stack length for a fallback type."""
 function others_stack_len(pool, ::Type{T}) where {T}
-    haskey(pool.others, T) ? length(pool.others[T]._checkpoint_depths) : 0
+    return haskey(pool.others, T) ? length(pool.others[T]._checkpoint_depths) : 0
 end
 
 """Get all n_active values for pool.others entries."""
 function all_others_n_active(pool)
-    Dict(T => tp.n_active for (T, tp) in pool.others)
+    return Dict(T => tp.n_active for (T, tp) in pool.others)
 end
 
 # ==============================================================================
@@ -40,7 +40,7 @@ end
 #   - Different Tag/N params = different concrete types = separate pool entries
 #   - Each unique Dual variant occupies its own IdDict slot in pool.others
 struct FakeTag{F} end
-struct FakeDual{Tag, V<:Real, N}
+struct FakeDual{Tag, V <: Real, N}
     value::V
     partials::NTuple{N, V}
 end
@@ -48,7 +48,7 @@ FakeDual{Tag, V, N}(v::V) where {Tag, V, N} = FakeDual{Tag, V, N}(v, ntuple(_ ->
 
 # Type aliases for readability
 const Dual_f1_11 = FakeDual{FakeTag{:f1}, Float64, 11}
-const Dual_f1_4  = FakeDual{FakeTag{:f1}, Float64, 4}
+const Dual_f1_4 = FakeDual{FakeTag{:f1}, Float64, 4}
 const Dual_f2_11 = FakeDual{FakeTag{:f2}, Float64, 11}
 
 # ==============================================================================
@@ -57,1464 +57,1464 @@ const Dual_f2_11 = FakeDual{FakeTag{:f2}, Float64, 11}
 
 @testset "Fallback Reclamation" begin
 
-@testset "1. Multiple distinct fallback types in single scope" begin
-    pool = AdaptiveArrayPool()
+    @testset "1. Multiple distinct fallback types in single scope" begin
+        pool = AdaptiveArrayPool()
 
-    checkpoint!(pool)
-    v1 = acquire!(pool, UInt8, 10)
-    v2 = acquire!(pool, Float16, 20)
-    v3 = acquire!(pool, Int16, 30)
+        checkpoint!(pool)
+        v1 = acquire!(pool, UInt8, 10)
+        v2 = acquire!(pool, Float16, 20)
+        v3 = acquire!(pool, Int16, 30)
 
-    @test others_n_active(pool, UInt8) == 1
-    @test others_n_active(pool, Float16) == 1
-    @test others_n_active(pool, Int16) == 1
+        @test others_n_active(pool, UInt8) == 1
+        @test others_n_active(pool, Float16) == 1
+        @test others_n_active(pool, Int16) == 1
 
-    rewind!(pool)
+        rewind!(pool)
 
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-    @test others_n_active(pool, Int16) == 0
-end
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+        @test others_n_active(pool, Int16) == 0
+    end
 
-@testset "1b. Multiple arrays per fallback type" begin
-    pool = AdaptiveArrayPool()
+    @testset "1b. Multiple arrays per fallback type" begin
+        pool = AdaptiveArrayPool()
 
-    checkpoint!(pool)
-    for _ in 1:5
-        acquire!(pool, UInt8, 10)
-        acquire!(pool, Float16, 20)
+        checkpoint!(pool)
+        for _ in 1:5
+            acquire!(pool, UInt8, 10)
+            acquire!(pool, Float16, 20)
+        end
+        @test others_n_active(pool, UInt8) == 5
+        @test others_n_active(pool, Float16) == 5
+
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
     end
-    @test others_n_active(pool, UInt8) == 5
-    @test others_n_active(pool, Float16) == 5
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-end
+    # ==============================================================================
+    # 2. Deeply Nested Scopes (3+ levels) with Fallback Types
+    # ==============================================================================
 
-# ==============================================================================
-# 2. Deeply Nested Scopes (3+ levels) with Fallback Types
-# ==============================================================================
+    @testset "2. Deep nesting (5 levels) with fallback types" begin
+        pool = AdaptiveArrayPool()
+        fallback_types = [UInt8, Float16, Int16, UInt16, Int8]
 
-@testset "2. Deep nesting (5 levels) with fallback types" begin
-    pool = AdaptiveArrayPool()
-    fallback_types = [UInt8, Float16, Int16, UInt16, Int8]
+        # Acquire one per type at each depth level
+        for depth_level in 1:5
+            checkpoint!(pool)
+            v = acquire!(pool, fallback_types[depth_level], 10 * depth_level)
+            @test others_n_active(pool, fallback_types[depth_level]) == 1
+        end
 
-    # Acquire one per type at each depth level
-    for depth_level in 1:5
-        checkpoint!(pool)
-        v = acquire!(pool, fallback_types[depth_level], 10 * depth_level)
-        @test others_n_active(pool, fallback_types[depth_level]) == 1
-    end
+        # All 5 types active
+        for (i, T) in enumerate(fallback_types)
+            @test others_n_active(pool, T) == 1
+        end
 
-    # All 5 types active
-    for (i, T) in enumerate(fallback_types)
-        @test others_n_active(pool, T) == 1
+        # Unwind — each type should revert as we go
+        for depth_level in 5:-1:1
+            rewind!(pool)
+            @test others_n_active(pool, fallback_types[depth_level]) == 0
+        end
     end
 
-    # Unwind — each type should revert as we go
-    for depth_level in 5:-1:1
-        rewind!(pool)
-        @test others_n_active(pool, fallback_types[depth_level]) == 0
-    end
-end
+    @testset "2b. Same fallback type across nested depths" begin
+        pool = AdaptiveArrayPool()
 
-@testset "2b. Same fallback type across nested depths" begin
-    pool = AdaptiveArrayPool()
+        # Level 1: acquire 1 UInt8
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        @test others_n_active(pool, UInt8) == 1
 
-    # Level 1: acquire 1 UInt8
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    @test others_n_active(pool, UInt8) == 1
+        # Level 2: acquire 2 more
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 20)
+        acquire!(pool, UInt8, 30)
+        @test others_n_active(pool, UInt8) == 3
 
-    # Level 2: acquire 2 more
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 20)
-    acquire!(pool, UInt8, 30)
-    @test others_n_active(pool, UInt8) == 3
+        # Level 3: acquire 1 more
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 40)
+        @test others_n_active(pool, UInt8) == 4
 
-    # Level 3: acquire 1 more
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 40)
-    @test others_n_active(pool, UInt8) == 4
+        # Unwind level 3
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 3
 
-    # Unwind level 3
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 3
+        # Unwind level 2
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 1
 
-    # Unwind level 2
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 1
+        # Unwind level 1
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    # Unwind level 1
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+    # ==============================================================================
+    # 3. @with_pool Macro with Fallback Types
+    # ==============================================================================
 
-# ==============================================================================
-# 3. @with_pool Macro with Fallback Types
-# ==============================================================================
+    @testset "3. @with_pool macro with fallback types" begin
+        # The macro uses lazy checkpoint/rewind (use_typed=false path) when
+        # types aren't statically extractable
+        function helper_fallback!(pool)
+            acquire!(pool, UInt8, 50)
+            acquire!(pool, Float16, 50)
+        end
 
-@testset "3. @with_pool macro with fallback types" begin
-    # The macro uses lazy checkpoint/rewind (use_typed=false path) when
-    # types aren't statically extractable
-    function helper_fallback!(pool)
-        acquire!(pool, UInt8, 50)
-        acquire!(pool, Float16, 50)
-    end
+        result = @with_pool pool begin
+            helper_fallback!(pool)
+            others_n_active(pool, UInt8)
+        end
+        @test result == 1
 
-    result = @with_pool pool begin
-        helper_fallback!(pool)
-        others_n_active(pool, UInt8)
+        # After scope: verify task-local pool rewound fallback allocations
+        task_pool = AdaptiveArrayPools.get_task_local_pool()
+        @test others_n_active(task_pool, UInt8) == 0
+        @test others_n_active(task_pool, Float16) == 0
     end
-    @test result == 1
-
-    # After scope: verify task-local pool rewound fallback allocations
-    task_pool = AdaptiveArrayPools.get_task_local_pool()
-    @test others_n_active(task_pool, UInt8) == 0
-    @test others_n_active(task_pool, Float16) == 0
-end
 
-@testset "3b. @with_pool with static fallback type" begin
-    # acquire!(pool, UInt8, ...) — UInt8 is not a fixed slot, so macro
-    # goes through lazy path (since _fixed_slot_bit(UInt8) == 0)
-    result = @with_pool pool begin
-        v = acquire!(pool, UInt8, 100)
-        length(v)
+    @testset "3b. @with_pool with static fallback type" begin
+        # acquire!(pool, UInt8, ...) — UInt8 is not a fixed slot, so macro
+        # goes through lazy path (since _fixed_slot_bit(UInt8) == 0)
+        result = @with_pool pool begin
+            v = acquire!(pool, UInt8, 100)
+            length(v)
+        end
+        @test result == 100
     end
-    @test result == 100
-end
 
-@testset "3c. Nested @with_pool with fallback types" begin
-    result = @with_pool p1 begin
-        a = acquire!(p1, UInt8, 10)
-        inner = @with_pool p2 begin
-            b = acquire!(p2, UInt8, 20)
-            others_n_active(p2, UInt8)
+    @testset "3c. Nested @with_pool with fallback types" begin
+        result = @with_pool p1 begin
+            a = acquire!(p1, UInt8, 10)
+            inner = @with_pool p2 begin
+                b = acquire!(p2, UInt8, 20)
+                others_n_active(p2, UInt8)
+            end
+            # After inner scope rewinds, the task-local pool's UInt8 n_active
+            # should be back to 1 (only 'a' from outer scope)
+            (inner, others_n_active(p1, UInt8))
         end
-        # After inner scope rewinds, the task-local pool's UInt8 n_active
-        # should be back to 1 (only 'a' from outer scope)
-        (inner, others_n_active(p1, UInt8))
+        @test result[1] == 2   # inner scope had 2 (1 from outer + 1 from inner)
+        @test result[2] == 1   # after inner rewind, back to 1
     end
-    @test result[1] == 2   # inner scope had 2 (1 from outer + 1 from inner)
-    @test result[2] == 1   # after inner rewind, back to 1
-end
 
-# ==============================================================================
-# 4. Lazy Mode with Fallback Types
-# ==============================================================================
+    # ==============================================================================
+    # 4. Lazy Mode with Fallback Types
+    # ==============================================================================
 
-@testset "4. Lazy checkpoint/rewind with fallback types" begin
-    pool = AdaptiveArrayPool()
+    @testset "4. Lazy checkpoint/rewind with fallback types" begin
+        pool = AdaptiveArrayPool()
 
-    _lazy_checkpoint!(pool)
-    v1 = acquire!(pool, UInt8, 10)
-    v2 = acquire!(pool, Float16, 20)
+        _lazy_checkpoint!(pool)
+        v1 = acquire!(pool, UInt8, 10)
+        v2 = acquire!(pool, Float16, 20)
 
-    @test others_n_active(pool, UInt8) == 1
-    @test others_n_active(pool, Float16) == 1
-    @test pool._touched_has_others[pool._current_depth] == true
+        @test others_n_active(pool, UInt8) == 1
+        @test others_n_active(pool, Float16) == 1
+        @test pool._touched_has_others[pool._current_depth] == true
 
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-end
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+    end
 
-@testset "4b. Lazy mode: pre-existing others get eagerly checkpointed" begin
-    pool = AdaptiveArrayPool()
+    @testset "4b. Lazy mode: pre-existing others get eagerly checkpointed" begin
+        pool = AdaptiveArrayPool()
 
-    # Pre-populate at global scope
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    @test others_n_active(pool, UInt8) == 1
-    rewind!(pool)
+        # Pre-populate at global scope
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        @test others_n_active(pool, UInt8) == 1
+        rewind!(pool)
 
-    # UInt8 pool exists now with n_active=0
-    @test haskey(pool.others, UInt8)
-    @test others_n_active(pool, UInt8) == 0
+        # UInt8 pool exists now with n_active=0
+        @test haskey(pool.others, UInt8)
+        @test others_n_active(pool, UInt8) == 0
 
-    # Now use lazy mode — pre-existing UInt8 should be eagerly checkpointed
-    _lazy_checkpoint!(pool)
-    acquire!(pool, UInt8, 20)
-    @test others_n_active(pool, UInt8) == 1
+        # Now use lazy mode — pre-existing UInt8 should be eagerly checkpointed
+        _lazy_checkpoint!(pool)
+        acquire!(pool, UInt8, 20)
+        @test others_n_active(pool, UInt8) == 1
 
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-@testset "4c. Lazy mode: new fallback type created during scope" begin
-    pool = AdaptiveArrayPool()
+    @testset "4c. Lazy mode: new fallback type created during scope" begin
+        pool = AdaptiveArrayPool()
 
-    _lazy_checkpoint!(pool)
-    # Int16 doesn't exist yet — created inside lazy scope
-    v = acquire!(pool, Int16, 10)
-    @test others_n_active(pool, Int16) == 1
+        _lazy_checkpoint!(pool)
+        # Int16 doesn't exist yet — created inside lazy scope
+        v = acquire!(pool, Int16, 10)
+        @test others_n_active(pool, Int16) == 1
 
-    _lazy_rewind!(pool)
-    # Should revert to 0 (auto-checkpoint sentinel covers new types)
-    @test others_n_active(pool, Int16) == 0
-end
+        _lazy_rewind!(pool)
+        # Should revert to 0 (auto-checkpoint sentinel covers new types)
+        @test others_n_active(pool, Int16) == 0
+    end
 
-# ==============================================================================
-# 5. Typed-Lazy Mode with Fallback Types
-# ==============================================================================
+    # ==============================================================================
+    # 5. Typed-Lazy Mode with Fallback Types
+    # ==============================================================================
 
-@testset "5. Typed-lazy checkpoint/rewind with fallback types" begin
-    pool = AdaptiveArrayPool()
+    @testset "5. Typed-lazy checkpoint/rewind with fallback types" begin
+        pool = AdaptiveArrayPool()
 
-    # Pre-populate a fallback type
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
+        # Pre-populate a fallback type
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
 
-    # Parent acquires UInt8
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 5)
-    @test others_n_active(pool, UInt8) == 1
+        # Parent acquires UInt8
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 5)
+        @test others_n_active(pool, UInt8) == 1
 
-    # Child typed-lazy scope tracking Float64
-    _typed_lazy_checkpoint!(pool, Float64)
-    acquire!(pool, UInt8, 15)  # Helper touches fallback type
-    @test others_n_active(pool, UInt8) == 2
+        # Child typed-lazy scope tracking Float64
+        _typed_lazy_checkpoint!(pool, Float64)
+        acquire!(pool, UInt8, 15)  # Helper touches fallback type
+        @test others_n_active(pool, UInt8) == 2
 
-    _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
-    # Must restore parent's n_active=1 (NOT 0)
-    @test others_n_active(pool, UInt8) == 1
+        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        # Must restore parent's n_active=1 (NOT 0)
+        @test others_n_active(pool, UInt8) == 1
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-@testset "5b. Typed-lazy with new fallback type in child scope" begin
-    pool = AdaptiveArrayPool()
+    @testset "5b. Typed-lazy with new fallback type in child scope" begin
+        pool = AdaptiveArrayPool()
 
-    checkpoint!(pool)
-    # Parent doesn't use any fallback types
+        checkpoint!(pool)
+        # Parent doesn't use any fallback types
 
-    _typed_lazy_checkpoint!(pool, Float64)
-    # Child creates new fallback type
-    acquire!(pool, Int16, 10)
-    @test others_n_active(pool, Int16) == 1
+        _typed_lazy_checkpoint!(pool, Float64)
+        # Child creates new fallback type
+        acquire!(pool, Int16, 10)
+        @test others_n_active(pool, Int16) == 1
 
-    _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
-    @test others_n_active(pool, Int16) == 0
+        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        @test others_n_active(pool, Int16) == 0
 
-    rewind!(pool)
-end
+        rewind!(pool)
+    end
 
-# ==============================================================================
-# 6. Mixed Fixed + Fallback Types
-# ==============================================================================
+    # ==============================================================================
+    # 6. Mixed Fixed + Fallback Types
+    # ==============================================================================
 
-@testset "6. Mixed fixed and fallback types rewind correctly" begin
-    pool = AdaptiveArrayPool()
+    @testset "6. Mixed fixed and fallback types rewind correctly" begin
+        pool = AdaptiveArrayPool()
 
-    checkpoint!(pool)
-    f64_v = acquire!(pool, Float64, 10)
-    u8_v = acquire!(pool, UInt8, 20)
-    i32_v = acquire!(pool, Int32, 30)
-    f16_v = acquire!(pool, Float16, 40)
+        checkpoint!(pool)
+        f64_v = acquire!(pool, Float64, 10)
+        u8_v = acquire!(pool, UInt8, 20)
+        i32_v = acquire!(pool, Int32, 30)
+        f16_v = acquire!(pool, Float16, 40)
 
-    @test pool.float64.n_active == 1
-    @test pool.int32.n_active == 1
-    @test others_n_active(pool, UInt8) == 1
-    @test others_n_active(pool, Float16) == 1
+        @test pool.float64.n_active == 1
+        @test pool.int32.n_active == 1
+        @test others_n_active(pool, UInt8) == 1
+        @test others_n_active(pool, Float16) == 1
 
-    rewind!(pool)
+        rewind!(pool)
 
-    @test pool.float64.n_active == 0
-    @test pool.int32.n_active == 0
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-end
+        @test pool.float64.n_active == 0
+        @test pool.int32.n_active == 0
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+    end
 
-@testset "6b. Lazy mode: mixed fixed + fallback" begin
-    pool = AdaptiveArrayPool()
+    @testset "6b. Lazy mode: mixed fixed + fallback" begin
+        pool = AdaptiveArrayPool()
 
-    _lazy_checkpoint!(pool)
-    acquire!(pool, Float64, 10)
-    acquire!(pool, UInt8, 20)
-    acquire!(pool, Int32, 30)
-    acquire!(pool, Float16, 40)
+        _lazy_checkpoint!(pool)
+        acquire!(pool, Float64, 10)
+        acquire!(pool, UInt8, 20)
+        acquire!(pool, Int32, 30)
+        acquire!(pool, Float16, 40)
 
-    @test pool.float64.n_active == 1
-    @test pool.int32.n_active == 1
-    @test others_n_active(pool, UInt8) == 1
-    @test others_n_active(pool, Float16) == 1
+        @test pool.float64.n_active == 1
+        @test pool.int32.n_active == 1
+        @test others_n_active(pool, UInt8) == 1
+        @test others_n_active(pool, Float16) == 1
 
-    _lazy_rewind!(pool)
+        _lazy_rewind!(pool)
 
-    @test pool.float64.n_active == 0
-    @test pool.int32.n_active == 0
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-end
+        @test pool.float64.n_active == 0
+        @test pool.int32.n_active == 0
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+    end
 
-# ==============================================================================
-# 7. Fallback Type Rewind → Re-acquire Cycle
-# ==============================================================================
+    # ==============================================================================
+    # 7. Fallback Type Rewind → Re-acquire Cycle
+    # ==============================================================================
 
-@testset "7. Rewind then re-acquire fallback type reuses memory" begin
-    pool = AdaptiveArrayPool()
-
-    # First cycle
-    checkpoint!(pool)
-    v1 = acquire!(pool, UInt8, 100)
-    v1 .= 0x42
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-
-    # Second cycle — should reuse existing backing vector
-    checkpoint!(pool)
-    v2 = acquire!(pool, UInt8, 100)
-    @test others_n_active(pool, UInt8) == 1
-    # The backing vector should be reused (same object)
-    @test parent(v1) === parent(v2)
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+    @testset "7. Rewind then re-acquire fallback type reuses memory" begin
+        pool = AdaptiveArrayPool()
 
-# ==============================================================================
-# 8. Checkpoint Stack Invariants (No Stack Leak)
-# ==============================================================================
+        # First cycle
+        checkpoint!(pool)
+        v1 = acquire!(pool, UInt8, 100)
+        v1 .= 0x42
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
 
-@testset "8. Full checkpoint: no stack leak over many iterations" begin
-    pool = AdaptiveArrayPool()
+        # Second cycle — should reuse existing backing vector
+        checkpoint!(pool)
+        v2 = acquire!(pool, UInt8, 100)
+        @test others_n_active(pool, UInt8) == 1
+        # The backing vector should be reused (same object)
+        @test parent(v1) === parent(v2)
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    # Pre-populate
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    rewind!(pool)
+    # ==============================================================================
+    # 8. Checkpoint Stack Invariants (No Stack Leak)
+    # ==============================================================================
 
-    uint8_pool = pool.others[UInt8]
-    initial_stack_len = length(uint8_pool._checkpoint_depths)
+    @testset "8. Full checkpoint: no stack leak over many iterations" begin
+        pool = AdaptiveArrayPool()
 
-    for _ in 1:100
+        # Pre-populate
         checkpoint!(pool)
         acquire!(pool, UInt8, 10)
         rewind!(pool)
-    end
 
-    @test length(uint8_pool._checkpoint_depths) == initial_stack_len
-    @test others_n_active(pool, UInt8) == 0
-end
+        uint8_pool = pool.others[UInt8]
+        initial_stack_len = length(uint8_pool._checkpoint_depths)
 
-@testset "8b. Lazy checkpoint: no stack leak over many iterations" begin
-    pool = AdaptiveArrayPool()
+        for _ in 1:100
+            checkpoint!(pool)
+            acquire!(pool, UInt8, 10)
+            rewind!(pool)
+        end
 
-    # Pre-populate
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    rewind!(pool)
+        @test length(uint8_pool._checkpoint_depths) == initial_stack_len
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    uint8_pool = pool.others[UInt8]
-    initial_stack_len = length(uint8_pool._checkpoint_depths)
+    @testset "8b. Lazy checkpoint: no stack leak over many iterations" begin
+        pool = AdaptiveArrayPool()
 
-    for _ in 1:100
-        _lazy_checkpoint!(pool)
+        # Pre-populate
+        checkpoint!(pool)
         acquire!(pool, UInt8, 10)
-        _lazy_rewind!(pool)
-    end
+        rewind!(pool)
 
-    @test length(uint8_pool._checkpoint_depths) == initial_stack_len
-    @test others_n_active(pool, UInt8) == 0
-end
+        uint8_pool = pool.others[UInt8]
+        initial_stack_len = length(uint8_pool._checkpoint_depths)
 
-@testset "8c. Typed-lazy checkpoint: no stack leak over many iterations" begin
-    pool = AdaptiveArrayPool()
+        for _ in 1:100
+            _lazy_checkpoint!(pool)
+            acquire!(pool, UInt8, 10)
+            _lazy_rewind!(pool)
+        end
 
-    # Pre-populate
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    rewind!(pool)
+        @test length(uint8_pool._checkpoint_depths) == initial_stack_len
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    uint8_pool = pool.others[UInt8]
-    initial_stack_len = length(uint8_pool._checkpoint_depths)
+    @testset "8c. Typed-lazy checkpoint: no stack leak over many iterations" begin
+        pool = AdaptiveArrayPool()
 
-    for _ in 1:100
-        _typed_lazy_checkpoint!(pool, Float64)
+        # Pre-populate
+        checkpoint!(pool)
         acquire!(pool, UInt8, 10)
-        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
-    end
+        rewind!(pool)
 
-    @test length(uint8_pool._checkpoint_depths) == initial_stack_len
-    @test others_n_active(pool, UInt8) == 0
-end
+        uint8_pool = pool.others[UInt8]
+        initial_stack_len = length(uint8_pool._checkpoint_depths)
 
-@testset "8d. @with_pool macro: no stack leak over many iterations" begin
-    pool_ref = Ref{AdaptiveArrayPool}()
+        for _ in 1:100
+            _typed_lazy_checkpoint!(pool, Float64)
+            acquire!(pool, UInt8, 10)
+            _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        end
 
-    # Pre-populate
-    @with_pool pool begin
-        acquire!(pool, UInt8, 10)
-        pool_ref[] = pool
+        @test length(uint8_pool._checkpoint_depths) == initial_stack_len
+        @test others_n_active(pool, UInt8) == 0
     end
 
-    uint8_pool = pool_ref[].others[UInt8]
-    initial_stack_len = length(uint8_pool._checkpoint_depths)
+    @testset "8d. @with_pool macro: no stack leak over many iterations" begin
+        pool_ref = Ref{AdaptiveArrayPool}()
 
-    for _ in 1:100
+        # Pre-populate
         @with_pool pool begin
             acquire!(pool, UInt8, 10)
+            pool_ref[] = pool
         end
-    end
-
-    @test length(uint8_pool._checkpoint_depths) == initial_stack_len
-    @test uint8_pool.n_active == 0
-end
 
-# ==============================================================================
-# 9. n_active Monotonicity (Memory Leak Detection)
-# ==============================================================================
+        uint8_pool = pool_ref[].others[UInt8]
+        initial_stack_len = length(uint8_pool._checkpoint_depths)
 
-@testset "9. n_active doesn't grow over repeated checkpoint/rewind cycles" begin
-    pool = AdaptiveArrayPool()
-    fallback_types = [UInt8, Float16, Int16]
+        for _ in 1:100
+            @with_pool pool begin
+                acquire!(pool, UInt8, 10)
+            end
+        end
 
-    # Pre-populate all types
-    checkpoint!(pool)
-    for T in fallback_types
-        acquire!(pool, T, 10)
+        @test length(uint8_pool._checkpoint_depths) == initial_stack_len
+        @test uint8_pool.n_active == 0
     end
-    rewind!(pool)
 
-    # Run 200 iterations — n_active should always return to 0
-    for iter in 1:200
+    # ==============================================================================
+    # 9. n_active Monotonicity (Memory Leak Detection)
+    # ==============================================================================
+
+    @testset "9. n_active doesn't grow over repeated checkpoint/rewind cycles" begin
+        pool = AdaptiveArrayPool()
+        fallback_types = [UInt8, Float16, Int16]
+
+        # Pre-populate all types
         checkpoint!(pool)
         for T in fallback_types
             acquire!(pool, T, 10)
         end
         rewind!(pool)
 
-        for T in fallback_types
-            n = others_n_active(pool, T)
-            if n != 0
-                @test n == 0  # Will show which type leaked
-                @info "LEAK DETECTED" iteration=iter type=T n_active=n
-                break
+        # Run 200 iterations — n_active should always return to 0
+        for iter in 1:200
+            checkpoint!(pool)
+            for T in fallback_types
+                acquire!(pool, T, 10)
             end
-        end
-    end
-
-    # Final check
-    for T in fallback_types
-        @test others_n_active(pool, T) == 0
-    end
-end
+            rewind!(pool)
 
-@testset "9b. Lazy mode: n_active doesn't grow over iterations" begin
-    pool = AdaptiveArrayPool()
+            for T in fallback_types
+                n = others_n_active(pool, T)
+                if n != 0
+                    @test n == 0  # Will show which type leaked
+                    @info "LEAK DETECTED" iteration = iter type = T n_active = n
+                    break
+                end
+            end
+        end
 
-    for iter in 1:200
-        _lazy_checkpoint!(pool)
-        acquire!(pool, UInt8, 10)
-        acquire!(pool, Float16, 20)
-        _lazy_rewind!(pool)
+        # Final check
+        for T in fallback_types
+            @test others_n_active(pool, T) == 0
+        end
     end
 
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-end
+    @testset "9b. Lazy mode: n_active doesn't grow over iterations" begin
+        pool = AdaptiveArrayPool()
 
-@testset "9c. @with_pool: n_active doesn't grow over iterations" begin
-    for iter in 1:200
-        @with_pool pool begin
+        for iter in 1:200
+            _lazy_checkpoint!(pool)
             acquire!(pool, UInt8, 10)
             acquire!(pool, Float16, 20)
+            _lazy_rewind!(pool)
         end
-    end
 
-    # Verify task-local pool is clean
-    pool = AdaptiveArrayPools.get_task_local_pool()
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-end
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+    end
 
-# ==============================================================================
-# 10. Backing Vector Count Stability (Pool Growth Detection)
-# ==============================================================================
+    @testset "9c. @with_pool: n_active doesn't grow over iterations" begin
+        for iter in 1:200
+            @with_pool pool begin
+                acquire!(pool, UInt8, 10)
+                acquire!(pool, Float16, 20)
+            end
+        end
 
-@testset "10. Pool vectors don't grow over checkpoint/rewind cycles" begin
-    pool = AdaptiveArrayPool()
+        # Verify task-local pool is clean
+        pool = AdaptiveArrayPools.get_task_local_pool()
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+    end
 
-    # Warmup: first cycle creates backing vectors
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 100)
-    acquire!(pool, UInt8, 200)
-    acquire!(pool, Float16, 50)
-    rewind!(pool)
+    # ==============================================================================
+    # 10. Backing Vector Count Stability (Pool Growth Detection)
+    # ==============================================================================
 
-    # Record vector counts after warmup
-    u8_vec_count = length(pool.others[UInt8].vectors)
-    f16_vec_count = length(pool.others[Float16].vectors)
+    @testset "10. Pool vectors don't grow over checkpoint/rewind cycles" begin
+        pool = AdaptiveArrayPool()
 
-    # Run many iterations — vector count should stay stable
-    for _ in 1:100
+        # Warmup: first cycle creates backing vectors
         checkpoint!(pool)
         acquire!(pool, UInt8, 100)
         acquire!(pool, UInt8, 200)
         acquire!(pool, Float16, 50)
         rewind!(pool)
-    end
 
-    @test length(pool.others[UInt8].vectors) == u8_vec_count
-    @test length(pool.others[Float16].vectors) == f16_vec_count
-end
+        # Record vector counts after warmup
+        u8_vec_count = length(pool.others[UInt8].vectors)
+        f16_vec_count = length(pool.others[Float16].vectors)
 
-@testset "10b. Lazy mode: pool vectors don't grow" begin
-    pool = AdaptiveArrayPool()
+        # Run many iterations — vector count should stay stable
+        for _ in 1:100
+            checkpoint!(pool)
+            acquire!(pool, UInt8, 100)
+            acquire!(pool, UInt8, 200)
+            acquire!(pool, Float16, 50)
+            rewind!(pool)
+        end
 
-    # Warmup
-    _lazy_checkpoint!(pool)
-    acquire!(pool, UInt8, 100)
-    acquire!(pool, Float16, 50)
-    _lazy_rewind!(pool)
+        @test length(pool.others[UInt8].vectors) == u8_vec_count
+        @test length(pool.others[Float16].vectors) == f16_vec_count
+    end
 
-    u8_vec_count = length(pool.others[UInt8].vectors)
-    f16_vec_count = length(pool.others[Float16].vectors)
+    @testset "10b. Lazy mode: pool vectors don't grow" begin
+        pool = AdaptiveArrayPool()
 
-    for _ in 1:100
+        # Warmup
         _lazy_checkpoint!(pool)
         acquire!(pool, UInt8, 100)
         acquire!(pool, Float16, 50)
         _lazy_rewind!(pool)
-    end
 
-    @test length(pool.others[UInt8].vectors) == u8_vec_count
-    @test length(pool.others[Float16].vectors) == f16_vec_count
-end
-
-# ==============================================================================
-# 11. unsafe_acquire! with Fallback Types
-# ==============================================================================
+        u8_vec_count = length(pool.others[UInt8].vectors)
+        f16_vec_count = length(pool.others[Float16].vectors)
 
-@testset "11. unsafe_acquire! with fallback types" begin
-    pool = AdaptiveArrayPool()
-
-    checkpoint!(pool)
-    v = unsafe_acquire!(pool, UInt8, 10)
-    @test v isa Array{UInt8, 1}
-    @test length(v) == 10
-    @test others_n_active(pool, UInt8) == 1
+        for _ in 1:100
+            _lazy_checkpoint!(pool)
+            acquire!(pool, UInt8, 100)
+            acquire!(pool, Float16, 50)
+            _lazy_rewind!(pool)
+        end
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+        @test length(pool.others[UInt8].vectors) == u8_vec_count
+        @test length(pool.others[Float16].vectors) == f16_vec_count
+    end
 
-@testset "11b. unsafe_acquire! N-D with fallback types" begin
-    pool = AdaptiveArrayPool()
+    # ==============================================================================
+    # 11. unsafe_acquire! with Fallback Types
+    # ==============================================================================
 
-    checkpoint!(pool)
-    m = unsafe_acquire!(pool, UInt8, 3, 4)
-    @test m isa Array{UInt8, 2}
-    @test size(m) == (3, 4)
-    @test others_n_active(pool, UInt8) == 1
+    @testset "11. unsafe_acquire! with fallback types" begin
+        pool = AdaptiveArrayPool()
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+        checkpoint!(pool)
+        v = unsafe_acquire!(pool, UInt8, 10)
+        @test v isa Array{UInt8, 1}
+        @test length(v) == 10
+        @test others_n_active(pool, UInt8) == 1
 
-# ==============================================================================
-# 12. Convenience Functions with Fallback Types
-# ==============================================================================
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-@testset "12. zeros!/ones!/similar! with fallback types" begin
-    pool = AdaptiveArrayPool()
+    @testset "11b. unsafe_acquire! N-D with fallback types" begin
+        pool = AdaptiveArrayPool()
 
-    checkpoint!(pool)
-    z = zeros!(pool, UInt8, 10)
-    @test all(z .== 0)
-    @test others_n_active(pool, UInt8) == 1
+        checkpoint!(pool)
+        m = unsafe_acquire!(pool, UInt8, 3, 4)
+        @test m isa Array{UInt8, 2}
+        @test size(m) == (3, 4)
+        @test others_n_active(pool, UInt8) == 1
 
-    o = ones!(pool, UInt8, 10)
-    @test all(o .== 1)
-    @test others_n_active(pool, UInt8) == 2
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    src = UInt8[1, 2, 3]
-    s = similar!(pool, src)
-    @test length(s) == 3
-    @test others_n_active(pool, UInt8) == 3
+    # ==============================================================================
+    # 12. Convenience Functions with Fallback Types
+    # ==============================================================================
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+    @testset "12. zeros!/ones!/similar! with fallback types" begin
+        pool = AdaptiveArrayPool()
 
-# ==============================================================================
-# 13. Exception Safety
-# ==============================================================================
+        checkpoint!(pool)
+        z = zeros!(pool, UInt8, 10)
+        @test all(z .== 0)
+        @test others_n_active(pool, UInt8) == 1
 
-@testset "13. Exception during fallback acquire doesn't leak" begin
-    pool = AdaptiveArrayPool()
+        o = ones!(pool, UInt8, 10)
+        @test all(o .== 1)
+        @test others_n_active(pool, UInt8) == 2
 
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    @test others_n_active(pool, UInt8) == 1
+        src = UInt8[1, 2, 3]
+        s = similar!(pool, src)
+        @test length(s) == 3
+        @test others_n_active(pool, UInt8) == 3
 
-    try
-        checkpoint!(pool)
-        acquire!(pool, UInt8, 20)
-        @test others_n_active(pool, UInt8) == 2
-        error("simulated failure")
-    catch
         rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
     end
 
-    @test others_n_active(pool, UInt8) == 1
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+    # ==============================================================================
+    # 13. Exception Safety
+    # ==============================================================================
 
-@testset "13b. @with_pool exception safety with fallback types" begin
-    try
-        @with_pool pool begin
-            acquire!(pool, UInt8, 10)
-            acquire!(pool, Float16, 20)
+    @testset "13. Exception during fallback acquire doesn't leak" begin
+        pool = AdaptiveArrayPool()
+
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        @test others_n_active(pool, UInt8) == 1
+
+        try
+            checkpoint!(pool)
+            acquire!(pool, UInt8, 20)
+            @test others_n_active(pool, UInt8) == 2
             error("simulated failure")
+        catch
+            rewind!(pool)
         end
-    catch
-    end
-
-    # After exception + rewind via finally, pool should be clean
-    pool = AdaptiveArrayPools.get_task_local_pool()
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-end
 
-# ==============================================================================
-# 14. Depth Tracking Consistency
-# ==============================================================================
+        @test others_n_active(pool, UInt8) == 1
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-@testset "14. _current_depth returns to 1 after cleanup" begin
-    pool = AdaptiveArrayPool()
-    @test pool._current_depth == 1
+    @testset "13b. @with_pool exception safety with fallback types" begin
+        try
+            @with_pool pool begin
+                acquire!(pool, UInt8, 10)
+                acquire!(pool, Float16, 20)
+                error("simulated failure")
+            end
+        catch
+        end
 
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    @test pool._current_depth == 2
+        # After exception + rewind via finally, pool should be clean
+        pool = AdaptiveArrayPools.get_task_local_pool()
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+    end
 
-    checkpoint!(pool)
-    acquire!(pool, Float16, 20)
-    @test pool._current_depth == 3
+    # ==============================================================================
+    # 14. Depth Tracking Consistency
+    # ==============================================================================
 
-    rewind!(pool)
-    @test pool._current_depth == 2
+    @testset "14. _current_depth returns to 1 after cleanup" begin
+        pool = AdaptiveArrayPool()
+        @test pool._current_depth == 1
 
-    rewind!(pool)
-    @test pool._current_depth == 1
-end
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        @test pool._current_depth == 2
 
-@testset "14b. _touched_has_others stack cleaned properly" begin
-    pool = AdaptiveArrayPool()
-    @test length(pool._touched_has_others) == 1  # sentinel
+        checkpoint!(pool)
+        acquire!(pool, Float16, 20)
+        @test pool._current_depth == 3
 
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    @test length(pool._touched_has_others) == 2
-    @test pool._touched_has_others[2] == true
+        rewind!(pool)
+        @test pool._current_depth == 2
 
-    rewind!(pool)
-    @test length(pool._touched_has_others) == 1  # back to sentinel
-end
+        rewind!(pool)
+        @test pool._current_depth == 1
+    end
 
-# ==============================================================================
-# 15. Custom Struct Types as Fallback
-# ==============================================================================
+    @testset "14b. _touched_has_others stack cleaned properly" begin
+        pool = AdaptiveArrayPool()
+        @test length(pool._touched_has_others) == 1  # sentinel
 
-@testset "15. Custom struct type as fallback" begin
-    pool = AdaptiveArrayPool()
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        @test length(pool._touched_has_others) == 2
+        @test pool._touched_has_others[2] == true
 
-    checkpoint!(pool)
-    v = acquire!(pool, MyTestElement, 5)
-    @test v isa SubArray
-    @test length(v) == 5
-    @test eltype(v) == MyTestElement
-    @test others_n_active(pool, MyTestElement) == 1
+        rewind!(pool)
+        @test length(pool._touched_has_others) == 1  # back to sentinel
+    end
 
-    rewind!(pool)
-    @test others_n_active(pool, MyTestElement) == 0
-end
+    # ==============================================================================
+    # 15. Custom Struct Types as Fallback
+    # ==============================================================================
 
-@testset "15b. Custom struct: repeated cycles don't leak" begin
-    pool = AdaptiveArrayPool()
+    @testset "15. Custom struct type as fallback" begin
+        pool = AdaptiveArrayPool()
 
-    for _ in 1:50
         checkpoint!(pool)
-        acquire!(pool, MyTestElement, 10)
+        v = acquire!(pool, MyTestElement, 5)
+        @test v isa SubArray
+        @test length(v) == 5
+        @test eltype(v) == MyTestElement
+        @test others_n_active(pool, MyTestElement) == 1
+
         rewind!(pool)
+        @test others_n_active(pool, MyTestElement) == 0
     end
 
-    @test others_n_active(pool, MyTestElement) == 0
-    @test length(pool.others[MyTestElement].vectors) == 1  # reuses single backing
-end
-
-# ==============================================================================
-# 16. Full Mode ↔ Lazy Mode Transitions with Fallback
-# ==============================================================================
+    @testset "15b. Custom struct: repeated cycles don't leak" begin
+        pool = AdaptiveArrayPool()
 
-@testset "16. Parent full checkpoint, child lazy, fallback touched" begin
-    pool = AdaptiveArrayPool()
+        for _ in 1:50
+            checkpoint!(pool)
+            acquire!(pool, MyTestElement, 10)
+            rewind!(pool)
+        end
 
-    # Pre-populate UInt8
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    rewind!(pool)
+        @test others_n_active(pool, MyTestElement) == 0
+        @test length(pool.others[MyTestElement].vectors) == 1  # reuses single backing
+    end
 
-    # Parent: full checkpoint, acquires UInt8
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 5)
-    @test others_n_active(pool, UInt8) == 1
+    # ==============================================================================
+    # 16. Full Mode ↔ Lazy Mode Transitions with Fallback
+    # ==============================================================================
 
-    # Child: lazy checkpoint, touches same fallback type
-    _lazy_checkpoint!(pool)
-    acquire!(pool, UInt8, 15)
-    @test others_n_active(pool, UInt8) == 2
+    @testset "16. Parent full checkpoint, child lazy, fallback touched" begin
+        pool = AdaptiveArrayPool()
 
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, UInt8) == 1  # parent's UInt8 preserved
+        # Pre-populate UInt8
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        rewind!(pool)
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+        # Parent: full checkpoint, acquires UInt8
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 5)
+        @test others_n_active(pool, UInt8) == 1
 
-@testset "16b. Parent lazy, child full checkpoint, fallback touched" begin
-    pool = AdaptiveArrayPool()
+        # Child: lazy checkpoint, touches same fallback type
+        _lazy_checkpoint!(pool)
+        acquire!(pool, UInt8, 15)
+        @test others_n_active(pool, UInt8) == 2
 
-    _lazy_checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    @test others_n_active(pool, UInt8) == 1
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, UInt8) == 1  # parent's UInt8 preserved
 
-    # Child: full checkpoint
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 20)
-    @test others_n_active(pool, UInt8) == 2
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 1
+    @testset "16b. Parent lazy, child full checkpoint, fallback touched" begin
+        pool = AdaptiveArrayPool()
 
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+        _lazy_checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        @test others_n_active(pool, UInt8) == 1
 
-@testset "16c. Parent full, child typed-lazy (Float64), helper touches fallback" begin
-    pool = AdaptiveArrayPool()
+        # Child: full checkpoint
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 20)
+        @test others_n_active(pool, UInt8) == 2
 
-    # Pre-populate
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 10)
-    rewind!(pool)
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 1
 
-    # Parent full checkpoint
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 5)
-    @test others_n_active(pool, UInt8) == 1
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    # Child typed-lazy tracking Float64, helper acquires UInt8
-    _typed_lazy_checkpoint!(pool, Float64)
-    acquire!(pool, Float64, 10)  # tracked type
-    acquire!(pool, UInt8, 15)    # untracked fallback
-    @test others_n_active(pool, UInt8) == 2
+    @testset "16c. Parent full, child typed-lazy (Float64), helper touches fallback" begin
+        pool = AdaptiveArrayPool()
 
-    _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
-    @test others_n_active(pool, UInt8) == 1  # parent's UInt8 preserved
-    @test pool.float64.n_active == 0  # tracked type also cleaned
+        # Pre-populate
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 10)
+        rewind!(pool)
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+        # Parent full checkpoint
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 5)
+        @test others_n_active(pool, UInt8) == 1
 
-# ==============================================================================
-# 17. Stress Test: Simulated Realistic Workload
-# ==============================================================================
+        # Child typed-lazy tracking Float64, helper acquires UInt8
+        _typed_lazy_checkpoint!(pool, Float64)
+        acquire!(pool, Float64, 10)  # tracked type
+        acquire!(pool, UInt8, 15)    # untracked fallback
+        @test others_n_active(pool, UInt8) == 2
 
-@testset "17. Realistic workload: nested function calls with fallback types" begin
-    pool = AdaptiveArrayPool()
+        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        @test others_n_active(pool, UInt8) == 1  # parent's UInt8 preserved
+        @test pool.float64.n_active == 0  # tracked type also cleaned
 
-    function inner_compute!(pool)
-        a = acquire!(pool, UInt8, 100)
-        b = acquire!(pool, Float16, 50)
-        a .= 0x01
-        b .= Float16(2.0)
-        sum(a) + sum(b)
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
     end
 
-    function middle_compute!(pool)
-        checkpoint!(pool)
-        try
-            x = acquire!(pool, Float64, 10)
-            x .= 1.0
-            result = inner_compute!(pool)
-            return sum(x) + result
-        finally
-            rewind!(pool)
+    # ==============================================================================
+    # 17. Stress Test: Simulated Realistic Workload
+    # ==============================================================================
+
+    @testset "17. Realistic workload: nested function calls with fallback types" begin
+        pool = AdaptiveArrayPool()
+
+        function inner_compute!(pool)
+            a = acquire!(pool, UInt8, 100)
+            b = acquire!(pool, Float16, 50)
+            a .= 0x01
+            b .= Float16(2.0)
+            sum(a) + sum(b)
         end
-    end
 
-    # Outer scope
-    for _ in 1:100
-        checkpoint!(pool)
-        try
-            r = middle_compute!(pool)
-            @test r ≈ 10.0 + 100.0 + 100.0  # 10 Float64 + 100 UInt8 + 50 Float16
-        finally
-            rewind!(pool)
+        function middle_compute!(pool)
+            checkpoint!(pool)
+            try
+                x = acquire!(pool, Float64, 10)
+                x .= 1.0
+                result = inner_compute!(pool)
+                return sum(x) + result
+            finally
+                rewind!(pool)
+            end
         end
-    end
 
-    # After all iterations: no leaks
-    @test pool.float64.n_active == 0
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
+        # Outer scope
+        for _ in 1:100
+            checkpoint!(pool)
+            try
+                r = middle_compute!(pool)
+                @test r ≈ 10.0 + 100.0 + 100.0  # 10 Float64 + 100 UInt8 + 50 Float16
+            finally
+                rewind!(pool)
+            end
+        end
 
-    # Backing vectors: should not have grown
-    @test length(pool.others[UInt8].vectors) == 1
-    @test length(pool.others[Float16].vectors) == 1
-end
+        # After all iterations: no leaks
+        @test pool.float64.n_active == 0
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
 
-@testset "17b. @with_pool stress: 500 iterations with multiple fallback types" begin
-    # Warmup cycle to populate task-local pool (may already have entries from prior tests)
-    @with_pool pool begin
-        acquire!(pool, UInt8, 10)
-        acquire!(pool, Float16, 20)
-        acquire!(pool, Int16, 30)
+        # Backing vectors: should not have grown
+        @test length(pool.others[UInt8].vectors) == 1
+        @test length(pool.others[Float16].vectors) == 1
     end
 
-    pool = AdaptiveArrayPools.get_task_local_pool()
-    u8_baseline = length(pool.others[UInt8].vectors)
-    f16_baseline = length(pool.others[Float16].vectors)
-    i16_baseline = length(pool.others[Int16].vectors)
-
-    for _ in 1:500
+    @testset "17b. @with_pool stress: 500 iterations with multiple fallback types" begin
+        # Warmup cycle to populate task-local pool (may already have entries from prior tests)
         @with_pool pool begin
             acquire!(pool, UInt8, 10)
             acquire!(pool, Float16, 20)
             acquire!(pool, Int16, 30)
         end
-    end
 
-    pool = AdaptiveArrayPools.get_task_local_pool()
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-    @test others_n_active(pool, Int16) == 0
+        pool = AdaptiveArrayPools.get_task_local_pool()
+        u8_baseline = length(pool.others[UInt8].vectors)
+        f16_baseline = length(pool.others[Float16].vectors)
+        i16_baseline = length(pool.others[Int16].vectors)
 
-    # Backing vectors should not have grown beyond warmup baseline
-    @test length(pool.others[UInt8].vectors) == u8_baseline
-    @test length(pool.others[Float16].vectors) == f16_baseline
-    @test length(pool.others[Int16].vectors) == i16_baseline
-end
-
-# ==============================================================================
-# 18. Memory Leak Canary: Total Pool Size Stability
-# ==============================================================================
-
-@testset "18. Total others pool size doesn't grow unbounded" begin
-    pool = AdaptiveArrayPool()
-
-    # Warmup
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 1000)
-    acquire!(pool, Float16, 1000)
-    rewind!(pool)
-
-    # Measure baseline memory footprint
-    function total_backing_bytes(pool)
-        total = 0
-        for (T, tp) in pool.others
-            for v in tp.vectors
-                total += sizeof(v)
+        for _ in 1:500
+            @with_pool pool begin
+                acquire!(pool, UInt8, 10)
+                acquire!(pool, Float16, 20)
+                acquire!(pool, Int16, 30)
             end
         end
-        total
+
+        pool = AdaptiveArrayPools.get_task_local_pool()
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+        @test others_n_active(pool, Int16) == 0
+
+        # Backing vectors should not have grown beyond warmup baseline
+        @test length(pool.others[UInt8].vectors) == u8_baseline
+        @test length(pool.others[Float16].vectors) == f16_baseline
+        @test length(pool.others[Int16].vectors) == i16_baseline
     end
 
-    baseline_bytes = total_backing_bytes(pool)
+    # ==============================================================================
+    # 18. Memory Leak Canary: Total Pool Size Stability
+    # ==============================================================================
 
-    # Run many cycles
-    for _ in 1:500
+    @testset "18. Total others pool size doesn't grow unbounded" begin
+        pool = AdaptiveArrayPool()
+
+        # Warmup
         checkpoint!(pool)
         acquire!(pool, UInt8, 1000)
         acquire!(pool, Float16, 1000)
         rewind!(pool)
+
+        # Measure baseline memory footprint
+        function total_backing_bytes(pool)
+            total = 0
+            for (T, tp) in pool.others
+                for v in tp.vectors
+                    total += sizeof(v)
+                end
+            end
+            total
+        end
+
+        baseline_bytes = total_backing_bytes(pool)
+
+        # Run many cycles
+        for _ in 1:500
+            checkpoint!(pool)
+            acquire!(pool, UInt8, 1000)
+            acquire!(pool, Float16, 1000)
+            rewind!(pool)
+        end
+
+        final_bytes = total_backing_bytes(pool)
+        @test final_bytes == baseline_bytes  # No growth
     end
 
-    final_bytes = total_backing_bytes(pool)
-    @test final_bytes == baseline_bytes  # No growth
-end
+    # ==============================================================================
+    # 19. reset! and empty! Properly Handle Fallback Types
+    # ==============================================================================
 
-# ==============================================================================
-# 19. reset! and empty! Properly Handle Fallback Types
-# ==============================================================================
+    @testset "19. reset! clears fallback n_active but preserves vectors" begin
+        pool = AdaptiveArrayPool()
 
-@testset "19. reset! clears fallback n_active but preserves vectors" begin
-    pool = AdaptiveArrayPool()
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 100)
+        acquire!(pool, Float16, 200)
+        # Don't rewind — simulate leaked state
 
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 100)
-    acquire!(pool, Float16, 200)
-    # Don't rewind — simulate leaked state
+        reset!(pool)
 
-    reset!(pool)
+        @test others_n_active(pool, UInt8) == 0
+        @test others_n_active(pool, Float16) == 0
+        @test pool._current_depth == 1
+        # Vectors should be preserved for reuse
+        @test length(pool.others[UInt8].vectors) == 1
+        @test length(pool.others[Float16].vectors) == 1
+    end
 
-    @test others_n_active(pool, UInt8) == 0
-    @test others_n_active(pool, Float16) == 0
-    @test pool._current_depth == 1
-    # Vectors should be preserved for reuse
-    @test length(pool.others[UInt8].vectors) == 1
-    @test length(pool.others[Float16].vectors) == 1
-end
+    @testset "19b. empty! clears fallback types completely" begin
+        pool = AdaptiveArrayPool()
 
-@testset "19b. empty! clears fallback types completely" begin
-    pool = AdaptiveArrayPool()
+        checkpoint!(pool)
+        acquire!(pool, UInt8, 100)
+        acquire!(pool, Float16, 200)
+        rewind!(pool)
 
-    checkpoint!(pool)
-    acquire!(pool, UInt8, 100)
-    acquire!(pool, Float16, 200)
-    rewind!(pool)
+        empty!(pool)
 
-    empty!(pool)
+        @test isempty(pool.others)
+        @test pool._current_depth == 1
+    end
 
-    @test isempty(pool.others)
-    @test pool._current_depth == 1
-end
+    # ==============================================================================
+    # 20. Edge Case: Acquire Zero-Length Array of Fallback Type
+    # ==============================================================================
 
-# ==============================================================================
-# 20. Edge Case: Acquire Zero-Length Array of Fallback Type
-# ==============================================================================
+    @testset "20. Zero-length fallback array acquire/rewind" begin
+        pool = AdaptiveArrayPool()
 
-@testset "20. Zero-length fallback array acquire/rewind" begin
-    pool = AdaptiveArrayPool()
+        checkpoint!(pool)
+        v = acquire!(pool, UInt8, 0)
+        @test length(v) == 0
+        @test others_n_active(pool, UInt8) == 1
 
-    checkpoint!(pool)
-    v = acquire!(pool, UInt8, 0)
-    @test length(v) == 0
-    @test others_n_active(pool, UInt8) == 1
+        rewind!(pool)
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    rewind!(pool)
-    @test others_n_active(pool, UInt8) == 0
-end
+    # ==============================================================================
+    # 21. Parametric Dual-Like Type: Basic Reclamation
+    # ==============================================================================
+    # ForwardDiff.Dual{Tag{f}, V, N} — each unique parameterization is a DIFFERENT
+    # concrete type, creating separate pool.others entries. This tests that pool
+    # correctly handles multiple parametric variants of the same "family" of types.
 
-# ==============================================================================
-# 21. Parametric Dual-Like Type: Basic Reclamation
-# ==============================================================================
-# ForwardDiff.Dual{Tag{f}, V, N} — each unique parameterization is a DIFFERENT
-# concrete type, creating separate pool.others entries. This tests that pool
-# correctly handles multiple parametric variants of the same "family" of types.
+    @testset "21. Dual-like parametric type: basic acquire/rewind" begin
+        pool = AdaptiveArrayPool()
 
-@testset "21. Dual-like parametric type: basic acquire/rewind" begin
-    pool = AdaptiveArrayPool()
+        checkpoint!(pool)
+        v = acquire!(pool, Dual_f1_11, 10)
+        @test eltype(v) == Dual_f1_11
+        @test others_n_active(pool, Dual_f1_11) == 1
 
-    checkpoint!(pool)
-    v = acquire!(pool, Dual_f1_11, 10)
-    @test eltype(v) == Dual_f1_11
-    @test others_n_active(pool, Dual_f1_11) == 1
+        rewind!(pool)
+        @test others_n_active(pool, Dual_f1_11) == 0
+    end
 
-    rewind!(pool)
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+    @testset "21b. Dual-like: different param variants are separate pool entries" begin
+        pool = AdaptiveArrayPool()
 
-@testset "21b. Dual-like: different param variants are separate pool entries" begin
-    pool = AdaptiveArrayPool()
+        checkpoint!(pool)
+        # Three different Dual variants — each gets its own IdDict entry
+        acquire!(pool, Dual_f1_11, 10)
+        acquire!(pool, Dual_f1_4, 20)
+        acquire!(pool, Dual_f2_11, 30)
 
-    checkpoint!(pool)
-    # Three different Dual variants — each gets its own IdDict entry
-    acquire!(pool, Dual_f1_11, 10)
-    acquire!(pool, Dual_f1_4, 20)
-    acquire!(pool, Dual_f2_11, 30)
+        @test length(pool.others) == 3
+        @test others_n_active(pool, Dual_f1_11) == 1
+        @test others_n_active(pool, Dual_f1_4) == 1
+        @test others_n_active(pool, Dual_f2_11) == 1
 
-    @test length(pool.others) == 3
-    @test others_n_active(pool, Dual_f1_11) == 1
-    @test others_n_active(pool, Dual_f1_4) == 1
-    @test others_n_active(pool, Dual_f2_11) == 1
+        rewind!(pool)
+        @test others_n_active(pool, Dual_f1_11) == 0
+        @test others_n_active(pool, Dual_f1_4) == 0
+        @test others_n_active(pool, Dual_f2_11) == 0
+    end
 
-    rewind!(pool)
-    @test others_n_active(pool, Dual_f1_11) == 0
-    @test others_n_active(pool, Dual_f1_4) == 0
-    @test others_n_active(pool, Dual_f2_11) == 0
-end
+    # ==============================================================================
+    # 22. Parametric Dual-Like: Nested Scopes (Simulates ForwardDiff Chunk Processing)
+    # ==============================================================================
+    # ForwardDiff.gradient processes data in chunks, calling the function N times.
+    # Each call creates a @with_pool scope. The pool must correctly rewind Dual
+    # arrays created during each chunk evaluation.
 
-# ==============================================================================
-# 22. Parametric Dual-Like: Nested Scopes (Simulates ForwardDiff Chunk Processing)
-# ==============================================================================
-# ForwardDiff.gradient processes data in chunks, calling the function N times.
-# Each call creates a @with_pool scope. The pool must correctly rewind Dual
-# arrays created during each chunk evaluation.
+    @testset "22. Dual-like: simulated ForwardDiff chunk processing" begin
+        pool = AdaptiveArrayPool()
+        n_chunks = 11  # Like processing 121 elements in chunks of 11
 
-@testset "22. Dual-like: simulated ForwardDiff chunk processing" begin
-    pool = AdaptiveArrayPool()
-    n_chunks = 11  # Like processing 121 elements in chunks of 11
+        for chunk in 1:n_chunks
+            checkpoint!(pool)
+            # Simulates what happens inside cubic_interp when called with Dual data
+            partials = acquire!(pool, Dual_f1_11, 44)   # like (4, 11) partials array
+            workspace = acquire!(pool, Dual_f1_11, 11)   # temporary workspace
+            @test others_n_active(pool, Dual_f1_11) == 2
 
-    for chunk in 1:n_chunks
-        checkpoint!(pool)
-        # Simulates what happens inside cubic_interp when called with Dual data
-        partials = acquire!(pool, Dual_f1_11, 44)   # like (4, 11) partials array
-        workspace = acquire!(pool, Dual_f1_11, 11)   # temporary workspace
-        @test others_n_active(pool, Dual_f1_11) == 2
+            rewind!(pool)
+            @test others_n_active(pool, Dual_f1_11) == 0
+        end
 
-        rewind!(pool)
+        # After all chunks: zero leak
         @test others_n_active(pool, Dual_f1_11) == 0
+        @test length(pool.others[Dual_f1_11].vectors) == 2  # reuses 2 backing vectors
     end
 
-    # After all chunks: zero leak
-    @test others_n_active(pool, Dual_f1_11) == 0
-    @test length(pool.others[Dual_f1_11].vectors) == 2  # reuses 2 backing vectors
-end
+    @testset "22b. Dual-like: simulated nested @with_pool in chunk processing" begin
+        pool = AdaptiveArrayPool()
 
-@testset "22b. Dual-like: simulated nested @with_pool in chunk processing" begin
-    pool = AdaptiveArrayPool()
+        for chunk in 1:11
+            # Outer scope: oneshot function
+            checkpoint!(pool)
+            partials = acquire!(pool, Dual_f1_11, 44)
 
-    for chunk in 1:11
-        # Outer scope: oneshot function
-        checkpoint!(pool)
-        partials = acquire!(pool, Dual_f1_11, 44)
+            # Inner scope: solver function (nested @with_pool)
+            checkpoint!(pool)
+            m = acquire!(pool, Dual_f1_11, 11)
+            @test others_n_active(pool, Dual_f1_11) == 2
+            rewind!(pool)
+            @test others_n_active(pool, Dual_f1_11) == 1  # only partials
 
-        # Inner scope: solver function (nested @with_pool)
-        checkpoint!(pool)
-        m = acquire!(pool, Dual_f1_11, 11)
-        @test others_n_active(pool, Dual_f1_11) == 2
-        rewind!(pool)
-        @test others_n_active(pool, Dual_f1_11) == 1  # only partials
+            rewind!(pool)
+            @test others_n_active(pool, Dual_f1_11) == 0
+        end
 
-        rewind!(pool)
         @test others_n_active(pool, Dual_f1_11) == 0
     end
 
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+    # ==============================================================================
+    # 23. Parametric Dual-Like: Lazy Mode (Macro-Generated Path)
+    # ==============================================================================
 
-# ==============================================================================
-# 23. Parametric Dual-Like: Lazy Mode (Macro-Generated Path)
-# ==============================================================================
+    @testset "23. Dual-like: lazy checkpoint/rewind" begin
+        pool = AdaptiveArrayPool()
 
-@testset "23. Dual-like: lazy checkpoint/rewind" begin
-    pool = AdaptiveArrayPool()
+        _lazy_checkpoint!(pool)
+        acquire!(pool, Dual_f1_11, 44)
+        acquire!(pool, Dual_f1_11, 11)
+        @test others_n_active(pool, Dual_f1_11) == 2
+        @test pool._touched_has_others[pool._current_depth] == true
 
-    _lazy_checkpoint!(pool)
-    acquire!(pool, Dual_f1_11, 44)
-    acquire!(pool, Dual_f1_11, 11)
-    @test others_n_active(pool, Dual_f1_11) == 2
-    @test pool._touched_has_others[pool._current_depth] == true
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, Dual_f1_11) == 0
+    end
 
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+    @testset "23b. Dual-like: lazy mode with pre-existing Dual pool" begin
+        pool = AdaptiveArrayPool()
 
-@testset "23b. Dual-like: lazy mode with pre-existing Dual pool" begin
-    pool = AdaptiveArrayPool()
+        # Pre-populate Dual pool (simulates warmup call)
+        checkpoint!(pool)
+        acquire!(pool, Dual_f1_11, 10)
+        rewind!(pool)
+        @test others_n_active(pool, Dual_f1_11) == 0
 
-    # Pre-populate Dual pool (simulates warmup call)
-    checkpoint!(pool)
-    acquire!(pool, Dual_f1_11, 10)
-    rewind!(pool)
-    @test others_n_active(pool, Dual_f1_11) == 0
+        # Lazy scope — pre-existing Dual pool must be eagerly checkpointed
+        _lazy_checkpoint!(pool)
+        acquire!(pool, Dual_f1_11, 44)
+        @test others_n_active(pool, Dual_f1_11) == 1
 
-    # Lazy scope — pre-existing Dual pool must be eagerly checkpointed
-    _lazy_checkpoint!(pool)
-    acquire!(pool, Dual_f1_11, 44)
-    @test others_n_active(pool, Dual_f1_11) == 1
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, Dual_f1_11) == 0
+    end
 
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+    @testset "23c. Dual-like: lazy nested, Dual acquired only in inner scope" begin
+        pool = AdaptiveArrayPool()
 
-@testset "23c. Dual-like: lazy nested, Dual acquired only in inner scope" begin
-    pool = AdaptiveArrayPool()
+        # Outer: lazy, acquires Float64 only
+        _lazy_checkpoint!(pool)
+        acquire!(pool, Float64, 10)
 
-    # Outer: lazy, acquires Float64 only
-    _lazy_checkpoint!(pool)
-    acquire!(pool, Float64, 10)
+        # Inner: lazy, acquires Dual (new type created inside nested lazy scope)
+        _lazy_checkpoint!(pool)
+        acquire!(pool, Dual_f1_11, 44)
+        @test others_n_active(pool, Dual_f1_11) == 1
 
-    # Inner: lazy, acquires Dual (new type created inside nested lazy scope)
-    _lazy_checkpoint!(pool)
-    acquire!(pool, Dual_f1_11, 44)
-    @test others_n_active(pool, Dual_f1_11) == 1
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, Dual_f1_11) == 0
 
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, Dual_f1_11) == 0
+        _lazy_rewind!(pool)
+        @test pool.float64.n_active == 0
+    end
 
-    _lazy_rewind!(pool)
-    @test pool.float64.n_active == 0
-end
+    # ==============================================================================
+    # 24. Parametric Dual-Like: Typed-Lazy (use_typed=true Macro Path)
+    # ==============================================================================
+    # This simulates the MOST LIKELY leak scenario:
+    # The macro extracts Float64 as the tracked type, but at runtime the data is
+    # Dual-typed. The typed-lazy path must correctly handle untracked Dual types.
 
-# ==============================================================================
-# 24. Parametric Dual-Like: Typed-Lazy (use_typed=true Macro Path)
-# ==============================================================================
-# This simulates the MOST LIKELY leak scenario:
-# The macro extracts Float64 as the tracked type, but at runtime the data is
-# Dual-typed. The typed-lazy path must correctly handle untracked Dual types.
-
-@testset "24. Dual-like: typed-lazy with Dual as untracked type" begin
-    pool = AdaptiveArrayPool()
-
-    # Typed-lazy tracking Float64 — Dual is untracked (goes through has_others)
-    _typed_lazy_checkpoint!(pool, Float64)
-    acquire!(pool, Float64, 10)       # tracked type
-    acquire!(pool, Dual_f1_11, 44)    # untracked Dual
-    @test others_n_active(pool, Dual_f1_11) == 1
-    @test pool._touched_has_others[pool._current_depth] == true
-
-    _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
-    @test pool.float64.n_active == 0
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+    @testset "24. Dual-like: typed-lazy with Dual as untracked type" begin
+        pool = AdaptiveArrayPool()
 
-@testset "24b. Dual-like: typed-lazy nested, parent has Dual, child adds more" begin
-    pool = AdaptiveArrayPool()
+        # Typed-lazy tracking Float64 — Dual is untracked (goes through has_others)
+        _typed_lazy_checkpoint!(pool, Float64)
+        acquire!(pool, Float64, 10)       # tracked type
+        acquire!(pool, Dual_f1_11, 44)    # untracked Dual
+        @test others_n_active(pool, Dual_f1_11) == 1
+        @test pool._touched_has_others[pool._current_depth] == true
 
-    # Pre-populate Dual pool
-    checkpoint!(pool)
-    acquire!(pool, Dual_f1_11, 10)
-    rewind!(pool)
+        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        @test pool.float64.n_active == 0
+        @test others_n_active(pool, Dual_f1_11) == 0
+    end
 
-    # Parent: full checkpoint, acquires Dual
-    checkpoint!(pool)
-    acquire!(pool, Dual_f1_11, 5)
-    @test others_n_active(pool, Dual_f1_11) == 1
+    @testset "24b. Dual-like: typed-lazy nested, parent has Dual, child adds more" begin
+        pool = AdaptiveArrayPool()
 
-    # Child: typed-lazy (Float64), helper acquires more Dual
-    _typed_lazy_checkpoint!(pool, Float64)
-    acquire!(pool, Float64, 10)
-    acquire!(pool, Dual_f1_11, 44)
-    @test others_n_active(pool, Dual_f1_11) == 2
+        # Pre-populate Dual pool
+        checkpoint!(pool)
+        acquire!(pool, Dual_f1_11, 10)
+        rewind!(pool)
 
-    _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
-    @test others_n_active(pool, Dual_f1_11) == 1  # parent's preserved
+        # Parent: full checkpoint, acquires Dual
+        checkpoint!(pool)
+        acquire!(pool, Dual_f1_11, 5)
+        @test others_n_active(pool, Dual_f1_11) == 1
 
-    rewind!(pool)
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+        # Child: typed-lazy (Float64), helper acquires more Dual
+        _typed_lazy_checkpoint!(pool, Float64)
+        acquire!(pool, Float64, 10)
+        acquire!(pool, Dual_f1_11, 44)
+        @test others_n_active(pool, Dual_f1_11) == 2
 
-@testset "24c. Dual-like: typed-lazy with Dual as TRACKED type via eltype" begin
-    pool = AdaptiveArrayPool()
+        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        @test others_n_active(pool, Dual_f1_11) == 1  # parent's preserved
 
-    # Simulates: @with_pool pool function f(y::Vector{Dual})
-    #   z = similar!(pool, y)  → macro extracts eltype(y) = Dual as tracked type
-    # Since Dual is a fallback type, _tracked_mask_for_types(Dual) == UInt16(0)
-    # The _can_use_typed_path check becomes:
-    #   touched_mask & ~0 == 0 → always true IF no has_others set
-    # But _checkpoint!(pool, Dual) does checkpoint the Dual pool.
+        rewind!(pool)
+        @test others_n_active(pool, Dual_f1_11) == 0
+    end
 
-    # This tests the typed path where the only tracked type is a fallback type
-    checkpoint!(pool, Dual_f1_11)
-    acquire!(pool, Dual_f1_11, 44)
-    acquire!(pool, Dual_f1_11, 11)
-    @test others_n_active(pool, Dual_f1_11) == 2
+    @testset "24c. Dual-like: typed-lazy with Dual as TRACKED type via eltype" begin
+        pool = AdaptiveArrayPool()
 
-    rewind!(pool, Dual_f1_11)
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+        # Simulates: @with_pool pool function f(y::Vector{Dual})
+        #   z = similar!(pool, y)  → macro extracts eltype(y) = Dual as tracked type
+        # Since Dual is a fallback type, _tracked_mask_for_types(Dual) == UInt16(0)
+        # The _can_use_typed_path check becomes:
+        #   touched_mask & ~0 == 0 → always true IF no has_others set
+        # But _checkpoint!(pool, Dual) does checkpoint the Dual pool.
 
-# ==============================================================================
-# 25. Dual-Like: Stress Test — Simulates Full ForwardDiff.gradient Pipeline
-# ==============================================================================
-# Simulates: ForwardDiff.gradient(f, z) where f uses cubic_interp
-# ForwardDiff calls f(z_dual) multiple times (one per chunk)
-# Each call: @with_pool → acquire Dual arrays → rewind
+        # This tests the typed path where the only tracked type is a fallback type
+        checkpoint!(pool, Dual_f1_11)
+        acquire!(pool, Dual_f1_11, 44)
+        acquire!(pool, Dual_f1_11, 11)
+        @test others_n_active(pool, Dual_f1_11) == 2
 
-@testset "25. Dual-like: full gradient simulation stress test" begin
-    pool = AdaptiveArrayPool()
-    n_chunks = 11
+        rewind!(pool, Dual_f1_11)
+        @test others_n_active(pool, Dual_f1_11) == 0
+    end
 
-    function simulate_cubic_interp_dual!(pool)
-        # Outer oneshot scope
-        checkpoint!(pool)
-        try
-            partials = acquire!(pool, Dual_f1_11, 4 * 11)
-            workspace = acquire!(pool, Float64, 10)  # spacing (Float64, not Dual)
+    # ==============================================================================
+    # 25. Dual-Like: Stress Test — Simulates Full ForwardDiff.gradient Pipeline
+    # ==============================================================================
+    # Simulates: ForwardDiff.gradient(f, z) where f uses cubic_interp
+    # ForwardDiff calls f(z_dual) multiple times (one per chunk)
+    # Each call: @with_pool → acquire Dual arrays → rewind
 
-            # Inner solver scope
-            checkpoint!(pool)
-            try
-                m = acquire!(pool, Dual_f1_11, 11)
-                # solve...
-            finally
-                rewind!(pool)
-            end
+    @testset "25. Dual-like: full gradient simulation stress test" begin
+        pool = AdaptiveArrayPool()
+        n_chunks = 11
 
-            # Inner differentiation scope
+        function simulate_cubic_interp_dual!(pool)
+            # Outer oneshot scope
             checkpoint!(pool)
             try
-                line = acquire!(pool, Dual_f1_11, 11)
-                dline = acquire!(pool, Dual_f1_11, 11)
-                # compute derivatives...
+                partials = acquire!(pool, Dual_f1_11, 4 * 11)
+                workspace = acquire!(pool, Float64, 10)  # spacing (Float64, not Dual)
+
+                # Inner solver scope
+                checkpoint!(pool)
+                try
+                    m = acquire!(pool, Dual_f1_11, 11)
+                    # solve...
+                finally
+                    rewind!(pool)
+                end
+
+                # Inner differentiation scope
+                checkpoint!(pool)
+                try
+                    line = acquire!(pool, Dual_f1_11, 11)
+                    dline = acquire!(pool, Dual_f1_11, 11)
+                    # compute derivatives...
+                finally
+                    rewind!(pool)
+                end
+
+                return nothing
             finally
                 rewind!(pool)
             end
-
-            return nothing
-        finally
-            rewind!(pool)
         end
-    end
-
-    for chunk in 1:n_chunks
-        simulate_cubic_interp_dual!(pool)
-    end
 
-    # After all chunks: zero leak
-    @test pool.float64.n_active == 0
-    @test others_n_active(pool, Dual_f1_11) == 0
+        for chunk in 1:n_chunks
+            simulate_cubic_interp_dual!(pool)
+        end
 
-    # Checkpoint stack must be clean
-    if haskey(pool.others, Dual_f1_11)
-        dual_pool = pool.others[Dual_f1_11]
-        @test length(dual_pool._checkpoint_depths) == 1  # sentinel only
-        @test dual_pool._checkpoint_depths[1] == 0  # sentinel value
-    end
-end
+        # After all chunks: zero leak
+        @test pool.float64.n_active == 0
+        @test others_n_active(pool, Dual_f1_11) == 0
 
-@testset "25b. Dual-like: @with_pool stress with Dual types" begin
-    # Warmup
-    @with_pool pool begin
-        acquire!(pool, Dual_f1_11, 44)
+        # Checkpoint stack must be clean
+        if haskey(pool.others, Dual_f1_11)
+            dual_pool = pool.others[Dual_f1_11]
+            @test length(dual_pool._checkpoint_depths) == 1  # sentinel only
+            @test dual_pool._checkpoint_depths[1] == 0  # sentinel value
+        end
     end
 
-    tl_pool = AdaptiveArrayPools.get_task_local_pool()
-    baseline_vecs = length(tl_pool.others[Dual_f1_11].vectors)
-
-    for _ in 1:200
+    @testset "25b. Dual-like: @with_pool stress with Dual types" begin
+        # Warmup
         @with_pool pool begin
             acquire!(pool, Dual_f1_11, 44)
-            acquire!(pool, Dual_f1_11, 11)
         end
+
+        tl_pool = AdaptiveArrayPools.get_task_local_pool()
+        baseline_vecs = length(tl_pool.others[Dual_f1_11].vectors)
+
+        for _ in 1:200
+            @with_pool pool begin
+                acquire!(pool, Dual_f1_11, 44)
+                acquire!(pool, Dual_f1_11, 11)
+            end
+        end
+
+        tl_pool = AdaptiveArrayPools.get_task_local_pool()
+        @test others_n_active(tl_pool, Dual_f1_11) == 0
+        @test length(tl_pool.others[Dual_f1_11].vectors) <= baseline_vecs + 1
     end
 
-    tl_pool = AdaptiveArrayPools.get_task_local_pool()
-    @test others_n_active(tl_pool, Dual_f1_11) == 0
-    @test length(tl_pool.others[Dual_f1_11].vectors) <= baseline_vecs + 1
-end
+    # ==============================================================================
+    # 26. Dual-Like: New Dual Type Created Mid-Scope (First-Touch Scenario)
+    # ==============================================================================
+    # When ForwardDiff first calls f(z_dual), the Dual type doesn't exist in pool.others yet.
+    # get_typed_pool! auto-checkpoints new types (pushes sentinel n_active=0).
+    # This tests that the auto-checkpoint + rewind cycle is correct for first-ever encounter.
 
-# ==============================================================================
-# 26. Dual-Like: New Dual Type Created Mid-Scope (First-Touch Scenario)
-# ==============================================================================
-# When ForwardDiff first calls f(z_dual), the Dual type doesn't exist in pool.others yet.
-# get_typed_pool! auto-checkpoints new types (pushes sentinel n_active=0).
-# This tests that the auto-checkpoint + rewind cycle is correct for first-ever encounter.
-
-@testset "26. Dual-like: first-touch auto-checkpoint in lazy mode" begin
-    pool = AdaptiveArrayPool()
-    # Pool has never seen Dual_f2_11 before
-    @test !haskey(pool.others, Dual_f2_11)
-
-    _lazy_checkpoint!(pool)
-    # First-ever acquire of this Dual variant
-    v = acquire!(pool, Dual_f2_11, 10)
-    @test haskey(pool.others, Dual_f2_11)
-    @test others_n_active(pool, Dual_f2_11) == 1
-
-    _lazy_rewind!(pool)
-    @test others_n_active(pool, Dual_f2_11) == 0
-end
+    @testset "26. Dual-like: first-touch auto-checkpoint in lazy mode" begin
+        pool = AdaptiveArrayPool()
+        # Pool has never seen Dual_f2_11 before
+        @test !haskey(pool.others, Dual_f2_11)
 
-@testset "26b. Dual-like: first-touch in typed-lazy mode" begin
-    pool = AdaptiveArrayPool()
-    @test !haskey(pool.others, Dual_f2_11)
+        _lazy_checkpoint!(pool)
+        # First-ever acquire of this Dual variant
+        v = acquire!(pool, Dual_f2_11, 10)
+        @test haskey(pool.others, Dual_f2_11)
+        @test others_n_active(pool, Dual_f2_11) == 1
 
-    _typed_lazy_checkpoint!(pool, Float64)
-    v = acquire!(pool, Dual_f2_11, 10)
-    @test others_n_active(pool, Dual_f2_11) == 1
+        _lazy_rewind!(pool)
+        @test others_n_active(pool, Dual_f2_11) == 0
+    end
 
-    _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
-    @test others_n_active(pool, Dual_f2_11) == 0
-end
+    @testset "26b. Dual-like: first-touch in typed-lazy mode" begin
+        pool = AdaptiveArrayPool()
+        @test !haskey(pool.others, Dual_f2_11)
 
-@testset "26c. Dual-like: first-touch in typed (only) checkpoint — tracked fallback" begin
-    pool = AdaptiveArrayPool()
-    @test !haskey(pool.others, Dual_f2_11)
+        _typed_lazy_checkpoint!(pool, Float64)
+        v = acquire!(pool, Dual_f2_11, 10)
+        @test others_n_active(pool, Dual_f2_11) == 1
 
-    # checkpoint!(pool, Dual_f2_11) creates the pool entry via get_typed_pool!
-    # AND pushes checkpoint for it
-    checkpoint!(pool, Dual_f2_11)
-    v = acquire!(pool, Dual_f2_11, 10)
-    @test others_n_active(pool, Dual_f2_11) == 1
+        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        @test others_n_active(pool, Dual_f2_11) == 0
+    end
 
-    rewind!(pool, Dual_f2_11)
-    @test others_n_active(pool, Dual_f2_11) == 0
-end
+    @testset "26c. Dual-like: first-touch in typed (only) checkpoint — tracked fallback" begin
+        pool = AdaptiveArrayPool()
+        @test !haskey(pool.others, Dual_f2_11)
 
-# ==============================================================================
-# 27. Dual-Like: Mixed Fixed + Multiple Dual Variants (Type Explosion)
-# ==============================================================================
+        # checkpoint!(pool, Dual_f2_11) creates the pool entry via get_typed_pool!
+        # AND pushes checkpoint for it
+        checkpoint!(pool, Dual_f2_11)
+        v = acquire!(pool, Dual_f2_11, 10)
+        @test others_n_active(pool, Dual_f2_11) == 1
 
-@testset "27. Type explosion: multiple Dual variants + fixed types" begin
-    pool = AdaptiveArrayPool()
-
-    checkpoint!(pool)
-    acquire!(pool, Float64, 10)       # fixed slot
-    acquire!(pool, Int32, 5)          # fixed slot
-    acquire!(pool, Dual_f1_11, 44)    # fallback Dual variant 1
-    acquire!(pool, Dual_f1_4, 16)     # fallback Dual variant 2
-    acquire!(pool, Dual_f2_11, 33)    # fallback Dual variant 3
-    acquire!(pool, UInt8, 20)         # fallback primitive
-
-    @test pool.float64.n_active == 1
-    @test pool.int32.n_active == 1
-    @test others_n_active(pool, Dual_f1_11) == 1
-    @test others_n_active(pool, Dual_f1_4) == 1
-    @test others_n_active(pool, Dual_f2_11) == 1
-    @test others_n_active(pool, UInt8) == 1
-
-    rewind!(pool)
-
-    @test pool.float64.n_active == 0
-    @test pool.int32.n_active == 0
-    @test others_n_active(pool, Dual_f1_11) == 0
-    @test others_n_active(pool, Dual_f1_4) == 0
-    @test others_n_active(pool, Dual_f2_11) == 0
-    @test others_n_active(pool, UInt8) == 0
-end
+        rewind!(pool, Dual_f2_11)
+        @test others_n_active(pool, Dual_f2_11) == 0
+    end
 
-# ==============================================================================
-# 28. Dual-Like: _acquire_impl! Bypass (Macro Transform Path)
-# ==============================================================================
-# When @with_pool transforms acquire! → _acquire_impl!, _record_type_touch!
-# is bypassed. This tests that fallback types still get properly rewound
-# even when type touch recording is skipped.
-
-@testset "28. _acquire_impl! bypass: fallback type with typed checkpoint" begin
-    pool = AdaptiveArrayPool()
-    using AdaptiveArrayPools: _acquire_impl!
-
-    # Simulate typed path: checkpoint specific type, use _acquire_impl! directly
-    # checkpoint!(pool, Dual) creates the pool entry via get_typed_pool! which
-    # now sets has_others=true at creation time
-    checkpoint!(pool, Dual_f1_11)
-    # _acquire_impl! does NOT call _record_type_touch!
-    v = _acquire_impl!(pool, Dual_f1_11, 10)
-    @test others_n_active(pool, Dual_f1_11) == 1
-
-    rewind!(pool, Dual_f1_11)
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+    # ==============================================================================
+    # 27. Dual-Like: Mixed Fixed + Multiple Dual Variants (Type Explosion)
+    # ==============================================================================
 
-@testset "28b. _acquire_impl! bypass: typed-lazy fallback to lazy rewind" begin
-    pool = AdaptiveArrayPool()
-    using AdaptiveArrayPools: _acquire_impl!
+    @testset "27. Type explosion: multiple Dual variants + fixed types" begin
+        pool = AdaptiveArrayPool()
 
-    # Typed-lazy tracking Float64, Dual acquired via _acquire_impl! (no touch)
-    _typed_lazy_checkpoint!(pool, Float64)
+        checkpoint!(pool)
+        acquire!(pool, Float64, 10)       # fixed slot
+        acquire!(pool, Int32, 5)          # fixed slot
+        acquire!(pool, Dual_f1_11, 44)    # fallback Dual variant 1
+        acquire!(pool, Dual_f1_4, 16)     # fallback Dual variant 2
+        acquire!(pool, Dual_f2_11, 33)    # fallback Dual variant 3
+        acquire!(pool, UInt8, 20)         # fallback primitive
+
+        @test pool.float64.n_active == 1
+        @test pool.int32.n_active == 1
+        @test others_n_active(pool, Dual_f1_11) == 1
+        @test others_n_active(pool, Dual_f1_4) == 1
+        @test others_n_active(pool, Dual_f2_11) == 1
+        @test others_n_active(pool, UInt8) == 1
 
-    # Tracked type via _acquire_impl!
-    _acquire_impl!(pool, Float64, 10)
+        rewind!(pool)
 
-    # Untracked Dual via _acquire_impl! — NO touch recording!
-    # But get_typed_pool! now sets _touched_has_others when creating new fallback type
-    _acquire_impl!(pool, Dual_f1_11, 44)
-    @test others_n_active(pool, Dual_f1_11) == 1
+        @test pool.float64.n_active == 0
+        @test pool.int32.n_active == 0
+        @test others_n_active(pool, Dual_f1_11) == 0
+        @test others_n_active(pool, Dual_f1_4) == 0
+        @test others_n_active(pool, Dual_f2_11) == 0
+        @test others_n_active(pool, UInt8) == 0
+    end
 
-    # has_others should now be true (set by get_typed_pool! on first creation)
-    @test pool._touched_has_others[pool._current_depth] == true
+    # ==============================================================================
+    # 28. Dual-Like: _acquire_impl! Bypass (Macro Transform Path)
+    # ==============================================================================
+    # When @with_pool transforms acquire! → _acquire_impl!, _record_type_touch!
+    # is bypassed. This tests that fallback types still get properly rewound
+    # even when type touch recording is skipped.
+
+    @testset "28. _acquire_impl! bypass: fallback type with typed checkpoint" begin
+        pool = AdaptiveArrayPool()
+        using AdaptiveArrayPools: _acquire_impl!
+
+        # Simulate typed path: checkpoint specific type, use _acquire_impl! directly
+        # checkpoint!(pool, Dual) creates the pool entry via get_typed_pool! which
+        # now sets has_others=true at creation time
+        checkpoint!(pool, Dual_f1_11)
+        # _acquire_impl! does NOT call _record_type_touch!
+        v = _acquire_impl!(pool, Dual_f1_11, 10)
+        @test others_n_active(pool, Dual_f1_11) == 1
 
-    _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
+        rewind!(pool, Dual_f1_11)
+        @test others_n_active(pool, Dual_f1_11) == 0
+    end
 
-    # Fix: get_typed_pool! sets has_others → rewind iterates pool.others → n_active restored
-    @test others_n_active(pool, Dual_f1_11) == 0
-end
+    @testset "28b. _acquire_impl! bypass: typed-lazy fallback to lazy rewind" begin
+        pool = AdaptiveArrayPool()
+        using AdaptiveArrayPools: _acquire_impl!
 
-# ==============================================================================
-# 29. Repeated typed checkpoint with existing fallback + extra touched types
-# ==============================================================================
-# Reproduces the real-world ForwardDiff.gradient scenario:
-# - @with_pool extracts Tv (Dual) as static type → use_typed=true
-# - Macro transforms acquire!(pool, Tv, ...) → _acquire_impl!(pool, Tv, ...)
-# - Helper function (not in @with_pool body) acquires Float64 via normal acquire!
-#   → _record_type_touch! sets touched_type_masks for Float64
-# - At rewind: _can_use_typed_path returns false (Float64 was extra-touched)
-#   → falls to _typed_lazy_rewind! which checks has_others
-# - BUG: has_others was false on 2nd+ calls because get_typed_pool! closure only
-#   runs for NEW types. checkpoint!(pool, Dual) always pushed has_others=false.
-# - FIX: checkpoint!(pool, types...) now pushes has_others=true when any type
-#   is a fallback (compile-time check via _fixed_slot_bit).
-
-@testset "29. Repeated typed checkpoint: existing fallback + extra touched types" begin
-    pool = AdaptiveArrayPool()
-    using AdaptiveArrayPools: _acquire_impl!, _typed_lazy_checkpoint!, _typed_lazy_rewind!,
-                              _tracked_mask_for_types, _can_use_typed_path
-
-    # Simulate 3 iterations of ForwardDiff.gradient calling cubic_interp
-    for iter in 1:3
-        # 1. Typed checkpoint for Dual (the macro fast path if _can_use_typed_path)
-        #    On iter=1: Dual is new → get_typed_pool! creates it
-        #    On iter≥2: Dual exists → get_typed_pool! returns immediately
-        if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11))
-            checkpoint!(pool, Dual_f1_11)
-        else
-            _typed_lazy_checkpoint!(pool, Dual_f1_11)
-        end
+        # Typed-lazy tracking Float64, Dual acquired via _acquire_impl! (no touch)
+        _typed_lazy_checkpoint!(pool, Float64)
 
-        # 2. Helper function acquires Float64 via normal acquire! (NOT transformed)
-        #    This sets touched_type_masks for Float64, causing _can_use_typed_path=false at rewind
-        acquire!(pool, Float64, 10)
+        # Tracked type via _acquire_impl!
+        _acquire_impl!(pool, Float64, 10)
 
-        # 3. _acquire_impl! for Dual (macro-transformed, no _record_type_touch!)
+        # Untracked Dual via _acquire_impl! — NO touch recording!
+        # But get_typed_pool! now sets _touched_has_others when creating new fallback type
         _acquire_impl!(pool, Dual_f1_11, 44)
         @test others_n_active(pool, Dual_f1_11) == 1
 
-        # 4. Rewind — same conditional as the macro generates
-        if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11))
-            rewind!(pool, Dual_f1_11)
-        else
-            _typed_lazy_rewind!(pool, _tracked_mask_for_types(Dual_f1_11))
-        end
+        # has_others should now be true (set by get_typed_pool! on first creation)
+        @test pool._touched_has_others[pool._current_depth] == true
+
+        _typed_lazy_rewind!(pool, _tracked_mask_for_types(Float64))
 
-        # Key assertion: Dual must be rewound on EVERY iteration, not just the first
+        # Fix: get_typed_pool! sets has_others → rewind iterates pool.others → n_active restored
         @test others_n_active(pool, Dual_f1_11) == 0
-        @test pool.float64.n_active == 0
     end
-end
 
-@testset "29b. Repeated typed checkpoint: multiple Dual variants + helpers" begin
-    pool = AdaptiveArrayPool()
-    using AdaptiveArrayPools: _acquire_impl!, _unsafe_acquire_impl!,
-                              _typed_lazy_checkpoint!, _typed_lazy_rewind!,
-                              _tracked_mask_for_types, _can_use_typed_path
+    # ==============================================================================
+    # 29. Repeated typed checkpoint with existing fallback + extra touched types
+    # ==============================================================================
+    # Reproduces the real-world ForwardDiff.gradient scenario:
+    # - @with_pool extracts Tv (Dual) as static type → use_typed=true
+    # - Macro transforms acquire!(pool, Tv, ...) → _acquire_impl!(pool, Tv, ...)
+    # - Helper function (not in @with_pool body) acquires Float64 via normal acquire!
+    #   → _record_type_touch! sets touched_type_masks for Float64
+    # - At rewind: _can_use_typed_path returns false (Float64 was extra-touched)
+    #   → falls to _typed_lazy_rewind! which checks has_others
+    # - BUG: has_others was false on 2nd+ calls because get_typed_pool! closure only
+    #   runs for NEW types. checkpoint!(pool, Dual) always pushed has_others=false.
+    # - FIX: checkpoint!(pool, types...) now pushes has_others=true when any type
+    #   is a fallback (compile-time check via _fixed_slot_bit).
+
+    @testset "29. Repeated typed checkpoint: existing fallback + extra touched types" begin
+        pool = AdaptiveArrayPool()
+        using AdaptiveArrayPools: _acquire_impl!, _typed_lazy_checkpoint!, _typed_lazy_rewind!,
+            _tracked_mask_for_types, _can_use_typed_path
+
+        # Simulate 3 iterations of ForwardDiff.gradient calling cubic_interp
+        for iter in 1:3
+            # 1. Typed checkpoint for Dual (the macro fast path if _can_use_typed_path)
+            #    On iter=1: Dual is new → get_typed_pool! creates it
+            #    On iter≥2: Dual exists → get_typed_pool! returns immediately
+            if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11))
+                checkpoint!(pool, Dual_f1_11)
+            else
+                _typed_lazy_checkpoint!(pool, Dual_f1_11)
+            end
+
+            # 2. Helper function acquires Float64 via normal acquire! (NOT transformed)
+            #    This sets touched_type_masks for Float64, causing _can_use_typed_path=false at rewind
+            acquire!(pool, Float64, 10)
+
+            # 3. _acquire_impl! for Dual (macro-transformed, no _record_type_touch!)
+            _acquire_impl!(pool, Dual_f1_11, 44)
+            @test others_n_active(pool, Dual_f1_11) == 1
 
-    for iter in 1:5
-        # Simulate @with_pool with Tv=Dual_f1_11, Tg=Float64
-        tracked_mask = _tracked_mask_for_types(Dual_f1_11, Float64)
+            # 4. Rewind — same conditional as the macro generates
+            if _can_use_typed_path(pool, _tracked_mask_for_types(Dual_f1_11))
+                rewind!(pool, Dual_f1_11)
+            else
+                _typed_lazy_rewind!(pool, _tracked_mask_for_types(Dual_f1_11))
+            end
 
-        if _can_use_typed_path(pool, tracked_mask)
-            checkpoint!(pool, Dual_f1_11, Float64)
-        else
-            _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64)
+            # Key assertion: Dual must be rewound on EVERY iteration, not just the first
+            @test others_n_active(pool, Dual_f1_11) == 0
+            @test pool.float64.n_active == 0
         end
+    end
 
-        # Outer scope: multi-dim Dual acquire (unsafe_acquire, macro-transformed)
-        _unsafe_acquire_impl!(pool, Dual_f1_11, 4, 11, 11)
-        @test others_n_active(pool, Dual_f1_11) == 1
+    @testset "29b. Repeated typed checkpoint: multiple Dual variants + helpers" begin
+        pool = AdaptiveArrayPool()
+        using AdaptiveArrayPools: _acquire_impl!, _unsafe_acquire_impl!,
+            _typed_lazy_checkpoint!, _typed_lazy_rewind!,
+            _tracked_mask_for_types, _can_use_typed_path
 
-        # Helper function: normal acquire! for Float64 + Int32 (extra touched types)
-        acquire!(pool, Float64, 20)
-        acquire!(pool, Int32, 5)
+        for iter in 1:5
+            # Simulate @with_pool with Tv=Dual_f1_11, Tg=Float64
+            tracked_mask = _tracked_mask_for_types(Dual_f1_11, Float64)
 
-        # Inner scope: nested @with_pool for solver
-        if _can_use_typed_path(pool, tracked_mask)
-            checkpoint!(pool, Dual_f1_11, Float64)
-        else
-            _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64)
-        end
-        _acquire_impl!(pool, Dual_f1_11, 11)
-        @test others_n_active(pool, Dual_f1_11) == 2  # outer + inner
-        _acquire_impl!(pool, Float64, 11)
-
-        # Inner rewind
-        if _can_use_typed_path(pool, tracked_mask)
-            rewind!(pool, Dual_f1_11, Float64)
-        else
-            _typed_lazy_rewind!(pool, tracked_mask)
-        end
-        @test others_n_active(pool, Dual_f1_11) == 1  # back to outer's count
+            if _can_use_typed_path(pool, tracked_mask)
+                checkpoint!(pool, Dual_f1_11, Float64)
+            else
+                _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64)
+            end
 
-        # Outer rewind
-        if _can_use_typed_path(pool, tracked_mask)
-            rewind!(pool, Dual_f1_11, Float64)
-        else
-            _typed_lazy_rewind!(pool, tracked_mask)
-        end
+            # Outer scope: multi-dim Dual acquire (unsafe_acquire, macro-transformed)
+            _unsafe_acquire_impl!(pool, Dual_f1_11, 4, 11, 11)
+            @test others_n_active(pool, Dual_f1_11) == 1
 
-        @test others_n_active(pool, Dual_f1_11) == 0
-        @test pool.float64.n_active == 0
-        @test pool.int32.n_active == 0
+            # Helper function: normal acquire! for Float64 + Int32 (extra touched types)
+            acquire!(pool, Float64, 20)
+            acquire!(pool, Int32, 5)
+
+            # Inner scope: nested @with_pool for solver
+            if _can_use_typed_path(pool, tracked_mask)
+                checkpoint!(pool, Dual_f1_11, Float64)
+            else
+                _typed_lazy_checkpoint!(pool, Dual_f1_11, Float64)
+            end
+            _acquire_impl!(pool, Dual_f1_11, 11)
+            @test others_n_active(pool, Dual_f1_11) == 2  # outer + inner
+            _acquire_impl!(pool, Float64, 11)
+
+            # Inner rewind
+            if _can_use_typed_path(pool, tracked_mask)
+                rewind!(pool, Dual_f1_11, Float64)
+            else
+                _typed_lazy_rewind!(pool, tracked_mask)
+            end
+            @test others_n_active(pool, Dual_f1_11) == 1  # back to outer's count
+
+            # Outer rewind
+            if _can_use_typed_path(pool, tracked_mask)
+                rewind!(pool, Dual_f1_11, Float64)
+            else
+                _typed_lazy_rewind!(pool, tracked_mask)
+            end
+
+            @test others_n_active(pool, Dual_f1_11) == 0
+            @test pool.float64.n_active == 0
+            @test pool.int32.n_active == 0
+        end
     end
-end
 
 end  # top-level @testset
diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl
index ca3faeb5..fe62228a 100644
--- a/test/test_macro_internals.jl
+++ b/test/test_macro_internals.jl
@@ -81,7 +81,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
 
             # Assignment in function call (should NOT be captured)
             @testset "function call with kwarg" begin
-                expr = :(foo(x, y=1))
+                expr = :(foo(x, y = 1))
                 locals = _extract_local_assignments(expr)
                 # y=1 inside function call is a kwarg, not an assignment
                 # This depends on Julia parsing - in some cases it might be captured
@@ -1565,4 +1565,4 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
         @test pool.int64.n_active == 0
     end
 
-end # Macro Internals
\ No newline at end of file
+end # Macro Internals
diff --git a/test/test_macros.jl b/test/test_macros.jl
index 606c3f41..eaafc2bd 100644
--- a/test/test_macros.jl
+++ b/test/test_macros.jl
@@ -256,4 +256,4 @@ import AdaptiveArrayPools: checkpoint!, rewind!
         MAYBE_POOLING_ENABLED[] = true
     end
 
-end # Macro System
\ No newline at end of file
+end # Macro System
diff --git a/test/test_multidimensional.jl b/test/test_multidimensional.jl
index dca6f290..6e330d25 100644
--- a/test/test_multidimensional.jl
+++ b/test/test_multidimensional.jl
@@ -37,7 +37,7 @@ using AdaptiveArrayPools: checkpoint!, rewind!
 
         # Without pool (fallback)
         mat_alloc = acquire!(DISABLED_CPU, Float64, 10, 10)
-        @test mat_alloc isa Array{Float64,2}
+        @test mat_alloc isa Array{Float64, 2}
         @test size(mat_alloc) == (10, 10)
     end
 
@@ -265,4 +265,4 @@ using AdaptiveArrayPools: checkpoint!, rewind!
         @test alloc == 0
     end
 
-end # Multi-dimensional Arrays
\ No newline at end of file
+end # Multi-dimensional Arrays
diff --git a/test/test_nway_cache.jl b/test/test_nway_cache.jl
index cd1c2c76..33cb64ea 100644
--- a/test/test_nway_cache.jl
+++ b/test/test_nway_cache.jl
@@ -130,8 +130,10 @@ end
         pool = AdaptiveArrayPool()
 
         function test_nd_many_patterns!(p)
-            dims_list = ((2, 50), (5, 20), (10, 10), (20, 5), (50, 2),
-                         (1, 100), (100, 1), (4, 25), (25, 4), (8, 13))
+            dims_list = (
+                (2, 50), (5, 20), (10, 10), (20, 5), (50, 2),
+                (1, 100), (100, 1), (4, 25), (25, 4), (8, 13),
+            )
             for _ in 1:50
                 for dims in dims_list
                     @with_pool p begin
diff --git a/test/test_reshape.jl b/test/test_reshape.jl
index 1316d9be..5ccb29e0 100644
--- a/test/test_reshape.jl
+++ b/test/test_reshape.jl
@@ -390,7 +390,9 @@
             end
 
             # Warmup (compile + cache)
-            for _ in 1:4; _test_reshape_func_alloc(ext); end
+            for _ in 1:4
+                _test_reshape_func_alloc(ext)
+            end
 
             alloc = @allocated _test_reshape_func_alloc(ext)
             println("  @with_pool function (acquire+reshape+zeros!): $alloc bytes")
@@ -415,7 +417,9 @@
 
             function _measure_maybe_reshape(data, enabled)
                 MAYBE_POOLING_ENABLED[] = enabled
-                for _ in 1:4; _test_maybe_reshape_alloc(data); end
+                for _ in 1:4
+                    _test_maybe_reshape_alloc(data)
+                end
                 return @allocated _test_maybe_reshape_alloc(data)
             end
 
@@ -428,7 +432,7 @@
                 _measure_maybe_reshape(ext, false)
 
                 # Measure
-                alloc_pooled   = _measure_maybe_reshape(ext, true)
+                alloc_pooled = _measure_maybe_reshape(ext, true)
                 alloc_unpooled = _measure_maybe_reshape(ext, false)
 
                 println("  @maybe_with_pool pooled:   $alloc_pooled bytes")
diff --git a/test/test_state.jl b/test/test_state.jl
index 4254c87d..ce186686 100644
--- a/test/test_state.jl
+++ b/test/test_state.jl
@@ -863,7 +863,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
             v_bool .= true
             l3_results[] = (
                 bool_n_active = pool.bool.n_active,
-                depth = pool._current_depth
+                depth = pool._current_depth,
             )
         end
 
@@ -885,7 +885,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
             l2_results[] = (
                 int64_n_active = pool.int64.n_active,
                 float32_n_active = pool.float32.n_active,
-                l3_bool_after = pool.bool.n_active
+                l3_bool_after = pool.bool.n_active,
             )
         end
 
@@ -962,7 +962,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
                 bool_after_l3 = p.bool.n_active,      # Cleaned by L3
                 complexf64_after_l3 = p.complexf64.n_active,  # Cleaned by L3
                 l3_bool_was = l3_active[1],
-                l3_cf64_was = l3_active[2]
+                l3_cf64_was = l3_active[2],
             )
         end
 
@@ -1028,7 +1028,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
             v_bool .= true
             l3_results[] = (
                 bool_n_active = pool.bool.n_active,
-                depth = pool._current_depth
+                depth = pool._current_depth,
             )
         end
 
@@ -1050,7 +1050,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
             l2_results[] = (
                 int64_n_active = p.int64.n_active,
                 float32_n_active = p.float32.n_active,
-                l3_bool_after = p.bool.n_active
+                l3_bool_after = p.bool.n_active,
             )
         end
 
@@ -1182,7 +1182,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
                 l3_complexf64 = l3_active[3],
                 bool_after = p.bool.n_active,
                 int32_after = p.int32.n_active,
-                complexf64_after = p.complexf64.n_active
+                complexf64_after = p.complexf64.n_active,
             )
         end
 
@@ -1489,20 +1489,20 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
         using AdaptiveArrayPools: _fixed_slot_bit, Bit
 
         # Each fixed slot returns a unique nonzero bit
-        @test _fixed_slot_bit(Float64)    == UInt16(1) << 0
-        @test _fixed_slot_bit(Float32)    == UInt16(1) << 1
-        @test _fixed_slot_bit(Int64)      == UInt16(1) << 2
-        @test _fixed_slot_bit(Int32)      == UInt16(1) << 3
+        @test _fixed_slot_bit(Float64) == UInt16(1) << 0
+        @test _fixed_slot_bit(Float32) == UInt16(1) << 1
+        @test _fixed_slot_bit(Int64) == UInt16(1) << 2
+        @test _fixed_slot_bit(Int32) == UInt16(1) << 3
         @test _fixed_slot_bit(ComplexF64) == UInt16(1) << 4
         @test _fixed_slot_bit(ComplexF32) == UInt16(1) << 5
-        @test _fixed_slot_bit(Bool)       == UInt16(1) << 6
-        @test _fixed_slot_bit(Bit)        == UInt16(1) << 7
+        @test _fixed_slot_bit(Bool) == UInt16(1) << 6
+        @test _fixed_slot_bit(Bit) == UInt16(1) << 7
 
         # Non-fixed-slot types return 0
-        @test _fixed_slot_bit(UInt8)    == UInt16(0)
-        @test _fixed_slot_bit(UInt16)   == UInt16(0)
-        @test _fixed_slot_bit(Float16)  == UInt16(0)
-        @test _fixed_slot_bit(String)   == UInt16(0)
+        @test _fixed_slot_bit(UInt8) == UInt16(0)
+        @test _fixed_slot_bit(UInt16) == UInt16(0)
+        @test _fixed_slot_bit(Float16) == UInt16(0)
+        @test _fixed_slot_bit(String) == UInt16(0)
 
         # All 8 bits are unique (no collisions)
         bits = [_fixed_slot_bit(T) for T in (Float64, Float32, Int64, Int32, ComplexF64, ComplexF32, Bool, Bit)]
@@ -1745,8 +1745,8 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
         # causing false negatives: the typed fast path is rejected even when only
         # tracked types were touched.
         using AdaptiveArrayPools: _can_use_typed_path, _tracked_mask_for_types,
-              _lazy_checkpoint!, _lazy_rewind!, _LAZY_MODE_BIT, _TYPED_LAZY_BIT,
-              _acquire_impl!
+            _lazy_checkpoint!, _lazy_rewind!, _LAZY_MODE_BIT, _TYPED_LAZY_BIT,
+            _acquire_impl!
 
         # --- Case 1: _LAZY_MODE_BIT (bit 15) should be ignored ---
         pool = AdaptiveArrayPool()
@@ -2024,14 +2024,14 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
         checkpoint!(pool)
         depth = pool._current_depth  # = 2
 
-        @test pool.float64._checkpoint_depths[end]    == depth
-        @test pool.float32._checkpoint_depths[end]    == depth
-        @test pool.int64._checkpoint_depths[end]      == depth
-        @test pool.int32._checkpoint_depths[end]      == depth
+        @test pool.float64._checkpoint_depths[end] == depth
+        @test pool.float32._checkpoint_depths[end] == depth
+        @test pool.int64._checkpoint_depths[end] == depth
+        @test pool.int32._checkpoint_depths[end] == depth
         @test pool.complexf64._checkpoint_depths[end] == depth
         @test pool.complexf32._checkpoint_depths[end] == depth
-        @test pool.bool._checkpoint_depths[end]       == depth
-        @test pool.bits._checkpoint_depths[end]       == depth
+        @test pool.bool._checkpoint_depths[end] == depth
+        @test pool.bits._checkpoint_depths[end] == depth
 
         rewind!(pool)
     end
@@ -2117,10 +2117,10 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
             depth = pool._current_depth  # = 2
 
             # No typed pool should have an eager checkpoint at this depth
-            @test pool.float64._checkpoint_depths[end]    < depth
-            @test pool.float32._checkpoint_depths[end]    < depth
-            @test pool.int64._checkpoint_depths[end]      < depth
-            @test pool.bool._checkpoint_depths[end]       < depth
+            @test pool.float64._checkpoint_depths[end] < depth
+            @test pool.float32._checkpoint_depths[end] < depth
+            @test pool.int64._checkpoint_depths[end] < depth
+            @test pool.bool._checkpoint_depths[end] < depth
 
             # But depth metadata IS updated
             @test pool._current_depth == 2
@@ -2148,7 +2148,7 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
             # First acquire triggers lazy checkpoint for Float64 only
             acquire!(pool, Float64, 5)
             @test pool.float64._checkpoint_depths[end] == depth  # NOW checkpointed
-            @test pool.float32._checkpoint_depths[end] <  depth  # Float32 untouched
+            @test pool.float32._checkpoint_depths[end] < depth  # Float32 untouched
 
             rewind!(pool)
             @test pool.float64.n_active == 0
@@ -2439,4 +2439,4 @@ import AdaptiveArrayPools: _typed_lazy_checkpoint!, _typed_lazy_rewind!, _tracke
         end
     end
 
-end # State Management
\ No newline at end of file
+end # State Management
diff --git a/test/test_task_local_pool.jl b/test/test_task_local_pool.jl
index c408578d..61078f52 100644
--- a/test/test_task_local_pool.jl
+++ b/test/test_task_local_pool.jl
@@ -230,4 +230,4 @@
         empty!(pool)
     end
 
-end # Task-local Pool
\ No newline at end of file
+end # Task-local Pool
diff --git a/test/test_utils.jl b/test/test_utils.jl
index ebb32264..163adf86 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -2,7 +2,7 @@ import AdaptiveArrayPools: _validate_pool_return
 
 # Helper macro to capture stdout (must be defined before use)
 macro capture_out(expr)
-    quote
+    return quote
         local old_stdout = stdout
         local rd, wr = redirect_stdout()
         try
@@ -197,7 +197,7 @@ end
     end
 
     @testset "Base.show for TypedPool & BitTypedPool" begin
-        import AdaptiveArrayPools: TypedPool, BitTypedPool 
+        import AdaptiveArrayPools: TypedPool, BitTypedPool
 
         # Empty TypedPool - compact show
         tp_empty = TypedPool{Float64}()
@@ -573,4 +573,4 @@ end
         POOL_DEBUG[] = old_debug
     end
 
-end # Utilities and Debugging
\ No newline at end of file
+end # Utilities and Debugging
diff --git a/test/test_zero_allocation.jl b/test/test_zero_allocation.jl
index 7beab686..a1349e1c 100644
--- a/test/test_zero_allocation.jl
+++ b/test/test_zero_allocation.jl
@@ -11,7 +11,7 @@
 # 3. Verify: loop has 0 bytes allocation after warmup
 
 @testset "Zero-allocation Patterns" begin
-    
+
     # ==============================================================================
     # Pattern 1: acquire! only (SubArray) - N-D matrices
     # ==============================================================================
@@ -410,4 +410,4 @@
         end
     end
 
-end # Zero-allocation Patterns
\ No newline at end of file
+end # Zero-allocation Patterns