|
16 | 16 |
|
17 | 17 | //================================================================================ |
18 | 18 | // this file has been auto-generated, do not modify its contents! |
19 | | -// date: 2024-07-24 15:35:29.178410 |
20 | | -// git hash: 986ca557aa59f869d68fe1e7184c2228517ea52d |
| 19 | +// date: 2024-09-23 14:12:25.024358 |
| 20 | +// git hash: 3a88b56a57cce5e1f3365aa6e8efb76a14f7f865 |
21 | 21 | //================================================================================ |
22 | 22 |
|
23 | 23 | #ifndef KERNEL_FLOAT_MACROS_H |
|
85 | 85 |
|
86 | 86 | #define KERNEL_FLOAT_MAX_ALIGNMENT (32) |
87 | 87 |
|
88 | | -#ifndef KERNEL_FLOAT_FAST_MATH |
| 88 | +#if KERNEL_FLOAT_FAST_MATH |
89 | 89 | #define KERNEL_FLOAT_POLICY ::kernel_float::fast_policy; |
90 | 90 | #endif |
91 | 91 |
|
@@ -424,7 +424,6 @@ struct alignas(Alignment) aligned_array<T, 1, Alignment> { |
424 | 424 | }; |
425 | 425 |
|
426 | 426 | template<typename T, size_t Alignment> |
427 | | - |
428 | 427 | struct aligned_array<T, 0, Alignment> { |
429 | 428 | KERNEL_FLOAT_INLINE |
430 | 429 | T* data() { |
@@ -807,19 +806,23 @@ namespace detail { |
807 | 806 | template<typename Policy, typename F, size_t N, typename Output, typename... Args> |
808 | 807 | struct map_policy_impl { |
809 | 808 | static constexpr size_t packet_size = preferred_vector_size<Output>::value; |
| 809 | + static constexpr size_t remainder = N % packet_size; |
810 | 810 |
|
811 | 811 | KERNEL_FLOAT_INLINE static void call(F fun, Output* output, const Args*... args) { |
812 | 812 | if constexpr (N / packet_size > 0) { |
813 | 813 | #pragma unroll |
814 | | - for (size_t i = 0; i < N - N % packet_size; i += packet_size) { |
815 | | - Policy::template type<F, N, Output, Args...>::call(fun, output + i, (args + i)...); |
| 814 | + for (size_t i = 0; i < N - remainder; i += packet_size) { |
| 815 | + Policy::template type<F, packet_size, Output, Args...>::call( |
| 816 | + fun, |
| 817 | + output + i, |
| 818 | + (args + i)...); |
816 | 819 | } |
817 | 820 | } |
818 | 821 |
|
819 | | - if constexpr (N % packet_size > 0) { |
| 822 | + if constexpr (remainder > 0) { |
820 | 823 | #pragma unroll |
821 | | - for (size_t i = N - N % packet_size; i < N; i++) { |
822 | | - Policy::template type<F, N, Output, Args...>::call(fun, output + i, (args + i)...); |
| 824 | + for (size_t i = N - remainder; i < N; i++) { |
| 825 | + Policy::template type<F, 1, Output, Args...>::call(fun, output + i, (args + i)...); |
823 | 826 | } |
824 | 827 | } |
825 | 828 | } |
|
0 commit comments