1616
1717// ================================================================================
1818// this file has been auto-generated, do not modify its contents!
19- // date: 2024-04-22 13:28:09.684538
20- // git hash: fd4eadfbb0c8597276a6c12f972038cd1baff985
19+ // date: 2024-04-26 10:06:43.573011
20+ // git hash: a9c7d752a7329ae5187e3e9362a2b47c9f38371a
2121// ================================================================================
2222
2323#ifndef KERNEL_FLOAT_MACROS_H
7272#define KERNEL_FLOAT_CALL (F, ...) F(__VA_ARGS__)
7373
7474// TOOD: check if this way is support across all compilers
75- // #if defined(__has_builtin) && __has_builtin(__builtin_assume_aligned)
76- #if 0
77- #define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) \
78- static_cast<TYPE*>(__builtin_assume_aligned(static_cast<TYPE*>(PTR), (ALIGNMENT)))
75+ #if defined(__has_builtin) && 0 // Seems that `__builtin_assume_aligned` leads to segfaults
76+ #if __has_builtin(__builtin_assume_aligned)
77+ #define KERNEL_FLOAT_ASSUME_ALIGNED (TYPE, PTR, ALIGNMENT ) static_cast <TYPE*>(
78+ __builtin_assume_aligned (static_cast <TYPE*>(PTR), (ALIGNMENT)))
79+ #else
80+ #define KERNEL_FLOAT_ASSUME_ALIGNED (TYPE, PTR, ALIGNMENT ) (PTR)
81+ #endif
7982#else
8083#define KERNEL_FLOAT_ASSUME_ALIGNED (TYPE, PTR, ALIGNMENT ) (PTR)
8184#endif
@@ -4321,8 +4324,8 @@ KERNEL_FLOAT_FP8_CAST(double)
43214324namespace kernel_float {
43224325KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__half, __nv_fp8_e4m3)
43234326KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__half, __nv_fp8_e5m2)
4324- KERNEL_FLOAT_FP8_CAST (__half)
43254327
4328+ KERNEL_FLOAT_FP8_CAST (__half)
43264329KERNEL_FLOAT_FP8_CAST2 (__half, __nv_fp8_e4m3, __NV_E4M3)
43274330KERNEL_FLOAT_FP8_CAST2 (__half, __nv_fp8_e5m2, __NV_E5M2)
43284331
@@ -4335,8 +4338,8 @@ KERNEL_FLOAT_FP8_CAST2(__half, __nv_fp8_e5m2, __NV_E5M2)
43354338namespace kernel_float {
43364339KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__nv_bfloat16, __nv_fp8_e4m3)
43374340KERNEL_FLOAT_DEFINE_PROMOTED_TYPE (__nv_bfloat16, __nv_fp8_e5m2)
4338- KERNEL_FLOAT_FP8_CAST (__nv_bfloat16)
43394341
4342+ KERNEL_FLOAT_FP8_CAST (__nv_bfloat16)
43404343KERNEL_FLOAT_FP8_CAST2 (__nv_bfloat16, __nv_fp8_e4m3, __NV_E4M3)
43414344KERNEL_FLOAT_FP8_CAST2 (__nv_bfloat16, __nv_fp8_e5m2, __NV_E5M2)
43424345} // namespace kernel_float
0 commit comments