@@ -70,6 +70,95 @@ int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT;
7070static void jl_longjmp_in_ctx (int sig , void * _ctx , jl_jmp_buf jmpbuf );
7171
7272#if !defined(_OS_DARWIN_ )
73+ extern void jl_fake_signal_return (void );
74+ // Create a trampoline function that does the stack manipulations for jl_call_in_ctx/jl_call_in_state
75+ // The callee-saved registers still may get smashed (by the cdecl fptr), since we didn't explicitly copy all of the
76+ // state to the stack (to build a real sigreturn frame).
77+ #if (defined(_OS_LINUX_ ) || defined(_OS_FREEBSD_ ) || defined(_OS_OPENBSD_ )) && defined(_CPU_X86_64_ )
78+ __asm__(
79+ " .type jl_fake_signal_return, @function\n"
80+ "jl_fake_signal_return:\n"
81+ " .cfi_startproc\n"
82+ " .cfi_signal_frame\n"
83+ // Mark as end of stack until frame is set up
84+ " .cfi_undefined %rip\n"
85+ " .cfi_undefined %rsp\n"
86+ // rdi points to signal_ctx_pc in ptls (followed by signal_ctx_sp, signal_ctx_fptr, signal_ctx_arg)
87+ " pushq (%rdi)\n" // push pc (signal_ctx_pc)
88+ " pushq 8(%rdi)\n" // push sp (signal_ctx_sp)
89+ // stack layout: [sp, pc] (pc at higher address, like return address after call)
90+ " .cfi_def_cfa %rsp, 8\n"
91+ " .cfi_offset %rip, 0\n" // previous %rip at CFA+0 (pc slot at rsp+8)
92+ " .cfi_offset %rsp, -8\n" // previous %rsp at CFA-8 (sp slot at rsp+0)
93+ " pushq 16(%rdi)\n" // push fptr (signal_ctx_fptr)
94+ " .cfi_def_cfa %rsp, 16\n"
95+ " movq 24(%rdi), %rdi\n" // restore original rdi from signal_ctx_arg
96+ " subq $8, %rsp\n" // align stack to 16 bytes
97+ " .cfi_def_cfa %rsp, 24\n"
98+ " callq *8(%rsp)\n" // call fptr
99+ " ud2\n" // unreachable
100+ " .cfi_endproc\n"
101+ " .size jl_fake_signal_return, .-jl_fake_signal_return\n"
102+ );
103+
104+ #elif (defined(_OS_LINUX_ ) || defined(_OS_FREEBSD_ )) && defined(_CPU_X86_ )
105+ __asm__(
106+ " .type jl_fake_signal_return, @function\n"
107+ "jl_fake_signal_return:\n"
108+ " .cfi_startproc\n"
109+ " .cfi_signal_frame\n"
110+ // Mark as end of stack until frame is set up
111+ " .cfi_undefined 1\n"
112+ // eax points to signal_ctx_pc in ptls (followed by signal_ctx_sp, signal_ctx_fptr, signal_ctx_arg)
113+ " pushl (%eax)\n" // push pc (signal_ctx_pc)
114+ " pushl 4(%eax)\n" // push sp (signal_ctx_sp)
115+ // stack layout: [sp, pc] (pc at higher address, like return address after call)
116+ " .cfi_def_cfa %esp, 4\n"
117+ " .cfi_offset %eip, 0\n" // previous %eip at CFA+0 (pc slot at esp+4)
118+ " .cfi_offset %esp, -4\n" // previous %esp at CFA-4 (sp slot at esp+0)
119+ " pushl 8(%eax)\n" // push fptr (signal_ctx_fptr)
120+ " .cfi_def_cfa %esp, 8\n"
121+ " movl 12(%eax), %eax\n" // restore original eax from signal_ctx_arg
122+ " subl $4, %esp\n" // align stack to 16 bytes
123+ " .cfi_def_cfa %esp, 12\n"
124+ " calll *4(%esp)\n" // call fptr
125+ " ud2\n" // unreachable
126+ " .cfi_endproc\n"
127+ " .size jl_fake_signal_return, .-jl_fake_signal_return\n"
128+ );
129+ #elif (defined(_OS_LINUX_ ) || defined(_OS_FREEBSD_ )) && defined(_CPU_AARCH64_ )
130+ __asm__(
131+ " .type jl_fake_signal_return, @function\n"
132+ "jl_fake_signal_return:\n"
133+ " .cfi_startproc\n"
134+ " .cfi_signal_frame\n"
135+ // Mark as end of stack until frame is set up
136+ " .cfi_undefined 1\n"
137+ // x0 points to signal_ctx_pc in ptls (followed by signal_ctx_sp, signal_ctx_fptr, signal_ctx_arg)
138+ " ldp x1, x2, [x0]\n" // load pc (x1) and sp (x2)
139+ " stp x2, x1, [sp, #-16]!\n" // push sp and pc (sp at lower addr, pc at higher addr)
140+ // stack layout: [sp, pc] (pc at higher address, like return address after call)
141+ " .cfi_def_cfa sp, 16\n"
142+ " .cfi_offset lr, -8\n" // previous lr (pc) at CFA-8 (pc slot at sp+8)
143+ " .cfi_offset sp, -16\n" // previous sp at CFA-16 (sp slot at sp+0)
144+ // This is not quite valid, since the AArch64 DWARF spec lacks the ability to define how to restore the LR register correctly,
145+ // so normally libunwind implementations on linux detect this function specially and hack around the invalid info:
146+ // https://github.com/llvm/llvm-project/commit/c82deed6764cbc63966374baf9721331901ca958
147+ " ldp x1, x2, [x0, #16]\n" // load fptr (x1) and saved x0 (x2)
148+ " mov x0, x2\n" // restore original x0
149+ " blr x1\n" // call fptr
150+ " brk #1\n" // unreachable
151+ " .cfi_endproc\n"
152+ " .size jl_fake_signal_return, .-jl_fake_signal_return\n"
153+ );
154+ #else
155+ extern void JL_NORETURN jl_fake_signal_return (void )
156+ {
157+ CFI_NORETURN
158+ abort ();
159+ }
160+ #endif
161+
73162static inline uintptr_t jl_get_rsp_from_ctx (const void * _ctx )
74163{
75164#if defined(_OS_LINUX_ ) && defined(_CPU_X86_64_ )
@@ -123,46 +212,79 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
123212 // will not be part of the validation...
124213 uintptr_t rsp = jl_get_rsp_from_ctx (_ctx );
125214 rsp = (rsp - 256 ) & ~(uintptr_t )15 ; // redzone and re-alignment
215+ assert (rsp % 16 == 0 );
126216#if defined(_OS_LINUX_ ) && defined(_CPU_X86_64_ )
127217 ucontext_t * ctx = (ucontext_t * )_ctx ;
128- rsp -= sizeof (void * );
129- * (uintptr_t * )rsp = 0 ;
130- ctx -> uc_mcontext .gregs [REG_RSP ] = rsp ;
131- ctx -> uc_mcontext .gregs [REG_RIP ] = (uintptr_t )fptr ;
218+ // Save context in ptls for stack unwinding
219+ ptls -> signal_ctx_pc = ctx -> uc_mcontext .gregs [REG_RIP ];
220+ ptls -> signal_ctx_sp = ctx -> uc_mcontext .gregs [REG_RSP ];
221+ ptls -> signal_ctx_fptr = fptr ;
222+ ptls -> signal_ctx_arg = ctx -> uc_mcontext .gregs [REG_RDI ];
223+ ctx -> uc_mcontext .gregs [REG_RSP ] = rsp ; // set stack pointer
224+ ctx -> uc_mcontext .gregs [REG_RDI ] = (uintptr_t )& ptls -> signal_ctx_pc ; // first arg points to signal_ctx
225+ ctx -> uc_mcontext .gregs [REG_RIP ] = (uintptr_t )& jl_fake_signal_return ; // "call" jl_fake_signal_return
132226#elif defined(_OS_FREEBSD_ ) && defined(_CPU_X86_64_ )
133227 ucontext_t * ctx = (ucontext_t * )_ctx ;
134- rsp -= sizeof (void * );
135- * (uintptr_t * )rsp = 0 ;
136- ctx -> uc_mcontext .mc_rsp = rsp ;
137- ctx -> uc_mcontext .mc_rip = (uintptr_t )fptr ;
228+ // Save context in ptls for stack unwinding
229+ ptls -> signal_ctx_pc = ctx -> uc_mcontext .mc_rip ;
230+ ptls -> signal_ctx_sp = ctx -> uc_mcontext .mc_rsp ;
231+ ptls -> signal_ctx_fptr = fptr ;
232+ ptls -> signal_ctx_arg = ctx -> uc_mcontext .mc_rdi ;
233+ ctx -> uc_mcontext .mc_rsp = rsp ; // set stack pointer
234+ ctx -> uc_mcontext .mc_rdi = (uintptr_t )& ptls -> signal_ctx_pc ; // first arg points to signal_ctx
235+ ctx -> uc_mcontext .mc_rip = (uintptr_t )& jl_fake_signal_return ; // "call" jl_fake_signal_return
138236#elif defined(_OS_LINUX_ ) && defined(_CPU_X86_ )
139237 ucontext_t * ctx = (ucontext_t * )_ctx ;
140- rsp -= sizeof (void * );
141- * (uintptr_t * )rsp = 0 ;
142- ctx -> uc_mcontext .gregs [REG_ESP ] = rsp ;
143- ctx -> uc_mcontext .gregs [REG_EIP ] = (uintptr_t )fptr ;
238+ // Save context in ptls for stack unwinding
239+ ptls -> signal_ctx_pc = ctx -> uc_mcontext .gregs [REG_EIP ];
240+ ptls -> signal_ctx_sp = ctx -> uc_mcontext .gregs [REG_ESP ];
241+ ptls -> signal_ctx_fptr = fptr ;
242+ ptls -> signal_ctx_arg = ctx -> uc_mcontext .gregs [REG_EAX ];
243+ ctx -> uc_mcontext .gregs [REG_ESP ] = rsp ; // set stack pointer
244+ ctx -> uc_mcontext .gregs [REG_EAX ] = (uintptr_t )& ptls -> signal_ctx_pc ; // set eax to point to signal_ctx
245+ ctx -> uc_mcontext .gregs [REG_EIP ] = (uintptr_t )& jl_fake_signal_return ; // "call" jl_fake_signal_return
144246#elif defined(_OS_FREEBSD_ ) && defined(_CPU_X86_ )
145247 ucontext_t * ctx = (ucontext_t * )_ctx ;
146- rsp -= sizeof (void * );
147- * (uintptr_t * )rsp = 0 ;
148- ctx -> uc_mcontext .mc_esp = rsp ;
149- ctx -> uc_mcontext .mc_eip = (uintptr_t )fptr ;
248+ // Save context in ptls for stack unwinding
249+ ptls -> signal_ctx_pc = ctx -> uc_mcontext .mc_eip ;
250+ ptls -> signal_ctx_sp = ctx -> uc_mcontext .mc_esp ;
251+ ptls -> signal_ctx_fptr = fptr ;
252+ ptls -> signal_ctx_arg = ctx -> uc_mcontext .mc_eax ;
253+ ctx -> uc_mcontext .mc_esp = rsp ; // set stack pointer
254+ ctx -> uc_mcontext .mc_eax = (uintptr_t )& ptls -> signal_ctx_pc ; // set eax to point to signal_ctx
255+ ctx -> uc_mcontext .mc_eip = (uintptr_t )& jl_fake_signal_return ; // "call" jl_fake_signal_return
150256#elif defined(_OS_OPENBSD_ ) && defined(_CPU_X86_64_ )
151257 struct sigcontext * ctx = (struct sigcontext * )_ctx ;
152- rsp -= sizeof (void * );
153- * (uintptr_t * )rsp = 0 ;
154- ctx -> sc_rsp = rsp ;
155- ctx -> sc_rip = fptr ;
258+ // Save context in ptls for stack unwinding
259+ ptls -> signal_ctx_pc = ctx -> sc_rip ;
260+ ptls -> signal_ctx_sp = ctx -> sc_rsp ;
261+ ptls -> signal_ctx_fptr = fptr ;
262+ ptls -> signal_ctx_arg = ctx -> sc_rdi ;
263+ ctx -> sc_rsp = rsp ; // set stack pointer
264+ ctx -> sc_rdi = (uintptr_t )& ptls -> signal_ctx_pc ; // first arg points to signal_ctx
265+ ctx -> sc_rip = (uintptr_t )& jl_fake_signal_return ; // "call" jl_fake_signal_return
156266#elif defined(_OS_LINUX_ ) && defined(_CPU_AARCH64_ )
157267 ucontext_t * ctx = (ucontext_t * )_ctx ;
158- ctx -> uc_mcontext .sp = rsp ;
159- ctx -> uc_mcontext .regs [29 ] = 0 ; // Clear link register (x29)
160- ctx -> uc_mcontext .pc = (uintptr_t )fptr ;
268+ // Save context in ptls for stack unwinding
269+ ptls -> signal_ctx_pc = (uintptr_t )ctx -> uc_mcontext .pc ;
270+ ptls -> signal_ctx_sp = ctx -> uc_mcontext .sp ;
271+ ptls -> signal_ctx_fptr = fptr ;
272+ ptls -> signal_ctx_arg = ctx -> uc_mcontext .regs [0 ];
273+ ctx -> uc_mcontext .sp = rsp ; // sp
274+ ctx -> uc_mcontext .regs [0 ] = (uintptr_t )& ptls -> signal_ctx_pc ; // first arg points to signal_ctx
275+ ctx -> uc_mcontext .pc = (uint64_t )& jl_fake_signal_return ; // pc
276+ ctx -> uc_mcontext .regs [30 ] = 0 ; // clear lr (x30)
161277#elif defined(_OS_FREEBSD_ ) && defined(_CPU_AARCH64_ )
162278 ucontext_t * ctx = (ucontext_t * )_ctx ;
163- ctx -> uc_mcontext .mc_gpregs .gp_sp = rsp ;
164- ctx -> uc_mcontext .mc_gpregs .gp_x [29 ] = 0 ; // Clear link register (x29)
165- ctx -> uc_mcontext .mc_gpregs .gp_elr = (uintptr_t )fptr ;
279+ // Save context in ptls for stack unwinding
280+ ptls -> signal_ctx_pc = ctx -> uc_mcontext .mc_gpregs .gp_elr ;
281+ ptls -> signal_ctx_sp = ctx -> uc_mcontext .mc_gpregs .gp_sp ;
282+ ptls -> signal_ctx_fptr = fptr ;
283+ ptls -> signal_ctx_arg = ctx -> uc_mcontext .mc_gpregs .gp_x [0 ];
284+ ctx -> uc_mcontext .mc_gpregs .gp_sp = rsp ; // set stack pointer
285+ ctx -> uc_mcontext .mc_gpregs .gp_x [0 ] = (uintptr_t )& ptls -> signal_ctx_pc ; // first arg points to signal_ctx
286+ ctx -> uc_mcontext .mc_gpregs .gp_elr = (uintptr_t )& jl_fake_signal_return ; // pc
287+ ctx -> uc_mcontext .mc_gpregs .gp_lr = 0 ; // clear lr (x30)
166288#elif defined(_OS_LINUX_ ) && defined(_CPU_ARM_ )
167289 ucontext_t * ctx = (ucontext_t * )_ctx ;
168290 uintptr_t target = (uintptr_t )fptr ;
@@ -549,9 +671,8 @@ static void jl_try_deliver_sigint(void)
549671// Write only by signal handling thread, read only by main thread
550672// no sync necessary.
551673static int thread0_exit_signo = 0 ;
552- static void JL_NORETURN jl_exit_thread0_cb (void )
674+ static void jl_exit_thread0_cb (void )
553675{
554- CFI_NORETURN
555676 jl_atomic_fetch_add (& jl_gc_disable_counter , -1 );
556677 jl_fprint_critical_error (ios_safe_stderr , thread0_exit_signo , 0 , NULL , jl_current_task );
557678 jl_atexit_hook (128 );
0 commit comments