Skip to content

Commit c5eff45

Browse files
committed
Make the Windows job-reap test diagnosable and align its server shape
The job-handle-close reap test failed on all windows-latest legs with a silent 15s accept timeout: the grandchild's connect-back never arrived, and xdist swallows the server's stderr, so the logs could not say which link of the spawn chain broke. Reshape the server to the form the passing cancellation test already proves on Windows CI: it connects back to the liveness listener itself after spawning the child, then parks on stdin. The test now accepts two connections, so a failure distinguishes "server never ran its connect line" from "child never connected". Capture the server's stderr via errlog into a temp file and attach it to every failure message, and wrap the child spawn in a try/except that reports to stderr before re-raising. The contract is unchanged: both sockets must close after a graceful exit (server FIN, child killed by the job-handle close), with returncode 0 and the escalation seam untouched.
1 parent 6b7fa82 commit c5eff45

1 file changed

Lines changed: 66 additions & 30 deletions

File tree

tests/transports/stdio/test_windows.py

Lines changed: 66 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import asyncio
1717
import sys
1818
from contextlib import AsyncExitStack
19+
from pathlib import Path
1920

2021
import anyio
2122
import anyio.abc
@@ -39,6 +40,7 @@
3940

4041

4142
async def test_a_gracefully_exited_servers_child_is_reaped_when_the_job_handle_closes( # pragma: no cover
43+
tmp_path: Path,
4244
spawned_processes: list[anyio.abc.Process | FallbackProcess],
4345
terminate_calls: list[anyio.abc.Process | FallbackProcess],
4446
) -> None:
@@ -54,44 +56,78 @@ async def test_a_gracefully_exited_servers_child_is_reaped_when_the_job_handle_c
5456
`terminate_calls == []` is the load-bearing distinction: it proves the child died
5557
through the graceful path's job-handle close and not through the escalation's
5658
`TerminateJobObject` — the two kills are indistinguishable on the socket.
59+
60+
The server connects back too (not just the child), and its stderr is captured
61+
through `errlog`: a failure then says *which* process never arrived and what the
62+
server printed, instead of one silent timeout — xdist swallows subprocess stderr
63+
on CI, so without the capture a broken spawn chain is undiagnosable there.
5764
"""
5865
async with AsyncExitStack() as stack:
5966
sock, port = await open_liveness_listener()
6067
stack.push_async_callback(sock.aclose)
6168

6269
child = connect_back_script(port)
63-
# The server hands its inherited Job membership to a child, then exits as
64-
# soon as its stdin closes — the well-behaved graceful path, so the
65-
# escalation never runs. The child inherits membership because the SDK
66-
# assigns the server to the Job synchronously after the spawn returns,
67-
# while the server's interpreter is still cold-starting — long before it
68-
# can Popen the child (job membership is inherited at CreateProcess, never
69-
# acquired retroactively).
70-
server = f"import subprocess, sys\nsubprocess.Popen([sys.executable, '-c', {child!r}])\nsys.stdin.read()\n"
70+
# The server spawns a child (its Popen failure, if any, is surfaced on
71+
# stderr), connects back itself, then exits as soon as its stdin closes —
72+
# the well-behaved graceful path, so the escalation never runs. The child
73+
# inherits Job membership because the SDK assigns the server to the Job
74+
# synchronously after the spawn returns, while the server's interpreter is
75+
# still cold-starting — long before it can Popen the child (job membership
76+
# is inherited at CreateProcess, never acquired retroactively).
77+
server = (
78+
f"import socket, subprocess, sys\n"
79+
f"try:\n"
80+
f" subprocess.Popen([sys.executable, '-c', {child!r}])\n"
81+
f"except BaseException as exc:\n"
82+
f" print(exc, file=sys.stderr, flush=True)\n"
83+
f" raise\n"
84+
f"s = socket.create_connection(('127.0.0.1', {port}))\n"
85+
f"s.sendall(b'alive')\n"
86+
f"sys.stdin.read()\n"
87+
)
7188
server_params = StdioServerParameters(command=sys.executable, args=["-c", server])
7289

73-
# The bound covers two Python interpreter cold starts on a loaded runner;
74-
# a healthy run takes well under a second.
75-
with anyio.fail_after(15.0):
76-
async with stdio_client(server_params):
77-
stream = await accept_alive(sock)
78-
stack.push_async_callback(stream.aclose)
79-
80-
# The child connected (so it joined the Job) and the context has fully
81-
# exited (so the job handle is closed). KILL_ON_JOB_CLOSE must have killed
82-
# the child: its socket closes abruptly. The `spawned_processes` recording
83-
# is load-bearing here beyond observability: `_process_jobs` is weak-keyed,
84-
# and the recorded strong reference pins the process object (and with it
85-
# the job-handle entry) across this assertion window — without it, a GC
86-
# between context exit and this assert could close the handle itself and
87-
# mask a regression in the deterministic close.
88-
await assert_stream_closed(stream)
89-
90-
leader = spawned_processes[0]
91-
# The graceful path: the server exited on stdin closure with code 0, and
92-
# the tree-termination escalation was never invoked.
93-
assert leader.returncode == 0
94-
assert terminate_calls == []
90+
with (tmp_path / "errlog.txt").open("w+", encoding="utf-8") as errlog:
91+
92+
def server_stderr() -> str:
93+
errlog.seek(0)
94+
return errlog.read()
95+
96+
try:
97+
# The bound covers two Python interpreter cold starts on a loaded
98+
# runner; a healthy run takes well under a second.
99+
with anyio.fail_after(15.0):
100+
async with stdio_client(server_params, errlog=errlog):
101+
# The server and child race to connect; accept both,
102+
# order-agnostic (accept_alive verifies each banner).
103+
streams: list[anyio.abc.SocketStream] = []
104+
for _ in range(2):
105+
stream = await accept_alive(sock)
106+
stack.push_async_callback(stream.aclose)
107+
streams.append(stream)
108+
except TimeoutError:
109+
pytest.fail(f"a liveness connection never arrived; server stderr: {server_stderr()!r}")
110+
111+
# Both peers connected and the context has fully exited, closing the
112+
# job handle. KILL_ON_JOB_CLOSE must have killed the child, and the
113+
# server died with its graceful exit: both sockets close. The
114+
# `spawned_processes` recording is load-bearing here beyond
115+
# observability: `_process_jobs` is weak-keyed, and the recorded strong
116+
# reference pins the process object (and with it the job-handle entry)
117+
# across this assertion window — without it, a GC between context exit
118+
# and this assert could close the handle itself and mask a regression
119+
# in the deterministic close.
120+
try:
121+
for stream in streams:
122+
await assert_stream_closed(stream)
123+
except TimeoutError:
124+
pytest.fail(f"a socket stayed open after shutdown; server stderr: {server_stderr()!r}")
125+
126+
leader = spawned_processes[0]
127+
# The graceful path: the server exited on stdin closure with code 0,
128+
# and the tree-termination escalation was never invoked.
129+
assert leader.returncode == 0, server_stderr()
130+
assert terminate_calls == [], server_stderr()
95131

96132

97133
# Overrides the suite-wide plain-"asyncio" anyio_backend fixture for this test only:

0 commit comments

Comments
 (0)