Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/69582.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed ``salt-run manage.status``/``manage.up``/``manage.down`` reporting every targeted minion as up because synthesized ``no_return`` rows from ``LocalClient.get_cli_event_returns`` were being counted as successful returns.
13 changes: 12 additions & 1 deletion salt/runners/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,21 @@ def _ping(tgt, tgt_type, timeout, gather_job_timeout):
tgt,
tgt_type,
gather_job_timeout=gather_job_timeout,
# ``get_cli_event_returns`` defaults ``expect_minions`` to True so
# the ``salt`` CLI emits per-target timeout rows. The manage
# runner derives ``up`` / ``down`` from actual returns and
# treating the synthesized ``no_return`` rows as returns would
# report every targeted minion as up (issue #69582).
expect_minions=False,
):

if fn_ret:
for mid, _ in fn_ret.items():
for mid, min_ret in fn_ret.items():
# Defense in depth: drop synthesized timeout rows in case
# a caller passes ``expect_minions=True`` through kwargs
# or a future change re-enables them by default.
if isinstance(min_ret, dict) and min_ret.get("out") == "no_return":
continue
log.debug("minion '%s' returned from ping", mid)
returned.add(mid)

Expand Down
155 changes: 155 additions & 0 deletions tests/pytests/integration/runners/test_manage_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""
Integration regression tests for ``salt-run manage.status`` /
``manage.up`` / ``manage.down``.

The bug fixed by this module (issue #69582): ``manage._ping`` was
treating the synthesized ``no_return`` rows yielded by
``LocalClient.get_cli_event_returns`` (now emitted by default with
``expect_minions=True``) as successful returns. That made
``manage.status`` report every targeted minion as up and
``manage.down`` return nothing, even when a known-accepted minion was
not running.

These tests exercise the real runner against live salt-minions plus a
fake accepted-but-absent minion key, so the bug reproduces end to end
without mocks.
"""

import logging
import os
import shutil

import pytest

import salt.utils.path

log = logging.getLogger(__name__)

pytestmark = [
pytest.mark.slow_test,
pytest.mark.windows_whitelisted,
]


FAKE_DOWN_MINION_ID = "down-minion-69582"


@pytest.fixture
def fake_down_minion(salt_master, salt_minion):
"""
Make the master treat ``down-minion-69582`` as an accepted minion by
cloning ``salt_minion``'s public key under the fake id. No daemon
will respond on that id so any targeting call should time it out --
which is exactly what ``manage.down`` should surface.
"""
accepted = salt.utils.path.join(
salt_master.config["pki_dir"], "minions", salt_minion.id
)
fake = salt.utils.path.join(
salt_master.config["pki_dir"], "minions", FAKE_DOWN_MINION_ID
)
shutil.copyfile(accepted, fake)
try:
yield FAKE_DOWN_MINION_ID
finally:
try:
os.remove(fake)
except FileNotFoundError:
pass
except OSError as exc:
log.error(
"Failed to remove %s, this may affect other tests: %s",
fake,
exc,
)


def _no_return_marker_in(result):
"""
Verify the runner never emitted a synthesized ``no_return`` payload
in a slot that should hold a minion id. This is the original
user-visible symptom of #69582: ``manage.up`` was returning entries
whose ids were actually timeout placeholders.
"""
if isinstance(result, dict):
values = list(result.values())
keys = list(result.keys())
else:
values = list(result)
keys = list(result)
for slot in keys + values:
if isinstance(slot, str) and "no_return" in slot.lower():
return True
if isinstance(slot, dict) and slot.get("out") == "no_return":
return True
return False


def test_manage_status_partitions_live_and_down_minions(
salt_run_cli, salt_minion, salt_sub_minion, fake_down_minion
):
"""
``salt-run manage.status`` must list live minions in ``up`` and the
fake accepted-but-absent minion in ``down``.

Regression for https://github.com/saltstack/salt/issues/69582
where the result was ``{"up": [<everything>], "down": []}``.
"""
ret = salt_run_cli.run(
"manage.status", "timeout=5", "gather_job_timeout=10", _timeout=120
)
assert ret.returncode == 0, ret
data = ret.data
assert isinstance(data, dict), data
assert set(data.keys()) == {"up", "down"}, data
up = data["up"] or []
down = data["down"] or []
assert salt_minion.id in up, data
assert salt_sub_minion.id in up, data
assert fake_down_minion in down, data
assert fake_down_minion not in up, data
assert salt_minion.id not in down, data
assert salt_sub_minion.id not in down, data
assert not _no_return_marker_in(up), data
assert not _no_return_marker_in(down), data


def test_manage_up_excludes_down_minions(
salt_run_cli, salt_minion, salt_sub_minion, fake_down_minion
):
"""
``salt-run manage.up`` must omit minions that never returned.
Before the fix the fake accepted-but-absent minion was reported as
up because the synthesized ``no_return`` row was counted as a
successful return.
"""
ret = salt_run_cli.run(
"manage.up", "timeout=5", "gather_job_timeout=10", _timeout=120
)
assert ret.returncode == 0, ret
data = ret.data
assert isinstance(data, list), data
assert salt_minion.id in data, data
assert salt_sub_minion.id in data, data
assert fake_down_minion not in data, data
assert not _no_return_marker_in(data), data


def test_manage_down_includes_unresponsive_minions(
salt_run_cli, salt_minion, salt_sub_minion, fake_down_minion
):
"""
``salt-run manage.down`` must list the fake accepted-but-absent
minion. Before the fix it returned an empty list regardless of how
many accepted minions were offline.
"""
ret = salt_run_cli.run(
"manage.down", "timeout=5", "gather_job_timeout=10", _timeout=120
)
assert ret.returncode == 0, ret
data = ret.data
assert isinstance(data, list), data
assert fake_down_minion in data, data
assert salt_minion.id not in data, data
assert salt_sub_minion.id not in data, data
assert not _no_return_marker_in(data), data
130 changes: 130 additions & 0 deletions tests/pytests/unit/runners/test_manage_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""
Regression tests for the ``manage.status`` / ``manage.up`` / ``manage.down``
runner behavior.

The bug fixed by this module: ``manage._ping`` iterates the events yielded by
``LocalClient.get_cli_event_returns`` and adds every minion id it sees to the
``returned`` set. When ``get_cli_event_returns`` is called with
``expect_minions=True`` it also yields synthesized ``no_return`` entries for
minions that did **not** respond. Treating those synthesized entries as
returns made ``manage.status`` report every minion as up and never list any
minion as down (issue #69582).
"""

from contextlib import contextmanager

import pytest

from salt.runners import manage
from tests.support.mock import MagicMock, patch


@pytest.fixture
def configure_loader_modules():
return {
manage: {
"__opts__": {
"conf_file": "/etc/salt/master",
"timeout": 5,
"gather_job_timeout": 10,
},
},
}


class _FakeClient:
"""Stand-in for ``salt.client.LocalClient`` exposing the surface the
``manage._ping`` helper uses."""

def __init__(self, minions, events):
self._minions = minions
self._events = events

def __enter__(self):
return self

def __exit__(self, exc_type, exc, tb):
return False

def run_job(self, *args, **kwargs): # pylint: disable=unused-argument
return {"jid": "20260627000000000000", "minions": list(self._minions)}

def _get_timeout(self, timeout):
return timeout

def get_cli_event_returns(self, *args, **kwargs): # pylint: disable=unused-argument
yield from self._events


@contextmanager
def _patched_client(fake_client):
@contextmanager
def _factory(*args, **kwargs): # pylint: disable=unused-argument
yield fake_client

with patch("salt.client.get_local_client", _factory):
yield


def test_manage_status_reports_unresponsive_minions_as_down():
"""
``manage.status`` must distinguish actual returns from the synthetic
``no_return`` failure events that ``LocalClient.get_cli_event_returns``
emits when ``expect_minions`` is True.

Issue: https://github.com/saltstack/salt/issues/69582
"""
targeted = ["minion-up", "minion-down"]
events = [
# Real return from the responsive minion.
{"minion-up": {"ret": True, "retcode": 0, "out": "highstate"}},
# Synthetic ``no_return`` row emitted by ``get_cli_event_returns``
# for a minion that did not respond in time.
{
"minion-down": {
"out": "no_return",
"ret": "Minion did not return. [No response]",
"retcode": 2,
},
},
]
fake = _FakeClient(targeted, events)
with _patched_client(fake):
result = manage.status(output=False)
assert result == {"up": ["minion-up"], "down": ["minion-down"]}


def test_manage_down_reports_unresponsive_minions():
"""``manage.down`` should include minions that fail to return."""
targeted = ["minion-up", "minion-down"]
events = [
{"minion-up": {"ret": True, "retcode": 0, "out": "highstate"}},
{
"minion-down": {
"out": "no_return",
"ret": "Minion did not return. [No response]",
"retcode": 2,
},
},
]
fake = _FakeClient(targeted, events)
with _patched_client(fake), patch("salt.wheel.Wheel", MagicMock()):
assert manage.down() == ["minion-down"]


def test_manage_up_excludes_unresponsive_minions():
"""``manage.up`` should not include minions that fail to return."""
targeted = ["minion-up", "minion-down"]
events = [
{"minion-up": {"ret": True, "retcode": 0, "out": "highstate"}},
{
"minion-down": {
"out": "no_return",
"ret": "Minion did not return. [Not connected]",
"retcode": 2,
},
},
]
fake = _FakeClient(targeted, events)
with _patched_client(fake):
assert manage.up() == ["minion-up"]
Loading