Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cmd/stepsecurity-dev-machine-guard/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/step-security/dev-machine-guard/internal/device"
"github.com/step-security/dev-machine-guard/internal/executor"
"github.com/step-security/dev-machine-guard/internal/featuregate"
"github.com/step-security/dev-machine-guard/internal/heartbeat"
"github.com/step-security/dev-machine-guard/internal/launchd"
"github.com/step-security/dev-machine-guard/internal/output"
"github.com/step-security/dev-machine-guard/internal/paths"
Expand Down Expand Up @@ -240,6 +241,10 @@ func main() {
config.ShowConfigure()

case "send-telemetry":
// Stamp the local heartbeat first — before the enterprise gate and
// the singleton lock inside telemetry.Run — so even runs that bail at
// the gate or die during startup leave an on-disk "I started" record.
writeHeartbeat("send-telemetry", log)
if !config.IsEnterpriseMode() {
log.Error("Enterprise configuration not found. Run '%s configure' or download the script from your StepSecurity dashboard.", os.Args[0])
os.Exit(1)
Expand Down Expand Up @@ -597,6 +602,17 @@ func findLegacyLeftovers(legacy string) []string {
// state and reconciles local hook installation to match. Silent no-op
// in community mode (enterprise config missing) — the existing scan
// path stays unaffected. Failures are logged but never crash main.
// writeHeartbeat stamps last-run.json with this run's start metadata. Wholly
// best-effort: a write failure (read-only home, disabled install dir) is
// logged at debug and never affects the run. The invocation method reuses the
// scheduler-footprint detection telemetry already does, so the heartbeat
// distinguishes a scheduled fire from a manual run.
func writeHeartbeat(command string, log *progress.Logger) {
if err := heartbeat.Write(paths.HeartbeatFile(), command, telemetry.DetectInvocationMethod()); err != nil {
log.Debug("heartbeat: failed to write %s: %v", paths.HeartbeatFile(), err)
}
}

func runHookStateReconcile(exec executor.Executor, log *progress.Logger) {
if !featuregate.IsEnabled(featuregate.FeatureAIAgentHooks) {
log.Debug("hook-state reconcile: skipped (feature gated)")
Expand Down
137 changes: 137 additions & 0 deletions internal/heartbeat/heartbeat.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Package heartbeat writes a small last-run.json "I started" breadcrumb to
// the install dir at the very top of a telemetry run — before the
// enterprise-config gate and before the singleton lock is acquired.
//
// Why this exists, separate from agent.error.log and scan-state.json: those
// only appear once a run gets far enough to log a line or finish an upload.
// Several failure modes never reach that point — a process killed mid-startup
// (e.g. the Windows GUI-launcher teardown), a run that fails the enterprise
// gate, a lock it can never acquire. The heartbeat captures "this binary
// started at time T, pid P, triggered by X" independent of any of that, so a
// stale file means "the agent isn't being invoked at all" (scheduler not
// firing — battery policy, missing task) while a fresh file alongside missing
// server-side telemetry means "the agent runs but dies/fails before upload."
//
// The write is durable against the abrupt termination it is meant to record:
// marshal to a temp sibling, fsync, then atomically rename over last-run.json
// (same pattern as internal/state). A kill at any point leaves either the
// previous heartbeat or the new one — never a truncated file.
package heartbeat

import (
"encoding/json"
"os"
"path/filepath"
"runtime"
"time"

"github.com/step-security/dev-machine-guard/internal/buildinfo"
)

// SchemaVersion is the on-disk format version for last-run.json. Bump when
// the Record shape changes incompatibly; readers treat a mismatch as "no
// usable heartbeat" rather than failing.
const SchemaVersion = 1

// Filename is the basename written into the install dir. Exported so callers
// and diagnostics can reference it without duplicating the literal.
const Filename = "last-run.json"

// Record is the last-run.json envelope: a point-in-time stamp that a run
// began. It deliberately carries only start-of-run facts — outcome lives in
// scan-state.json (LastSuccessfulExecutionID) and agent.error.log.
type Record struct {
SchemaVersion int `json:"schema_version"`
WrittenAt time.Time `json:"written_at"`
PID int `json:"pid"`
AgentVersion string `json:"agent_version"`
Command string `json:"command"` // subcommand that started the run, e.g. "send-telemetry"
InvocationMethod string `json:"invocation_method"` // scheduler footprint vs manual; see telemetry.DetectInvocationMethod
OS string `json:"os"`
}

// Write stamps last-run.json at path with this run's start metadata. An empty
// path is a no-op returning nil — callers pass paths.HeartbeatFile(), which is
// "" when the install dir is disabled (--install-dir=""), and treat the
// heartbeat as off in that case. Best-effort: callers should log a write error
// at debug/warn and continue, never fail the run on it.
func Write(path, command, invocationMethod string) error {
if path == "" {
return nil
}
rec := Record{
SchemaVersion: SchemaVersion,
WrittenAt: time.Now().UTC(),
PID: os.Getpid(),
AgentVersion: buildinfo.Version,
Command: command,
InvocationMethod: invocationMethod,
OS: runtime.GOOS,
}
return writeRecord(path, rec)
}

// Load reads last-run.json. A missing file, parse error, or schema mismatch
// returns (nil, err) with err nil for the missing/mismatch cases (expected
// fall-throughs) so callers can treat a nil record as "no usable heartbeat"
// without distinguishing causes. Exposed for diagnostics and any future
// fleet-view that folds the last-run summary into the telemetry payload.
func Load(path string) (*Record, error) {
if path == "" {
return nil, nil
}
data, err := os.ReadFile(filepath.Clean(path))
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
var r Record
if err := json.Unmarshal(data, &r); err != nil {
return nil, err
}
if r.SchemaVersion != SchemaVersion {
return nil, nil
}
return &r, nil
}

// writeRecord persists rec to path atomically: temp sibling, fsync, rename.
// Mirrors internal/state.Save, including the Windows pre-remove (os.Rename
// there fails when the destination already exists).
func writeRecord(path string, rec Record) error {
data, err := json.MarshalIndent(rec, "", " ")
if err != nil {
return err
}
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0o750); err != nil {
return err
}
tmp, err := os.CreateTemp(dir, ".last-run-*.tmp")
if err != nil {
return err
}
tmpPath := tmp.Name()
if _, err := tmp.Write(data); err != nil {
_ = tmp.Close()
_ = os.Remove(tmpPath)
return err
}
if err := tmp.Sync(); err != nil {
_ = tmp.Close()
_ = os.Remove(tmpPath)
return err
}
if err := tmp.Close(); err != nil {
_ = os.Remove(tmpPath)
return err
}
_ = os.Remove(path)
if err := os.Rename(tmpPath, path); err != nil {
_ = os.Remove(tmpPath)
return err
}
return nil
}
117 changes: 117 additions & 0 deletions internal/heartbeat/heartbeat_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package heartbeat

import (
"os"
"path/filepath"
"testing"
"time"

"github.com/step-security/dev-machine-guard/internal/buildinfo"
)

func TestWriteThenLoadRoundTrips(t *testing.T) {
path := filepath.Join(t.TempDir(), "last-run.json")

before := time.Now().Add(-time.Second)
if err := Write(path, "send-telemetry", "install"); err != nil {
t.Fatalf("Write: %v", err)
}

rec, err := Load(path)
if err != nil {
t.Fatalf("Load: %v", err)
}
if rec == nil {
t.Fatal("Load returned nil record after Write")
}
if rec.SchemaVersion != SchemaVersion {
t.Errorf("SchemaVersion = %d, want %d", rec.SchemaVersion, SchemaVersion)
}
if rec.PID != os.Getpid() {
t.Errorf("PID = %d, want %d", rec.PID, os.Getpid())
}
if rec.Command != "send-telemetry" {
t.Errorf("Command = %q, want send-telemetry", rec.Command)
}
if rec.InvocationMethod != "install" {
t.Errorf("InvocationMethod = %q, want install", rec.InvocationMethod)
}
if rec.AgentVersion != buildinfo.Version {
t.Errorf("AgentVersion = %q, want %q", rec.AgentVersion, buildinfo.Version)
}
if rec.OS == "" {
t.Error("OS is empty")
}
if rec.WrittenAt.Before(before) || rec.WrittenAt.After(time.Now().Add(time.Second)) {
t.Errorf("WrittenAt %v not within the test window", rec.WrittenAt)
}
}

func TestWriteEmptyPathIsNoop(t *testing.T) {
if err := Write("", "send-telemetry", "one_time"); err != nil {
t.Fatalf("Write(\"\") should be a no-op, got %v", err)
}
}

func TestWriteOverwritesPreviousRun(t *testing.T) {
path := filepath.Join(t.TempDir(), "last-run.json")

if err := Write(path, "send-telemetry", "one_time"); err != nil {
t.Fatalf("first Write: %v", err)
}
// A second write must atomically replace the first (Windows os.Rename
// would fail on an existing destination without the pre-remove).
if err := Write(path, "install", "install"); err != nil {
t.Fatalf("second Write: %v", err)
}

rec, err := Load(path)
if err != nil {
t.Fatalf("Load: %v", err)
}
if rec == nil || rec.Command != "install" {
t.Fatalf("second Write did not take; got %+v", rec)
}

// No leftover temp siblings from the atomic-rename dance.
entries, err := os.ReadDir(filepath.Dir(path))
if err != nil {
t.Fatalf("ReadDir: %v", err)
}
for _, e := range entries {
if filepath.Ext(e.Name()) == ".tmp" {
t.Errorf("leftover temp file: %s", e.Name())
}
}
}

func TestLoadMissingFileReturnsNilNil(t *testing.T) {
rec, err := Load(filepath.Join(t.TempDir(), "does-not-exist.json"))
if err != nil {
t.Fatalf("Load of missing file should not error, got %v", err)
}
if rec != nil {
t.Errorf("expected nil record for missing file, got %+v", rec)
}
}

func TestLoadSchemaMismatchReturnsNil(t *testing.T) {
path := filepath.Join(t.TempDir(), "last-run.json")
if err := os.WriteFile(path, []byte(`{"schema_version":999,"pid":1}`), 0o600); err != nil {
t.Fatal(err)
}
rec, err := Load(path)
if err != nil {
t.Fatalf("Load: %v", err)
}
if rec != nil {
t.Errorf("expected nil for schema mismatch, got %+v", rec)
}
}

func TestLoadEmptyPathReturnsNilNil(t *testing.T) {
rec, err := Load("")
if err != nil || rec != nil {
t.Errorf("Load(\"\") = (%+v, %v), want (nil, nil)", rec, err)
}
}
11 changes: 11 additions & 0 deletions internal/paths/paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,14 @@ func ScanStateFile() string {
}
return filepath.Join(home, "scan-state.json")
}

// HeartbeatFile returns the absolute path to last-run.json, or "" when
// Home() is disabled. Callers must treat "" as "heartbeat unavailable" and
// skip writing it (same contract as ScanStateFile).
func HeartbeatFile() string {
home := Home()
if home == "" {
return ""
}
return filepath.Join(home, "last-run.json")
}
Loading