Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ deploy/scripts/licence-*-private.pem

# Claude Code local config
.claude/
.worktrees
18 changes: 16 additions & 2 deletions apps/docs/docs/features/alerts.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ When an alert instance is created or transitions state, a **notification** is ge

## Creating Alert Rules

1. Navigate to **AlertsRules**
2. Click **New Rule**
1. Open a host and select **MonitoringAlerts**
2. Click **Add Rule**
3. Configure:

| Field | Description |
Expand All @@ -44,6 +44,20 @@ When an alert instance is created or transitions state, a **notification** is ge

4. Click **Save**

## Global Metric Defaults

Global alert defaults are metric threshold templates. They are not evaluated for
any host until an administrator applies them to that host.

Administrators can apply those defaults when needed:
- On a host's **Alerts** tab, **Use Metric Defaults** replaces that host's
host-level metric threshold rules with the current global defaults.
- On **Administration → Monitoring**, **Apply to Hosts** replaces host-level
metric threshold rules across all hosts with the current global defaults.

These actions only replace metric threshold rules. Check, certificate, Docker,
silence, and notification settings are left unchanged.

---

## Silencing
Expand Down
26 changes: 22 additions & 4 deletions apps/ingest/internal/handlers/terminal_ws.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"net"
"net/http"
"net/url"
"strconv"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -51,6 +52,7 @@ type wsMessage struct {
Code int32 `json:"exit_code,omitempty"`
Token string `json:"token,omitempty"`
Password string `json:"password,omitempty"`
Port uint32 `json:"port,omitempty"`
}

func (h *TerminalWSHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -84,6 +86,12 @@ func (h *TerminalWSHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
conn.Close(websocket.StatusPolicyViolation, "invalid authentication")
return
}
sshPort, err := terminalSSHPort(authMsg.Port)
if err != nil {
writeWS(ctx, conn, wsMessage{Type: "error", Msg: "Invalid SSH port"})
conn.Close(websocket.StatusPolicyViolation, "invalid port")
return
}

tokenSum := sha256.Sum256([]byte(authMsg.Token))
info, err := queries.ValidateAndActivateTerminalSession(ctx, h.pool, sessionID, hex.EncodeToString(tokenSum[:]))
Expand Down Expand Up @@ -113,8 +121,8 @@ func (h *TerminalWSHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
return
}

slog.Info("terminal ws: opening SSH session", "session_id", sessionID, "host_id", info.HostID, "username", info.Username)
sshClient, sshSession, stdin, stdout, err := h.openSSHSession(ctx, info.HostID, info.Host, info.Username, authMsg.Password)
slog.Info("terminal ws: opening SSH session", "session_id", sessionID, "host_id", info.HostID, "username", info.Username, "port", sshPort)
sshClient, sshSession, stdin, stdout, err := h.openSSHSession(ctx, info.HostID, info.Host, info.Username, authMsg.Password, sshPort)
authMsg.Password = ""
if err != nil {
slog.Warn("terminal ws: SSH connection failed", "session_id", sessionID, "host_id", info.HostID, "username", info.Username, "err", err)
Expand Down Expand Up @@ -261,7 +269,7 @@ func terminalWSAcceptOptions(trustedOrigins []string) (*websocket.AcceptOptions,
}, nil
}

func (h *TerminalWSHandler) openSSHSession(ctx context.Context, hostID, host, username, password string) (*ssh.Client, *ssh.Session, io.WriteCloser, io.Reader, error) {
func (h *TerminalWSHandler) openSSHSession(ctx context.Context, hostID, host, username, password, port string) (*ssh.Client, *ssh.Session, io.WriteCloser, io.Reader, error) {
config := &ssh.ClientConfig{
User: username,
Auth: []ssh.AuthMethod{
Expand All @@ -280,7 +288,7 @@ func (h *TerminalWSHandler) openSSHSession(ctx context.Context, hostID, host, us
Timeout: 30 * time.Second,
}

address := net.JoinHostPort(host, "22")
address := net.JoinHostPort(host, port)
type dialResult struct {
client *ssh.Client
err error
Expand Down Expand Up @@ -352,6 +360,16 @@ func terminalRemoteAddr(remoteAddr string) string {
return remoteAddr
}

func terminalSSHPort(port uint32) (string, error) {
if port == 0 {
return "22", nil
}
if port > 65535 {
return "", fmt.Errorf("SSH port %d is out of range", port)
}
return strconv.FormatUint(uint64(port), 10), nil
}

func isSSHAuthenticationFailure(err error) bool {
var authErr *ssh.ServerAuthError
return errors.As(err, &authErr)
Expand Down
26 changes: 26 additions & 0 deletions apps/ingest/internal/handlers/terminal_ws_security_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,32 @@ func TestTerminalRemoteAddrNormalisesHostPort(t *testing.T) {
}
}

func TestTerminalSSHPortDefaultsToTwentyTwo(t *testing.T) {
got, err := terminalSSHPort(0)
if err != nil {
t.Fatalf("terminalSSHPort(0) error = %v", err)
}
if got != "22" {
t.Fatalf("terminalSSHPort(0) = %q, want 22", got)
}
}

func TestTerminalSSHPortAllowsCustomPort(t *testing.T) {
got, err := terminalSSHPort(2222)
if err != nil {
t.Fatalf("terminalSSHPort(2222) error = %v", err)
}
if got != "2222" {
t.Fatalf("terminalSSHPort(2222) = %q, want 2222", got)
}
}

func TestTerminalSSHPortRejectsOutOfRangePort(t *testing.T) {
if _, err := terminalSSHPort(65536); err == nil {
t.Fatal("expected terminalSSHPort(65536) to reject the port")
}
}

func TestIsSSHAuthenticationFailure(t *testing.T) {
if !isSSHAuthenticationFailure(&ssh.ServerAuthError{}) {
t.Fatal("expected ssh.ServerAuthError to count as an authentication failure")
Expand Down
57 changes: 43 additions & 14 deletions apps/web/app/(dashboard)/hosts/[id]/alerts-tab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import { useState } from 'react'
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
import { formatDistanceToNow, format } from 'date-fns'
import { Bell, Plus, Trash2, VolumeX, VolumeOff } from 'lucide-react'
import { format } from 'date-fns'
import { Bell, Plus, RotateCcw, Trash2, VolumeX, VolumeOff } from 'lucide-react'
import { useForm, Controller } from 'react-hook-form'
import { zodResolver } from '@hookform/resolvers/zod'
import { z } from 'zod'
Expand Down Expand Up @@ -41,6 +41,7 @@ import {
createAlertRule,
updateAlertRule,
deleteAlertRule,
replaceHostMetricAlertsWithGlobalDefaults,
getAlertInstances,
getActiveSilencesForHost,
createSilence,
Expand All @@ -49,7 +50,7 @@ import {
import { getChecksWithHistory } from '@/lib/actions/checks'
import { getCertificates } from '@/lib/actions/certificates'
import { getHostDockerContainers } from '@/lib/actions/docker-containers'
import type { AlertRule, AlertSeverity, AlertSilence } from '@/lib/db/schema'
import type { AlertRule, AlertSeverity } from '@/lib/db/schema'

// ─── Form schema (flat — validation applied per conditionType in onSubmit) ─────

Expand Down Expand Up @@ -159,13 +160,11 @@ const silenceFormSchema = z.object({
type SilenceFormValues = z.infer<typeof silenceFormSchema>

function AddSilenceDialog({
scopeId,
hostId,
open,
onOpenChange,
onSuccess,
}: {
scopeId: string
hostId: string
open: boolean
onOpenChange: (v: boolean) => void
Expand Down Expand Up @@ -719,6 +718,7 @@ export function AlertsTab({ scopeId, hostId }: Props) {
const qc = useQueryClient()
const [addDialogOpen, setAddDialogOpen] = useState(false)
const [addSilenceOpen, setAddSilenceOpen] = useState(false)
const [replaceMetricDefaultsError, setReplaceMetricDefaultsError] = useState<string | null>(null)

const { data: allRules = [] } = useQuery({
queryKey: ['alert-rules', scopeId, hostId],
Expand Down Expand Up @@ -757,6 +757,21 @@ export function AlertsTab({ scopeId, hostId }: Props) {
onSuccess: () => qc.invalidateQueries({ queryKey: ['alert-rules', scopeId, hostId] }),
})

const replaceMetricDefaultsMutation = useMutation({
mutationFn: async () => {
const result = await replaceHostMetricAlertsWithGlobalDefaults(hostId)
if ('error' in result) throw new Error(result.error)
return result
},
onMutate: () => setReplaceMetricDefaultsError(null),
onSuccess: () => qc.invalidateQueries({ queryKey: ['alert-rules', scopeId, hostId] }),
onError: (error) => {
setReplaceMetricDefaultsError(
error instanceof Error ? error.message : 'Failed to replace metric alert rules',
)
},
})

const deleteSilenceMutation = useMutation({
mutationFn: (silenceId: string) => deleteSilence(silenceId),
onSuccess: () => qc.invalidateQueries({ queryKey: ['silences-active', scopeId, hostId] }),
Expand Down Expand Up @@ -810,13 +825,23 @@ export function AlertsTab({ scopeId, hostId }: Props) {
)}

{/* Host-specific rules */}
<Card>
<CardHeader className="flex flex-row items-start justify-between">
<Card data-testid="host-alert-rules-card">
<CardHeader className="flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
<div>
<CardTitle className="text-base">Alert Rules</CardTitle>
<CardDescription className="mt-1">Rules that apply specifically to this host</CardDescription>
</div>
<div className="flex items-center gap-2 shrink-0">
<div className="flex flex-wrap items-center gap-2 shrink-0">
<Button
size="sm"
variant="outline"
onClick={() => replaceMetricDefaultsMutation.mutate()}
disabled={replaceMetricDefaultsMutation.isPending}
data-testid="host-alerts-replace-metrics-with-defaults"
>
<RotateCcw className="size-3.5 mr-1" />
Use Metric Defaults
</Button>
<Button size="sm" variant="outline" onClick={() => setAddSilenceOpen(true)}>
<VolumeX className="size-3.5 mr-1" />
Silence Host
Expand All @@ -828,6 +853,11 @@ export function AlertsTab({ scopeId, hostId }: Props) {
</div>
</CardHeader>
<CardContent>
{replaceMetricDefaultsError != null && (
<p className="text-sm text-red-600 pb-3" data-testid="host-alerts-replace-metrics-error">
{replaceMetricDefaultsError}
</p>
)}
{hostRules.length === 0 ? (
<p className="text-sm text-muted-foreground py-4 text-center">
No rules for this host yet. Add one to start alerting.
Expand Down Expand Up @@ -881,14 +911,14 @@ export function AlertsTab({ scopeId, hostId }: Props) {
</CardContent>
</Card>

{/* Global default rules (read-only) — these also apply to this host */}
{/* Global default rules (read-only) — templates available for this host */}
<Card>
<CardHeader>
<div>
<CardTitle className="text-base">Instance-wide Default Rules</CardTitle>
<CardTitle className="text-base">Global Metric Defaults</CardTitle>
<CardDescription className="mt-1">
These rules apply to <strong>all hosts</strong> in your instance and are
evaluated in addition to the host-specific rules above.{' '}
These defaults are not evaluated for this host until you apply them with{' '}
<strong>Use Metric Defaults</strong>.{' '}
<a href="/settings/monitoring" className="underline underline-offset-2">
Manage in Administration → Monitoring
</a>
Expand All @@ -899,7 +929,7 @@ export function AlertsTab({ scopeId, hostId }: Props) {
<CardContent>
{globalDefaults.length === 0 ? (
<p className="text-sm text-muted-foreground py-4 text-center">
No instance-wide default rules configured.
No global metric defaults configured.
</p>
) : (
<Table>
Expand Down Expand Up @@ -940,7 +970,6 @@ export function AlertsTab({ scopeId, hostId }: Props) {
onSuccess={() => qc.invalidateQueries({ queryKey: ['alert-rules', scopeId, hostId] })}
/>
<AddSilenceDialog
scopeId={scopeId}
hostId={hostId}
open={addSilenceOpen}
onOpenChange={setAddSilenceOpen}
Expand Down
Loading
Loading