Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions internal/container/agent_profile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,31 @@ func TestBuildEnvInjectionScript_QuotesValuesForSourcing(t *testing.T) {
}
}

func TestBuildEnvInjectionScript_ChownsEnvFileToDirOwner(t *testing.T) {
// Sluice's docker exec runs as the image's USER (root for the
// upstream openclaw and hermes images), so the awk rename and
// heredoc append leave the file root-owned. The agent runtime
// runs as a non-root user, so without a chown back to the dir
// owner the agent cannot run `hermes claw migrate` or in-agent
// secret edits. Verify the script emits a chown step.
for _, hasValues := range []bool{true, false} {
envMap := map[string]string{}
if hasValues {
envMap["KEY"] = "value"
}
script, err := BuildEnvInjectionScript(envMap, false, true)
if err != nil {
t.Fatalf("hasValues=%v: build script: %v", hasValues, err)
}
if !strings.Contains(script, `stat -c '%u:%g'`) {
t.Errorf("hasValues=%v: script must stat the parent dir to derive owner: %s", hasValues, script)
}
if !strings.Contains(script, `chown "$DIR_OWNER" "$ENV_FILE"`) {
t.Errorf("hasValues=%v: script must chown the env file to dir owner: %s", hasValues, script)
}
}
}

func TestBuildEnvInjectionScript_RejectsNewlineInValue(t *testing.T) {
// A newline in the value would split the env-file entry across two
// lines. The second line would either be silently lost or interpreted
Expand Down
95 changes: 67 additions & 28 deletions internal/container/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,37 +295,76 @@ func BuildEnvInjectionScriptForProfile(profile *AgentProfile, envMap map[string]
break
}
}
if !hasContent {
return script.String(), nil
}

// Append the new block via a quoted-tag heredoc. The single quotes
// around the tag (`<<'TAG'`) tell the shell to perform NO expansion
// on the body — every byte we write between the heredoc start and
// the end tag lands in the file verbatim. That lets us emit
// `KEY='value'` lines whose contents are safe under both shell
// `source` and dotenv parsing.
script.WriteString(fmt.Sprintf(" && cat >> \"$ENV_FILE\" <<'%s'\n", envHeredocTag))
script.WriteString(EnvBlockBegin)
script.WriteString("\n")
for _, k := range keys {
v := envMap[k]
if v == "" {
continue
// Match the env file's ownership to its parent directory. The
// docker exec runs the script as the image's USER (typically
// root for agent containers), so both the awk rename above and
// the heredoc `cat >>` leave the file root-owned. The agent
// runtime runs as a non-root user (UID 10000 for the upstream
// hermes image, configurable via HERMES_UID); a root-owned env
// file blocks agent-side write paths like `hermes claw migrate`
// or in-agent secret edits with EACCES. Inheriting the parent
// directory's owner is portable across agents because each
// agent's entrypoint chowns its data dir to its runtime user
// before sluice ever exec's in. The chown is best-effort: a
// read-only env file still satisfies sluice's own usage, so a
// stat or chown failure does not abort the script.
//
// Stat flag portability: GNU stat (Linux containers like
// alpine, debian) uses `-c`, BSD stat (macOS guests booted via
// the tart backend) uses `-f`. The two `stat` calls are run as
// fallbacks under a single subshell so whichever userland is
// in the agent's container resolves the directory owner.
const chownStep = ` && { DIR_OWNER=$(stat -c '%u:%g' "$(dirname "$ENV_FILE")" 2>/dev/null` +
` || stat -f '%u:%g' "$(dirname "$ENV_FILE")" 2>/dev/null);` +
` [ -n "$DIR_OWNER" ] && chown "$DIR_OWNER" "$ENV_FILE" 2>/dev/null;` +
` true; }`

if hasContent {
// Append the new block via a quoted-tag heredoc. The single
// quotes around the tag (`<<'TAG'`) tell the shell to perform
// NO expansion on the body. Every byte between the heredoc
// start and the end tag lands in the file verbatim, so we
// can emit KEY='value' lines whose contents are safe under
// both shell source and dotenv parsing.
//
// The chownStep is appended to the SAME line as the heredoc
// `<<` operator (before the heredoc body) because shell will
// not let a `&&` at the start of a new line attach to a
// command whose heredoc body is in between. With this
// placement, the parser sees one logical line:
// cat >> file <<TAG && { chown ...; }
// whose heredoc body follows. The `&&` short-circuits if
// cat fails (disk full, permissions, etc.) so we do not
// chown a file that did not get the new managed block.
script.WriteString(fmt.Sprintf(" && cat >> \"$ENV_FILE\" <<'%s'%s\n", envHeredocTag, chownStep))
script.WriteString(EnvBlockBegin)
script.WriteString("\n")
for _, k := range keys {
v := envMap[k]
if v == "" {
continue
}
// Escape embedded single quotes via the four-character
// idiom (apostrophe, backslash, apostrophe, apostrophe).
// Wrapped in single quotes, the result is one
// well-formed dotenv/shell string with no expansion.
escaped := strings.ReplaceAll(v, "'", `'\''`)
script.WriteString(k)
script.WriteString("='")
script.WriteString(escaped)
script.WriteString("'\n")
}
// Escape embedded single quotes: 'value' -> 'val'\''ue'.
// The result, wrapped in single quotes, is one well-formed
// dotenv/shell string with no expansion.
escaped := strings.ReplaceAll(v, "'", `'\''`)
script.WriteString(k)
script.WriteString("='")
script.WriteString(escaped)
script.WriteString("'\n")
script.WriteString(EnvBlockEnd)
script.WriteString("\n")
script.WriteString(envHeredocTag)
script.WriteString("\n")
} else {
// No managed-block emission. The awk pre-pass still
// rewrote the file (removing any prior block), so we
// still want to fix ownership of the result.
script.WriteString(chownStep)
}
script.WriteString(EnvBlockEnd)
script.WriteString("\n")
script.WriteString(envHeredocTag)
script.WriteString("\n")

return script.String(), nil
}
Expand Down
157 changes: 152 additions & 5 deletions internal/proxy/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"log"
"net"
"net/http"
"runtime/debug"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -127,6 +128,14 @@ type SluiceAddon struct {
// time.Sleep-based synchronization. Nil in production.
persistDone chan struct{}

// responsePanicHook is a test injection point for the Response
// handler's deferred recover. When non-nil it is invoked between
// the OAuth swap and the DLP scan, so a test can force the
// downstream-of-OAuth panic shape we observed in production
// without having to engineer a malformed Flow that triggers a
// real nil deref. Always nil in production.
responsePanicHook func()

// auditLog, when non-nil, receives per-request deny/inject events.
auditLog *audit.FileLogger

Expand Down Expand Up @@ -547,11 +556,53 @@ func (a *SluiceAddon) injectHeaders(f *mitmproxy.Flow, host string, port int) {
}
defer secret.Release()

f.Request.Header.Set(binding.Header, binding.FormatValue(secret.String()))
f.Request.Header.Set(binding.Header, binding.FormatValue(extractInjectableSecret(a.oauthIndex.Load(), binding.Credential, secret.String())))
log.Printf("[ADDON-INJECT] injected header %q for %s:%d (credential %q)",
binding.Header, host, port, binding.Credential)
}

// extractInjectableSecret returns the value to substitute into a binding's
// `{value}` template.
//
// Static credentials are plain strings stored as-is in the vault; the
// value to inject is the string itself. OAuth credentials are
// JSON-marshalled OAuthCredential structs (access_token + refresh_token
// + token_url + expires_at); the value to inject is just the
// access_token, so a binding like `Authorization: Bearer {value}`
// produces `Bearer <jwt>` rather than `Bearer {"access_token":...}`.
//
// We dispatch on the credential's metadata type (looked up via the
// supplied OAuthIndex, populated from credential_meta on startup and
// SIGHUP) rather than inferring from the secret's JSON shape. Shape
// inference would mis-handle a static credential whose value happens
// to be OAuth-shaped JSON. The credential_meta table is the single
// source of truth for cred_type elsewhere in sluice; the injection
// path follows the same rule.
//
// If the metadata says oauth but parsing fails (corrupted vault
// entry, schema drift, etc.) we fall back to the raw secret. That
// preserves the previous behavior on broken state instead of
// returning an empty string and silently producing `Bearer ` headers.
//
// A nil index (no oauth credentials registered yet, or the QUIC
// path running before UpdateOAuthIndex fires) means every
// credential is treated as static.
func extractInjectableSecret(idx *OAuthIndex, credName, secret string) string {
if idx == nil || !idx.Has(credName) {
return secret
}
cred, err := vault.ParseOAuth([]byte(secret))
if err != nil || cred == nil || cred.AccessToken == "" {
// Generic [INJECT] prefix because both the HTTP/1+2 and the
// HTTP/3 (QUIC) header-injection paths share this helper.
// An [ADDON-INJECT] tag would mislead a reader who saw the
// line in a deployment that uses HTTP/3 exclusively.
log.Printf("[INJECT] credential %q registered as oauth but vault payload not parseable; injecting raw secret", credName)
return secret
}
return cred.AccessToken
}
Comment thread
nnemirovsky marked this conversation as resolved.

// Request performs Pass 2 (scoped phantom replacement) and Pass 3 (strip
// unbound phantoms) on the fully-buffered request body, headers, URL query,
// and URL path. Called by go-mitmproxy after the request body has been read
Expand Down Expand Up @@ -647,12 +698,52 @@ func (a *SluiceAddon) StreamRequestModifier(f *mitmproxy.Flow, in io.Reader) io.
// redact patterns (see SetRedactRules) so credential strings in upstream
// responses are scrubbed before being relayed to the agent.
func (a *SluiceAddon) Response(f *mitmproxy.Flow) {
// Top-level recover so a panic inside any sub-step (OAuth swap,
// DLP scan, future hooks) cannot escape into go-mitmproxy's
// generic recover, which abandons the response body and leaves
// the agent reading an empty stream. We log the full stack so
// the underlying bug can be diagnosed later, but the response
// continues with whatever state f.Response was in at the time
Comment thread
nnemirovsky marked this conversation as resolved.
// of the panic. Real tokens cannot leak: processOAuthResponseIfMatching
// has its own snapshot/rollback, and any panic in DLP runs AFTER
// OAuth swap (so tokens are already phantoms by then).
defer func() {
if r := recover(); r != nil {
host := "unknown"
method := ""
if f != nil && f.Request != nil {
if f.Request.URL != nil {
host = f.Request.URL.Host
}
method = f.Request.Method
}
log.Printf("[ADDON] PANIC in Response handler for %s %s: %v\n%s", method, host, r, debug.Stack())
}
}()
Comment thread
nnemirovsky marked this conversation as resolved.

Comment thread
nnemirovsky marked this conversation as resolved.
// Nil-flow guard. The deferred recover above dereferences f to
// build the log line; without this early return, a nil flow
// (which go-mitmproxy never produces in practice but tests can)
// would hit the recover path on every call. Mirror what
// StreamResponseModifier does so both entry points handle nil
// flows uniformly.
if f == nil {
return
}
if f.Response == nil || f.Request == nil {
return
}

a.processOAuthResponseIfMatching(f)

// Test-only panic injection. Always nil in production. Lets a
// regression test exercise the deferred recover above without
// having to construct a Flow that triggers a real downstream
// nil deref.
if a.responsePanicHook != nil {
a.responsePanicHook()
}

// Outbound DLP: scan response body and headers for credential
// patterns that should not reach the agent. Runs after OAuth
// processing so real tokens are already swapped to phantoms.
Expand Down Expand Up @@ -711,10 +802,56 @@ func (a *SluiceAddon) processOAuthResponseIfMatching(f *mitmproxy.Flow) {
// emit a single WARNING per client connection so operators notice the
// gap without log spam from multi-chunk streams. The dedup state lives
// on dlpStreamWarned, scoped to the client connection id.
func (a *SluiceAddon) StreamResponseModifier(f *mitmproxy.Flow, in io.Reader) io.Reader {
if f.Request == nil {
return in
}
func (a *SluiceAddon) StreamResponseModifier(f *mitmproxy.Flow, in io.Reader) (out io.Reader) {
// Default to passing the input through unchanged; named return
// lets the deferred recover ensure we always hand SOMETHING
// usable back to go-mitmproxy on a panic, instead of letting
// the panic escape into mitmproxy's outer recover (which
// abandons the response body entirely).
out = in

// Defensive nil-input guard up front, BEFORE the flow checks
// below. If both `f` and `in` are nil (rare but possible in tests
// or on an unusual go-mitmproxy code path), the f-nil early
// return below would otherwise hand back a nil io.Reader, which
// the proxy's downstream copy would nil-deref on. http.NoBody
// keeps the response well-framed (zero bytes) and the panic is
// avoided regardless of what `f` looks like.
if in == nil {
out = http.NoBody
return out
}

if f == nil || f.Request == nil {
return out
}
Comment thread
nnemirovsky marked this conversation as resolved.

// Known-safe fallback for the panic recover. Set ONLY after the
// OAuth phantom swap has produced a clean buffer that contains
// no real tokens. Critically: we never assign the raw upstream
// bytes here, because a matched OAuth token-endpoint response
// contains real access and refresh tokens, and a panic between
// io.ReadAll and a successful swapOAuthTokens would otherwise
// leak those tokens straight to the agent. If the panic fires
// before safeFallback is set, the recover hands back http.NoBody
// instead. The agent sees an empty 2xx token body and surfaces
// the failure as a parse error, which is the strictly safer
// outcome compared to leaking a real bearer.
var safeFallback []byte
defer func() {
if r := recover(); r != nil {
host := "unknown"
if f.Request != nil && f.Request.URL != nil {
host = f.Request.URL.Host
}
log.Printf("[ADDON] PANIC in StreamResponseModifier for %s: %v\n%s", host, r, debug.Stack())
if safeFallback != nil {
out = bytes.NewReader(safeFallback)
} else {
out = http.NoBody
}
}
Comment thread
nnemirovsky marked this conversation as resolved.
}()

// Warn when DLP rules are configured but the response is streamed.
// Dedupe by client connection id so we emit at most one warning per
Expand Down Expand Up @@ -782,10 +919,20 @@ func (a *SluiceAddon) StreamResponseModifier(f *mitmproxy.Flow, in io.Reader) io

modified, err := a.swapOAuthTokens(body, contentType, credName)
if err != nil {
// The body did not parse as an OAuth token response. This is
// usually an HTML error page from a misconfigured token
// endpoint, not a credentials envelope, so passing it through
// is the historical behavior. We deliberately do NOT set
// safeFallback here: if a later panic somehow fires while
// returning, the recover defaults to http.NoBody rather than
// leaking whatever this body contains.
log.Printf("[ADDON-OAUTH] stream token parse error for credential %q: %v", credName, err)
return bytes.NewReader(body)
}

// Swap completed. The modified buffer is phantom-only and safe to
// hand back on a late panic.
safeFallback = modified
return bytes.NewReader(modified)
}

Expand Down
Loading
Loading