Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/agent/core/interfaces/i-sandbox-service.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import type { ValidatedHarnessConfig } from '../../infra/agent/agent-schemas.js'
import type { HarnessModuleBuilder } from '../../infra/harness/harness-module-builder.js'
import type { HarnessOutcomeRecorder } from '../../infra/harness/harness-outcome-recorder.js'
import type { ISearchKnowledgeService } from '../../infra/sandbox/tools-sdk.js'
import type { SessionManager } from '../../infra/session/session-manager.js'
import type { EnvironmentContext } from '../domain/environment/types.js'
import type { HarnessLoadResult } from '../domain/harness/types.js'
import type { REPLResult, SandboxConfig } from '../domain/sandbox/types.js'
import type { IContentGenerator } from './i-content-generator.js'
import type { ICurateService } from './i-curate-service.js'
import type { IFileSystem } from './i-file-system.js'
import type { IHarnessStore } from './i-harness-store.js'
import type { ILogger } from './i-logger.js'
import type { ISwarmCoordinator } from './i-swarm-coordinator.js'

Expand Down Expand Up @@ -46,6 +49,26 @@ export interface ISandboxService {
*/
executeCode(code: string, sessionId: string, config?: SandboxConfig): Promise<REPLResult>

/**
* Load the latest `HarnessVersion` for `(projectId, commandType)` and
* register the resulting module on `sessionId`. Future `executeCode`
* calls inject `harness.*` into the sandbox context when a module is
* loaded. Never throws — every failure mode is encoded in the
* returned `HarnessLoadResult`.
*
* First production consumer: Phase 5's `AgentLLMService` session-start
* hook.
*
* @param sessionId - Session identifier
* @param projectId - Project identifier (composite-key partition)
* @param commandType - Harness command type scope
*/
loadHarness(
sessionId: string,
projectId: string,
commandType: 'chat' | 'curate' | 'query',
): Promise<HarnessLoadResult>

/**
* Set the content generator for parallel LLM operations (mapExtract).
* When set, sandboxes will have access to `tools.curation.mapExtract()`.
Expand Down Expand Up @@ -85,6 +108,15 @@ export interface ISandboxService {
*/
setHarnessConfig?(config: ValidatedHarnessConfig): void

/**
* Wire in the AutoHarness V2 module builder. `loadHarness` uses this
* to evaluate the `HarnessVersion.code` string returned by the store
* into a callable module.
*
* @param builder - Module builder instance
*/
setHarnessModuleBuilder?(builder: HarnessModuleBuilder): void

/**
* Wire in the AutoHarness V2 outcome recorder for fire-and-forget
* recording on every `executeCode` call.
Expand All @@ -94,6 +126,15 @@ export interface ISandboxService {
*/
setHarnessOutcomeRecorder?(recorder: HarnessOutcomeRecorder, logger?: ILogger): void

/**
* Wire in the AutoHarness V2 storage interface. `loadHarness` calls
* `store.getLatest(projectId, commandType)` to find the version to
* evaluate.
*
* @param store - Harness storage instance
*/
setHarnessStore?(store: IHarnessStore): void

/**
* Set a variable in a session's sandbox.
* If the sandbox doesn't exist yet, the variable is buffered and injected
Expand Down
12 changes: 11 additions & 1 deletion src/agent/infra/agent/service-initializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { createBlobStorage } from '../blob/blob-storage-factory.js'
import { EnvironmentContextBuilder } from '../environment/environment-context-builder.js'
import { AgentEventBus, SessionEventBus } from '../events/event-emitter.js'
import { FileSystemService } from '../file-system/file-system-service.js'
import { HarnessOutcomeRecorder, HarnessStore } from '../harness/index.js'
import { HarnessModuleBuilder, HarnessOutcomeRecorder, HarnessStore } from '../harness/index.js'
import { AgentLLMService } from '../llm/agent-llm-service.js'
import { CompactionService } from '../llm/context/compaction/compaction-service.js'
import { EscalatedCompressionStrategy } from '../llm/context/compression/escalated-compression.js'
Expand Down Expand Up @@ -263,6 +263,16 @@ export async function createCipherAgentServices(
)
sandboxService.setHarnessOutcomeRecorder(harnessOutcomeRecorder, logger)

// Phase 3 Task 3.3: wire the module builder + store into the sandbox so
// `SandboxService.loadHarness(...)` can evaluate stored versions into
// callable `harness.*` namespaces. No consumer calls `loadHarness` yet
// in v1.0 — Phase 5's mode-selector + AgentLLMService hook is the first.
const harnessModuleBuilder = new HarnessModuleBuilder(
logger.withSource('HarnessModuleBuilder'),
)
sandboxService.setHarnessModuleBuilder(harnessModuleBuilder)
sandboxService.setHarnessStore(harnessStore)

// 6c. Swarm coordinator — try to load config and build providers.
// Missing config → fail-open (no swarm). Invalid config → warn but continue.
let swarmCoordinator: SwarmCoordinator | undefined
Expand Down
1 change: 1 addition & 0 deletions src/agent/infra/harness/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
* from '.../infra/harness'` without reaching into individual files.
*/

export {HarnessModuleBuilder} from './harness-module-builder.js'
export {HarnessOutcomeRecorder} from './harness-outcome-recorder.js'
export {HarnessStore} from './harness-store.js'
202 changes: 201 additions & 1 deletion src/agent/infra/sandbox/sandbox-service.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import type { EnvironmentContext } from '../../core/domain/environment/types.js'
import type { ProjectType } from '../../core/domain/harness/types.js'
import type {
HarnessContext,
HarnessLoadResult,
HarnessMeta,
HarnessModule,
ProjectType,
} from '../../core/domain/harness/types.js'
import type { REPLResult, SandboxConfig } from '../../core/domain/sandbox/types.js'
import type { IContentGenerator } from '../../core/interfaces/i-content-generator.js'
import type { ICurateService } from '../../core/interfaces/i-curate-service.js'
import type { IFileSystem } from '../../core/interfaces/i-file-system.js'
import type { IHarnessStore } from '../../core/interfaces/i-harness-store.js'
import type { ILogger } from '../../core/interfaces/i-logger.js'
import type { ISandboxService } from '../../core/interfaces/i-sandbox-service.js'
import type { ISwarmCoordinator } from '../../core/interfaces/i-swarm-coordinator.js'
import type { ValidatedHarnessConfig } from '../agent/agent-schemas.js'
import type { HarnessModuleBuilder } from '../harness/harness-module-builder.js'
import type { HarnessOutcomeRecorder } from '../harness/harness-outcome-recorder.js'
import type { SessionManager } from '../session/session-manager.js'
import type { ISearchKnowledgeService, ToolsSDK } from './tools-sdk.js'
Expand All @@ -17,6 +25,19 @@ import {CurateResultCollector} from './curate-result-collector.js'
import { LocalSandbox } from './local-sandbox.js'
import { createToolsSDK } from './tools-sdk.js'

/**
* Per-session harness state captured after a successful `loadHarness`.
* Holds the callable module + the metadata the template declared, so
* subsequent `executeCode` calls can inject `harness.*` into the
* sandbox context without re-invoking the module builder.
*/
interface SessionHarnessState {
readonly commandType: 'chat' | 'curate' | 'query'
readonly meta: HarnessMeta
readonly module: HarnessModule
readonly projectType: ProjectType
}

/**
* Sandbox service implementation.
* Manages sandbox instances tied to agent sessions.
Expand All @@ -34,8 +55,12 @@ export class SandboxService implements ISandboxService {
private fileSystem?: IFileSystem
/** AutoHarness V2 config block, wired in before any session is created. */
private harnessConfig?: ValidatedHarnessConfig
/** AutoHarness V2 module builder — evaluates harness code per session. */
private harnessModuleBuilder?: HarnessModuleBuilder
/** AutoHarness V2 outcome recorder — fire-and-forget from executeCode. */
private harnessOutcomeRecorder?: HarnessOutcomeRecorder
/** AutoHarness V2 storage — reads latest HarnessVersion on loadHarness. */
private harnessStore?: IHarnessStore
/** Current harness version ID per session, populated by Phase 3 loadHarness. */
private harnessVersionIdBySession = new Map<string, string>()
/** Logger for defensive .catch on fire-and-forget record calls. */
Expand All @@ -48,6 +73,8 @@ export class SandboxService implements ISandboxService {
private sandboxes = new Map<string, LocalSandbox>()
/** Search knowledge service for Tools SDK */
private searchKnowledgeService?: ISearchKnowledgeService
/** Per-session harness state after loadHarness; drives harness.* injection. */
private sessionHarnessStates = new Map<string, SessionHarnessState>()
/** Session manager for sub-agent delegation via tools.agentQuery() */
private sessionManager?: SessionManager
/** Swarm coordinator for cross-provider query and store */
Expand All @@ -59,6 +86,7 @@ export class SandboxService implements ISandboxService {
async cleanup(): Promise<void> {
this.harnessOutcomeRecorder?.cleanup()
this.harnessVersionIdBySession.clear()
this.sessionHarnessStates.clear()
this.sandboxes.clear()
this.sandboxCommandTypes.clear()
this.pendingVariables.clear()
Expand All @@ -72,6 +100,7 @@ export class SandboxService implements ISandboxService {
async clearSession(sessionId: string): Promise<void> {
this.harnessOutcomeRecorder?.clearSession(sessionId)
this.harnessVersionIdBySession.delete(sessionId)
this.sessionHarnessStates.delete(sessionId)
this.sandboxes.delete(sessionId)
this.sandboxCommandTypes.delete(sessionId)
this.pendingVariables.delete(sessionId)
Expand Down Expand Up @@ -139,6 +168,15 @@ export class SandboxService implements ISandboxService {
this.pendingVariables.delete(sessionId)
}

// Inject harness.* namespace if a harness module is loaded for this
// session (via loadHarness()). When no harness is loaded, the
// sandbox context has no `harness` entry and user code runs
// against raw `tools.*` orchestration.
const harnessNs = this.buildHarnessNamespace(sessionId)
if (harnessNs !== undefined) {
initialContext.harness = harnessNs
}

// Build per-session ToolsSDK (includes agentQuery bound to this sessionId)
const sessionToolsSDK = this.buildToolsSDK(sessionId, config?.commandType)

Expand Down Expand Up @@ -198,6 +236,82 @@ export class SandboxService implements ISandboxService {
return result
}

/**
* Load the latest harness version for `(projectId, commandType)` and
* register it on `sessionId` so future `executeCode` calls inject
* `harness.*` into the sandbox context.
*
* Never throws — every failure is encoded in the returned
* `HarnessLoadResult`. A `{loaded: false}` result leaves the session
* untouched; the sandbox continues with raw `tools.*` orchestration.
* On `{loaded: true}`, the method also populates
* `harnessVersionIdBySession` so Phase 2's recorder can attribute
* outcomes to the loaded version, and — if the session's sandbox
* already exists — injects `harness.*` into its context immediately.
*
* Harness mode is hardcoded to the Phase 3 "assisted" baseline;
* Phase 5's `HarnessModeSelector` will layer mode gating on top.
*/
async loadHarness(
sessionId: string,
projectId: string,
commandType: 'chat' | 'curate' | 'query',
): Promise<HarnessLoadResult> {
// Deliberate: three distinct conditions (admin-disabled,
// store not wired, builder not wired) collapse into the same
// 'no-version' result for v1.0. `HarnessLoadResult` doesn't
// distinguish 'disabled' / 'not-configured' / 'no-version' as
// separate reasons because no consumer yet needs to branch on
// them — Phase 5's mode selector is the first real caller and
// will add variants if the downstream telemetry needs them.
// Keep this conflation intentional, not accidental.
if (
this.harnessConfig?.enabled !== true ||
this.harnessStore === undefined ||
this.harnessModuleBuilder === undefined
) {
return {loaded: false, reason: 'no-version'}
Comment thread
danhdoan marked this conversation as resolved.
}

const version = await this.harnessStore.getLatest(projectId, commandType)
if (version === undefined) {
return {loaded: false, reason: 'no-version'}
}

const result = this.harnessModuleBuilder.build(version)
if (!result.loaded) {
this.logger?.warn('SandboxService.loadHarness: builder returned failure', {
commandType,
projectId,
reason: result.reason,
versionId: version.id,
})
return result
}

this.sessionHarnessStates.set(sessionId, {
commandType,
meta: result.module.meta(),
module: result.module,
projectType: this.resolveProjectType(),
})
this.harnessVersionIdBySession.set(sessionId, result.version.id)

// If the sandbox already exists, inject now. Otherwise `executeCode`
// picks up the namespace at sandbox-creation time via the
// `buildHarnessNamespace` check in the creation block.
//
// `buildHarnessNamespace` only returns `undefined` when no state is
// registered for `sessionId` — we just set it above, so the result
// is guaranteed non-undefined here. No need to re-guard.
const sandbox = this.sandboxes.get(sessionId)
if (sandbox !== undefined) {
sandbox.updateContext({harness: this.buildHarnessNamespace(sessionId)})
}

return result
}

/**
* Set the content generator for parallel LLM operations (mapExtract).
* When set, new sandboxes will have access to `tools.curation.mapExtract()`.
Expand Down Expand Up @@ -256,6 +370,15 @@ export class SandboxService implements ISandboxService {
this.harnessConfig = config
}

/**
* Wire in the AutoHarness V2 module builder. `loadHarness` uses this
* to evaluate the `HarnessVersion.code` string returned by the store
* into a callable module.
*/
setHarnessModuleBuilder(builder: HarnessModuleBuilder): void {
this.harnessModuleBuilder = builder
}

/**
* Wire in the AutoHarness V2 outcome recorder. When set, every
* `executeCode` call fire-and-forgets a `recorder.record(...)` with the
Expand All @@ -269,6 +392,15 @@ export class SandboxService implements ISandboxService {
this.logger = logger
}

/**
* Wire in the AutoHarness V2 storage interface. `loadHarness` calls
* `store.getLatest(projectId, commandType)` to find the version to
* evaluate.
*/
setHarnessStore(store: IHarnessStore): void {
this.harnessStore = store
}

/**
* Set a variable in a session's sandbox.
* If the sandbox doesn't exist yet, the variable is buffered and injected
Expand Down Expand Up @@ -326,6 +458,74 @@ export class SandboxService implements ISandboxService {
this.invalidateSandboxes()
}

/**
* Build the `harness.*` namespace for a session, or `undefined` if
* no harness is loaded. Each call to `harness.curate()` /
* `harness.query()` constructs a fresh `HarnessContext` so the
* `abort` signal and tool bindings are session-current. `meta`
* returns the captured metadata without re-invoking the VM.
*/
private buildHarnessNamespace(sessionId: string): Record<string, unknown> | undefined {
const state = this.sessionHarnessStates.get(sessionId)
if (state === undefined) return undefined

const {commandType, meta, module, projectType} = state
const workingDirectory = this.environmentContext?.workingDirectory ?? ''

const buildCtx = (): HarnessContext => ({
// Phase 3 placeholder: a fresh signal per call. Phase 5's
// `AgentLLMService` hook will thread the session's real abort
// signal through so `ctx.abort` propagates user cancellation.
abort: new AbortController().signal,
env: {commandType, projectType, workingDirectory},
tools: this.buildHarnessTools(),
})

const ns: Record<string, unknown> = {
meta: (): HarnessMeta => meta,
}

if (module.curate !== undefined) {
const curateFn = module.curate
ns.curate = async (): Promise<unknown> => curateFn(buildCtx())
Comment thread
danhdoan marked this conversation as resolved.
}

if (module.query !== undefined) {
const queryFn = module.query
ns.query = async (): Promise<unknown> => queryFn(buildCtx())
}

return ns
}

/**
* Build the `HarnessContext['tools']` surface by binding the two
* v1.0 methods (`curate`, `readFile`) to the service's real tool
* instances. Each bound function throws if the underlying service
* isn't wired — the harness code sees a normal runtime error rather
* than a silent no-op.
*/
private buildHarnessTools(): HarnessContext['tools'] {
const {curateService} = this
const {fileSystem} = this
return {
async curate(operations, options) {
if (curateService === undefined) {
throw new Error('harness.ctx.tools.curate: no curate service wired')
}

return curateService.curate(operations, options)
},
async readFile(filePath, options) {
if (fileSystem === undefined) {
throw new Error('harness.ctx.tools.readFile: no file system wired')
}

return fileSystem.readFile(filePath, options)
},
}
}

/**
* Build a Tools SDK instance for a specific session.
* Includes `agentQuery` bound to the session's ID for sub-agent delegation.
Expand Down
Loading
Loading