diff --git a/.changeset/friendlier-errors.md b/.changeset/friendlier-errors.md new file mode 100644 index 0000000000..cafb9f724e --- /dev/null +++ b/.changeset/friendlier-errors.md @@ -0,0 +1,8 @@ +--- +"@workflow/core": patch +"@workflow/errors": patch +"@workflow/builders": patch +"@workflow/utils": patch +--- + +Friendlier workflow error messages. New `SerializationError`, `WorkflowBuildError`, and structured context-violation classes (e.g. `NotInWorkflowContextError`) with actionable hints and docs links applied to user-facing throw sites; `FatalError.is()` recognizes any error with `fatal: true` so context violations and serialization failures now fail fast instead of burning retry attempts. Runtime logs are namespaced under `[workflow-sdk]` and gain `errorAttribution` (`user` vs `sdk`) plus class-aware hints; `Ansi` helpers moved to a new `@workflow/errors/ansi` subpath so consumers that only use the error classes don't pull `chalk` into their bundle. Adds a `@workflow/core/describe-error` subpath so CLI / web observability renderers can derive the same user-vs-SDK framing from persisted failure events. diff --git a/.changeset/pretty-log-format.md b/.changeset/pretty-log-format.md new file mode 100644 index 0000000000..24767d1d2d --- /dev/null +++ b/.changeset/pretty-log-format.md @@ -0,0 +1,16 @@ +--- +"@workflow/core": patch +"@workflow/errors": patch +--- + +Replace `util.inspect`'s default object dump for runtime structured-log metadata with an opinionated, workflow-aware formatter (`packages/core/src/log-format.ts`). The runtime logger now composes `[workflow-sdk] ` + stack + a compact, color-coded metadata block — passed to `console.error` / `console.warn` as a single string — instead of letting Node quote-escape multi-line stacks and paragraph hints inside an object dump. + +Highlights of the new format: + +- `wrun_…` / `step_…` ULIDs render with their parsed friendly name (`add (./workflows/1_simple)`) using the existing `parseStepName` / `parseWorkflowName` utilities. +- Color-coded attribution badge (`user error` red, `sdk error` magenta) paired with the error class in bold. +- `hint` renders as a clean paragraph under `hint:` instead of a backslash-`\n`-escaped string. +- Redundant fields (`errorStack`, plus `errorMessage` when the parent message already includes it) are dropped to avoid double-printing. +- Unknown fields fall through as a sorted `key value` tail so we never silently drop log information. + +Side-effect: `@workflow/errors/ansi` gains `bold`, `red`, `magenta` helpers used by the formatter. The `web` / `web-shared` packages don't consume stderr — they read structured event payloads from the World event log — so the change is presentation-only at the runtime layer. diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 28e159c706..e39c7a8f31 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -449,6 +449,12 @@ jobs: with: install-dependencies: 'false' build-packages: 'false' + # This job never runs `pnpm install`, so the pnpm store path + # never exists. The post-job `actions/setup-node@v4` cache-save + # then fails with "Path Validation Error" and red-X's the job. + # Disable the cache to keep the matrix step the only failure + # surface. + cache-pnpm: 'false' - id: set-matrix run: | diff --git a/packages/builders/src/base-builder.ts b/packages/builders/src/base-builder.ts index 5e28dc09b5..760a5b2779 100644 --- a/packages/builders/src/base-builder.ts +++ b/packages/builders/src/base-builder.ts @@ -2,6 +2,7 @@ import { randomUUID } from 'node:crypto'; import { mkdir, readFile, realpath, rename, writeFile } from 'node:fs/promises'; import { basename, dirname, join, relative, resolve } from 'node:path'; import { promisify } from 'node:util'; +import { WorkflowBuildError } from '@workflow/errors'; import { pluralize } from '@workflow/utils'; import chalk from 'chalk'; import enhancedResolveOriginal from 'enhanced-resolve'; @@ -343,8 +344,11 @@ export abstract class BaseBuilder { } if (throwOnError) { - throw new Error( - `Build failed during ${phase}:\n${errorMessages.join('\n')}` + throw new WorkflowBuildError( + `Build failed during ${phase}:\n${errorMessages.join('\n')}`, + { + hint: `Review the esbuild errors above — they come from the ${phase} bundle. Fix the offending source files and re-run the build.`, + } ); } } @@ -421,13 +425,12 @@ export abstract class BaseBuilder { dirname(outfile), 'workflow/internal/builtins' ).catch((err) => { - throw new Error( - [ - chalk.red('Failed to resolve built-in steps sources.'), - `${chalk.yellow.bold('hint:')} run \`${chalk.cyan.italic('npm install workflow')}\` to resolve this issue.`, - '', - `Caused by: ${chalk.red(String(err))}`, - ].join('\n') + throw new WorkflowBuildError( + `Failed to resolve built-in steps sources.\n\nCaused by: ${String(err)}`, + { + hint: 'run `pnpm install workflow` to resolve this issue.', + cause: err, + } ); }); @@ -856,7 +859,9 @@ export abstract class BaseBuilder { !interimBundle.outputFiles || interimBundle.outputFiles.length === 0 ) { - throw new Error('No output files generated from esbuild'); + throw new WorkflowBuildError('No output files generated from esbuild', { + hint: 'This usually indicates a misconfigured entry point or an empty workflow directory. Check that your workflow files contain a `"use workflow"` or `"use step"` directive.', + }); } // Serde compliance warnings: check if workflow bundle has Node.js imports diff --git a/packages/core/package.json b/packages/core/package.json index 412da1b8d7..20f09eb728 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -64,6 +64,10 @@ "types": "./dist/encryption.d.ts", "default": "./dist/encryption.js" }, + "./describe-error": { + "types": "./dist/describe-error.d.ts", + "default": "./dist/describe-error.js" + }, "./_workflow": "./dist/workflow/index.js" }, "scripts": { diff --git a/packages/core/src/capture-stack.ts b/packages/core/src/capture-stack.ts new file mode 100644 index 0000000000..a17fb73d1a --- /dev/null +++ b/packages/core/src/capture-stack.ts @@ -0,0 +1,26 @@ +/** + * V8-only (Node, Bun, Chrome, Deno). Rewrites `err.stack` so the top frame is + * the caller of `stackStartFn` instead of the framework function that threw. + * Without this, terminal overlays (Next.js, Turbopack, VS Code) render the + * code frame at our `throw` site inside `@workflow/core`, which is useless + * to the user. + * + * No-op on engines that don't expose `Error.captureStackTrace` — the stack + * degrades gracefully to the default behavior. + * + * Kept in its own tiny module so callers that can't participate in the + * `context-errors.ts` ↔ `workflow/get-workflow-metadata.ts` import cycle can + * still pull in the helper without pulling in the full error classes. + */ +export function redirectStackToCaller( + err: Error, + // biome-ignore lint/complexity/noBannedTypes: signature matches Error.captureStackTrace + stackStartFn: Function +): void { + const capture = ( + Error as unknown as { + captureStackTrace?: (target: object, fn: Function) => void; + } + ).captureStackTrace; + capture?.(err, stackStartFn); +} diff --git a/packages/core/src/context-errors.test.ts b/packages/core/src/context-errors.test.ts new file mode 100644 index 0000000000..9dd1b60963 --- /dev/null +++ b/packages/core/src/context-errors.test.ts @@ -0,0 +1,242 @@ +import { FatalError } from '@workflow/errors'; +import { inspect } from 'node:util'; +import { afterEach, describe, expect, it } from 'vitest'; +import { + NotInStepContextError, + NotInWorkflowContextError, + NotInWorkflowOrStepContextError, + throwNotInWorkflowContext, + UnavailableInWorkflowContextError, +} from './context-errors.js'; +import { + WORKFLOW_CONTEXT_SYMBOL, + type WorkflowMetadata, +} from './workflow/get-workflow-metadata.js'; + +// These tests assert on the plain-text form of the messages. In a TTY chalk +// would add color, but vitest runs without a TTY so chalk is level=0 and +// the styling helpers are pass-throughs. Snapshots therefore match the raw +// structure we care about (╰▶ / ├▶ tree + labels + docs URL). + +describe('NotInWorkflowContextError', () => { + it('frames the function name and docs link', () => { + const err = new NotInWorkflowContextError( + 'createHook()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/create-hook' + ); + expect(err.name).toBe('NotInWorkflowContextError'); + expect(err.message).toMatchInlineSnapshot(` + "\`createHook()\` can only be called inside a workflow function + ╰▶ docs: https://workflow-sdk.dev/docs/api-reference/workflow/create-hook" + `); + }); + + it('does not expose functionName as an enumerable own property', () => { + // Regression: `readonly functionName` as a constructor param-property used + // to leak through util.inspect (Next.js error overlay, Node's default + // error formatter). Keep this invariant so the terminal output stays + // clean. + const err = new NotInWorkflowContextError( + 'createHook()', + 'https://example.com/docs' + ); + expect(Object.keys(err)).not.toContain('functionName'); + expect((err as any).functionName).toBeUndefined(); + }); +}); + +describe('NotInStepContextError', () => { + it('uses "step function" phrasing', () => { + const err = new NotInStepContextError( + 'getStepMetadata()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/get-step-metadata' + ); + expect(err.message).toContain('can only be called inside a step function'); + expect(err.message).toContain( + 'docs: https://workflow-sdk.dev/docs/api-reference/workflow/get-step-metadata' + ); + }); +}); + +describe('NotInWorkflowOrStepContextError', () => { + it('uses "workflow or step function" phrasing', () => { + const err = new NotInWorkflowOrStepContextError( + 'getWorkflowMetadata()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/get-workflow-metadata' + ); + expect(err.message).toContain( + 'can only be called inside a workflow or step function' + ); + }); +}); + +describe('UnavailableInWorkflowContextError', () => { + afterEach(() => { + delete (globalThis as any)[WORKFLOW_CONTEXT_SYMBOL]; + }); + + it('names the workflow when a context is active', () => { + (globalThis as any)[WORKFLOW_CONTEXT_SYMBOL] = { + workflowName: 'workflow//./src/workflows/example.ts//myWorkflow', + } as WorkflowMetadata; + + const err = new UnavailableInWorkflowContextError( + 'resumeHook()', + 'https://workflow-sdk.dev/docs/api-reference/workflow-api/resume-hook' + ); + expect(err.message).toContain('cannot be called from a workflow context'); + expect(err.message).toContain( + 'workflow//./src/workflows/example.ts//myWorkflow' + ); + }); + + it('falls back to a generic phrasing when no context is present', () => { + const err = new UnavailableInWorkflowContextError( + 'resumeHook()', + 'https://workflow-sdk.dev/docs/api-reference/workflow-api/resume-hook' + ); + expect(err.message).toContain('from a workflow context'); + }); +}); + +describe('plain .message / lazy pretty rendering', () => { + it('.message contains no ANSI escape bytes', () => { + // The user's structured logs, log drains, and CBOR event payloads all + // read `err.message` as a string. ANSI bytes leaking into them produced + // unreadable `\x1B[...m` noise in JSON. Keep `.message` plain. + const err = new NotInWorkflowContextError( + 'createHook()', + 'https://example.com/docs' + ); + // biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI check + expect(err.message).not.toMatch(/\x1B\[/); + }); + + it('.stack contains no ANSI escape bytes', () => { + const err = new NotInWorkflowContextError( + 'createHook()', + 'https://example.com/docs' + ); + // biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI check + expect(err.stack ?? '').not.toMatch(/\x1B\[/); + }); + + it('util.inspect(err) reveals the pretty framed form', () => { + // Node prints uncaught / logged errors via util.inspect. The pretty + // (framed) output belongs on the render path, not in stored state. + const err = new NotInWorkflowContextError( + 'createHook()', + 'https://example.com/docs' + ); + const out = inspect(err); + expect(out).toContain('NotInWorkflowContextError:'); + expect(out).toContain('createHook()'); + expect(out).toContain('can only be called inside a workflow function'); + expect(out).toContain('╰▶'); + expect(out).toContain('docs:'); + }); + + it('util.inspect(err) does not duplicate framed detail lines', () => { + // Regression: `.message` is multi-line (`title\n╰▶ docs: …`), so V8's + // `.stack` reads `Name: messageLine1\nmessageLine2\n at …`. Slicing + // only the first line of stack glued the framed-detail tail of the + // message onto the prepended pretty form and rendered every `╰▶ docs:` + // line twice. Now we slice past all message lines. + const out = inspect( + new NotInWorkflowContextError('createHook()', 'https://example.com/docs') + ); + // Multi-detail variants would also duplicate every detail; the docs + // line is the canonical case. + expect(out).not.toMatch(/╰▶ docs:.*\n.*╰▶ docs:/s); + // ╰▶ should appear exactly once for the single-detail error. + const occurrences = (out.match(/╰▶ docs:/g) ?? []).length; + expect(occurrences).toBe(1); + }); + + it('err.toString() also returns the pretty framed form', () => { + const err = new NotInWorkflowContextError( + 'createHook()', + 'https://example.com/docs' + ); + expect(err.toString()).toContain('NotInWorkflowContextError:'); + expect(err.toString()).toContain('╰▶'); + }); +}); + +describe('FatalError.is() gate', () => { + // The step handler uses FatalError.is() to decide retry vs bubble-up. + // Context-violation errors can't succeed on retry — they signal the + // user called a workflow-only API from the wrong context — so burning + // three retry attempts just produces duplicated log output. + it.each([ + [ + 'NotInWorkflowContextError', + () => + new NotInWorkflowContextError('createHook()', 'https://example.com'), + ], + [ + 'NotInStepContextError', + () => + new NotInStepContextError('getStepMetadata()', 'https://example.com'), + ], + [ + 'NotInWorkflowOrStepContextError', + () => + new NotInWorkflowOrStepContextError( + 'getWorkflowMetadata()', + 'https://example.com' + ), + ], + [ + 'UnavailableInWorkflowContextError', + () => + new UnavailableInWorkflowContextError( + 'resumeHook()', + 'https://example.com' + ), + ], + ])('%s satisfies FatalError.is', (_name, make) => { + expect(FatalError.is(make())).toBe(true); + }); +}); + +describe('throw helpers redirect the stack to the caller', () => { + // V8-only. Skip silently on engines without Error.captureStackTrace. + const hasCaptureStackTrace = + typeof (Error as unknown as { captureStackTrace?: unknown }) + .captureStackTrace === 'function'; + + it.skipIf(!hasCaptureStackTrace)( + 'throwNotInWorkflowContext: top stack frame is the caller, not the framework function', + () => { + function frameworkGate() { + throwNotInWorkflowContext( + 'frameworkGate()', + 'https://example.com/docs', + frameworkGate + ); + } + + function userCallSite() { + frameworkGate(); + } + + try { + userCallSite(); + } catch (err) { + const stack = (err as Error).stack ?? ''; + // The first "at ..." frame should reference userCallSite, not + // frameworkGate or throwNotInWorkflowContext. + const firstFrame = stack + .split('\n') + .find((l) => l.trim().startsWith('at ')); + expect(firstFrame).toBeDefined(); + expect(firstFrame).toContain('userCallSite'); + expect(firstFrame).not.toContain('frameworkGate'); + expect(firstFrame).not.toContain('throwNotInWorkflowContext'); + return; + } + throw new Error('expected throwNotInWorkflowContext to throw'); + } + ); +}); diff --git a/packages/core/src/context-errors.ts b/packages/core/src/context-errors.ts new file mode 100644 index 0000000000..1e6ae89b36 --- /dev/null +++ b/packages/core/src/context-errors.ts @@ -0,0 +1,137 @@ +import { redirectStackToCaller } from './capture-stack.js'; +import { + ContextViolationError, + type Detail, + type DocsUrl, + NotInStepContextError, + NotInWorkflowContextError, + NotInWorkflowOrStepContextError, +} from './context-violation-error.js'; +import { + WORKFLOW_CONTEXT_SYMBOL, + type WorkflowMetadata, +} from './workflow/get-workflow-metadata.js'; + +// Re-export the structural base + subclasses so the public surface is a +// single import point. The base + simpler subclasses live in +// `context-violation-error.ts` because `get-workflow-metadata.ts` needs to +// throw one without creating an import cycle with this file. +export { + ContextViolationError, + NotInStepContextError, + NotInWorkflowContextError, + NotInWorkflowOrStepContextError, +}; + +/** + * Thrown when an API that MUST NOT run inside a workflow function is called + * from one (e.g. `resumeHook()`, which would cause determinism issues). + * The message names the specific workflow that made the offending call. + */ +export class UnavailableInWorkflowContextError extends ContextViolationError { + name = 'UnavailableInWorkflowContextError'; + + constructor(functionName: string, docsUrl: DocsUrl) { + const ctx = (globalThis as any)[WORKFLOW_CONTEXT_SYMBOL] as + | WorkflowMetadata + | undefined; + const workflowName = ctx?.workflowName; + + // Apply dim styling to `workflow/` / `step/` prefixes in the qualified + // name so the part the user named stands out. + const nameSegs = (() => { + if (!workflowName) return null; + const m = workflowName.match(/^(workflow\/|step\/)(.*)$/); + if (m) return [{ dim: m[1] }, { text: m[2] }] as const; + return [{ text: workflowName }] as const; + })(); + + const contextLine: Detail = nameSegs + ? { + type: 'plain', + segments: [ + { text: 'this call was made from the ' }, + ...nameSegs, + { text: ' workflow context.' }, + ], + } + : { + type: 'plain', + segments: [{ text: 'this call was made from a workflow context.' }], + }; + + super({ + title: [ + { code: functionName }, + { text: ' cannot be called from a workflow context.' }, + ], + details: [ + { + type: 'plain', + segments: [ + { + text: 'calling this in a workflow context can cause determinism issues.', + }, + ], + }, + contextLine, + { type: 'docs', url: docsUrl }, + ], + }); + } +} + +/** + * Throw a {@link NotInWorkflowContextError} whose stack trace points at the + * user code that called `stackStartFn`, not at our framework internals. + * + * Prefer this over `throw new NotInWorkflowContextError(...)` so tooling + * (Next.js error overlay, VS Code terminal linkifier, Sentry, etc.) shows + * the user's call site as the relevant frame. + */ +export function throwNotInWorkflowContext( + functionName: string, + docsUrl: DocsUrl, + // biome-ignore lint/complexity/noBannedTypes: matches Error.captureStackTrace + stackStartFn: Function +): never { + const err = new NotInWorkflowContextError(functionName, docsUrl); + redirectStackToCaller(err, stackStartFn); + throw err; +} + +/** See {@link throwNotInWorkflowContext}. */ +export function throwNotInStepContext( + functionName: string, + docsUrl: DocsUrl, + // biome-ignore lint/complexity/noBannedTypes: matches Error.captureStackTrace + stackStartFn: Function +): never { + const err = new NotInStepContextError(functionName, docsUrl); + redirectStackToCaller(err, stackStartFn); + throw err; +} + +/** See {@link throwNotInWorkflowContext}. */ +export function throwNotInWorkflowOrStepContext( + functionName: string, + docsUrl: DocsUrl, + // biome-ignore lint/complexity/noBannedTypes: matches Error.captureStackTrace + stackStartFn: Function +): never { + const err = new NotInWorkflowOrStepContextError(functionName, docsUrl); + redirectStackToCaller(err, stackStartFn); + throw err; +} + +/** See {@link throwNotInWorkflowContext}. */ +export function throwUnavailableInWorkflowContext( + functionName: string, + docsUrl: DocsUrl, + // biome-ignore lint/complexity/noBannedTypes: matches Error.captureStackTrace + stackStartFn: Function +): never { + const err = new UnavailableInWorkflowContextError(functionName, docsUrl); + redirectStackToCaller(err, stackStartFn); + throw err; +} diff --git a/packages/core/src/context-violation-error.ts b/packages/core/src/context-violation-error.ts new file mode 100644 index 0000000000..b1190ff563 --- /dev/null +++ b/packages/core/src/context-violation-error.ts @@ -0,0 +1,188 @@ +import * as Ansi from '@workflow/errors/ansi'; + +/** + * A `docs:` line URL. The leading protocol is part of the type so call sites + * can't accidentally pass a protocol-relative or bare path. + */ +export type DocsUrl = `https://${string}`; + +const INSPECT_CUSTOM = Symbol.for('nodejs.util.inspect.custom'); + +/** + * Structured data for a framed error. The base class takes this and renders + * it to plain text (for `.message` / `.stack` / structured logs) or to an + * ANSI-framed string (for terminal display via `util.inspect` / `toString`). + * + * Keeping the pieces structured means we never have to strip ANSI back out + * once it's in the message — we just don't put it there in the first place. + */ +export interface FramedContent { + /** Headline. `{ code: 'foo()' }` segments render as backticked inline code. */ + readonly title: readonly Segment[]; + /** One framed branch per entry. The last uses `╰▶`, others use `├▶`. */ + readonly details: readonly Detail[]; +} + +export type Segment = + | { readonly text: string } + | { readonly code: string } + | { readonly dim: string }; + +export type Detail = + | { readonly type: 'plain'; readonly segments: readonly Segment[] } + | { readonly type: 'docs'; readonly url: DocsUrl }; + +function renderSegmentPlain(s: Segment): string { + if ('code' in s) return `\`${s.code}\``; + if ('dim' in s) return s.dim; + return s.text; +} + +function renderSegmentPretty(s: Segment): string { + if ('code' in s) return Ansi.code(s.code); + if ('dim' in s) return Ansi.dim(s.dim); + return s.text; +} + +function renderDetailPlain(d: Detail): string { + if (d.type === 'docs') return `docs: ${d.url}`; + return d.segments.map(renderSegmentPlain).join(''); +} + +function renderDetailPretty(d: Detail): string { + if (d.type === 'docs') return Ansi.docs(d.url); + return d.segments.map(renderSegmentPretty).join(''); +} + +export function renderPlain(c: FramedContent): string { + // Mimic `Ansi.frame` structure so `.message` is still readable in logs + // even without the color. + const title = c.title.map(renderSegmentPlain).join(''); + const lines = [title]; + c.details.forEach((detail, index) => { + const isLast = index === c.details.length - 1; + const first = isLast ? '╰▶ ' : '├▶ '; + const cont = isLast ? ' ' : '│ '; + const raw = renderDetailPlain(detail).split('\n'); + raw.forEach((line, i) => lines.push(`${i === 0 ? first : cont}${line}`)); + }); + return lines.join('\n'); +} + +export function renderPretty(c: FramedContent): string { + const title = c.title.map(renderSegmentPretty).join(''); + return Ansi.frame(title, c.details.map(renderDetailPretty)); +} + +/** + * Base class for structured context-violation errors. + * + * Design notes: + * + * - `.message` is **plain text** (no ANSI escape bytes). Structured logs, + * log drains, CBOR-serialized event data, and anything else that reads + * `err.message` / `err.stack` as a string gets clean output — no mojibake + * in JSON, no `\x1B[...m` noise in Vercel logs. + * + * - The ANSI-framed version is rendered **lazily** via `toString()` and + * `[util.inspect.custom]`. When the error is thrown and Node prints it + * via `util.inspect`, the user sees the colored, framed box. When it's + * attached to a structured log field, the consumer sees plain text. + * + * - `fatal = true` marks these as non-retryable. Calling `createHook()` + * from a step function will never succeed no matter how many retries — + * burning attempts just produces duplicated log output. The runtime's + * `FatalError.is(err)` gate recognizes any error with `fatal: true`. + */ +export abstract class ContextViolationError extends Error { + /** Non-retryable — see class doc. */ + readonly fatal = true; + + readonly #content: FramedContent; + + constructor(content: FramedContent) { + super(renderPlain(content)); + this.#content = content; + } + + /** + * `console.log(err)` and most Node internals route through `util.inspect`, + * which respects this symbol. Returning a custom string here means the + * thrown error prints as a pretty frame in the terminal while `.message` + * and `.stack` stay plain. + */ + [INSPECT_CUSTOM](): string { + const pretty = renderPretty(this.#content); + // `stack` starts with `${name}: ${message}\n at ...`. Our message is + // multi-line (`title\n╰▶ docs: …`), so slicing only the first line glues + // the framed-detail tail of the message onto the prepended pretty form + // and renders every detail line twice. Slice past *all* message lines. + const messageLineCount = this.message.split('\n').length; + const tail = (this.stack ?? '') + .split('\n') + .slice(messageLineCount) + .join('\n'); + return tail + ? `${this.name}: ${pretty}\n${tail}` + : `${this.name}: ${pretty}`; + } + + toString(): string { + return `${this.name}: ${renderPretty(this.#content)}`; + } +} + +/** + * Thrown when an API that must run inside a workflow function is called + * from outside a workflow context (e.g. from a step function or from + * regular application code). + */ +export class NotInWorkflowContextError extends ContextViolationError { + name = 'NotInWorkflowContextError'; + + constructor(functionName: string, docsUrl: DocsUrl) { + super({ + title: [ + { code: functionName }, + { text: ' can only be called inside a workflow function' }, + ], + details: [{ type: 'docs', url: docsUrl }], + }); + } +} + +/** + * Thrown when an API that must run inside a step function is called from + * outside a step context. + */ +export class NotInStepContextError extends ContextViolationError { + name = 'NotInStepContextError'; + + constructor(functionName: string, docsUrl: DocsUrl) { + super({ + title: [ + { code: functionName }, + { text: ' can only be called inside a step function' }, + ], + details: [{ type: 'docs', url: docsUrl }], + }); + } +} + +/** + * Thrown when an API that must run inside either a workflow or step function + * is called from regular application code. + */ +export class NotInWorkflowOrStepContextError extends ContextViolationError { + name = 'NotInWorkflowOrStepContextError'; + + constructor(functionName: string, docsUrl: DocsUrl) { + super({ + title: [ + { code: functionName }, + { text: ' can only be called inside a workflow or step function' }, + ], + details: [{ type: 'docs', url: docsUrl }], + }); + } +} diff --git a/packages/core/src/create-hook.ts b/packages/core/src/create-hook.ts index ed32889f66..05ea4cc84d 100644 --- a/packages/core/src/create-hook.ts +++ b/packages/core/src/create-hook.ts @@ -1,3 +1,4 @@ +import { throwNotInWorkflowContext } from './context-errors.js'; import type { Serializable } from './schemas.js'; /** @@ -177,8 +178,10 @@ export interface WebhookOptions */ // @ts-expect-error `options` is here for types/docs export function createHook(options?: HookOptions): Hook { - throw new Error( - '`createHook()` can only be called inside a workflow function' + throwNotInWorkflowContext( + 'createHook()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/create-hook', + createHook ); } @@ -197,7 +200,9 @@ export function createWebhook( // @ts-expect-error `options` is here for types/docs options?: WebhookOptions ): Webhook | Webhook { - throw new Error( - '`createWebhook()` can only be called inside a workflow function' + throwNotInWorkflowContext( + 'createWebhook()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/create-webhook', + createWebhook ); } diff --git a/packages/core/src/define-hook.test.ts b/packages/core/src/define-hook.test.ts index d8e8fd5120..20659c1cc7 100644 --- a/packages/core/src/define-hook.test.ts +++ b/packages/core/src/define-hook.test.ts @@ -102,14 +102,9 @@ describe('defineHook', () => { comment: string; }) ).rejects.toThrowErrorMatchingInlineSnapshot(` - [Error: [ - { - "message": "Invalid input: expected boolean at \\"approved\\"" - }, - { - "message": "Invalid input: expected string at \\"comment\\"" - } - ]] + [Error: Hook payload did not match the defined schema: + Invalid input: expected boolean at "approved" + Invalid input: expected string at "comment"] `); }); }); diff --git a/packages/core/src/define-hook.ts b/packages/core/src/define-hook.ts index 0111e70e40..bd4442f653 100644 --- a/packages/core/src/define-hook.ts +++ b/packages/core/src/define-hook.ts @@ -1,5 +1,6 @@ import type { StandardSchemaV1 } from '@standard-schema/spec'; import type { Hook as HookEntity } from '@workflow/world'; +import { throwNotInWorkflowContext } from './context-errors.js'; import type { Hook, HookOptions } from './create-hook.js'; import { resumeHook } from './runtime/resume-hook.js'; @@ -72,12 +73,19 @@ export function defineHook({ }: { schema?: StandardSchemaV1; } = {}): TypedHook { + function create(_options?: HookOptions): Hook { + // NOTE: `create` is referenced by name (not `this.create`) so the stack + // strip still works if the caller destructured the hook (`const { create } + // = defineHook(); create()`) — in that case `this` is undefined. + throwNotInWorkflowContext( + 'defineHook().create()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/define-hook', + create + ); + } + return { - create(_options?: HookOptions): Hook { - throw new Error( - '`defineHook().create()` can only be called inside a workflow function.' - ); - }, + create, async resume(token: string, payload: TInput): Promise { if (!schema?.['~standard']) { return await resumeHook(token, payload); @@ -90,7 +98,21 @@ export function defineHook({ // if the `issues` field exists, the validation failed if (result.issues) { - throw new Error(JSON.stringify(result.issues, null, 2)); + const lines = result.issues.map((issue) => { + const path = issue.path + ?.map((segment) => + typeof segment === 'object' && segment !== null + ? String((segment as { key: PropertyKey }).key) + : String(segment) + ) + .join('.'); + return path + ? ` at "${path}": ${issue.message}` + : ` ${issue.message}`; + }); + throw new Error( + `Hook payload did not match the defined schema:\n${lines.join('\n')}` + ); } return await resumeHook(token, result.value); diff --git a/packages/core/src/describe-error.test.ts b/packages/core/src/describe-error.test.ts new file mode 100644 index 0000000000..252566246c --- /dev/null +++ b/packages/core/src/describe-error.test.ts @@ -0,0 +1,252 @@ +import { + RUN_ERROR_CODES, + SerializationError, + StepNotRegisteredError, + WorkflowRuntimeError, +} from '@workflow/errors'; +import { describe, expect, test } from 'vitest'; +import { + NotInStepContextError, + NotInWorkflowContextError, +} from './context-errors.js'; +import { describeError, describeRunError } from './describe-error.js'; + +describe('describeError', () => { + test('plain user errors are attributed to the user with no hint', () => { + const result = describeError(new Error('something user code did')); + expect(result.attribution).toBe('user'); + expect(result.errorCode).toBe(RUN_ERROR_CODES.USER_ERROR); + expect(result.hint).toBeUndefined(); + }); + + test('non-Error throws are attributed to the user', () => { + expect(describeError('string').attribution).toBe('user'); + expect(describeError(undefined).attribution).toBe('user'); + expect(describeError(null).attribution).toBe('user'); + expect(describeError({ oops: true }).attribution).toBe('user'); + }); + + test('SerializationError is attributed to the user with a hint', () => { + const result = describeError( + new SerializationError('Failed to serialize step arguments') + ); + expect(result.attribution).toBe('user'); + expect(result.hint).toContain('serialized'); + }); + + test('context-violation errors are attributed to the user', () => { + const workflowOnly = describeError( + new NotInWorkflowContextError( + 'createHook', + 'hooks: https://example.com/hooks' + ) + ); + expect(workflowOnly.attribution).toBe('user'); + expect(workflowOnly.hint).toContain('wrong context'); + + const stepOnly = describeError( + new NotInStepContextError( + 'respondWith', + 'webhook responses: https://example.com/webhook' + ) + ); + expect(stepOnly.attribution).toBe('user'); + expect(stepOnly.hint).toContain('wrong context'); + }); + + test('WorkflowRuntimeError is attributed to the SDK', () => { + const result = describeError( + new WorkflowRuntimeError('corrupted event log') + ); + expect(result.attribution).toBe('sdk'); + expect(result.errorCode).toBe(RUN_ERROR_CODES.RUNTIME_ERROR); + expect(result.hint).toContain('internal workflow SDK error'); + }); + + test('StepNotRegisteredError (subclass of WorkflowRuntimeError) is attributed to the SDK', () => { + const result = describeError(new StepNotRegisteredError('missingStep')); + expect(result.attribution).toBe('sdk'); + expect(result.errorCode).toBe(RUN_ERROR_CODES.RUNTIME_ERROR); + }); + + test('REPLAY_TIMEOUT via precomputed errorCode is attributed to the SDK', () => { + const result = describeError(undefined, RUN_ERROR_CODES.REPLAY_TIMEOUT); + expect(result.attribution).toBe('sdk'); + expect(result.errorCode).toBe(RUN_ERROR_CODES.REPLAY_TIMEOUT); + expect(result.hint).toContain('replay took too long'); + }); + + test('MAX_DELIVERIES_EXCEEDED via precomputed errorCode is attributed to the SDK', () => { + const result = describeError( + undefined, + RUN_ERROR_CODES.MAX_DELIVERIES_EXCEEDED + ); + expect(result.attribution).toBe('sdk'); + expect(result.errorCode).toBe(RUN_ERROR_CODES.MAX_DELIVERIES_EXCEEDED); + expect(result.hint).toContain('max-delivery budget'); + }); + + test('precomputed errorCode wins over classifyRunError when both are provided', () => { + // A plain Error would classify as USER_ERROR, but passing REPLAY_TIMEOUT + // explicitly overrides that — useful for callers that know the failure + // category from the surrounding runtime context. + const result = describeError( + new Error('something'), + RUN_ERROR_CODES.REPLAY_TIMEOUT + ); + expect(result.errorCode).toBe(RUN_ERROR_CODES.REPLAY_TIMEOUT); + expect(result.attribution).toBe('sdk'); + }); +}); + +describe('describeRunError', () => { + test('plain user error event fields are attributed to the user with no hint', () => { + const result = describeRunError({ + errorCode: RUN_ERROR_CODES.USER_ERROR, + errorName: 'Error', + }); + expect(result.attribution).toBe('user'); + expect(result.hint).toBeUndefined(); + }); + + test('SerializationError by name is attributed to the user with a hint', () => { + const result = describeRunError({ + errorCode: RUN_ERROR_CODES.USER_ERROR, + errorName: 'SerializationError', + }); + expect(result.attribution).toBe('user'); + expect(result.hint).toContain('serialized'); + }); + + test('context-violation error names are attributed to the user', () => { + const result = describeRunError({ + errorCode: RUN_ERROR_CODES.USER_ERROR, + errorName: 'NotInWorkflowContextError', + }); + expect(result.attribution).toBe('user'); + expect(result.hint).toContain('wrong context'); + }); + + test('WorkflowRuntimeError name is attributed to the SDK', () => { + const result = describeRunError({ + errorCode: RUN_ERROR_CODES.RUNTIME_ERROR, + errorName: 'WorkflowRuntimeError', + }); + expect(result.attribution).toBe('sdk'); + expect(result.hint).toContain('internal workflow SDK error'); + }); + + test('REPLAY_TIMEOUT errorCode is attributed to the SDK', () => { + const result = describeRunError({ + errorCode: RUN_ERROR_CODES.REPLAY_TIMEOUT, + }); + expect(result.attribution).toBe('sdk'); + expect(result.hint).toContain('replay took too long'); + }); + + test('MAX_DELIVERIES_EXCEEDED errorCode is attributed to the SDK', () => { + const result = describeRunError({ + errorCode: RUN_ERROR_CODES.MAX_DELIVERIES_EXCEEDED, + }); + expect(result.attribution).toBe('sdk'); + expect(result.hint).toContain('max-delivery budget'); + }); + + test('RUNTIME_ERROR code without errorName still lands as SDK', () => { + const result = describeRunError({ + errorCode: RUN_ERROR_CODES.RUNTIME_ERROR, + }); + expect(result.attribution).toBe('sdk'); + expect(result.hint).toContain('internal workflow SDK error'); + }); + + test('missing errorCode defaults to USER_ERROR', () => { + const result = describeRunError({}); + expect(result.attribution).toBe('user'); + expect(result.errorCode).toBe(RUN_ERROR_CODES.USER_ERROR); + expect(result.hint).toBeUndefined(); + }); +}); + +/** + * Snapshot tests for the full `describeError` / `describeRunError` payload + * shape. These act as a regression gate on the exact strings that feed into + * log metadata fields (`errorAttribution`, `hint`) and UI attribution — we + * don't want a reworded hint to silently change what users see in their + * logs + docs links. + */ +describe('describeError — payload shape snapshots', () => { + test('plain user Error payload', () => { + expect(describeError(new Error('boom'))).toMatchInlineSnapshot(` + { + "attribution": "user", + "errorCode": "USER_ERROR", + } + `); + }); + + test('SerializationError payload', () => { + expect( + describeError(new SerializationError('boom')) + ).toMatchInlineSnapshot(` + { + "attribution": "user", + "errorCode": "USER_ERROR", + "hint": "A value passed across a workflow/step boundary could not be serialized. See the error message for the offending path and the Learn More link for details.", + } + `); + }); + + test('context-violation error payload', () => { + expect( + describeError( + new NotInWorkflowContextError( + 'createHook', + 'https://workflow-sdk.dev/docs/api-reference/workflow/create-hook' + ) + ) + ).toMatchInlineSnapshot(` + { + "attribution": "user", + "errorCode": "USER_ERROR", + "hint": "A workflow-only or step-only API was called from the wrong context. The error message includes the exact API and how to move the call.", + } + `); + }); + + test('WorkflowRuntimeError payload', () => { + expect( + describeError(new WorkflowRuntimeError('internal invariant')) + ).toMatchInlineSnapshot(` + { + "attribution": "sdk", + "errorCode": "RUNTIME_ERROR", + "hint": "This is an internal workflow SDK error, not a bug in your code. If it keeps happening, please report it with the stack trace and the runId.", + } + `); + }); + + test('REPLAY_TIMEOUT via precomputed errorCode payload', () => { + expect( + describeError(undefined, RUN_ERROR_CODES.REPLAY_TIMEOUT) + ).toMatchInlineSnapshot(` + { + "attribution": "sdk", + "errorCode": "REPLAY_TIMEOUT", + "hint": "The workflow replay took too long. This usually means the event log is unusually large or the workflow function is doing heavy synchronous work between step boundaries.", + } + `); + }); + + test('MAX_DELIVERIES_EXCEEDED via precomputed errorCode payload', () => { + expect( + describeError(undefined, RUN_ERROR_CODES.MAX_DELIVERIES_EXCEEDED) + ).toMatchInlineSnapshot(` + { + "attribution": "sdk", + "errorCode": "MAX_DELIVERIES_EXCEEDED", + "hint": "The workflow queue exceeded its max-delivery budget. This usually indicates a persistent runtime failure — check the most recent stack traces for the underlying cause.", + } + `); + }); +}); diff --git a/packages/core/src/describe-error.ts b/packages/core/src/describe-error.ts new file mode 100644 index 0000000000..420419a1ee --- /dev/null +++ b/packages/core/src/describe-error.ts @@ -0,0 +1,195 @@ +import { + RUN_ERROR_CODES, + type RunErrorCode, + SerializationError, + WorkflowRuntimeError, +} from '@workflow/errors'; +import { classifyRunError } from './classify-error.js'; +import { + NotInStepContextError, + NotInWorkflowContextError, + NotInWorkflowOrStepContextError, + UnavailableInWorkflowContextError, +} from './context-errors.js'; + +/** + * Attribution of a workflow/step failure for presentation. + * + * - `user`: the error came from customer code (a step or workflow function + * threw, or a value they passed across a boundary wasn't serializable). + * - `sdk`: the SDK produced the error itself — an internal invariant broke, + * or a runtime guard rejected the call. These should be rare; when they + * happen we want to frame the terminal output as "this is us, not you." + */ +export type ErrorAttribution = 'user' | 'sdk'; + +export interface ErrorDescription { + attribution: ErrorAttribution; + errorCode: RunErrorCode; + /** + * Short, class-aware hint to help a user understand what the error means. + * Only set for well-known SDK error classes (SerializationError, + * WorkflowRuntimeError, context-violation errors); `undefined` for plain + * user errors, where the stack is already the most useful thing to show. + */ + hint?: string; +} + +/** + * Error signal fields carried on persisted failure events (e.g. + * `run_failed` / `step_failed`). The shape is intentionally loose: + * + * - `errorCode` is typed as `string` rather than `RunErrorCode` because + * the value comes from stored JSON/CBOR and may predate the current + * enum — callers should not narrow on it blindly. Values that don't + * match a known `RUN_ERROR_CODES` entry fall through to USER_ERROR. + * - `errorName` is the thrown `Error#name`. It is not universally + * persisted today; callers that have access to it (either via an + * in-memory throw or a richer payload) can pass it in to sharpen + * the attribution and hint. When absent, `describeRunError` still + * returns a sensible attribution from `errorCode` alone. + */ +export interface PersistedErrorSignal { + errorCode?: string; + errorName?: string; +} + +const CONTEXT_ERROR_NAMES = new Set([ + 'NotInWorkflowContextError', + 'NotInStepContextError', + 'NotInWorkflowOrStepContextError', + 'UnavailableInWorkflowContextError', +]); + +function isContextViolationError(err: unknown): boolean { + return ( + err instanceof NotInWorkflowContextError || + err instanceof NotInStepContextError || + err instanceof NotInWorkflowOrStepContextError || + err instanceof UnavailableInWorkflowContextError + ); +} + +const SERIALIZATION_ERROR_HINT = + 'A value passed across a workflow/step boundary could not be serialized. See the error message for the offending path and the Learn More link for details.'; +const CONTEXT_ERROR_HINT = + 'A workflow-only or step-only API was called from the wrong context. The error message includes the exact API and how to move the call.'; +const RUNTIME_ERROR_HINT = + 'This is an internal workflow SDK error, not a bug in your code. If it keeps happening, please report it with the stack trace and the runId.'; +const REPLAY_TIMEOUT_HINT = + 'The workflow replay took too long. This usually means the event log is unusually large or the workflow function is doing heavy synchronous work between step boundaries.'; +const MAX_DELIVERIES_HINT = + 'The workflow queue exceeded its max-delivery budget. This usually indicates a persistent runtime failure — check the most recent stack traces for the underlying cause.'; + +function normalizeErrorCode(code: string | undefined): RunErrorCode { + // Values read back from persisted events are `string | undefined` — we + // only trust codes that match a known entry in `RUN_ERROR_CODES`. + const known = Object.values(RUN_ERROR_CODES) as readonly string[]; + if (code && known.includes(code)) { + return code as RunErrorCode; + } + return RUN_ERROR_CODES.USER_ERROR; +} + +/** + * Data-driven variant of {@link describeError} that works from persisted + * event fields instead of a live `Error` instance. Intended for CLI/web + * renderers that read failure events and no longer have the original + * thrown object. + */ +export function describeRunError( + signal: PersistedErrorSignal +): ErrorDescription { + const errorCode = normalizeErrorCode(signal.errorCode); + const name = signal.errorName; + + if (name === 'SerializationError') { + return { attribution: 'user', errorCode, hint: SERIALIZATION_ERROR_HINT }; + } + if (name && CONTEXT_ERROR_NAMES.has(name)) { + return { attribution: 'user', errorCode, hint: CONTEXT_ERROR_HINT }; + } + if (name === 'WorkflowRuntimeError' || name === 'StepNotRegisteredError') { + return { attribution: 'sdk', errorCode, hint: RUNTIME_ERROR_HINT }; + } + if (errorCode === RUN_ERROR_CODES.REPLAY_TIMEOUT) { + return { attribution: 'sdk', errorCode, hint: REPLAY_TIMEOUT_HINT }; + } + if (errorCode === RUN_ERROR_CODES.MAX_DELIVERIES_EXCEEDED) { + return { attribution: 'sdk', errorCode, hint: MAX_DELIVERIES_HINT }; + } + if (errorCode === RUN_ERROR_CODES.RUNTIME_ERROR) { + return { attribution: 'sdk', errorCode, hint: RUNTIME_ERROR_HINT }; + } + return { attribution: 'user', errorCode }; +} + +/** + * Describe an error for user-facing presentation. Purely informational — + * does not change any persisted event data or error classification used by + * the runtime. + * + * The attribution here is more nuanced than `classifyRunError`: + * + * - `SerializationError` is technically raised by the SDK, but it almost + * always points at something the caller did (passed a non-serializable + * value, didn't register a class). We attribute it to the user. + * - Context-violation errors (`NotInWorkflowContextError`, etc.) likewise + * describe a user mistake. + * - `WorkflowRuntimeError` (and subclasses like `StepNotRegisteredError`) + * indicates an internal SDK invariant broke — surface that as `sdk`. + * + * @param err The error value thrown by the workflow / step. + * @param errorCode Optional precomputed error code. Callers that already + * know the code (e.g. `REPLAY_TIMEOUT` or `MAX_DELIVERIES_EXCEEDED`, which + * `classifyRunError` can't derive from the error alone) should pass it so + * the attribution and hint reflect the actual failure category. + */ +export function describeError( + err: unknown, + errorCode?: RunErrorCode +): ErrorDescription { + const effectiveCode = errorCode ?? classifyRunError(err); + + if (SerializationError.is(err)) { + return { + attribution: 'user', + errorCode: effectiveCode, + hint: SERIALIZATION_ERROR_HINT, + }; + } + + if (isContextViolationError(err)) { + return { + attribution: 'user', + errorCode: effectiveCode, + hint: CONTEXT_ERROR_HINT, + }; + } + + if (err instanceof WorkflowRuntimeError) { + return { + attribution: 'sdk', + errorCode: effectiveCode, + hint: RUNTIME_ERROR_HINT, + }; + } + + if (effectiveCode === RUN_ERROR_CODES.REPLAY_TIMEOUT) { + return { + attribution: 'sdk', + errorCode: effectiveCode, + hint: REPLAY_TIMEOUT_HINT, + }; + } + + if (effectiveCode === RUN_ERROR_CODES.MAX_DELIVERIES_EXCEEDED) { + return { + attribution: 'sdk', + errorCode: effectiveCode, + hint: MAX_DELIVERIES_HINT, + }; + } + + return { attribution: 'user', errorCode: effectiveCode }; +} diff --git a/packages/core/src/encryption.ts b/packages/core/src/encryption.ts index 730e4ee115..368eacb269 100644 --- a/packages/core/src/encryption.ts +++ b/packages/core/src/encryption.ts @@ -1,3 +1,5 @@ +import { WorkflowRuntimeError } from '@workflow/errors'; + /** * Browser-compatible AES-256-GCM encryption module. * @@ -36,7 +38,7 @@ const KEY_LENGTH = 32; // bytes (AES-256) */ export async function importKey(raw: Uint8Array) { if (raw.byteLength !== KEY_LENGTH) { - throw new Error( + throw new WorkflowRuntimeError( `Encryption key must be exactly ${KEY_LENGTH} bytes, got ${raw.byteLength}` ); } @@ -82,7 +84,7 @@ export async function decrypt( ): Promise { const minLength = NONCE_LENGTH + TAG_LENGTH / 8; // nonce + auth tag if (data.byteLength < minLength) { - throw new Error( + throw new WorkflowRuntimeError( `Encrypted data too short: expected at least ${minLength} bytes, got ${data.byteLength}` ); } diff --git a/packages/core/src/flushable-stream.ts b/packages/core/src/flushable-stream.ts index 22751c8020..c9fc87e4da 100644 --- a/packages/core/src/flushable-stream.ts +++ b/packages/core/src/flushable-stream.ts @@ -1,3 +1,4 @@ +import { WorkflowRuntimeError } from '@workflow/errors'; import { type PromiseWithResolvers, withResolvers } from '@workflow/utils'; /** @@ -223,7 +224,7 @@ export async function flushablePipe( const readResult = await Promise.race([ reader.read(), writer.closed.then(() => { - throw new Error('Writable stream closed prematurely'); + throw new WorkflowRuntimeError('Writable stream closed prematurely'); }), ]); diff --git a/packages/core/src/global.ts b/packages/core/src/global.ts index 3dd5c52ac8..00f8ab0fd0 100644 --- a/packages/core/src/global.ts +++ b/packages/core/src/global.ts @@ -1,3 +1,4 @@ +import { WorkflowRuntimeError } from '@workflow/errors'; import { pluralize } from '@workflow/utils'; import type { Serializable } from './schemas.js'; @@ -125,5 +126,7 @@ export class WorkflowSuspension extends Error { } export function ENOTSUP(): never { - throw new Error('Not supported in workflow functions'); + throw new WorkflowRuntimeError( + 'This API is not available inside a workflow function. Workflow functions run in a deterministic VM; move the call to a step function for full Node.js access.' + ); } diff --git a/packages/core/src/log-format.test.ts b/packages/core/src/log-format.test.ts new file mode 100644 index 0000000000..9bb0bf9b5f --- /dev/null +++ b/packages/core/src/log-format.test.ts @@ -0,0 +1,122 @@ +import { describe, expect, test } from 'vitest'; +import { formatLogMetadata } from './log-format.js'; + +// chalk respects FORCE_COLOR=0 (which vitest doesn't set, but the runner +// has no TTY so chalk's level is 0 → ANSI helpers pass-through). The +// snapshots below match the plain-text structural form, which is what +// log drains and CI logs see. + +describe('formatLogMetadata', () => { + test('returns null for empty metadata', () => { + expect(formatLogMetadata('msg', undefined)).toBeNull(); + expect(formatLogMetadata('msg', {})).toBeNull(); + }); + + test('renders the canonical step-fatal payload', () => { + const out = formatLogMetadata( + 'Step add (./workflows/x) threw a FatalError — bubbling up to parent workflow', + { + workflowRunId: 'wrun_01ABC', + stepId: 'step_01XYZ', + stepName: 'step//./workflows/x//add', + errorAttribution: 'user', + errorName: 'NotInWorkflowContextError', + errorMessage: + '`createHook()` can only be called inside a workflow function', + hint: 'A workflow-only or step-only API was called from the wrong context. The error message includes the exact API and how to move the call.', + } + ); + expect(out).toMatchInlineSnapshot(` + " user error · NotInWorkflowContextError + run wrun_01ABC + step step_01XYZ · add (./workflows/x) + hint: A workflow-only or step-only API was called from the wrong context. The error message includes the exact API and how to move the call." + `); + }); + + test('renders the hit-max-retries payload with attempt + retryCount', () => { + const out = formatLogMetadata( + 'Step add (./workflows/x) hit max retries — bubbling error', + { + workflowRunId: 'wrun_01ABC', + workflowName: 'workflow//./workflows/x//myWorkflow', + stepId: 'step_01XYZ', + stepName: 'step//./workflows/x//add', + attempt: 4, + retryCount: 3, + errorAttribution: 'user', + errorName: 'Error', + errorMessage: 'Transient failure', + } + ); + expect(out).toMatchInlineSnapshot(` + " user error · Error + run wrun_01ABC · myWorkflow (./workflows/x) + step step_01XYZ · add (./workflows/x) + retry 4 attempts · 3 retries" + `); + }); + + test('renders sdk-attributed errors with the sdk badge', () => { + const out = formatLogMetadata( + 'Workflow myFlow failed due to an SDK runtime error', + { + errorCode: 'RUNTIME_ERROR', + errorAttribution: 'sdk', + errorName: 'WorkflowRuntimeError', + errorMessage: 'corrupted event log', + hint: 'This is an internal workflow SDK error.', + } + ); + expect(out).toMatchInlineSnapshot(` + " sdk error · WorkflowRuntimeError + code RUNTIME_ERROR + hint: This is an internal workflow SDK error." + `); + }); + + test('drops errorMessage when the parent message already includes it', () => { + // Important: avoids double-printing the same string in the stack and + // in the metadata block. + const errorMessage = 'thing went wrong'; + const out = formatLogMetadata(`Step foo threw\nError: ${errorMessage}`, { + errorAttribution: 'user', + errorName: 'Error', + errorMessage, + }); + expect(out).not.toContain(`message`); + expect(out).toMatchInlineSnapshot(`" user error · Error"`); + }); + + test('omits errorStack always (the parent message owns the stack)', () => { + const out = formatLogMetadata('msg', { + errorStack: 'Error: ...\n at foo (...)\n ...', + errorName: 'Error', + errorAttribution: 'user', + }); + expect(out).not.toContain('errorStack'); + expect(out).not.toContain('at foo'); + }); + + test('falls back gracefully on machine names it cannot parse', () => { + const out = formatLogMetadata('msg', { + workflowRunId: 'wrun_X', + workflowName: 'not-a-machine-name', + }); + // Should still emit the row — never silently drop info. + expect(out).toContain('wrun_X'); + }); + + test('renders unknown fields as a sorted key/value tail', () => { + const out = formatLogMetadata('msg', { + zoo: 'last', + apple: 'first', + banana: 42, + }); + expect(out).toMatchInlineSnapshot(` + " apple first + banana 42 + zoo last" + `); + }); +}); diff --git a/packages/core/src/log-format.ts b/packages/core/src/log-format.ts new file mode 100644 index 0000000000..53305343e1 --- /dev/null +++ b/packages/core/src/log-format.ts @@ -0,0 +1,212 @@ +import * as Ansi from '@workflow/errors/ansi'; +import { + formatStepName, + formatWorkflowName, + parseStepName, + parseWorkflowName, +} from '@workflow/utils'; + +/** + * Pretty-format a structured-log metadata object for human consumption on + * stderr. Designed to replace `util.inspect`'s default object dump for + * `console.error('[workflow-sdk] msg', metadata)`-style calls — that form + * works fine for small ad-hoc objects but produces a noisy, quote-escaped + * blob when applied to the structured-error metadata that workflow runtime + * logs emit (multi-line stack strings, hint paragraphs, parsed-name machine + * tags). + * + * The pretty form: + * + * - Renders well-known IDs (`workflowRunId`, `stepId`) with their parsed + * friendly names alongside the raw ID so users can copy the ID for + * lookup *and* see at a glance which workflow / step it refers to. + * - Drops fields that would just duplicate what's already in the log + * message — `errorMessage` when the message string already contains + * it, `errorStack` always (it should be in the message; we own the + * framing). + * - Color-codes attribution (`user error` red, `sdk error` magenta) so + * ownership is visually distinct. + * - Renders `hint` as a multi-line wrapped block under `hint:` so + * paragraph-length hints don't get backslash-escaped onto one line. + * - Aligns key/value pairs in two dim-padded columns. + * + * Important: web/web-shared do NOT consume stderr — they read CBOR/JSON + * event payloads from the World event log. Changing the stderr format is + * therefore a presentation-only change. The same metadata is also emitted + * as structured OTel span events from the logger itself for backends that + * want JSON-shaped data. + * + * Returns `null` when there's nothing useful to render (no surviving + * fields after redundancy stripping); callers can then skip the trailing + * block entirely instead of printing an empty separator. + */ +export function formatLogMetadata( + message: string, + metadata: Record | undefined +): string | null { + if (!metadata || Object.keys(metadata).length === 0) return null; + + // Drop fields that the message already encodes. We render framings and + // stacks into the message string itself in step-handler / runtime, so + // repeating them as `errorStack: '...'` or `errorMessage: '...'` would + // be pure noise. + const redundant = new Set(); + redundant.add('errorStack'); + if ( + typeof metadata.errorMessage === 'string' && + message.includes(metadata.errorMessage as string) + ) { + redundant.add('errorMessage'); + } + + // Pull well-known fields out for special-cased rendering. Anything not + // matched here flows into the trailing key/value block as-is. + const wellKnown = new Set([ + 'workflowRunId', + 'workflowName', + 'stepId', + 'stepName', + 'errorAttribution', + 'errorCode', + 'errorName', + 'errorMessage', + 'errorStack', + 'hint', + 'attempt', + 'retryCount', + ]); + + const lines: string[] = []; + + // Header: error class + attribution badge. Skips when neither is set + // (e.g. info logs that just carry context). + const errorName = pickString(metadata, 'errorName'); + const attribution = pickString(metadata, 'errorAttribution'); + if (errorName || attribution) { + const badge = attribution + ? attribution === 'sdk' + ? Ansi.magenta(`sdk error`) + : Ansi.red(`user error`) + : ''; + const cls = errorName ? Ansi.bold(errorName) : ''; + const sep = badge && cls ? Ansi.dim(' · ') : ''; + lines.push(` ${badge}${sep}${cls}`); + } + + // ID + parsed name pairs. Display the raw ULID-shaped ID (users copy + // these into URLs and the inspect CLI) alongside the parsed friendly + // name so they don't have to mentally decode `step//./workflows/x//y`. + const runId = pickString(metadata, 'workflowRunId'); + const wfName = pickString(metadata, 'workflowName'); + if (runId) { + lines.push(formatIdRow('run', runId, wfName, formatWorkflowName)); + } else if (wfName) { + lines.push(formatIdRow('run', null, wfName, formatWorkflowName)); + } + + const stepId = pickString(metadata, 'stepId'); + const stepName = pickString(metadata, 'stepName'); + if (stepId || stepName) { + lines.push(formatIdRow('step', stepId, stepName, formatStepName)); + } + + // Retry-loop metadata, when present (only on the hit-max-retries log). + if (metadata.attempt !== undefined || metadata.retryCount !== undefined) { + const a = metadata.attempt; + const r = metadata.retryCount; + if (a !== undefined && r !== undefined) { + lines.push( + ` ${kvKey('retry')} ${a} ${Ansi.dim('attempts ·')} ${r} ${Ansi.dim('retries')}` + ); + } else if (a !== undefined) { + lines.push(` ${kvKey('retry')} ${a} ${Ansi.dim('attempts')}`); + } + } + + // errorCode lives next to attribution conceptually; render it on its own + // dim line right after the badge if it adds info beyond the name. + const errorCode = pickString(metadata, 'errorCode'); + if (errorCode && errorCode !== errorName) { + lines.push(` ${kvKey('code')} ${Ansi.dim(errorCode)}`); + } + + // Hint: paragraph-shaped, render dimmed under its own key so the + // continuation reads clearly. We trust the hint to already be plain + // text (we ban ANSI in error messages elsewhere). + const hint = pickString(metadata, 'hint'); + if (hint) { + lines.push(` ${Ansi.hint(hint)}`); + } + + // Pass-through for fields we don't know about — render them as + // `key: value` in the trailing block so we never silently drop info. + // Sort for stable output (helpful for snapshot tests). + const passThrough = Object.entries(metadata) + .filter( + ([k, v]) => + !wellKnown.has(k) && !redundant.has(k) && v !== undefined && v !== null + ) + .sort(([a], [b]) => a.localeCompare(b)); + for (const [k, v] of passThrough) { + lines.push(` ${kvKey(k)} ${formatPassthroughValue(v)}`); + } + + return lines.length ? lines.join('\n') : null; +} + +function pickString( + metadata: Record, + key: string +): string | null { + const v = metadata[key]; + return typeof v === 'string' && v.length > 0 ? v : null; +} + +function kvKey(key: string): string { + // Right-pad to a consistent column width so values line up vertically. + return Ansi.dim(key.padEnd(6)); +} + +function formatIdRow( + label: string, + id: string | null, + name: string | null, + formatName: (n: string) => string +): string { + // Compact form: `run wrun_01KPYR1H596… · simple (./workflows/1_simple)` + const idCell = id ? id : Ansi.dim('—'); + // Only render the parsed name when parse succeeds and adds info beyond + // the raw ID. Falls back silently otherwise. + const parsed = name + ? label === 'run' + ? parseWorkflowName(name) + : parseStepName(name) + : null; + const nameCell = parsed + ? `${Ansi.dim('·')} ${formatName(name as string)}` + : ''; + return ` ${kvKey(label)} ${idCell}${nameCell ? ' ' + nameCell : ''}`; +} + +function formatPassthroughValue(v: unknown): string { + if (typeof v === 'string') { + // Multi-line strings: indent continuation lines so they line up under + // the key column. Single-line stays as-is. + if (v.includes('\n')) { + return v + .split('\n') + .map((line, i) => (i === 0 ? line : ` ${line}`)) + .join('\n'); + } + return v; + } + if (typeof v === 'number' || typeof v === 'boolean') return String(v); + // Objects / arrays: JSON-stringify compactly. Unlike util.inspect this + // doesn't quote-escape multi-line strings inside them, but for the + // structured metadata we emit (small POJOs) it's the right trade-off. + try { + return JSON.stringify(v); + } catch { + return String(v); + } +} diff --git a/packages/core/src/logger.test.ts b/packages/core/src/logger.test.ts new file mode 100644 index 0000000000..5bdee13995 --- /dev/null +++ b/packages/core/src/logger.test.ts @@ -0,0 +1,170 @@ +import { afterEach, beforeEach, describe, expect, test, vi } from 'vitest'; +import { runtimeLogger } from './logger.js'; + +describe('logger', () => { + let errorSpy: ReturnType; + let warnSpy: ReturnType; + + beforeEach(() => { + errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + }); + + afterEach(() => { + errorSpy.mockRestore(); + warnSpy.mockRestore(); + }); + + // The logger composes `[workflow-sdk] \n` + // into a single string argument and passes it to `console.error` / + // `console.warn`. This avoids `util.inspect` quoting multi-line stacks + // and paragraph hints inside an object dump. See `./log-format.ts`. + test('error logs go to console.error with [workflow-sdk] prefix and unknown fields fall through', () => { + runtimeLogger.error('boom', { foo: 'bar' }); + expect(errorSpy).toHaveBeenCalledTimes(1); + expect(errorSpy.mock.calls[0]).toHaveLength(1); + expect(errorSpy.mock.calls[0][0]).toContain('[workflow-sdk] boom'); + expect(errorSpy.mock.calls[0][0]).toContain('foo'); + expect(errorSpy.mock.calls[0][0]).toContain('bar'); + }); + + test('warn logs go to console.warn with [workflow-sdk] prefix', () => { + runtimeLogger.warn('watch out', { foo: 'bar' }); + expect(warnSpy).toHaveBeenCalledTimes(1); + expect(warnSpy.mock.calls[0][0]).toContain('[workflow-sdk] watch out'); + expect(warnSpy.mock.calls[0][0]).toContain('foo'); + }); + + test('info and debug do not print to console by default', () => { + runtimeLogger.info('quiet'); + runtimeLogger.debug('quieter'); + expect(errorSpy).not.toHaveBeenCalled(); + expect(warnSpy).not.toHaveBeenCalled(); + }); + + test('child() merges parent metadata into every call', () => { + const child = runtimeLogger.child({ workflowRunId: 'run-1' }); + child.error('boom', { stepId: 'step-1' }); + const out = errorSpy.mock.calls[0][0] as string; + expect(out).toContain('[workflow-sdk] boom'); + expect(out).toContain('run-1'); + expect(out).toContain('step-1'); + }); + + test('call-site metadata wins over child metadata on conflict', () => { + const child = runtimeLogger.child({ workflowRunId: 'parent-id' }); + child.error('boom', { workflowRunId: 'override' }); + const out = errorSpy.mock.calls[0][0] as string; + expect(out).toContain('override'); + expect(out).not.toContain('parent-id'); + }); + + test('child can be chained', () => { + const runLogger = runtimeLogger.child({ workflowRunId: 'run-1' }); + const stepLogger = runLogger.child({ stepId: 'step-1' }); + stepLogger.error('boom'); + const out = errorSpy.mock.calls[0][0] as string; + expect(out).toContain('run-1'); + expect(out).toContain('step-1'); + }); + + test('forRun attaches workflowRunId and workflowName', () => { + // Production passes machine-form names like `workflow//./module//fn`, + // which the formatter renders as `fn (./module)`. + const runLogger = runtimeLogger.forRun( + 'run-1', + 'workflow//./src/jobs//myWorkflow' + ); + runLogger.error('boom'); + const out = errorSpy.mock.calls[0][0] as string; + expect(out).toContain('run-1'); + expect(out).toContain('myWorkflow (./src/jobs)'); + }); + + test('forRun without workflowName omits the key', () => { + const runLogger = runtimeLogger.forRun('run-1'); + runLogger.error('boom'); + const out = errorSpy.mock.calls[0][0] as string; + expect(out).toContain('run-1'); + }); + + test('forRun accepts extra metadata', () => { + const runLogger = runtimeLogger.forRun('run-1', 'myWorkflow', { + stepId: 'step-1', + }); + runLogger.error('boom'); + const out = errorSpy.mock.calls[0][0] as string; + expect(out).toContain('run-1'); + expect(out).toContain('step-1'); + }); + + test('no metadata: only the prefix line is emitted', () => { + runtimeLogger.error('boom'); + expect(errorSpy).toHaveBeenCalledTimes(1); + expect(errorSpy.mock.calls[0][0]).toBe('[workflow-sdk] boom'); + }); + + /** + * Snapshot tests for the exact shape of runtime log output. These act as + * regression gates on what users see in their log drains, so that + * refactors of the logger don't accidentally change field ordering, the + * prefix, or whether metadata is merged. + */ + describe('shape snapshots', () => { + test('scoped logger emits the canonical step-failure call signature', () => { + const log = runtimeLogger.forRun('wrun_123', 'workflow//my-wf').child({ + stepId: 'step_456', + stepName: 'step//my-step', + }); + + log.error('Step "step//my-step" threw a FatalError', { + errorAttribution: 'user', + errorName: 'FatalError', + errorMessage: 'boom', + hint: 'Move the call to a step function.', + }); + + expect(errorSpy.mock.calls).toMatchInlineSnapshot(` + [ + [ + "[workflow-sdk] Step "step//my-step" threw a FatalError + user error · FatalError + run wrun_123 + step step_456 + hint: Move the call to a step function.", + ], + ] + `); + }); + + test('hit-max-retries style call signature', () => { + const log = runtimeLogger.forRun('wrun_abc', 'workflow//main').child({ + stepId: 'step_xyz', + stepName: 'step//doWork', + }); + + log.error( + 'Step "step//doWork" hit max retries — bubbling error thrown by your step to the parent workflow', + { + attempt: 4, + retryCount: 3, + errorAttribution: 'user', + errorName: 'Error', + errorMessage: 'Transient failure', + } + ); + + expect(errorSpy.mock.calls).toMatchInlineSnapshot(` + [ + [ + "[workflow-sdk] Step "step//doWork" hit max retries — bubbling error thrown by your step to the parent workflow + user error · Error + run wrun_abc + step step_xyz + retry 4 attempts · 3 retries", + ], + ] + `); + }); + }); +}); diff --git a/packages/core/src/logger.ts b/packages/core/src/logger.ts index d0a0ae1d5b..5e8a2f53fa 100644 --- a/packages/core/src/logger.ts +++ b/packages/core/src/logger.ts @@ -1,42 +1,101 @@ import debug from 'debug'; +import { formatLogMetadata } from './log-format.js'; import { getActiveSpan } from './telemetry.js'; -function createLogger(namespace: string) { +type LogMetadata = Record; + +type LogFn = (message: string, metadata?: LogMetadata) => void; + +export interface Logger { + debug: LogFn; + info: LogFn; + warn: LogFn; + error: LogFn; + /** + * Returns a child logger that merges the given metadata into every call. + * Useful for attaching stable context (e.g. `workflowRunId`, `workflowName`, + * `stepId`) so callers don't have to repeat it on every log. + * + * Call-site metadata wins on conflict, so children can still override. + */ + child: (metadata: LogMetadata) => Logger; + /** + * Convenience child logger for a workflow run. Equivalent to + * `logger.child({ workflowRunId, workflowName })`, but centralized so all + * runtime code structures run metadata consistently. + */ + forRun: ( + workflowRunId: string, + workflowName?: string, + extra?: LogMetadata + ) => Logger; +} + +function createLogger(namespace: string): Logger { const baseDebug = debug(`workflow:${namespace}`); - const logger = (level: string) => { - const levelDebug = baseDebug.extend(level); - - return (message: string, metadata?: Record) => { - // Always output error/warn to console so users see critical issues - // debug/info only output when DEBUG env var is set - if (level === 'error') { - console.error(`[Workflow] ${message}`, metadata ?? ''); - } else if (level === 'warn') { - console.warn(`[Workflow] ${message}`, metadata ?? ''); - } - - // Also log to debug library for verbose output when DEBUG is enabled - levelDebug(message, metadata); - - if (levelDebug.enabled) { - getActiveSpan() - .then((span) => { - span?.addEvent(`${level}.${namespace}`, { message, ...metadata }); - }) - .catch(() => { - // Silently ignore telemetry errors - }); - } + const build = (parentMetadata: LogMetadata): Logger => { + const logger = (level: string): LogFn => { + const levelDebug = baseDebug.extend(level); + + return (message, metadata) => { + const hasParent = Object.keys(parentMetadata).length > 0; + const hasCallSite = metadata && Object.keys(metadata).length > 0; + const merged = + hasParent || hasCallSite + ? { ...parentMetadata, ...(metadata ?? {}) } + : undefined; + + // Always output error/warn to console so users see critical issues. + // debug/info only output when DEBUG env var is set. + // + // Render the metadata as a single pretty string and pass it as the + // sole second argument so the runtime's `console.error` / `util.inspect` + // doesn't quote-escape multi-line stacks or paragraph hints inside a + // JSON-y object dump. See `./log-format.ts` for the reasoning + format. + if (level === 'error' || level === 'warn') { + const prefix = `[workflow-sdk] ${message}`; + const tail = formatLogMetadata(message, merged); + const out = level === 'error' ? console.error : console.warn; + if (tail) { + out(`${prefix}\n${tail}`); + } else { + out(prefix); + } + } + + // Also log to debug library for verbose output when DEBUG is enabled + levelDebug(message, merged); + + if (levelDebug.enabled) { + getActiveSpan() + .then((span) => { + span?.addEvent(`${level}.${namespace}`, { message, ...merged }); + }) + .catch(() => { + // Silently ignore telemetry errors + }); + } + }; }; - }; - return { - debug: logger('debug'), - info: logger('info'), - warn: logger('warn'), - error: logger('error'), + return { + debug: logger('debug'), + info: logger('info'), + warn: logger('warn'), + error: logger('error'), + child: (metadata) => build({ ...parentMetadata, ...metadata }), + forRun: (workflowRunId, workflowName, extra) => + build({ + ...parentMetadata, + workflowRunId, + ...(workflowName !== undefined ? { workflowName } : {}), + ...(extra ?? {}), + }), + }; }; + + return build({}); } export const stepLogger = createLogger('step'); diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index 3af03f4fed..684b0ca101 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -4,7 +4,10 @@ import { RunExpiredError, WorkflowRuntimeError, } from '@workflow/errors'; -import { parseWorkflowName } from '@workflow/utils/parse-name'; +import { + formatWorkflowName, + parseWorkflowName, +} from '@workflow/utils/parse-name'; import { type Event, SPEC_VERSION_CURRENT, @@ -12,6 +15,7 @@ import { type WorkflowRun, } from '@workflow/world'; import { classifyRunError } from './classify-error.js'; +import { describeError } from './describe-error.js'; import { importKey } from './encryption.js'; import { WorkflowSuspension } from './global.js'; import { runtimeLogger } from './logger.js'; @@ -137,10 +141,25 @@ export function workflowEntrypoint( // of the workflow events, etc. We simply attempt to mark the run as failed // and if that fails, the message is still consumed but with adequate logging // that an error occurred preventing us from failing the run. + // Scoped logger for this run — attaches runId/workflowName to every + // log line and child loggers below, so callers don't repeat it. + const runLogger = runtimeLogger.forRun(runId, workflowName); + if (metadata.attempt > MAX_QUEUE_DELIVERIES) { - runtimeLogger.error( + const maxDeliveriesDescription = describeError( + undefined, + RUN_ERROR_CODES.MAX_DELIVERIES_EXCEEDED + ); + runLogger.error( `Workflow handler exceeded max deliveries (${metadata.attempt}/${MAX_QUEUE_DELIVERIES})`, - { workflowRunId: runId, workflowName, attempt: metadata.attempt } + { + attempt: metadata.attempt, + errorCode: maxDeliveriesDescription.errorCode, + errorAttribution: maxDeliveriesDescription.attribution, + ...(maxDeliveriesDescription.hint + ? { hint: maxDeliveriesDescription.hint } + : {}), + } ); try { const world = await getWorld(); @@ -163,16 +182,17 @@ export function workflowEntrypoint( // Run already finished, consume the message silently return; } - runtimeLogger.error( + runLogger.error( `Failed to mark run as failed after ${metadata.attempt} delivery attempts. ` + `A persistent error is preventing the run from being terminated. ` + `The run will remain in its current state until manually resolved. ` + `This is most likely due to a persistent outage of the workflow backend ` + `or a bug in the workflow runtime and should be reported to the Workflow team.`, { - workflowRunId: runId, - error: err instanceof Error ? err.message : String(err), attempt: metadata.attempt, + errorName: err instanceof Error ? err.name : 'UnknownError', + errorMessage: err instanceof Error ? err.message : String(err), + errorStack: err instanceof Error ? err.stack : undefined, } ); } @@ -188,19 +208,38 @@ export function workflowEntrypoint( let replayTimeout: NodeJS.Timeout | undefined; if (process.env.VERCEL_URL !== undefined) { replayTimeout = setTimeout(async () => { - runtimeLogger.error('Workflow replay exceeded timeout', { - workflowRunId: runId, - timeoutMs: REPLAY_TIMEOUT_MS, - attempt: metadata.attempt, - maxRetries: REPLAY_TIMEOUT_MAX_RETRIES, - }); - // Allow a few retries before permanently failing the run. // On early attempts, just exit so the queue retries the message. if (metadata.attempt <= REPLAY_TIMEOUT_MAX_RETRIES) { + runLogger.warn( + 'Workflow replay exceeded timeout but will be re-attempted (attempt < maxRetries)', + { + timeoutMs: REPLAY_TIMEOUT_MS, + attempt: metadata.attempt, + maxRetries: REPLAY_TIMEOUT_MAX_RETRIES, + } + ); process.exit(1); } + const replayTimeoutDescription = describeError( + undefined, + RUN_ERROR_CODES.REPLAY_TIMEOUT + ); + runLogger.error( + 'Workflow replay exceeded timeout and max retries exceeded. Failing the run', + { + timeoutMs: REPLAY_TIMEOUT_MS, + attempt: metadata.attempt, + maxRetries: REPLAY_TIMEOUT_MAX_RETRIES, + errorCode: replayTimeoutDescription.errorCode, + errorAttribution: replayTimeoutDescription.attribution, + ...(replayTimeoutDescription.hint + ? { hint: replayTimeoutDescription.hint } + : {}), + } + ); + try { const world = await getWorld(); await world.events.create( @@ -217,8 +256,19 @@ export function workflowEntrypoint( }, { requestId } ); - } catch { - // Best effort — process exits regardless + } catch (err) { + // Best effort — process exits regardless. Surface why so + // operators can diagnose repeat timeouts against the backend. + runLogger.warn( + 'Unable to mark run as failed. The queue will continue to retry', + { + attempt: metadata.attempt, + errorName: err instanceof Error ? err.name : 'UnknownError', + errorMessage: + err instanceof Error ? err.message : String(err), + errorStack: err instanceof Error ? err.stack : undefined, + } + ); } // Note that this also prevents the runtime from acking the queue message, // so the queue will call back once, after which a 410 will get it to exit early. @@ -327,15 +377,28 @@ export function workflowEntrypoint( // completed/failed/cancelled during setup. // RunExpiredError: run already in terminal state. // In both cases, skip processing this message. - runtimeLogger.info( + runLogger.info( 'Run already finished during setup, skipping', - { workflowRunId: runId, message: err.message } + { errorName: err.name, errorMessage: err.message } ); return; } else if (err instanceof WorkflowRuntimeError) { - runtimeLogger.error( - 'Fatal runtime error during workflow setup', - { workflowRunId: runId, error: err.message } + // Include the stack directly in the message so it + // surfaces in stdout even when structured logging is + // being flattened (e.g. Vercel log drain). + const description = describeError(err); + runLogger.error( + `Fatal runtime error during workflow setup\n${err.stack}`, + { + errorCode: description.errorCode, + errorAttribution: description.attribution, + errorName: err.name, + errorMessage: err.message, + errorStack: err.stack, + ...(description.hint + ? { hint: description.hint } + : {}), + } ); try { await world.events.create( @@ -377,10 +440,9 @@ export function workflowEntrypoint( if (workflowRun.status !== 'running') { // Workflow has already completed or failed, so we can skip it - runtimeLogger.info( + runLogger.info( 'Workflow already completed or failed, skipping', { - workflowRunId: runId, status: workflowRun.status, } ); @@ -440,8 +502,9 @@ export function workflowEntrypoint( events.push(result.event!); } catch (err) { if (EntityConflictError.is(err)) { - runtimeLogger.info('Wait already completed, skipping', { - workflowRunId: runId, + // Another replay/worker already recorded wait_completed + // for the same correlationId — idempotent, ignore. + runLogger.info('Wait already completed, skipping', { correlationId: waitEvent.correlationId, }); continue; @@ -482,13 +545,12 @@ export function workflowEntrypoint( // WorkflowSuspension is normal control flow — not an error if (WorkflowSuspension.is(err)) { const suspensionMessage = buildWorkflowSuspensionMessage( - runId, err.stepCount, err.hookCount, err.waitCount ); if (suspensionMessage) { - runtimeLogger.debug(suspensionMessage); + runLogger.debug(suspensionMessage); } const result = await handleSuspension({ @@ -537,13 +599,25 @@ export function workflowEntrypoint( // internal issue (corrupted event log, missing data); // everything else is a user code error. const errorCode = classifyRunError(err); - - runtimeLogger.error('Error while running workflow', { - workflowRunId: runId, - errorCode, - errorName, - errorStack, - }); + const description = describeError(err, errorCode); + const friendlyWorkflow = formatWorkflowName(workflowName); + const framing = + description.attribution === 'sdk' + ? `Workflow ${friendlyWorkflow} failed due to an SDK runtime error` + : `Workflow ${friendlyWorkflow} threw`; + + // Use the stack as the primary message so it shows up + // in flattened logs without structured metadata. + runLogger.error( + `${framing}\n${errorStack || 'Unknown error encountered in workflow'}`, + { + errorCode, + errorAttribution: description.attribution, + errorName, + errorMessage, + ...(description.hint ? { hint: description.hint } : {}), + } + ); // Fail the workflow run via event (event-sourced architecture) try { @@ -567,11 +641,14 @@ export function workflowEntrypoint( EntityConflictError.is(failErr) || RunExpiredError.is(failErr) ) { - runtimeLogger.info( + // Run already transitioned to a terminal state via + // another path (duplicate delivery, cancellation). + // Nothing to do — just drop the failure event. + runLogger.info( 'Tried failing workflow run, but run has already finished.', { - workflowRunId: runId, - message: failErr.message, + errorName: failErr.name, + errorMessage: failErr.message, } ); span?.setAttributes({ @@ -616,11 +693,13 @@ export function workflowEntrypoint( EntityConflictError.is(err) || RunExpiredError.is(err) ) { - runtimeLogger.info( + // Run already completed/failed elsewhere — idempotent, + // drop the completion event. + runLogger.info( 'Tried completing workflow run, but run has already finished.', { - workflowRunId: runId, - message: err.message, + errorName: err.name, + errorMessage: err.message, } ); return; diff --git a/packages/core/src/runtime/step-handler.test.ts b/packages/core/src/runtime/step-handler.test.ts index b2fd7673c1..251d85debd 100644 --- a/packages/core/src/runtime/step-handler.test.ts +++ b/packages/core/src/runtime/step-handler.test.ts @@ -28,12 +28,19 @@ const { }, mockEventsCreate: vi.fn(), mockQueue: vi.fn().mockResolvedValue({ messageId: 'msg_test' }), - mockRuntimeLogger: { - warn: vi.fn(), - debug: vi.fn(), - info: vi.fn(), - error: vi.fn(), - }, + mockRuntimeLogger: (() => { + const logger = { + warn: vi.fn(), + debug: vi.fn(), + info: vi.fn(), + error: vi.fn(), + forRun: vi.fn(), + child: vi.fn(), + }; + logger.forRun.mockReturnValue(logger); + logger.child.mockReturnValue(logger); + return logger; + })(), mockStepLogger: { warn: vi.fn(), debug: vi.fn(), @@ -284,10 +291,17 @@ describe('step-handler 409 handling', () => { expect(mockRuntimeLogger.info).toHaveBeenCalledWith( 'Tried completing step, but step has already finished.', expect.objectContaining({ - workflowRunId: 'wrun_test123', - stepId: 'step_abc', + errorName: 'EntityConflictError', + errorMessage: expect.stringContaining('already completed'), }) ); + // Workflow/step context is attached via the scoped logger (forRun), + // not repeated in every log call. + expect(mockRuntimeLogger.forRun).toHaveBeenCalledWith( + 'wrun_test123', + expect.any(String), + expect.objectContaining({ stepId: 'step_abc' }) + ); // Should NOT have queued a workflow continuation expect(mockQueueMessage).not.toHaveBeenCalled(); }); @@ -333,10 +347,15 @@ describe('step-handler 409 handling', () => { expect(mockRuntimeLogger.info).toHaveBeenCalledWith( 'Tried failing step, but step has already finished.', expect.objectContaining({ - workflowRunId: 'wrun_test123', - stepId: 'step_abc', + errorName: 'EntityConflictError', + errorMessage: expect.stringContaining('already completed'), }) ); + expect(mockRuntimeLogger.forRun).toHaveBeenCalledWith( + 'wrun_test123', + expect.any(String), + expect.objectContaining({ stepId: 'step_abc' }) + ); }); }); @@ -379,10 +398,15 @@ describe('step-handler 409 handling', () => { expect(mockRuntimeLogger.info).toHaveBeenCalledWith( 'Tried failing step, but step has already finished.', expect.objectContaining({ - workflowRunId: 'wrun_test123', - stepId: 'step_abc', + errorName: 'EntityConflictError', + errorMessage: expect.stringContaining('already completed'), }) ); + expect(mockRuntimeLogger.forRun).toHaveBeenCalledWith( + 'wrun_test123', + expect.any(String), + expect.objectContaining({ stepId: 'step_abc' }) + ); // Step function should NOT have been called (pre-execution guard) expect(mockStepFn).not.toHaveBeenCalled(); }); @@ -428,10 +452,15 @@ describe('step-handler 409 handling', () => { expect(mockRuntimeLogger.info).toHaveBeenCalledWith( 'Tried retrying step, but step has already finished.', expect.objectContaining({ - workflowRunId: 'wrun_test123', - stepId: 'step_abc', + errorName: 'EntityConflictError', + errorMessage: expect.stringContaining('already completed'), }) ); + expect(mockRuntimeLogger.forRun).toHaveBeenCalledWith( + 'wrun_test123', + expect.any(String), + expect.objectContaining({ stepId: 'step_abc' }) + ); }); it('should re-throw non-409 errors from step_retrying', async () => { @@ -560,7 +589,12 @@ describe('step-handler max deliveries', () => { expect(mockQueueMessage).toHaveBeenCalled(); expect(mockRuntimeLogger.error).toHaveBeenCalledWith( expect.stringContaining('exceeded max deliveries'), - expect.objectContaining({ workflowRunId: 'wrun_test123' }) + expect.objectContaining({ attempt: MAX_QUEUE_DELIVERIES + 1 }) + ); + expect(mockRuntimeLogger.forRun).toHaveBeenCalledWith( + 'wrun_test123', + expect.any(String), + expect.objectContaining({ stepId: 'step_abc' }) ); }); @@ -719,11 +753,139 @@ describe('step-handler step not found', () => { expect(mockRuntimeLogger.info).toHaveBeenCalledWith( 'Tried failing step for missing function, but step has already finished.', expect.objectContaining({ - workflowRunId: 'wrun_test123', - stepId: 'step_abc', + errorName: 'EntityConflictError', + errorMessage: expect.stringContaining('Step already completed'), }) ); + expect(mockRuntimeLogger.forRun).toHaveBeenCalledWith( + 'wrun_test123', + expect.any(String), + expect.objectContaining({ stepName: 'missingStep' }) + ); // Should NOT re-queue the workflow since step was already resolved expect(mockQueueMessage).not.toHaveBeenCalled(); }); }); + +/** + * Regression gate for the most user-visible behavior change in this PR: + * fatal user errors (`FatalError`, `ContextViolationError`, + * `SerializationError`) should produce exactly one `step_failed` event + * — no retries — while a non-fatal user `Error` should retry up to + * `maxRetries`. Asserting on the live retry-loop wiring catches the + * silent-regression case where someone removes `fatal = true` later + * and the unit-level FatalError.is() tests stay green. + */ +describe('step-handler fatal vs retryable behavior', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(getStepFunction).mockReturnValue(mockStepFn); + vi.mocked(normalizeUnknownError).mockImplementation( + async (err: unknown) => ({ + message: err instanceof Error ? err.message : String(err), + name: err instanceof Error ? err.name : 'UnknownError', + stack: err instanceof Error ? err.stack : undefined, + }) + ); + vi.mocked(getErrorName).mockImplementation((err: unknown) => + err instanceof Error ? err.name : 'UnknownError' + ); + vi.mocked(getErrorStack).mockImplementation((err: unknown) => + err instanceof Error ? (err.stack ?? '') : '' + ); + mockQueueMessage.mockResolvedValue(undefined); + vi.mocked(getWorld).mockResolvedValue({ + events: { create: mockEventsCreate }, + queue: mockQueue, + getEncryptionKeyForRun: vi.fn().mockResolvedValue(undefined), + } as any); + mockEventsCreate.mockReset().mockResolvedValue({ + step: { + stepId: 'step_abc', + status: 'running', + attempt: 1, + startedAt: new Date(), + input: [], + }, + event: {}, + }); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('emits exactly one step_failed and does not re-queue when the step throws an error with fatal=true', async () => { + // Simulates a `ContextViolationError` / `SerializationError` — + // both opt into the no-retry path via a `fatal: true` own property + // that `FatalError.is()` recognizes. + class FatalUserError extends Error { + readonly fatal = true; + name = 'FatalUserError'; + } + mockStepFn.mockReset().mockRejectedValue(new FatalUserError('boom')); + mockStepFn.maxRetries = 3; + + await capturedHandler(createMessage(), createMetadata('myStep')); + + const stepFailedCalls = mockEventsCreate.mock.calls.filter( + ([, event]) => event.eventType === 'step_failed' + ); + expect(stepFailedCalls).toHaveLength(1); + // The retry path uses `step_retrying`; the fatal path skips it. + const stepRetryingCalls = mockEventsCreate.mock.calls.filter( + ([, event]) => event.eventType === 'step_retrying' + ); + expect(stepRetryingCalls).toHaveLength(0); + }); + + it('schedules a retry (and does not fail the step) on the first attempt of a non-fatal Error', async () => { + mockStepFn + .mockReset() + .mockRejectedValue(new Error('Transient failure, will succeed later')); + mockStepFn.maxRetries = 3; + + await capturedHandler( + createMessage(), + createMetadata('myStep', { attempt: 1 }) + ); + + // Non-fatal first attempt: re-queue via step_retrying, no terminal failure. + const stepRetryingCalls = mockEventsCreate.mock.calls.filter( + ([, event]) => event.eventType === 'step_retrying' + ); + expect(stepRetryingCalls).toHaveLength(1); + const stepFailedCalls = mockEventsCreate.mock.calls.filter( + ([, event]) => event.eventType === 'step_failed' + ); + expect(stepFailedCalls).toHaveLength(0); + }); + + it('emits step_failed once the non-fatal retry budget is exhausted', async () => { + mockStepFn.mockReset().mockRejectedValue(new Error('Transient failure')); + mockStepFn.maxRetries = 3; + // Final attempt: total attempts = maxRetries + 1. + mockEventsCreate.mockReset().mockResolvedValueOnce({ + step: { + stepId: 'step_abc', + status: 'running', + attempt: 4, + startedAt: new Date(), + input: [], + }, + event: {}, + }); + // Subsequent emissions (e.g. step_failed) get a generic ack. + mockEventsCreate.mockResolvedValue({ event: {} }); + + await capturedHandler( + createMessage(), + createMetadata('myStep', { attempt: 4 }) + ); + + const stepFailedCalls = mockEventsCreate.mock.calls.filter( + ([, event]) => event.eventType === 'step_failed' + ); + expect(stepFailedCalls).toHaveLength(1); + }); +}); diff --git a/packages/core/src/runtime/step-handler.ts b/packages/core/src/runtime/step-handler.ts index f38184f293..fdba936359 100644 --- a/packages/core/src/runtime/step-handler.ts +++ b/packages/core/src/runtime/step-handler.ts @@ -10,9 +10,10 @@ import { WorkflowRuntimeError, WorkflowWorldError, } from '@workflow/errors'; -import { pluralize } from '@workflow/utils'; +import { formatStepName, pluralize } from '@workflow/utils'; import { getPort } from '@workflow/utils/get-port'; import { SPEC_VERSION_CURRENT, StepInvokePayloadSchema } from '@workflow/world'; +import { describeError } from '../describe-error.js'; import { importKey } from '../encryption.js'; import { runtimeLogger, stepLogger } from '../logger.js'; import { getStepFunction } from '../private.js'; @@ -82,13 +83,19 @@ const stepHandler = (worldHandlers: WorldHandlers) => // of the step details, etc. We simply attempt to mark the step as failed // and enqueue the workflow once, and if either of those fails, the message // is still consumed but with adequate logging that an error occurred. + // Scoped logger for this step invocation — attaches run/step context to + // every log line below so callers don't repeat it. + const stepNameFromQueue = metadata.queueName.slice('__wkf_step_'.length); + const stepRuntimeLogger = runtimeLogger.forRun( + workflowRunId, + workflowName, + { stepId, stepName: stepNameFromQueue } + ); + if (metadata.attempt > MAX_QUEUE_DELIVERIES) { - runtimeLogger.error( + stepRuntimeLogger.error( `Step handler exceeded max deliveries (${metadata.attempt}/${MAX_QUEUE_DELIVERIES})`, { - workflowRunId, - stepId, - stepName: metadata.queueName.slice('__wkf_step_'.length), attempt: metadata.attempt, } ); @@ -118,17 +125,17 @@ const stepHandler = (worldHandlers: WorldHandlers) => } // Can't even mark the step as failed. Consume the message to stop // further retries. The run will remain in its current state. - runtimeLogger.error( + stepRuntimeLogger.error( `Failed to mark step as failed after ${metadata.attempt} delivery attempts. ` + `A persistent error is preventing the step from being terminated. ` + `The run will remain in its current state until manually resolved. ` + `This is most likely due to a persistent outage of the workflow backend ` + `or a bug in the workflow runtime and should be reported to the Workflow team.`, { - workflowRunId, - stepId, attempt: metadata.attempt, - error: err instanceof Error ? err.message : String(err), + errorName: err instanceof Error ? err.name : 'UnknownError', + errorMessage: err instanceof Error ? err.message : String(err), + errorStack: err instanceof Error ? err.stack : undefined, } ); } @@ -205,7 +212,7 @@ const stepHandler = (worldHandlers: WorldHandlers) => 1, typeof err.retryAfter === 'number' ? err.retryAfter : 1 ); - runtimeLogger.info( + stepRuntimeLogger.info( 'Throttled again on retry, deferring to queue', { retryAfterSeconds: retryRetryAfter, @@ -214,19 +221,22 @@ const stepHandler = (worldHandlers: WorldHandlers) => return { timeoutSeconds: retryRetryAfter }; } if (RunExpiredError.is(err)) { - runtimeLogger.info( - `Workflow run "${workflowRunId}" has already completed, skipping step "${stepId}": ${err.message}` + // Expected when a run is cancelled while a step is in-flight. + stepRuntimeLogger.info( + 'Workflow run has already completed, skipping step', + { errorName: err.name, errorMessage: err.message } ); return; } if (EntityConflictError.is(err)) { - runtimeLogger.debug( + // Step already in a terminal state — another worker finished + // it or it was retried to completion. Re-enqueue the parent + // workflow so it can observe the outcome. + stepRuntimeLogger.debug( 'Step in terminal state, re-enqueuing workflow', { - stepName, - stepId, - workflowRunId, - error: err.message, + errorName: err.name, + errorMessage: err.message, } ); span?.setAttributes({ @@ -258,11 +268,9 @@ const stepHandler = (worldHandlers: WorldHandlers) => 'delay.reason': 'retry_after_not_reached', 'delay.timeout_seconds': timeoutSeconds, }); - runtimeLogger.debug( + stepRuntimeLogger.debug( 'Step retryAfter timestamp not yet reached', { - stepName, - stepId, retryAfterSeconds: err.retryAfter, timeoutSeconds, } @@ -273,9 +281,7 @@ const stepHandler = (worldHandlers: WorldHandlers) => throw err; } - runtimeLogger.debug('Step execution details', { - stepName, - stepId: step.stepId, + stepRuntimeLogger.debug('Step execution details', { status: step.status, attempt: step.attempt, }); @@ -291,13 +297,12 @@ const stepHandler = (worldHandlers: WorldHandlers) => if (!stepFn || typeof stepFn !== 'function') { const err = new StepNotRegisteredError(stepName); - runtimeLogger.error( + stepRuntimeLogger.error( 'Step function not registered, failing step (not run)', { - workflowRunId, - stepName, - stepId, - error: err.message, + errorName: err.name, + errorMessage: err.message, + errorStack: err.stack, } ); @@ -319,13 +324,13 @@ const stepHandler = (worldHandlers: WorldHandlers) => ); } catch (stepFailErr) { if (EntityConflictError.is(stepFailErr)) { - runtimeLogger.info( + // Step already transitioned to a terminal state — duplicate + // delivery or concurrent cancellation. Drop silently. + stepRuntimeLogger.info( 'Tried failing step for missing function, but step has already finished.', { - workflowRunId, - stepId, - stepName, - message: stepFailErr.message, + errorName: stepFailErr.name, + errorMessage: stepFailErr.message, } ); return; @@ -363,11 +368,16 @@ const stepHandler = (worldHandlers: WorldHandlers) => if (step.attempt > maxRetries + 1) { const retryCount = step.attempt - 1; const errorMessage = `Step "${stepName}" exceeded max retries (${retryCount} ${pluralize('retry', 'retries', retryCount)})`; - stepLogger.error('Step exceeded max retries', { - workflowRunId, - stepName, - retryCount, - }); + stepLogger.error( + `Step ${formatStepName(stepName)} exceeded max retries (${retryCount} ${pluralize('retry', 'retries', retryCount)})`, + { + workflowRunId, + workflowName, + stepId, + stepName, + retryCount, + } + ); // Fail the step via event (event-sourced architecture) try { await world.events.create( @@ -385,13 +395,13 @@ const stepHandler = (worldHandlers: WorldHandlers) => ); } catch (err) { if (EntityConflictError.is(err)) { - runtimeLogger.info( + // Step already transitioned to a terminal state — duplicate + // delivery or concurrent completion. Drop silently. + stepRuntimeLogger.info( 'Tried failing step, but step has already finished.', { - workflowRunId, - stepId, - stepName, - message: err.message, + errorName: err.name, + errorMessage: err.message, } ); return; @@ -425,10 +435,8 @@ const stepHandler = (worldHandlers: WorldHandlers) => if (!step.startedAt) { const errorMessage = `Step "${stepId}" has no "startedAt" timestamp`; - runtimeLogger.error('Fatal runtime error during step setup', { - workflowRunId, - stepId, - error: errorMessage, + stepRuntimeLogger.error('Fatal runtime error during step setup', { + errorMessage, }); try { await world.events.create( @@ -537,6 +545,40 @@ const stepHandler = (worldHandlers: WorldHandlers) => ...Attribute.QueueExecutionTimeMs(executionTimeMs), }); + // --- Dehydrate (serialize) the step's return value --- + // A non-serializable return value is a user-code bug, not an + // infrastructure failure. Route it through the same step-failure + // path as a thrown error so SerializationError (which is marked + // `fatal: true`) short-circuits the retry loop instead of + // bubbling as an HTTP 500 and burning through all 4 queue + // deliveries on a guaranteed-to-fail message. + if (!userCodeFailed) { + try { + result = await trace( + 'step.dehydrate', + {}, + async (dehydrateSpan) => { + const startTime = Date.now(); + const dehydrated = await dehydrateStepReturnValue( + result, + workflowRunId, + encryptionKey, + ops + ); + const durationMs = Date.now() - startTime; + dehydrateSpan?.setAttributes({ + ...Attribute.QueueSerializeTimeMs(durationMs), + ...Attribute.StepResultType(typeof dehydrated), + }); + return dehydrated; + } + ); + } catch (err) { + userCodeError = err; + userCodeFailed = true; + } + } + // --- Handle user code errors --- if (userCodeFailed) { const err = userCodeError; @@ -589,12 +631,26 @@ const stepHandler = (worldHandlers: WorldHandlers) => }); if (isFatal) { + const description = describeError(err); + const friendlyStep = formatStepName(stepName); + const framing = + description.attribution === 'sdk' + ? `Step ${friendlyStep} failed with a FatalError from the SDK runtime — bubbling up to parent workflow` + : `Step ${friendlyStep} threw a FatalError — bubbling up to parent workflow`; + // Mirror the workflow-level log formatting: put the framing + + // stack into the message so console.error renders the stack + // inline, and keep the metadata object small with only the + // structured fields that log drains want to index. stepLogger.error( - 'Encountered FatalError while executing step, bubbling up to parent workflow', + `${framing}\n${normalizedStack || normalizedError.message}`, { workflowRunId, + stepId, stepName, - errorStack: normalizedStack, + errorAttribution: description.attribution, + errorName: normalizedError.name, + errorMessage: normalizedError.message, + ...(description.hint ? { hint: description.hint } : {}), } ); // Fail the step via event (event-sourced architecture) @@ -614,13 +670,12 @@ const stepHandler = (worldHandlers: WorldHandlers) => ); } catch (stepFailErr) { if (EntityConflictError.is(stepFailErr)) { - runtimeLogger.info( + // Step already in terminal state — idempotent. + stepRuntimeLogger.info( 'Tried failing step, but step has already finished.', { - workflowRunId, - stepId, - stepName, - message: stepFailErr.message, + errorName: stepFailErr.name, + errorMessage: stepFailErr.message, } ); return; @@ -647,14 +702,25 @@ const stepHandler = (worldHandlers: WorldHandlers) => if (currentAttempt >= maxRetries + 1) { // Max retries reached const retryCount = step.attempt - 1; + const description = describeError(err); + const friendlyStep = formatStepName(stepName); + const framing = + description.attribution === 'sdk' + ? `Step ${friendlyStep} hit max retries on an SDK runtime error — bubbling to parent workflow` + : `Step ${friendlyStep} hit max retries — bubbling error thrown by your step to the parent workflow`; stepLogger.error( - 'Max retries reached, bubbling error to parent workflow', + `${framing}\n${normalizedStack || normalizedError.message}`, { workflowRunId, + workflowName, + stepId, stepName, attempt: step.attempt, retryCount, - errorStack: normalizedStack, + errorAttribution: description.attribution, + errorName: normalizedError.name, + errorMessage: normalizedError.message, + ...(description.hint ? { hint: description.hint } : {}), } ); const errorMessage = `Step "${stepName}" failed after ${maxRetries} ${pluralize('retry', 'retries', maxRetries)}: ${normalizedError.message}`; @@ -675,13 +741,12 @@ const stepHandler = (worldHandlers: WorldHandlers) => ); } catch (stepFailErr) { if (EntityConflictError.is(stepFailErr)) { - runtimeLogger.info( + // Step already in terminal state — idempotent. + stepRuntimeLogger.info( 'Tried failing step, but step has already finished.', { - workflowRunId, - stepId, - stepName, - message: stepFailErr.message, + errorName: stepFailErr.name, + errorMessage: stepFailErr.message, } ); return; @@ -700,16 +765,24 @@ const stepHandler = (worldHandlers: WorldHandlers) => 'Encountered RetryableError, step will be retried', { workflowRunId, + workflowName, + stepId, stepName, attempt: currentAttempt, - message: err.message, + errorName: err.name, + errorMessage: err.message, + errorStack: normalizedStack, } ); } else { stepLogger.info('Encountered Error, step will be retried', { workflowRunId, + workflowName, + stepId, stepName, attempt: currentAttempt, + errorName: normalizedError.name, + errorMessage: normalizedError.message, errorStack: normalizedStack, }); } @@ -734,13 +807,12 @@ const stepHandler = (worldHandlers: WorldHandlers) => ); } catch (stepRetryErr) { if (EntityConflictError.is(stepRetryErr)) { - runtimeLogger.info( + // Step already in terminal state — idempotent. + stepRuntimeLogger.info( 'Tried retrying step, but step has already finished.', { - workflowRunId, - stepId, - stepName, - message: stepRetryErr.message, + errorName: stepRetryErr.name, + errorMessage: stepRetryErr.message, } ); return; @@ -787,30 +859,13 @@ const stepHandler = (worldHandlers: WorldHandlers) => // --- Infrastructure: complete the step --- // Errors here (network failures, server errors) propagate to the // queue handler for automatic retry. - - // NOTE: None of the code from this point is guaranteed to run - // Since the step might fail or cause a function timeout and the process might be SIGKILL'd - // The workflow runtime must be resilient to the below code not executing on a failed step - result = await trace( - 'step.dehydrate', - {}, - async (dehydrateSpan) => { - const startTime = Date.now(); - const dehydrated = await dehydrateStepReturnValue( - result, - workflowRunId, - encryptionKey, - ops - ); - const durationMs = Date.now() - startTime; - dehydrateSpan?.setAttributes({ - ...Attribute.QueueSerializeTimeMs(durationMs), - ...Attribute.StepResultType(typeof dehydrated), - }); - return dehydrated; - } - ); - + // + // NOTE: None of the code from this point is guaranteed to run. + // Since the step might fail or cause a function timeout and the + // process might be SIGKILL'd, the workflow runtime must be + // resilient to the below code not executing on a failed step. + // (Dehydration already happened above and is accounted for in the + // userCodeFailed path.) waitUntil( Promise.all(ops).catch((err) => { // Ignore expected client disconnect errors (e.g., browser refresh during streaming) @@ -839,13 +894,12 @@ const stepHandler = (worldHandlers: WorldHandlers) => ) .catch((err: unknown) => { if (EntityConflictError.is(err)) { - runtimeLogger.info( + // Step already in terminal state — idempotent. + stepRuntimeLogger.info( 'Tried completing step, but step has already finished.', { - workflowRunId, - stepId, - stepName, - message: err.message, + errorName: err.name, + errorMessage: err.message, } ); stepCompleted409 = true; diff --git a/packages/core/src/serialization.test.ts b/packages/core/src/serialization.test.ts index b44c34b679..8b0f399eb2 100644 --- a/packages/core/src/serialization.test.ts +++ b/packages/core/src/serialization.test.ts @@ -2154,7 +2154,9 @@ describe('step function serialization', () => { expect(err).toBeDefined(); expect(err?.message).toContain('Step function "nonExistentStep" not found'); - expect(err?.message).toContain('Make sure the step function is registered'); + expect(err?.message).toContain( + 'Make sure the step file is included in your build' + ); }); it('should dehydrate step function passed as argument to a step', async () => { diff --git a/packages/core/src/serialization.ts b/packages/core/src/serialization.ts index 321958cacf..74ea7f736d 100644 --- a/packages/core/src/serialization.ts +++ b/packages/core/src/serialization.ts @@ -1,4 +1,4 @@ -import { WorkflowRuntimeError } from '@workflow/errors'; +import { SerializationError, WorkflowRuntimeError } from '@workflow/errors'; import { parse, stringify } from 'devalue'; import { monotonicFactory } from 'ulid'; import { @@ -107,6 +107,25 @@ export function getStreamType(stream: ReadableStream): 'bytes' | undefined { */ const FRAME_HEADER_SIZE = 4; +/** + * The mode-specific serializers (`./serialization/{client,step,workflow}.ts`) + * already throw a `SerializationError` whose `.cause` is the underlying + * devalue / serde failure. The outer dehydrate/hydrate wrappers want to + * re-frame that error with a more specific context label (e.g. "workflow + * arguments" instead of generic "workflow value"), so they unwrap the + * inner SerializationError and reformat with the original cause. Errors + * that aren't already SerializationError flow through unchanged. + */ +function unwrapSerializationCause(error: unknown): unknown { + if (error instanceof SerializationError && error.cause !== undefined) { + return error.cause; + } + if (error instanceof WorkflowRuntimeError && error.cause !== undefined) { + return error.cause; + } + return error; +} + export function getSerializeStream( reducers: Partial, cryptoKey: EncryptionKeyParam @@ -148,11 +167,12 @@ export function getSerializeStream( frame.set(prefixed, FRAME_HEADER_SIZE); controller.enqueue(frame); } catch (error) { + const { message, hint } = formatSerializationError( + 'stream chunk', + error + ); controller.error( - new WorkflowRuntimeError( - formatSerializationError('stream chunk', error), - { slug: 'serialization-failed', cause: error } - ) + new SerializationError(message, { hint, cause: error }) ); } }, @@ -279,7 +299,7 @@ export class WorkflowServerReadableStream extends ReadableStream { constructor(runId: string, name: string, startIndex?: number) { if (typeof name !== 'string' || name.length === 0) { - throw new Error(`"name" is required, got "${name}"`); + throw new WorkflowRuntimeError(`"name" is required, got "${name}"`); } super({ // @ts-expect-error Not sure why TypeScript is complaining about this @@ -326,10 +346,12 @@ const STREAM_FLUSH_INTERVAL_MS = 10; export class WorkflowServerWritableStream extends WritableStream { constructor(runId: string, name: string) { if (typeof runId !== 'string') { - throw new Error(`"runId" must be a string, got "${typeof runId}"`); + throw new WorkflowRuntimeError( + `"runId" must be a string, got "${typeof runId}"` + ); } if (typeof name !== 'string' || name.length === 0) { - throw new Error(`"name" is required, got "${name}"`); + throw new WorkflowRuntimeError(`"name" is required, got "${name}"`); } const worldPromise = getWorld(); @@ -538,7 +560,12 @@ export function getExternalReducers( // Stream must not be locked when passing across execution boundary if (value.locked) { - throw new Error('ReadableStream is locked'); + throw new SerializationError( + 'ReadableStream is locked and cannot be passed across a workflow boundary.', + { + hint: 'Pass the stream before calling .getReader() / .pipeThrough() / .pipeTo(), or tee it with .tee() and pass one of the branches.', + } + ); } const streamId = ((global as any)[STABLE_ULID] || defaultUlid)(); @@ -608,7 +635,7 @@ export function getWorkflowReducers( const name = value[STREAM_NAME_SYMBOL]; if (!name) { - throw new Error('ReadableStream `name` is not set'); + throw new WorkflowRuntimeError('ReadableStream `name` is not set'); } const s: SerializableSpecial['ReadableStream'] = { name }; const type = value[STREAM_TYPE_SYMBOL]; @@ -619,7 +646,7 @@ export function getWorkflowReducers( if (!(value instanceof global.WritableStream)) return false; const name = value[STREAM_NAME_SYMBOL]; if (!name) { - throw new Error('WritableStream `name` is not set'); + throw new WorkflowRuntimeError('WritableStream `name` is not set'); } return { name }; }, @@ -649,7 +676,12 @@ function getStepReducers( // Stream must not be locked when passing across execution boundary if (value.locked) { - throw new Error('ReadableStream is locked'); + throw new SerializationError( + 'ReadableStream is locked and cannot be passed across a workflow boundary.', + { + hint: 'Pass the stream before calling .getReader() / .pipeThrough() / .pipeTo(), or tee it with .tee() and pass one of the branches.', + } + ); } // Check if the stream already has the name symbol set, in which case @@ -741,16 +773,22 @@ export function getExternalRevivers( // StepFunction should not be returned from workflows to clients StepFunction: () => { - throw new Error( - 'Step functions cannot be deserialized in client context. Step functions should not be returned from workflows.' + throw new SerializationError( + 'Step functions cannot be deserialized in client context. Step functions should not be returned from workflows.', + { + hint: 'A step function reference reached the client. Return a serializable value (e.g. the step result) instead of the step itself.', + } ); }, WorkflowFunction: (value) => Object.assign( () => { - throw new Error( - 'Workflow functions cannot be called directly. Use start() to invoke them.' + throw new SerializationError( + 'Workflow functions cannot be called directly. Use start() to invoke them.', + { + hint: 'Wrap the workflow with `start(workflowFn, { ... })` from `workflow` to begin a run instead of invoking it like a normal function.', + } ); }, { workflowId: value.workflowId } @@ -875,8 +913,11 @@ export function getWorkflowRevivers( (value as any)[WEBHOOK_RESPONSE_WRITABLE] = responseWritable; delete value.responseWritable; (value as any).respondWith = () => { - throw new Error( - '`respondWith()` must be called from within a step function' + throw new SerializationError( + '`respondWith()` must be called from within a step function.', + { + hint: 'Move the `respondWith(...)` call inside a `"use step"` function — it cannot be invoked from a workflow context.', + } ); }; } @@ -887,8 +928,11 @@ export function getWorkflowRevivers( WorkflowFunction: (value) => Object.assign( () => { - throw new Error( - 'Workflow functions cannot be called directly. Use start() to invoke them.' + throw new SerializationError( + 'Workflow functions cannot be called directly. Use start() to invoke them.', + { + hint: 'Wrap the workflow with `start(workflowFn, { ... })` from `workflow` to begin a run instead of invoking it like a normal function.', + } ); }, { workflowId: value.workflowId } @@ -958,8 +1002,11 @@ function getStepRevivers( const stepFn = getStepFunction(stepId); if (!stepFn) { - throw new Error( - `Step function "${stepId}" not found. Make sure the step function is registered.` + throw new SerializationError( + `Step function "${stepId}" not found. Make sure the step function is registered.`, + { + hint: 'Make sure the step file is included in your build (i.e. it is listed in the workflow manifest), and that the SWC plugin is configured for the file.', + } ); } @@ -971,7 +1018,7 @@ function getStepRevivers( const currentContext = contextStorage.getStore(); if (!currentContext) { - throw new Error( + throw new WorkflowRuntimeError( 'Cannot call step function with closure variables outside step context' ); } @@ -1009,8 +1056,11 @@ function getStepRevivers( WorkflowFunction: (value) => Object.assign( () => { - throw new Error( - 'Workflow functions cannot be called directly. Use start() to invoke them.' + throw new SerializationError( + 'Workflow functions cannot be called directly. Use start() to invoke them.', + { + hint: 'Wrap the workflow with `start(workflowFn, { ... })` from `workflow` to begin a run instead of invoking it like a normal function.', + } ); }, { workflowId: value.workflowId } @@ -1180,16 +1230,12 @@ export async function dehydrateWorkflowArguments( ), }); } catch (error) { - throw new WorkflowRuntimeError( - formatSerializationError( - 'workflow arguments', - error instanceof WorkflowRuntimeError ? error.cause : error - ), - { - slug: 'serialization-failed', - cause: error instanceof WorkflowRuntimeError ? error.cause : error, - } + const cause = unwrapSerializationCause(error); + const { message, hint } = formatSerializationError( + 'workflow arguments', + cause ); + throw new SerializationError(message, { hint, cause }); } } @@ -1233,16 +1279,12 @@ export async function dehydrateWorkflowReturnValue( extraReducers: getStreamAndRequestReducers(getWorkflowReducers(global)), }); } catch (error) { - throw new WorkflowRuntimeError( - formatSerializationError( - 'workflow return value', - error instanceof WorkflowRuntimeError ? error.cause : error - ), - { - slug: 'serialization-failed', - cause: error instanceof WorkflowRuntimeError ? error.cause : error, - } + const cause = unwrapSerializationCause(error); + const { message, hint } = formatSerializationError( + 'workflow return value', + cause ); + throw new SerializationError(message, { hint, cause }); } } @@ -1290,16 +1332,9 @@ export async function dehydrateStepArguments( extraReducers: getStreamAndRequestReducers(getWorkflowReducers(global)), }); } catch (error) { - throw new WorkflowRuntimeError( - formatSerializationError( - 'step arguments', - error instanceof WorkflowRuntimeError ? error.cause : error - ), - { - slug: 'serialization-failed', - cause: error instanceof WorkflowRuntimeError ? error.cause : error, - } - ); + const cause = unwrapSerializationCause(error); + const { message, hint } = formatSerializationError('step arguments', cause); + throw new SerializationError(message, { hint, cause }); } } @@ -1348,16 +1383,12 @@ export async function dehydrateStepReturnValue( ), }); } catch (error) { - throw new WorkflowRuntimeError( - formatSerializationError( - 'step return value', - error instanceof WorkflowRuntimeError ? error.cause : error - ), - { - slug: 'serialization-failed', - cause: error instanceof WorkflowRuntimeError ? error.cause : error, - } + const cause = unwrapSerializationCause(error); + const { message, hint } = formatSerializationError( + 'step return value', + cause ); + throw new SerializationError(message, { hint, cause }); } } diff --git a/packages/core/src/serialization/client.ts b/packages/core/src/serialization/client.ts index 33530c67bf..f6f5ca4704 100644 --- a/packages/core/src/serialization/client.ts +++ b/packages/core/src/serialization/client.ts @@ -5,7 +5,7 @@ * arguments) and when receiving workflow return values. Supports encryption. */ -import { WorkflowRuntimeError } from '@workflow/errors'; +import { SerializationError } from '@workflow/errors'; import type { CodecOptions } from './codec.js'; import { devalueCodec } from './codec-devalue.js'; import { @@ -33,10 +33,8 @@ export async function serialize( ) as Uint8Array; return encryptData(prefixed, encryptionKey); } catch (error) { - throw new WorkflowRuntimeError( - formatSerializationError('client value', error), - { slug: 'serialization-failed', cause: error } - ); + const { message, hint } = formatSerializationError('client value', error); + throw new SerializationError(message, { hint, cause: error }); } } diff --git a/packages/core/src/serialization/errors.ts b/packages/core/src/serialization/errors.ts index 41ae625889..56a43c0b37 100644 --- a/packages/core/src/serialization/errors.ts +++ b/packages/core/src/serialization/errors.ts @@ -3,6 +3,11 @@ * * Used by the mode-specific serializers (workflow, step, client) to * produce consistent error messages with devalue path information. + * + * Returns a `{ message, hint }` pair so callers can throw a + * `SerializationError(message, { hint, cause })` and have the hint flow + * through the standard friendly-errors framing instead of being baked + * into the message string. */ import { DevalueError } from 'devalue'; @@ -16,18 +21,18 @@ import { runtimeLogger } from '../logger.js'; export function formatSerializationError( context: string, error: unknown -): string { +): { message: string; hint: string } { const verb = context.includes('return value') ? 'returning' : 'passing'; let message = `Failed to serialize ${context}`; if (error instanceof DevalueError && error.path) { message += ` at path "${error.path}"`; } - message += `. Ensure you're ${verb} serializable types (plain objects, arrays, primitives, Date, RegExp, Map, Set).`; + const hint = `Ensure you're ${verb} serializable types (plain objects, arrays, primitives, Date, RegExp, Map, Set).`; if (error instanceof DevalueError && error.value !== undefined) { runtimeLogger.error('Serialization failed', { context, problematicValue: error.value, }); } - return message; + return { message, hint }; } diff --git a/packages/core/src/serialization/step.ts b/packages/core/src/serialization/step.ts index 960ba2dbbe..9df26dae52 100644 --- a/packages/core/src/serialization/step.ts +++ b/packages/core/src/serialization/step.ts @@ -5,7 +5,7 @@ * deserializing step arguments. Supports encryption as a composable layer. */ -import { WorkflowRuntimeError } from '@workflow/errors'; +import { SerializationError } from '@workflow/errors'; import type { CodecOptions } from './codec.js'; import { devalueCodec } from './codec-devalue.js'; import { @@ -33,10 +33,8 @@ export async function serialize( ) as Uint8Array; return encryptData(prefixed, encryptionKey); } catch (error) { - throw new WorkflowRuntimeError( - formatSerializationError('step value', error), - { slug: 'serialization-failed', cause: error } - ); + const { message, hint } = formatSerializationError('step value', error); + throw new SerializationError(message, { hint, cause: error }); } } diff --git a/packages/core/src/serialization/workflow.ts b/packages/core/src/serialization/workflow.ts index 5b3fb4e449..344fec0151 100644 --- a/packages/core/src/serialization/workflow.ts +++ b/packages/core/src/serialization/workflow.ts @@ -10,7 +10,7 @@ * inside the sandboxed VM. */ -import { WorkflowRuntimeError } from '@workflow/errors'; +import { SerializationError } from '@workflow/errors'; import type { CodecOptions } from './codec.js'; import { devalueCodec } from './codec-devalue.js'; import { formatSerializationError } from './errors.js'; @@ -32,10 +32,8 @@ export function serialize(value: unknown, options?: CodecOptions): Uint8Array { payload ) as Uint8Array; } catch (error) { - throw new WorkflowRuntimeError( - formatSerializationError('workflow value', error), - { slug: 'serialization-failed', cause: error } - ); + const { message, hint } = formatSerializationError('workflow value', error); + throw new SerializationError(message, { hint, cause: error }); } } diff --git a/packages/core/src/sleep.ts b/packages/core/src/sleep.ts index 7d3e0d7ab8..8c14c196a6 100644 --- a/packages/core/src/sleep.ts +++ b/packages/core/src/sleep.ts @@ -1,4 +1,5 @@ import type { StringValue } from 'ms'; +import { throwNotInWorkflowContext } from './context-errors.js'; import { WORKFLOW_SLEEP } from './symbols.js'; /** @@ -39,7 +40,11 @@ export async function sleep(param: StringValue | Date | number): Promise { // Inside the workflow VM, the sleep function is stored in the globalThis object behind a symbol const sleepFn = (globalThis as any)[WORKFLOW_SLEEP]; if (!sleepFn) { - throw new Error('`sleep()` can only be called inside a workflow function'); + throwNotInWorkflowContext( + 'sleep()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/sleep', + sleep + ); } return sleepFn(param); } diff --git a/packages/core/src/step/get-closure-vars.ts b/packages/core/src/step/get-closure-vars.ts index a5d1d5193d..9c81b7e5dc 100644 --- a/packages/core/src/step/get-closure-vars.ts +++ b/packages/core/src/step/get-closure-vars.ts @@ -1,3 +1,4 @@ +import { WorkflowRuntimeError } from '@workflow/errors'; import { contextStorage } from './context-storage.js'; /** @@ -10,7 +11,7 @@ import { contextStorage } from './context-storage.js'; export function __private_getClosureVars(): Record { const ctx = contextStorage.getStore(); if (!ctx) { - throw new Error( + throw new WorkflowRuntimeError( 'Closure variables can only be accessed inside a step function' ); } diff --git a/packages/core/src/step/get-step-metadata.ts b/packages/core/src/step/get-step-metadata.ts index cac32343cd..e2f87318cb 100644 --- a/packages/core/src/step/get-step-metadata.ts +++ b/packages/core/src/step/get-step-metadata.ts @@ -1,3 +1,4 @@ +import { throwNotInStepContext } from '../context-errors.js'; import { contextStorage } from './context-storage.js'; export interface StepMetadata { @@ -47,8 +48,10 @@ export interface StepMetadata { export function getStepMetadata(): StepMetadata { const ctx = contextStorage.getStore(); if (!ctx) { - throw new Error( - '`getStepMetadata()` can only be called inside a step function' + throwNotInStepContext( + 'getStepMetadata()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/get-step-metadata', + getStepMetadata ); } return ctx.stepMetadata; diff --git a/packages/core/src/step/get-workflow-metadata.ts b/packages/core/src/step/get-workflow-metadata.ts index 59368a0570..22e0578bf9 100644 --- a/packages/core/src/step/get-workflow-metadata.ts +++ b/packages/core/src/step/get-workflow-metadata.ts @@ -1,3 +1,4 @@ +import { throwNotInWorkflowOrStepContext } from '../context-errors.js'; import type { WorkflowMetadata } from '../workflow/get-workflow-metadata.js'; import { contextStorage } from './context-storage.js'; @@ -9,8 +10,10 @@ export type { WorkflowMetadata }; export function getWorkflowMetadata(): WorkflowMetadata { const ctx = contextStorage.getStore(); if (!ctx) { - throw new Error( - '`getWorkflowMetadata()` can only be called inside a workflow or step function' + throwNotInWorkflowOrStepContext( + 'getWorkflowMetadata()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/get-workflow-metadata', + getWorkflowMetadata ); } return ctx.workflowMetadata; diff --git a/packages/core/src/step/writable-stream.ts b/packages/core/src/step/writable-stream.ts index 093a5d9c58..5c250522fd 100644 --- a/packages/core/src/step/writable-stream.ts +++ b/packages/core/src/step/writable-stream.ts @@ -1,3 +1,4 @@ +import { throwNotInWorkflowOrStepContext } from '../context-errors.js'; import { createFlushableState, flushablePipe, @@ -37,8 +38,10 @@ export function getWritable( ): WritableStream { const ctx = contextStorage.getStore(); if (!ctx) { - throw new Error( - '`getWritable()` can only be called inside a workflow or step function' + throwNotInWorkflowOrStepContext( + 'getWritable()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/get-writable', + getWritable ); } diff --git a/packages/core/src/util.test.ts b/packages/core/src/util.test.ts index 3cd5c9e64e..169e1bc0cc 100644 --- a/packages/core/src/util.test.ts +++ b/packages/core/src/util.test.ts @@ -3,115 +3,113 @@ import { describe, expect, it } from 'vitest'; import { buildWorkflowSuspensionMessage, getWorkflowRunStreamId } from './util'; describe('buildWorkflowSuspensionMessage', () => { - const runId = 'test-run-123'; - it('should return null when both counts are zero', () => { - const result = buildWorkflowSuspensionMessage(runId, 0, 0, 0); + const result = buildWorkflowSuspensionMessage(0, 0, 0); expect(result).toBeNull(); }); it('should handle single step', () => { - const result = buildWorkflowSuspensionMessage(runId, 1, 0, 0); + const result = buildWorkflowSuspensionMessage(1, 0, 0); expect(result).toBe( - `[Workflows] "${runId}" - 1 step to be enqueued\n Workflow will suspend and resume when steps are completed` + `1 step to be enqueued\n Workflow will suspend and resume when steps are completed` ); }); it('should handle multiple steps', () => { - const result = buildWorkflowSuspensionMessage(runId, 3, 0, 0); + const result = buildWorkflowSuspensionMessage(3, 0, 0); expect(result).toBe( - `[Workflows] "${runId}" - 3 steps to be enqueued\n Workflow will suspend and resume when steps are completed` + `3 steps to be enqueued\n Workflow will suspend and resume when steps are completed` ); }); it('should handle single hook', () => { - const result = buildWorkflowSuspensionMessage(runId, 0, 1, 0); + const result = buildWorkflowSuspensionMessage(0, 1, 0); expect(result).toBe( - `[Workflows] "${runId}" - 1 hook to be enqueued\n Workflow will suspend and resume when hooks are received` + `1 hook to be enqueued\n Workflow will suspend and resume when hooks are received` ); }); it('should handle multiple hooks', () => { - const result = buildWorkflowSuspensionMessage(runId, 0, 2, 0); + const result = buildWorkflowSuspensionMessage(0, 2, 0); expect(result).toBe( - `[Workflows] "${runId}" - 2 hooks to be enqueued\n Workflow will suspend and resume when hooks are received` + `2 hooks to be enqueued\n Workflow will suspend and resume when hooks are received` ); }); it('should handle single step and single hook', () => { - const result = buildWorkflowSuspensionMessage(runId, 1, 1, 0); + const result = buildWorkflowSuspensionMessage(1, 1, 0); expect(result).toBe( - `[Workflows] "${runId}" - 1 step and 1 hook to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` + `1 step and 1 hook to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` ); }); it('should handle multiple steps and single hook', () => { - const result = buildWorkflowSuspensionMessage(runId, 5, 1, 0); + const result = buildWorkflowSuspensionMessage(5, 1, 0); expect(result).toBe( - `[Workflows] "${runId}" - 5 steps and 1 hook to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` + `5 steps and 1 hook to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` ); }); it('should handle single step and multiple hooks', () => { - const result = buildWorkflowSuspensionMessage(runId, 1, 3, 0); + const result = buildWorkflowSuspensionMessage(1, 3, 0); expect(result).toBe( - `[Workflows] "${runId}" - 1 step and 3 hooks to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` + `1 step and 3 hooks to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` ); }); it('should handle multiple steps and multiple hooks', () => { - const result = buildWorkflowSuspensionMessage(runId, 4, 2, 0); + const result = buildWorkflowSuspensionMessage(4, 2, 0); expect(result).toBe( - `[Workflows] "${runId}" - 4 steps and 2 hooks to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` + `4 steps and 2 hooks to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` ); }); it('should handle large numbers correctly', () => { - const result = buildWorkflowSuspensionMessage(runId, 100, 50, 0); + const result = buildWorkflowSuspensionMessage(100, 50, 0); expect(result).toBe( - `[Workflows] "${runId}" - 100 steps and 50 hooks to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` + `100 steps and 50 hooks to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received` ); }); it('should handle single wait without steps or hooks', () => { - const result = buildWorkflowSuspensionMessage(runId, 0, 0, 1); + const result = buildWorkflowSuspensionMessage(0, 0, 1); expect(result).toBe( - `[Workflows] "${runId}" - 1 timer to be enqueued\n Workflow will suspend and resume when timers have elapsed` + `1 timer to be enqueued\n Workflow will suspend and resume when timers have elapsed` ); }); it('should handle multiple waits without steps or hooks', () => { - const result = buildWorkflowSuspensionMessage(runId, 0, 0, 2); + const result = buildWorkflowSuspensionMessage(0, 0, 2); expect(result).toBe( - `[Workflows] "${runId}" - 2 timers to be enqueued\n Workflow will suspend and resume when timers have elapsed` + `2 timers to be enqueued\n Workflow will suspend and resume when timers have elapsed` ); }); it('should handle hooks and waits without steps', () => { - const result = buildWorkflowSuspensionMessage(runId, 0, 1, 1); + const result = buildWorkflowSuspensionMessage(0, 1, 1); expect(result).toBe( - `[Workflows] "${runId}" - 1 hook and 1 timer to be enqueued\n Workflow will suspend and resume when hooks are received and timers have elapsed` + `1 hook and 1 timer to be enqueued\n Workflow will suspend and resume when hooks are received and timers have elapsed` ); }); it('should handle steps and waits without hooks', () => { - const result = buildWorkflowSuspensionMessage(runId, 1, 0, 1); + const result = buildWorkflowSuspensionMessage(1, 0, 1); expect(result).toBe( - `[Workflows] "${runId}" - 1 step and 1 timer to be enqueued\n Workflow will suspend and resume when steps are completed and timers have elapsed` + `1 step and 1 timer to be enqueued\n Workflow will suspend and resume when steps are completed and timers have elapsed` ); }); it('should handle steps, hooks, and waits', () => { - const result = buildWorkflowSuspensionMessage(runId, 1, 1, 1); + const result = buildWorkflowSuspensionMessage(1, 1, 1); expect(result).toBe( - `[Workflows] "${runId}" - 1 step and 1 hook and 1 timer to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received and timers have elapsed` + `1 step and 1 hook and 1 timer to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received and timers have elapsed` ); }); it('should handle multiple waits with steps and hooks', () => { - const result = buildWorkflowSuspensionMessage(runId, 2, 1, 3); + const result = buildWorkflowSuspensionMessage(2, 1, 3); expect(result).toBe( - `[Workflows] "${runId}" - 2 steps and 1 hook and 3 timers to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received and timers have elapsed` + `2 steps and 1 hook and 3 timers to be enqueued\n Workflow will suspend and resume when steps are completed and hooks are received and timers have elapsed` ); }); }); diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index 3bd65618f2..0a0fdc4b28 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -2,15 +2,15 @@ import { waitUntil } from '@vercel/functions'; import { pluralize } from '@workflow/utils'; /** - * Builds a workflow suspension log message based on the counts of steps, hooks, and waits. - * @param runId - The workflow run ID + * Builds a workflow suspension log message based on the counts of steps, + * hooks, and waits. The structured logger attaches the run context, so the + * message itself only describes what's being enqueued. * @param stepCount - Number of steps to be enqueued * @param hookCount - Number of hooks to be enqueued * @param waitCount - Number of waits to be enqueued * @returns The formatted log message or null if all counts are 0 */ export function buildWorkflowSuspensionMessage( - runId: string, stepCount: number, hookCount: number, waitCount: number @@ -42,7 +42,7 @@ export function buildWorkflowSuspensionMessage( } const resumeMsg = resumeMsgParts.join(' and '); - return `[Workflows] "${runId}" - ${parts.join(' and ')} to be enqueued\n Workflow will suspend and resume when ${resumeMsg}`; + return `${parts.join(' and ')} to be enqueued\n Workflow will suspend and resume when ${resumeMsg}`; } /** diff --git a/packages/core/src/vm/index.test.ts b/packages/core/src/vm/index.test.ts index 8361fa63ca..0381b60f63 100644 --- a/packages/core/src/vm/index.test.ts +++ b/packages/core/src/vm/index.test.ts @@ -167,7 +167,8 @@ describe('createContext', () => { } expect(err).toBeDefined(); expect(err).toBeInstanceOf(Error); - expect(err?.message).toEqual('Not implemented'); + expect(err?.message).toContain('crypto.subtle.generateKey()'); + expect(err?.message).toContain('not available inside a workflow function'); }); it('should call `onWorkflowError` when a workflow error occurs', async () => { diff --git a/packages/core/src/vm/index.ts b/packages/core/src/vm/index.ts index a0df4a75c1..c0003bdc39 100644 --- a/packages/core/src/vm/index.ts +++ b/packages/core/src/vm/index.ts @@ -1,4 +1,5 @@ import { runInContext, createContext as vmCreateContext } from 'node:vm'; +import { WorkflowRuntimeError } from '@workflow/errors'; import seedrandom from 'seedrandom'; import { installUint8ArrayBase64 } from './uint8array-base64.js'; import { createRandomUUID } from './uuid.js'; @@ -72,7 +73,9 @@ export function createContext(options: CreateContextOptions) { get(target, prop) { if (prop === 'generateKey') { return () => { - throw new Error('Not implemented'); + throw new WorkflowRuntimeError( + '`crypto.subtle.generateKey()` is not available inside a workflow function. Move key generation to a step function where full Node.js crypto is available.' + ); }; } else if (prop === 'digest') { return boundDigest; diff --git a/packages/core/src/workflow.ts b/packages/core/src/workflow.ts index 520cba020c..29318d46b6 100644 --- a/packages/core/src/workflow.ts +++ b/packages/core/src/workflow.ts @@ -92,7 +92,7 @@ export async function runWorkflow( const startedAt = workflowRun.startedAt; if (!startedAt) { - throw new Error( + throw new WorkflowRuntimeError( `Workflow run "${workflowRun.runId}" has no "startedAt" timestamp (should not happen)` ); } diff --git a/packages/core/src/workflow/create-hook.ts b/packages/core/src/workflow/create-hook.ts index 4e604628f3..1d018f68b7 100644 --- a/packages/core/src/workflow/create-hook.ts +++ b/packages/core/src/workflow/create-hook.ts @@ -1,3 +1,4 @@ +import { throwNotInWorkflowContext } from '../context-errors.js'; import type { Hook, HookOptions, @@ -14,8 +15,10 @@ export function createHook(options?: HookOptions): Hook { WORKFLOW_CREATE_HOOK ] as typeof createHook; if (!createHookFn) { - throw new Error( - '`createHook()` can only be called inside a workflow function' + throwNotInWorkflowContext( + 'createHook()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/create-hook', + createHook ); } return createHookFn(options); diff --git a/packages/core/src/workflow/define-hook.ts b/packages/core/src/workflow/define-hook.ts index ad463c745f..bdc2624d06 100644 --- a/packages/core/src/workflow/define-hook.ts +++ b/packages/core/src/workflow/define-hook.ts @@ -1,4 +1,5 @@ import type { Hook as HookEntity } from '@workflow/world'; +import { throwUnavailableInWorkflowContext } from '../context-errors.js'; import type { Hook, HookOptions } from '../create-hook.js'; import { createHook } from './create-hook.js'; @@ -6,15 +7,23 @@ import { createHook } from './create-hook.js'; * NOTE: This is the implementation of `defineHook()` that is used in workflow contexts. */ export function defineHook() { + function resume( + _token: string, + _payload: TInput + ): Promise { + // Referenced by name (not `this.resume`) so the stack strip works even + // if the caller destructured the hook. + throwUnavailableInWorkflowContext( + 'defineHook().resume()', + 'https://workflow-sdk.dev/docs/api-reference/workflow-api/resume-hook', + resume + ); + } + return { create(options?: HookOptions): Hook { return createHook(options); }, - - resume(_token: string, _payload: TInput): Promise { - throw new Error( - '`defineHook().resume()` can only be called from external contexts (e.g. API routes).' - ); - }, + resume, }; } diff --git a/packages/core/src/workflow/get-workflow-metadata.ts b/packages/core/src/workflow/get-workflow-metadata.ts index c89bb06cd2..b4c08949ae 100644 --- a/packages/core/src/workflow/get-workflow-metadata.ts +++ b/packages/core/src/workflow/get-workflow-metadata.ts @@ -1,3 +1,6 @@ +import { redirectStackToCaller } from '../capture-stack.js'; +import { NotInWorkflowOrStepContextError } from '../context-violation-error.js'; + export interface WorkflowMetadata { /** * The name of the workflow. @@ -36,12 +39,22 @@ export const WORKFLOW_CONTEXT_SYMBOL = /* @__PURE__ */ Symbol.for('WORKFLOW_CONTEXT'); export function getWorkflowMetadata(): WorkflowMetadata { - // Inside the workflow VM, the context is stored in the globalThis object behind a symbol + // Inside the workflow VM, the context is stored in the globalThis object + // behind a symbol. const ctx = (globalThis as any)[WORKFLOW_CONTEXT_SYMBOL] as WorkflowMetadata; if (!ctx) { - throw new Error( - '`getWorkflowMetadata()` can only be called inside a workflow or step function' + // Use the shared `NotInWorkflowOrStepContextError` — it lives in + // `context-violation-error.ts` specifically so this file can throw it + // without creating a module-init cycle (the full `context-errors.ts` + // depends on this file's `WORKFLOW_CONTEXT_SYMBOL`). + const err = new NotInWorkflowOrStepContextError( + 'getWorkflowMetadata()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/get-workflow-metadata' ); + // Redirect the stack to the caller so terminal overlays (Next.js, + // Turbopack, VS Code) point at the user's code rather than this frame. + redirectStackToCaller(err, getWorkflowMetadata); + throw err; } return ctx; } diff --git a/packages/core/src/workflow/index.ts b/packages/core/src/workflow/index.ts index 61cc317491..73b92f2fcb 100644 --- a/packages/core/src/workflow/index.ts +++ b/packages/core/src/workflow/index.ts @@ -1,3 +1,7 @@ +import { + throwNotInStepContext, + throwUnavailableInWorkflowContext, +} from '../context-errors.js'; import type { StepMetadata } from '../step/get-step-metadata.js'; export { @@ -15,12 +19,16 @@ export { getWritable } from './writable-stream.js'; // workflows can't use these functions, but we still need to provide // the export so bundling doesn't fail when step and workflow are in same file export function getStepMetadata(): StepMetadata { - throw new Error( - '`getStepMetadata()` can only be called inside a step function' + throwNotInStepContext( + 'getStepMetadata()', + 'https://workflow-sdk.dev/docs/api-reference/workflow/get-step-metadata', + getStepMetadata ); } export function resumeHook() { - throw new Error( - '`resumeHook()` can only be called from outside a workflow function' + throwUnavailableInWorkflowContext( + 'resumeHook()', + 'https://workflow-sdk.dev/docs/api-reference/workflow-api/resume-hook', + resumeHook ); } diff --git a/packages/errors/__mocks__/chalk.ts b/packages/errors/__mocks__/chalk.ts new file mode 100644 index 0000000000..bfcda5b23b --- /dev/null +++ b/packages/errors/__mocks__/chalk.ts @@ -0,0 +1,26 @@ +// Mock implementation of the 'chalk' library for testing purposes. +// This mock wraps text in HTML-like tags to indicate styles because +// terminal styling is unreadable in test snapshots. + +import type { ChalkInstance } from 'chalk'; + +const short = new Map([ + ['italic', 'i'], + ['bold', 'b'], +]); + +function createChalkMock(currentModifiers: string[] = []): ChalkInstance { + return new Proxy(() => {}, { + get(_, prop: string) { + return createChalkMock([...currentModifiers, short.get(prop) || prop]); + }, + apply(_target, _thisArg, [text]) { + return currentModifiers.reduceRight((acc, mod) => { + const tag = String(mod); + return `<${tag}>${acc}`; + }, text as string); + }, + }) as ChalkInstance; +} + +export default createChalkMock(); diff --git a/packages/errors/package.json b/packages/errors/package.json index 9bd7c6562e..bdf01531d8 100644 --- a/packages/errors/package.json +++ b/packages/errors/package.json @@ -20,22 +20,29 @@ ".": { "types": "./dist/index.d.ts", "default": "./dist/index.js" + }, + "./ansi": { + "types": "./dist/ansi.d.ts", + "default": "./dist/ansi.js" } }, "scripts": { "build": "tsc", "dev": "tsc --watch", "clean": "tsc --build --clean && rm -rf dist", + "test": "vitest run src", "typecheck": "tsc --noEmit" }, "devDependencies": { "@types/ms": "2.1.0", "@types/node": "catalog:", "@workflow/tsconfig": "workspace:*", - "@workflow/world": "workspace:*" + "@workflow/world": "workspace:*", + "vitest": "catalog:" }, "dependencies": { "@workflow/utils": "workspace:*", + "chalk": "5.6.2", "ms": "2.1.3" } } diff --git a/packages/errors/src/ansi.test.ts b/packages/errors/src/ansi.test.ts new file mode 100644 index 0000000000..147c0b4dac --- /dev/null +++ b/packages/errors/src/ansi.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, it, vi } from 'vitest'; +import * as Ansi from './ansi.js'; + +// Render ANSI styles as HTML-like tags in snapshots so they're readable. +vi.mock('chalk'); + +describe('Ansi.frame', () => { + it('renders a single-line title with no contents', () => { + expect(Ansi.frame('something went wrong', [])).toMatchInlineSnapshot( + `"something went wrong"` + ); + }); + + it('renders a single content line with ╰▶', () => { + expect( + Ansi.frame('something went wrong', ['here is why']) + ).toMatchInlineSnapshot(` + "something went wrong + ╰▶ here is why" + `); + }); + + it('renders multiple contents with ├▶ and ╰▶', () => { + expect( + Ansi.frame('something went wrong', ['first reason', 'second reason']) + ).toMatchInlineSnapshot(` + "something went wrong + ├▶ first reason + ╰▶ second reason" + `); + }); + + it('indents continuation lines under their branch', () => { + expect( + Ansi.frame('title', ['first\nwith two lines', 'last\nalso two lines']) + ).toMatchInlineSnapshot(` + "title + ├▶ first + │ with two lines + ╰▶ last + also two lines" + `); + }); +}); + +describe('Ansi.code', () => { + it('wraps a token in dim backticks and italics', () => { + expect(Ansi.code('fn()')).toMatchInlineSnapshot( + `"\`fn()\`"` + ); + }); +}); + +describe('Ansi.hint / note / help / docs', () => { + it('renders a hint line', () => { + expect(Ansi.hint('try reloading')).toMatchInlineSnapshot( + `"hint: try reloading"` + ); + }); + + it('renders a note line', () => { + expect( + Ansi.note(['read more:', 'https://example.com']) + ).toMatchInlineSnapshot( + `"note: read more:\nhttps://example.com"` + ); + }); + + it('renders a help line', () => { + expect(Ansi.help('run `wf inspect run run_123`')).toMatchInlineSnapshot( + `"help: run \`wf inspect run run_123\`"` + ); + }); + + it('renders a docs line', () => { + expect( + Ansi.docs('https://workflow-sdk.dev/docs/api-reference/workflow/sleep') + ).toMatchInlineSnapshot( + `"docs: https://workflow-sdk.dev/docs/api-reference/workflow/sleep"` + ); + }); +}); + +describe('Ansi.inline', () => { + it('underlines a single token on a single line', () => { + const out = Ansi.inline`function ${{ text: 'hello', explain: 'name not allowed' }}()`; + expect(out).toMatchInlineSnapshot(` + "function hello() + ──┬── + ╰▶ name not allowed" + `); + }); + + it('preserves subsequent lines unchanged', () => { + const out = Ansi.inline`const ${{ text: 'x', explain: 'unused' }} = 1 +const y = 2`; + expect(out).toMatchInlineSnapshot(` + "const x = 1 + ┬ + ╰▶ unused + const y = 2" + `); + }); +}); diff --git a/packages/errors/src/ansi.ts b/packages/errors/src/ansi.ts new file mode 100644 index 0000000000..fc526d3d5a --- /dev/null +++ b/packages/errors/src/ansi.ts @@ -0,0 +1,276 @@ +import chalk from 'chalk'; + +/** + * Helpers for composing structured, human-friendly error messages. + * + * The goal is to make errors *actionable*: say what happened, explain why, + * and give the user a way out. Rendering uses ANSI colors when the terminal + * supports them (chalk auto-detects) and falls back to plain text elsewhere. + * + * Typical usage: + * + * ```ts + * throw new Error( + * Ansi.frame( + * `${Ansi.code(fnName)} can only be called inside a workflow function`, + * [Ansi.note(`Read more about creating hooks: https://...`)] + * ) + * ); + * ``` + * + * Renders as: + * + * ``` + * `createHook()` can only be called inside a workflow function + * ╰▶ note: Read more about creating hooks: https://... + * ``` + */ + +const styles = { + info: chalk.blue, + help: chalk.cyan, + warn: chalk.yellow, + error: chalk.red, +}; + +/** A "help:" line — use for the primary suggested fix. */ +export function help(messages: string | string[]): string { + const message = Array.isArray(messages) ? messages.join('\n') : messages; + return styles.help(`${chalk.bold('help:')} ${message}`); +} + +/** A "hint:" line — use for supplementary context or suggestions. */ +export function hint(messages: string | string[]): string { + const message = Array.isArray(messages) ? messages.join('\n') : messages; + return styles.info(`${chalk.bold('hint:')} ${message}`); +} + +/** A "note:" line — use for informational context. */ +export function note(messages: string | string[]): string { + const message = Array.isArray(messages) ? messages.join('\n') : messages; + return styles.info(`${chalk.bold('note:')} ${message}`); +} + +/** A "docs:" line — use for a single documentation URL. */ +export function docs(url: string): string { + return styles.info(`${chalk.bold('docs:')} ${url}`); +} + +/** Render an inline code token (italicized, dim backticks). */ +export function code(str: string): string { + return chalk.italic(`${chalk.dim('`')}${str}${chalk.dim('`')}`); +} + +/** Apply dim styling to a string (used for de-emphasizing separators). */ +export function dim(str: string): string { + return chalk.dim(str); +} + +/** Bold styling (used for emphasizing class names in headers). */ +export function bold(str: string): string { + return chalk.bold(str); +} + +/** Red styling (used for the user-error attribution badge). */ +export function red(str: string): string { + return chalk.red(str); +} + +/** Magenta styling (used for the SDK-error attribution badge). */ +export function magenta(str: string): string { + return chalk.magenta(str); +} + +/** + * Frame a title with one or more continuation lines, drawn with + * box-drawing characters. The last content uses `╰▶`, others use `├▶`. + * Multi-line contents are indented under their branch. + * + * @example + * frame('Something went wrong', ['why it happened', hint('how to fix it')]) + * // Something went wrong + * // ├▶ why it happened + * // ╰▶ hint: how to fix it + */ +export function frame(title: string, contents: string[]): string { + const result = [title]; + + contents.forEach((content, index) => { + const lines = content.split('\n'); + const isLastContent = index === contents.length - 1; + + const firstLinePrefix = isLastContent ? '╰▶ ' : '├▶ '; + const continuationPrefix = isLastContent ? ' ' : '│ '; + + const framedLines = lines.map((line, lineIndex) => { + const prefix = lineIndex === 0 ? firstLinePrefix : continuationPrefix; + return `${prefix}${line}`; + }); + + result.push(...framedLines); + }); + + return result.join('\n'); +} + +interface Explain { + text: string; + explain: string; + /** adds ansi coloring */ + color?: (s: string) => string; +} + +type Explainish = + | Explain + | [text: string, explain: string, opts?: { color: Explain['color'] }]; + +type Marker = { + startCol: number; + endCol: number; + explain: string; + color?: (s: string) => string; +}; + +const identity = (s: string) => s; + +function getMarkerMidpoint(marker: Marker): number { + const textLen = marker.endCol - marker.startCol; + return marker.startCol + Math.floor(textLen / 2); +} + +function buildUnderline(markers: Marker[]): string { + const parts: string[] = []; + let pos = 0; + for (const marker of markers) { + // Treat zero-length markers as length 1 so we always emit a `┬` anchor + // for the explanation line and avoid a negative `String.repeat` count. + const textLen = Math.max(1, marker.endCol - marker.startCol); + const midPoint = Math.floor(textLen / 2); + + if (marker.startCol > pos) { + parts.push(' '.repeat(marker.startCol - pos)); + pos = marker.startCol; + } + const leftFill = '─'.repeat(midPoint); + const rightFill = '─'.repeat(Math.max(0, textLen - midPoint - 1)); + const segment = `${leftFill}┬${rightFill}`; + const colorFn = marker.color ?? identity; + parts.push(colorFn(segment)); + pos += textLen; + } + return parts.join(''); +} + +function buildExplanationLine( + marker: Marker, + midCol: number, + remainingMids: number[], + isOnlyMarker: boolean +): string { + let line = '╰'; + let pos = midCol + 1; + + for (const nextMid of remainingMids) { + while (pos < nextMid) { + line += '─'; + pos++; + } + line += '┼'; + pos++; + } + + const arrow = isOnlyMarker ? '▶ ' : '─▶ '; + line += arrow + marker.explain; + + const colorFn = marker.color ?? identity; + return ' '.repeat(midCol) + colorFn(line); +} + +/** + * Tagged template for underlining tokens in a source string and annotating + * them with explanations. Useful for pointing out offending tokens in + * user-authored code. + * + * @example + * inline`function ${{ text: 'hello', explain: 'name not allowed' }}() { + * return 666 + * }` + * // function hello() { + * // ──┬── + * // ╰▶ name not allowed + * // return 666 + * // } + */ +export function inline( + text: TemplateStringsArray, + ...values: Explainish[] +): string { + const resultLines: string[] = []; + let currentLine = ''; + let currentLineVisualLen = 0; + let pendingMarkers: Marker[] = []; + + const flushLine = () => { + resultLines.push(currentLine); + if (pendingMarkers.length === 0) { + currentLine = ''; + currentLineVisualLen = 0; + return; + } + + const markerMids = pendingMarkers.map(getMarkerMidpoint); + + resultLines.push(buildUnderline(pendingMarkers)); + + for (let i = 0; i < pendingMarkers.length; i++) { + const line = buildExplanationLine( + pendingMarkers[i], + markerMids[i], + markerMids.slice(i + 1), + pendingMarkers.length === 1 + ); + resultLines.push(line); + } + + pendingMarkers = []; + currentLine = ''; + currentLineVisualLen = 0; + }; + + for (let i = 0; i < text.length; i++) { + const segment = text[i]; + const lines = segment.split('\n'); + + for (let j = 0; j < lines.length; j++) { + if (j > 0) { + flushLine(); + } + currentLine += lines[j]; + currentLineVisualLen += lines[j].length; + } + + if (i < values.length) { + const val = values[i]; + const value: Explain = !Array.isArray(val) + ? val + : { text: val[0], explain: val[1], ...val[2] }; + const startCol = currentLineVisualLen; + const colorFn = value.color ?? ((s: string) => s); + currentLine += colorFn(value.text); + currentLineVisualLen += value.text.length; + const endCol = currentLineVisualLen; + pendingMarkers.push({ + startCol, + endCol, + explain: value.explain, + color: value.color, + }); + } + } + + if (currentLine || pendingMarkers.length > 0) { + flushLine(); + } + + return resultLines.join('\n'); +} diff --git a/packages/errors/src/build-error.test.ts b/packages/errors/src/build-error.test.ts new file mode 100644 index 0000000000..ad58dda707 --- /dev/null +++ b/packages/errors/src/build-error.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, test } from 'vitest'; +import { WorkflowBuildError, WorkflowError } from './index.js'; + +describe('WorkflowBuildError', () => { + test('sets the name and extends WorkflowError', () => { + const err = new WorkflowBuildError('boom'); + expect(err.name).toBe('WorkflowBuildError'); + expect(err).toBeInstanceOf(WorkflowError); + expect(err).toBeInstanceOf(WorkflowBuildError); + }); + + test('appends hint with a blank line separator', () => { + const err = new WorkflowBuildError('Build failed during steps', { + hint: 'run `pnpm install workflow` and try again', + }); + expect(err.hint).toBe('run `pnpm install workflow` and try again'); + expect(err.message).toMatchInlineSnapshot(` + "Build failed during steps + + run \`pnpm install workflow\` and try again" + `); + }); + + test('preserves cause for debugging', () => { + const cause = new TypeError('underlying esbuild failure'); + const err = new WorkflowBuildError('boom', { cause }); + expect(err.cause).toBe(cause); + }); + + test('WorkflowBuildError.is discriminates by name', () => { + const err = new WorkflowBuildError('boom'); + const other = new Error('boom'); + expect(WorkflowBuildError.is(err)).toBe(true); + expect(WorkflowBuildError.is(other)).toBe(false); + expect(WorkflowBuildError.is(null)).toBe(false); + }); +}); diff --git a/packages/errors/src/fatal-error.test.ts b/packages/errors/src/fatal-error.test.ts new file mode 100644 index 0000000000..91d3321b42 --- /dev/null +++ b/packages/errors/src/fatal-error.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest'; +import { FatalError } from './index.js'; + +describe('FatalError.is', () => { + it('returns true for direct FatalError instances', () => { + expect(FatalError.is(new FatalError('boom'))).toBe(true); + }); + + it('returns true for any error with fatal: true', () => { + // The runtime uses `FatalError.is()` as its non-retry gate. Structured + // error classes that aren't direct subclasses (e.g. context-violation + // errors) opt in via a `fatal: true` own property — otherwise the + // step handler would burn three retry attempts on an error that will + // never succeed, producing a wall of duplicated log output. + class ContextViolation extends Error { + fatal = true; + name = 'ContextViolation'; + } + expect(FatalError.is(new ContextViolation())).toBe(true); + }); + + it('returns false for plain errors', () => { + expect(FatalError.is(new Error('boom'))).toBe(false); + }); + + it('returns false for non-Error values', () => { + expect(FatalError.is('boom')).toBe(false); + expect(FatalError.is(null)).toBe(false); + expect(FatalError.is(undefined)).toBe(false); + expect(FatalError.is({ fatal: true })).toBe(false); + }); + + it('returns false when fatal is not strictly true', () => { + // Defensive: we intentionally check `=== true`, not truthy, so an + // error with `fatal: 1` or `fatal: 'yes'` doesn't accidentally flip + // the retry gate. + class Weird extends Error { + fatal: unknown = 1; + } + expect(FatalError.is(new Weird())).toBe(false); + }); +}); diff --git a/packages/errors/src/index.ts b/packages/errors/src/index.ts index 572a70cd16..18868221d4 100644 --- a/packages/errors/src/index.ts +++ b/packages/errors/src/index.ts @@ -2,6 +2,11 @@ import { parseDurationToDate } from '@workflow/utils'; import type { StructuredError } from '@workflow/world'; import type { StringValue } from 'ms'; +// Note: `Ansi` helpers live under the `@workflow/errors/ansi` subpath so the +// main entry point doesn't pull `chalk` (and its ESM machinery) into every +// consumer — most places that `import from '@workflow/errors'` only want the +// error classes and never render framed messages. + const BASE_URL = 'https://workflow-sdk.dev/err'; /** @@ -71,7 +76,15 @@ export class WorkflowError extends Error { ? `${message}\n\nLearn more: ${BASE_URL}/${options.slug}` : message; super(msgDocs, { cause: options?.cause }); - this.cause = options?.cause; + // Only set `cause` when actually provided. Assigning `undefined` + // unconditionally makes `cause` an enumerable own property, which + // pollutes `util.inspect(err)` output with `{ cause: undefined, … }` + // on every no-cause subclass. The `super(...)` call above already + // conditionally sets non-enumerable `.cause` when `options.cause` + // is provided. + if (options?.cause !== undefined) { + this.cause = options.cause; + } if (options?.cause instanceof Error) { this.stack = `${this.stack}\nCaused by: ${options.cause.stack}`; @@ -217,6 +230,91 @@ export class WorkflowRuntimeError extends WorkflowError { } } +interface WorkflowBuildErrorOptions extends ErrorOptions { + /** + * An optional actionable hint appended to the main message, explaining how + * the user can resolve the failure. Shown after a blank line. + */ + hint?: string; +} + +/** + * Thrown when the workflow build pipeline (esbuild, SWC transform, file + * discovery, bundler integration) fails in a way the user can act on. + * + * This is distinct from `WorkflowRuntimeError` (which is raised at runtime + * by the workflow engine) — `WorkflowBuildError` fires during `pnpm build`, + * `next build`, or equivalent, before any workflow has started executing. + * + * Prefer attaching a short, actionable `hint` (e.g. `run \`pnpm install workflow\``) + * as plain text — the rendering layer is responsible for any styling or + * "hint:" label. Keeping `hint` plain keeps it useful in non-TTY contexts + * (CI logs, structured error serialization) where ANSI escapes are noise. + */ +export class WorkflowBuildError extends WorkflowError { + readonly hint?: string; + + constructor(message: string, options?: WorkflowBuildErrorOptions) { + const body = options?.hint ? `${message}\n\n${options.hint}` : message; + super(body, { cause: options?.cause }); + this.name = 'WorkflowBuildError'; + this.hint = options?.hint; + } + + static is(value: unknown): value is WorkflowBuildError { + return isError(value) && value.name === 'WorkflowBuildError'; + } +} + +interface SerializationErrorOptions extends ErrorOptions { + /** + * An optional actionable hint appended to the main message, explaining how + * the user can resolve the failure (e.g. "register the class with…" or + * "move this call inside a step"). + */ + hint?: string; +} + +/** + * Thrown when a value cannot be serialized into or deserialized out of the + * workflow event log. + * + * This usually indicates a user-facing mistake: passing a non-serializable + * value (class without `WORKFLOW_SERIALIZE`, locked stream, direct workflow + * function reference) into a step boundary, or an unregistered class + * returning from a step. + * + * Internal invariants (corrupted buffers, unknown format bytes) should use + * `WorkflowRuntimeError` instead — this class is scoped to things the user + * can fix in their own code. + */ +export class SerializationError extends WorkflowError { + readonly hint?: string; + /** + * Serialization errors are deterministic — if a step returns a non-POJO, + * replaying the step will always produce the same non-serializable value. + * Retrying is guaranteed to fail, so these errors are surfaced as fatal + * and skip the step-retry loop. `FatalError.is()` recognizes any error + * with `fatal: true` (see `packages/errors/src/index.ts`), so no other + * wiring is required for user-thrown SerializationErrors. + */ + readonly fatal = true; + + constructor(message: string, options?: SerializationErrorOptions) { + const body = options?.hint ? `${message}\n\n${options.hint}` : message; + super(body, { + slug: ERROR_SLUGS.SERIALIZATION_FAILED, + cause: options?.cause, + }); + this.name = 'SerializationError'; + this.hint = options?.hint; + } + + static is(value: unknown): value is SerializationError { + return isError(value) && value.name === 'SerializationError'; + } +} + /** * Thrown when a step function is not registered in the current deployment. * @@ -543,6 +641,13 @@ export class RunNotSupportedError extends WorkflowError { * A fatal error is an error that cannot be retried. * It will cause the step to fail and the error will * be bubbled up to the workflow logic. + * + * Any error can opt into the non-retry behavior by setting a `fatal: true` + * own property. This is how structured error classes that aren't direct + * `FatalError` subclasses (e.g. context-violation errors) signal to the + * step handler that retrying will never help — the user's code is calling + * a workflow-only API from the wrong context, or similar — and burning + * retry attempts just produces a wall of duplicated log output. */ export class FatalError extends Error { fatal = true; @@ -553,7 +658,9 @@ export class FatalError extends Error { } static is(value: unknown): value is FatalError { - return isError(value) && value.name === 'FatalError'; + if (!isError(value)) return false; + if (value.name === 'FatalError') return true; + return (value as { fatal?: unknown }).fatal === true; } } diff --git a/packages/errors/src/serialization-error.test.ts b/packages/errors/src/serialization-error.test.ts new file mode 100644 index 0000000000..49a25db7a8 --- /dev/null +++ b/packages/errors/src/serialization-error.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, test } from 'vitest'; +import { FatalError, SerializationError, WorkflowError } from './index.js'; + +describe('SerializationError', () => { + test('sets the name and extends WorkflowError', () => { + const err = new SerializationError('boom'); + expect(err.name).toBe('SerializationError'); + expect(err).toBeInstanceOf(WorkflowError); + expect(err).toBeInstanceOf(SerializationError); + }); + + test('includes the serialization-failed docs link', () => { + const err = new SerializationError('boom'); + expect(err.message).toContain('boom'); + expect(err.message).toContain( + 'https://workflow-sdk.dev/err/serialization-failed' + ); + }); + + test('appends hint before the docs link', () => { + const err = new SerializationError('boom', { + hint: 'Register the class with WORKFLOW_SERIALIZE.', + }); + expect(err.hint).toBe('Register the class with WORKFLOW_SERIALIZE.'); + expect(err.message).toMatchInlineSnapshot(` + "boom + + Register the class with WORKFLOW_SERIALIZE. + + Learn more: https://workflow-sdk.dev/err/serialization-failed" + `); + }); + + test('preserves cause for debugging', () => { + const cause = new TypeError('underlying'); + const err = new SerializationError('boom', { cause }); + expect(err.cause).toBe(cause); + }); + + test('SerializationError.is discriminates by name', () => { + const err = new SerializationError('boom'); + const other = new Error('boom'); + expect(SerializationError.is(err)).toBe(true); + expect(SerializationError.is(other)).toBe(false); + expect(SerializationError.is(null)).toBe(false); + }); + + test('is fatal — FatalError.is() short-circuits retry loop', () => { + // Serialization failures are deterministic. Retrying a step that + // returned a non-POJO will produce the same error on every attempt, + // so the step handler should not burn the retry budget. We opt in + // via a `fatal: true` own property that FatalError.is() recognizes. + const err = new SerializationError('boom'); + expect(err.fatal).toBe(true); + expect(FatalError.is(err)).toBe(true); + }); +}); diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 5aff0af4e5..7128e83132 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -1,5 +1,7 @@ export { pluralize } from './pluralize.js'; export { + formatStepName, + formatWorkflowName, parseClassName, parseStepName, parseWorkflowName, diff --git a/packages/utils/src/parse-name.test.ts b/packages/utils/src/parse-name.test.ts index 6a1f5bfd9e..e87a3aa4f1 100644 --- a/packages/utils/src/parse-name.test.ts +++ b/packages/utils/src/parse-name.test.ts @@ -1,5 +1,11 @@ import { describe, expect, test } from 'vitest'; -import { parseClassName, parseStepName, parseWorkflowName } from './parse-name'; +import { + formatStepName, + formatWorkflowName, + parseClassName, + parseStepName, + parseWorkflowName, +} from './parse-name'; describe('parseWorkflowName', () => { test('should parse a valid workflow name with relative path', () => { @@ -219,3 +225,38 @@ describe('parseClassName', () => { expect(parseClassName('workflow//path//fn')).toBeNull(); }); }); + +describe('formatStepName / formatWorkflowName', () => { + test('renders a step with relative path as "shortName (moduleSpecifier)"', () => { + expect(formatStepName('step//./workflows/1_simple//add')).toBe( + 'add (./workflows/1_simple)' + ); + }); + + test('renders a workflow with relative path the same way', () => { + expect(formatWorkflowName('workflow//./workflows/1_simple//simple')).toBe( + 'simple (./workflows/1_simple)' + ); + }); + + test('renders a step with module specifier', () => { + expect(formatStepName('step//@myorg/tasks@2.0.0//processOrder')).toBe( + 'processOrder (@myorg/tasks@2.0.0)' + ); + }); + + test('nested functions use the leaf name', () => { + expect( + formatStepName('step//./workflows/order//processOrder/innerStep') + ).toBe('innerStep (./workflows/order)'); + }); + + test('falls back to the raw name when the format is unrecognized', () => { + // Never silently drop information — if parsing fails the caller still + // gets something back that identifies the entity. + expect(formatStepName('something-weird')).toBe('something-weird'); + expect(formatWorkflowName('step//wrong-tag//fn')).toBe( + 'step//wrong-tag//fn' + ); + }); +}); diff --git a/packages/utils/src/parse-name.ts b/packages/utils/src/parse-name.ts index 879b6284b8..9be071d93b 100644 --- a/packages/utils/src/parse-name.ts +++ b/packages/utils/src/parse-name.ts @@ -87,3 +87,33 @@ export function parseStepName(name: string) { export function parseClassName(name: string) { return parseName('class', name); } + +/** + * Human-friendly single-line rendering of a step or workflow name for log + * messages. Parses the machine name (`step//./workflows/1_simple//add`) and + * renders it as `add (./workflows/1_simple)` so users see the short function + * name and the source module specifier without the internal `//` syntax. + * + * Falls back to the raw name if parsing fails (e.g. older name formats or + * user-provided strings we don't recognize) so logs never silently drop + * information. + */ +export function formatStepName(name: string): string { + return formatParsedName(parseStepName(name), name); +} + +export function formatWorkflowName(name: string): string { + return formatParsedName(parseWorkflowName(name), name); +} + +function formatParsedName( + parsed: { + shortName: string; + moduleSpecifier: string; + functionName: string; + } | null, + fallback: string +): string { + if (!parsed) return fallback; + return `${parsed.shortName} (${parsed.moduleSpecifier})`; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7b9b92fd3f..f699fd2e1c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -680,6 +680,9 @@ importers: '@workflow/utils': specifier: workspace:* version: link:../utils + chalk: + specifier: 5.6.2 + version: 5.6.2 ms: specifier: 2.1.3 version: 2.1.3 @@ -696,6 +699,9 @@ importers: '@workflow/world': specifier: workspace:* version: link:../world + vitest: + specifier: 'catalog:' + version: 4.0.18(@opentelemetry/api@1.9.1)(@types/node@22.19.0)(jiti@2.6.1)(jsdom@26.1.0)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3) packages/nest: dependencies: @@ -22873,6 +22879,14 @@ snapshots: optionalDependencies: vite: 6.4.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3) + '@vitest/mocker@4.0.18(vite@7.3.2(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3))': + dependencies: + '@vitest/spy': 4.0.18 + estree-walker: 3.0.3 + magic-string: 0.30.21 + optionalDependencies: + vite: 7.3.2(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3) + '@vitest/mocker@4.0.18(vite@7.3.2(@types/node@24.6.2)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3))': dependencies: '@vitest/spy': 4.0.18 @@ -31834,7 +31848,7 @@ snapshots: vitest@4.0.18(@opentelemetry/api@1.9.0)(@types/node@22.19.0)(jiti@2.6.1)(jsdom@26.1.0)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3): dependencies: '@vitest/expect': 4.0.18 - '@vitest/mocker': 4.0.18(vite@7.3.2(@types/node@24.6.2)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3)) + '@vitest/mocker': 4.0.18(vite@7.3.2(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3)) '@vitest/pretty-format': 4.0.18 '@vitest/runner': 4.0.18 '@vitest/snapshot': 4.0.18 @@ -31873,7 +31887,7 @@ snapshots: vitest@4.0.18(@opentelemetry/api@1.9.1)(@types/node@22.19.0)(jiti@2.6.1)(jsdom@26.1.0)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3): dependencies: '@vitest/expect': 4.0.18 - '@vitest/mocker': 4.0.18(vite@7.3.2(@types/node@24.6.2)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3)) + '@vitest/mocker': 4.0.18(vite@7.3.2(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.3)) '@vitest/pretty-format': 4.0.18 '@vitest/runner': 4.0.18 '@vitest/snapshot': 4.0.18