From 6d4b6969cd96596fea43808e6cddbdd70c029b8d Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 8 May 2026 20:06:18 +0200 Subject: [PATCH 1/2] feat(workflows): make reliability repair-aware by default --- .github/workflows/workflow-reliability.yml | 56 ++++ .../completed/2026-05/traj_bdrlknyl8twj.json | 53 ++++ .../completed/2026-05/traj_bdrlknyl8twj.md | 31 ++ .trajectories/index.json | 9 +- packages/sdk/src/workflows/README.md | 2 +- .../workflow-reliability-contract.test.ts | 183 ++++++++++++ .../workflow-reliability-e2e.test.ts | 248 ++++++++++++++++ packages/sdk/src/workflows/builder.ts | 24 +- packages/sdk/src/workflows/runner.ts | 264 +++++++++++++----- packages/sdk/src/workflows/schema.json | 2 +- packages/sdk/src/workflows/types.ts | 2 +- 11 files changed, 802 insertions(+), 72 deletions(-) create mode 100644 .github/workflows/workflow-reliability.yml create mode 100644 .trajectories/completed/2026-05/traj_bdrlknyl8twj.json create mode 100644 .trajectories/completed/2026-05/traj_bdrlknyl8twj.md create mode 100644 packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts diff --git a/.github/workflows/workflow-reliability.yml b/.github/workflows/workflow-reliability.yml new file mode 100644 index 000000000..7818db432 --- /dev/null +++ b/.github/workflows/workflow-reliability.yml @@ -0,0 +1,56 @@ +name: Workflow Reliability + +on: + pull_request: + branches: [main] + paths: + - '.github/workflows/workflow-reliability.yml' + - 'packages/sdk/src/workflows/**' + - 'packages/sdk/src/__tests__/**' + - 'packages/sdk/package.json' + - 'packages/workflow-types/**' + - 'package-lock.json' + - 'package.json' + push: + branches: [main] + paths: + - '.github/workflows/workflow-reliability.yml' + - 'packages/sdk/src/workflows/**' + - 'packages/sdk/src/__tests__/**' + - 'packages/sdk/package.json' + - 'packages/workflow-types/**' + - 'package-lock.json' + - 'package.json' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + workflow-reliability: + name: SDK Workflow Reliability + runs-on: ubuntu-latest + env: + NPM_CONFIG_FUND: false + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + - name: Typecheck SDK workflows + run: npm --prefix packages/sdk run check + + - name: Run workflow reliability contract matrix + run: | + npx vitest run --root packages/sdk --config vitest.config.ts \ + src/workflows/__tests__/workflow-reliability-contract.test.ts \ + src/workflows/__tests__/workflow-reliability-e2e.test.ts diff --git a/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json new file mode 100644 index 000000000..83278b2e1 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json @@ -0,0 +1,53 @@ +{ + "id": "traj_bdrlknyl8twj", + "version": 1, + "task": { + "title": "Add workflow reliability defaults and E2E matrix" + }, + "status": "completed", + "startedAt": "2026-05-08T17:54:45.069Z", + "completedAt": "2026-05-08T18:05:37.305Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-08T18:02:02.075Z" + } + ], + "chapters": [ + { + "id": "chap_sqrkpwofov15", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-08T18:02:02.075Z", + "endedAt": "2026-05-08T18:05:37.305Z", + "events": [ + { + "ts": 1778263322077, + "type": "decision", + "content": "Made retry-mode workflows repair-aware by default: Made retry-mode workflows repair-aware by default", + "raw": { + "question": "Made retry-mode workflows repair-aware by default", + "chosen": "Made retry-mode workflows repair-aware by default", + "alternatives": [], + "reasoning": "Workflow reliability is now a product contract: SDK builder workflows and raw runner configs with agents get bounded repair retries unless callers explicitly choose fail-fast, continue, or repairRetries: 0. Agent/artifact failures now invoke repair before retrying, not only deterministic gates." + }, + "significance": "high" + } + ] + } + ], + "retrospective": { + "summary": "Added Relay workflow reliability defaults, repairable builder presets, agent-step repair before retry, API-agent verification through the normal agent loop, worktree-step validation, a dedicated reliability CI job, and contract/E2E coverage for malformed artifacts, child INVALID_ARTIFACT recovery, deterministic gate repair, fan-out isolation, master-child, worktree-backed, deterministic-only, and agent-plus-gate workflow shapes.", + "approach": "Standard approach", + "confidence": 0.9 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-workflow-reliability-defaults", + "tags": [], + "_trace": { + "startRef": "0e536f46028fb008342efc0908342408984b37d0", + "endRef": "0e536f46028fb008342efc0908342408984b37d0" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_bdrlknyl8twj.md b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.md new file mode 100644 index 000000000..4d1c6b046 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.md @@ -0,0 +1,31 @@ +# Trajectory: Add workflow reliability defaults and E2E matrix + +> **Status:** ✅ Completed +> **Confidence:** 90% +> **Started:** May 8, 2026 at 07:54 PM +> **Completed:** May 8, 2026 at 08:05 PM + +--- + +## Summary + +Added Relay workflow reliability defaults, repairable builder presets, agent-step repair before retry, API-agent verification through the normal agent loop, worktree-step validation, a dedicated reliability CI job, and contract/E2E coverage for malformed artifacts, child INVALID_ARTIFACT recovery, deterministic gate repair, fan-out isolation, master-child, worktree-backed, deterministic-only, and agent-plus-gate workflow shapes. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Made retry-mode workflows repair-aware by default +- **Chose:** Made retry-mode workflows repair-aware by default +- **Reasoning:** Workflow reliability is now a product contract: SDK builder workflows and raw runner configs with agents get bounded repair retries unless callers explicitly choose fail-fast, continue, or repairRetries: 0. Agent/artifact failures now invoke repair before retrying, not only deterministic gates. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Made retry-mode workflows repair-aware by default: Made retry-mode workflows repair-aware by default diff --git a/.trajectories/index.json b/.trajectories/index.json index 3a4581d6c..16fca31e4 100644 --- a/.trajectories/index.json +++ b/.trajectories/index.json @@ -1,6 +1,6 @@ { "version": 1, - "lastUpdated": "2026-05-08T15:51:38.996Z", + "lastUpdated": "2026-05-08T18:05:37.419Z", "trajectories": { "traj_1775914133873_35667beb": { "title": "fix-sdk-build-resolution-workflow", @@ -282,6 +282,13 @@ "startedAt": "2026-05-08T15:50:35.978Z", "completedAt": "2026-05-08T15:51:38.854Z", "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-repairable-workflows/.trajectories/completed/2026-05/traj_vkozdglobkyg.json" + }, + "traj_bdrlknyl8twj": { + "title": "Add workflow reliability defaults and E2E matrix", + "status": "completed", + "startedAt": "2026-05-08T17:54:45.069Z", + "completedAt": "2026-05-08T18:05:37.305Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-workflow-reliability-defaults/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json" } } } \ No newline at end of file diff --git a/packages/sdk/src/workflows/README.md b/packages/sdk/src/workflows/README.md index dd41b9262..ec15bd395 100644 --- a/packages/sdk/src/workflows/README.md +++ b/packages/sdk/src/workflows/README.md @@ -371,7 +371,7 @@ errorHandling: notifyChannel: alerts ``` -When `errorHandling.strategy: retry` includes an explicit `repairRetries` budget, deterministic step or verification gate failures are treated as repairable work before terminal failure. The runner chooses `errorHandling.repairAgent` when set, otherwise it uses the step's owning/upstream agent when possible, then falls back to the best available workflow agent. The selected agent gets the failed command, working directory, exit information, and captured output, then the deterministic gate is retried. +Retry-mode workflows are repair-aware by default. Deterministic step failures, verification gate failures, and malformed agent artifacts are treated as repairable work before terminal failure. The runner chooses `errorHandling.repairAgent` when set, otherwise it uses the step's owning/upstream agent when possible, then falls back to the best available workflow agent. The selected agent gets the failed command or agent output, working directory, exit information, and captured evidence, then the failed gate or step is retried. Use `repairRetries: 0`, `strategy: fail-fast`, or `strategy: continue` when a workflow intentionally should not invoke repair agents. ## Built-in Templates diff --git a/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts b/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts index 715b9b4d9..b77cae1c1 100644 --- a/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts +++ b/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts @@ -3,6 +3,7 @@ import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; import os from 'node:os'; import path from 'node:path'; +import { workflow } from '../builder.js'; import { WorkflowRunner, type WorkflowDb } from '../runner.js'; import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; @@ -60,6 +61,65 @@ function baseConfig(overrides: Partial = {}): RelayYamlConfig { } describe('workflow reliability contract', () => { + it('makes SDK builder workflows repairable by default', () => { + const config = workflow('default-reliable') + .agent('fixer', { cli: 'claude', role: 'implementation engineer' }) + .step('verify', { type: 'deterministic', command: 'npm test' }) + .toConfig(); + + expect(config.errorHandling).toMatchObject({ + strategy: 'retry', + maxRetries: 2, + retryDelayMs: 1000, + repairRetries: 2, + }); + }); + + it('offers reliable and repairable presets for workflow authors', () => { + const reliable = workflow('reliable') + .agent('fixer', { cli: 'claude', role: 'implementation engineer' }) + .step('verify', { type: 'deterministic', command: 'npm test' }) + .reliable({ repairAgent: 'fixer', repairRetries: 3 }) + .toConfig(); + const repairable = workflow('repairable') + .agent('fixer', { cli: 'claude', role: 'implementation engineer' }) + .step('verify', { type: 'deterministic', command: 'npm test' }) + .repairable({ maxRetries: 4 }) + .toConfig(); + + expect(reliable.errorHandling).toMatchObject({ + strategy: 'retry', + maxRetries: 3, + repairAgent: 'fixer', + repairRetries: 3, + }); + expect(repairable.errorHandling).toMatchObject({ + strategy: 'retry', + maxRetries: 4, + repairRetries: 4, + }); + }); + + it('applies repair-aware defaults to raw runner configs with agents', async () => { + const executeDeterministicStep = vi + .fn() + .mockResolvedValueOnce({ output: 'missing artifact', exitCode: 1 }) + .mockResolvedValueOnce({ output: 'artifact exists', exitCode: 0 }); + const executeAgentStep = vi.fn(async () => 'created artifact'); + const runner = new WorkflowRunner({ + db: makeDb(), + workspaceId: 'ws-test', + cwd: process.cwd(), + executor: { executeDeterministicStep, executeAgentStep }, + }); + + const run = await runner.execute(baseConfig(), 'default'); + + expect(run.status, run.error).toBe('completed'); + expect(executeAgentStep).toHaveBeenCalledTimes(1); + expect(executeDeterministicStep).toHaveBeenCalledTimes(2); + }); + it('routes repairable deterministic failures through a repair agent before retrying', async () => { const executeDeterministicStep = vi .fn() @@ -380,6 +440,129 @@ describe('workflow reliability contract', () => { } }); + it('repairs malformed agent artifacts before retrying the agent step', async () => { + const executeAgentStep = vi.fn(async (step) => { + if (step.name.includes('-repair-')) return 'patched artifact instructions'; + if ((executeAgentStep as any).mock.calls.filter(([s]: any[]) => s.name === 'write-artifact').length === 1) { + return 'plain prose without required metadata'; + } + return 'artifact complete\nRICKY_MASTER_CHILD_RUN_VERIFIED'; + }); + const runner = new WorkflowRunner({ + db: makeDb(), + workspaceId: 'ws-test', + cwd: process.cwd(), + executor: { executeAgentStep }, + }); + + const run = await runner.execute( + baseConfig({ + workflows: [ + { + name: 'default', + steps: [ + { + name: 'write-artifact', + agent: 'fixer', + task: 'Write a structured workflow artifact.', + verification: { + type: 'output_contains', + value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', + }, + }, + ], + }, + ], + }), + 'default' + ); + + expect(run.status, run.error).toBe('completed'); + expect(executeAgentStep).toHaveBeenCalledTimes(3); + expect((executeAgentStep as any).mock.calls[1][0]).toMatchObject({ name: 'write-artifact-repair-1' }); + expect((executeAgentStep as any).mock.calls[1][2]).toContain('invalid artifact'); + }); + + it('repairs child INVALID_ARTIFACT failures instead of stopping the master at attempt one', async () => { + const executeAgentStep = vi.fn(async (step) => { + if (step.name.includes('-repair-')) return 'repaired child workflow artifact'; + const childAttempts = (executeAgentStep as any).mock.calls.filter( + ([s]: any[]) => s.name === 'run-update-config-2' + ).length; + if (childAttempts === 1) { + return 'Execution: blocked — INVALID_ARTIFACT at final-hard-validation'; + } + return 'Execution: success — run child-fixed\nRICKY_MASTER_CHILD_RUN_VERIFIED'; + }); + const runner = new WorkflowRunner({ + db: makeDb(), + workspaceId: 'ws-test', + cwd: process.cwd(), + executor: { executeAgentStep }, + }); + + const run = await runner.execute( + baseConfig({ + workflows: [ + { + name: 'default', + steps: [ + { + name: 'run-update-config-2', + agent: 'fixer', + task: 'Run the child workflow and return structured evidence.', + verification: { + type: 'output_contains', + value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', + }, + }, + { + name: 'final-signoff', + type: 'deterministic', + command: 'true', + dependsOn: ['run-update-config-2'], + }, + ], + }, + ], + }), + 'default' + ); + + expect(run.status, run.error).toBe('completed'); + expect(executeAgentStep).toHaveBeenCalledTimes(3); + expect((executeAgentStep as any).mock.calls[1][2]).toContain('INVALID_ARTIFACT'); + }); + + it('keeps retrying the failed gate when a repair agent returns an unusable fix', async () => { + const executeDeterministicStep = vi + .fn() + .mockResolvedValueOnce({ output: 'INVALID_ARTIFACT', exitCode: 1 }) + .mockResolvedValueOnce({ output: 'still INVALID_ARTIFACT', exitCode: 1 }) + .mockResolvedValueOnce({ output: 'artifact valid', exitCode: 0 }); + const executeAgentStep = vi + .fn() + .mockResolvedValueOnce('malformed repair response without fenced artifact') + .mockResolvedValueOnce('valid repair response with metadata'); + const runner = new WorkflowRunner({ + db: makeDb(), + workspaceId: 'ws-test', + cwd: process.cwd(), + executor: { executeDeterministicStep, executeAgentStep }, + }); + + const run = await runner.execute( + baseConfig({ + errorHandling: { strategy: 'retry', repairRetries: 2, retryDelayMs: 1, repairAgent: 'fixer' }, + }), + 'default' + ); + + expect(run.status, run.error).toBe('completed'); + expect(executeAgentStep).toHaveBeenCalledTimes(2); + expect(executeDeterministicStep).toHaveBeenCalledTimes(3); + }); + it('does not run repair agents for fail-fast workflows even when agents are present', async () => { const executeDeterministicStep = vi.fn(async () => ({ output: 'hard failure', exitCode: 1 })); const executeAgentStep = vi.fn(async () => 'unexpected repair'); diff --git a/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts b/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts new file mode 100644 index 000000000..6a222fdac --- /dev/null +++ b/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts @@ -0,0 +1,248 @@ +import { describe, expect, it } from 'vitest'; +import { execSync } from 'node:child_process'; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { WorkflowRunner } from '../runner.js'; +import type { AgentDefinition, RelayYamlConfig, WorkflowStep } from '../types.js'; + +const CHECK_MARKER = + 'node -e "const fs=require(\'fs\');const v=fs.readFileSync(\'marker.txt\',\'utf8\').trim();if(v!==\'fixed\'){console.log(\'marker=\'+v);process.exit(1)}console.log(\'ok\')"'; + +function baseConfig( + name: string, + pattern: RelayYamlConfig['swarm']['pattern'], + steps: NonNullable[number]['steps'] +): RelayYamlConfig { + return { + version: '1', + name, + swarm: { pattern }, + agents: [ + { + name: 'fixer', + cli: 'claude', + role: 'implementation engineer', + interactive: false, + }, + ], + workflows: [{ name: 'default', steps }], + trajectories: false, + }; +} + +function makeWorkspace(): string { + const cwd = mkdtempSync(path.join(os.tmpdir(), 'relay-workflow-reliability-e2e-')); + writeFileSync(path.join(cwd, 'marker.txt'), 'broken\n'); + return cwd; +} + +async function runReliabilityWorkflow(config: RelayYamlConfig, cwd = makeWorkspace()) { + const callsByStep = new Map(); + const executeAgentStep = async ( + step: WorkflowStep, + _agent: AgentDefinition, + resolvedTask: string + ): Promise => { + const count = (callsByStep.get(step.name) ?? 0) + 1; + callsByStep.set(step.name, count); + + if (step.name.includes('-repair-')) { + writeFileSync(path.join(step.cwd ?? cwd, 'marker.txt'), 'fixed\n'); + return `repair complete for ${step.name}`; + } + + if (/invalid[- ]artifact/i.test(step.name) && count === 1) { + return 'Execution: blocked — INVALID_ARTIFACT at final-hard-validation'; + } + + if (/child/i.test(resolvedTask) && count === 1) { + return 'Execution: blocked — INVALID_ARTIFACT at final-hard-validation'; + } + + return `Execution: success\nRICKY_MASTER_CHILD_RUN_VERIFIED\n${resolvedTask.slice(0, 80)}`; + }; + + const runner = new WorkflowRunner({ + workspaceId: 'ws-e2e', + cwd, + executor: { executeAgentStep }, + }); + + try { + const run = await runner.execute(config, 'default'); + return { run, callsByStep, cwd }; + } finally { + rmSync(cwd, { recursive: true, force: true }); + } +} + +describe('workflow reliability e2e shapes', () => { + it('repairs a failing deterministic gate in a pipeline workflow', async () => { + const { run, callsByStep } = await runReliabilityWorkflow( + baseConfig('reliable-pipeline', 'pipeline', [ + { name: 'prepare', agent: 'fixer', task: 'Prepare inputs.' }, + { + name: 'verify', + type: 'deterministic', + command: CHECK_MARKER, + dependsOn: ['prepare'], + captureOutput: true, + }, + ]) + ); + + expect(run.status, run.error).toBe('completed'); + expect(callsByStep.has('verify-repair-1')).toBe(true); + }); + + it('repairs a failing deterministic gate in a DAG workflow', async () => { + const { run, callsByStep } = await runReliabilityWorkflow( + baseConfig('reliable-dag', 'dag', [ + { name: 'backend', agent: 'fixer', task: 'Prepare backend evidence.' }, + { name: 'frontend', agent: 'fixer', task: 'Prepare frontend evidence.' }, + { + name: 'integrated-validation', + type: 'deterministic', + command: CHECK_MARKER, + dependsOn: ['backend', 'frontend'], + captureOutput: true, + }, + ]) + ); + + expect(run.status, run.error).toBe('completed'); + expect(callsByStep.has('integrated-validation-repair-1')).toBe(true); + }); + + it('keeps fan-out siblings isolated while repairing the failed branch gate', async () => { + const { run, callsByStep } = await runReliabilityWorkflow( + baseConfig('reliable-fan-out', 'fan-out', [ + { + name: 'branch-a-validation', + type: 'deterministic', + command: CHECK_MARKER, + captureOutput: true, + }, + { + name: 'branch-b-validation', + type: 'deterministic', + command: 'node -e "console.log(\'branch-b-ok\')"', + captureOutput: true, + }, + { + name: 'merge', + agent: 'fixer', + task: 'Merge {{steps.branch-a-validation.output}} and {{steps.branch-b-validation.output}}.', + dependsOn: ['branch-a-validation', 'branch-b-validation'], + }, + ]) + ); + + expect(run.status, run.error).toBe('completed'); + expect(callsByStep.has('branch-a-validation-repair-1')).toBe(true); + expect(callsByStep.has('branch-b-validation-repair-1')).toBe(false); + }); + + it('repairs child workflow INVALID_ARTIFACT output before master final validation', async () => { + const { run, callsByStep } = await runReliabilityWorkflow( + baseConfig('reliable-master-child', 'hierarchical', [ + { + name: 'run-child-workflow', + agent: 'fixer', + task: 'Run child workflow and return RICKY_MASTER_CHILD_RUN_VERIFIED.', + verification: { + type: 'output_contains', + value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', + }, + }, + { + name: 'master-final-validation', + type: 'deterministic', + command: CHECK_MARKER, + dependsOn: ['run-child-workflow'], + captureOutput: true, + }, + ]) + ); + + expect(run.status, run.error).toBe('completed'); + expect(callsByStep.has('run-child-workflow-repair-1')).toBe(true); + expect(callsByStep.has('master-final-validation-repair-1')).toBe(false); + }); + + it('repairs a deterministic-only workflow with a configured repair agent', async () => { + const { run, callsByStep } = await runReliabilityWorkflow( + baseConfig('reliable-deterministic-only', 'pipeline', [ + { + name: 'verify-only', + type: 'deterministic', + command: CHECK_MARKER, + captureOutput: true, + }, + ]) + ); + + expect(run.status, run.error).toBe('completed'); + expect(callsByStep.has('verify-only-repair-1')).toBe(true); + }); + + it('repairs agent artifact retries and then passes deterministic validation', async () => { + const { run, callsByStep } = await runReliabilityWorkflow( + baseConfig('reliable-agent-plus-gates', 'pipeline', [ + { + name: 'invalid-artifact-author', + agent: 'fixer', + task: 'Produce structured artifact metadata.', + verification: { + type: 'output_contains', + value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', + }, + }, + { + name: 'verify-artifact', + type: 'deterministic', + command: CHECK_MARKER, + dependsOn: ['invalid-artifact-author'], + captureOutput: true, + }, + ]) + ); + + expect(run.status, run.error).toBe('completed'); + expect(callsByStep.has('invalid-artifact-author-repair-1')).toBe(true); + expect(callsByStep.has('verify-artifact-repair-1')).toBe(false); + }); + + it('repairs validation inside a git worktree-backed workflow', async () => { + const cwd = makeWorkspace(); + execSync('git init -q', { cwd }); + execSync('git config user.email test@example.com', { cwd }); + execSync('git config user.name "Relay Test"', { cwd }); + execSync('git add marker.txt && git commit -q -m init', { cwd }); + + const { run, callsByStep } = await runReliabilityWorkflow( + baseConfig('reliable-worktree', 'pipeline', [ + { + name: 'make-worktree', + type: 'worktree', + branch: 'reliability-worktree-test', + path: 'child-worktree', + }, + { + name: 'verify-in-worktree', + type: 'deterministic', + command: CHECK_MARKER, + cwd: 'child-worktree', + dependsOn: ['make-worktree'], + captureOutput: true, + }, + ]), + cwd + ); + + expect(run.status, run.error).toBe('completed'); + expect(callsByStep.has('verify-in-worktree-repair-1')).toBe(true); + }); +}); diff --git a/packages/sdk/src/workflows/builder.ts b/packages/sdk/src/workflows/builder.ts index 5343ef08f..6756b4141 100644 --- a/packages/sdk/src/workflows/builder.ts +++ b/packages/sdk/src/workflows/builder.ts @@ -98,6 +98,8 @@ export interface ErrorOptions { repairRetries?: number; } +export interface ReliabilityOptions extends ErrorOptions {} + export interface WorkflowRunOptions { /** Run a specific workflow by name (default: first). */ workflow?: string; @@ -373,6 +375,25 @@ export class WorkflowBuilder { return this; } + /** + * Opt into the product reliability contract: repairable workflow failures get + * routed through an agent and retried before the workflow is allowed to fail. + */ + repairable(options: ReliabilityOptions = {}): this { + return this.onError('retry', { + maxRetries: options.maxRetries ?? options.repairRetries ?? 2, + retryDelayMs: options.retryDelayMs ?? 1000, + notifyChannel: options.notifyChannel, + repairAgent: options.repairAgent, + repairRetries: options.repairRetries ?? options.maxRetries ?? 2, + }); + } + + /** Alias for `.repairable()` for workflow authors who think in product terms. */ + reliable(options: ReliabilityOptions = {}): this { + return this.repairable(options); + } + private validateBuilderState(): void { const hasAgentSteps = this._steps.some((s) => s.type !== 'deterministic' && s.type !== 'worktree'); if (hasAgentSteps && this._agents.length === 0) { @@ -429,7 +450,8 @@ export class WorkflowBuilder { config.errorHandling = this._errorHandling ?? { strategy: 'retry', maxRetries: 2, - retryDelayMs: 10_000, + retryDelayMs: 1000, + repairRetries: 2, }; if (this._coordination !== undefined) config.coordination = this._coordination; if (this._state !== undefined) config.state = this._state; diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 74513b63c..93a125730 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -397,11 +397,28 @@ interface DeterministicRepairContext { exitSignal?: string; } +interface AgentStepRepairContext { + step: WorkflowStep; + agentDef: AgentDefinition; + attempt: number; + maxRetries: number; + cwd: string; + error: string; + output: string; + exitCode?: number; + exitSignal?: string; + completionReason?: WorkflowStepCompletionReason; +} + type DiagnosticVerificationCheck = VerificationCheck & { diagnosticAgent?: string; diagnosticTimeout?: number; }; +const DEFAULT_WORKFLOW_MAX_RETRIES = 2; +const DEFAULT_WORKFLOW_REPAIR_RETRIES = 2; +const DEFAULT_WORKFLOW_RETRY_DELAY_MS = 1000; + interface ChannelEvidenceOptions { stepName?: string; sender?: string; @@ -2080,6 +2097,35 @@ export class WorkflowRunner { return config; } + private applyReliabilityDefaults(config: RelayYamlConfig): RelayYamlConfig { + const existing = config.errorHandling; + if (existing?.strategy === 'fail-fast' || existing?.strategy === 'continue') { + return config; + } + + const hasRepairAgentCandidate = (config.agents ?? []).length > 0; + const maxRetries = + existing?.maxRetries ?? + existing?.repairRetries ?? + (existing ? DEFAULT_WORKFLOW_MAX_RETRIES : DEFAULT_WORKFLOW_MAX_RETRIES); + const repairRetries = + existing?.repairRetries ?? + (hasRepairAgentCandidate + ? existing?.maxRetries ?? DEFAULT_WORKFLOW_REPAIR_RETRIES + : existing?.repairRetries); + + return { + ...config, + errorHandling: { + ...existing, + strategy: 'retry', + maxRetries, + retryDelayMs: existing?.retryDelayMs ?? DEFAULT_WORKFLOW_RETRY_DELAY_MS, + ...(repairRetries !== undefined ? { repairRetries } : {}), + }, + }; + } + /** Validate a config object against the RelayYamlConfig shape. */ validateConfig(config: unknown, source = ''): asserts config is RelayYamlConfig { if (typeof config !== 'object' || config === null) { @@ -2511,6 +2557,10 @@ export class WorkflowRunner { if (typeof s.command !== 'string') { throw new Error(`${source}: deterministic step "${s.name}" must have a "command" field`); } + } else if (s.type === 'worktree') { + if (typeof s.branch !== 'string') { + throw new Error(`${source}: worktree step "${s.name}" must have a "branch" string field`); + } } else if (s.type === 'integration') { // Integration steps require integration and action if (typeof s.integration !== 'string') { @@ -2791,8 +2841,9 @@ export class WorkflowRunner { // Validate config (catches cycles, missing deps, invalid steps, etc.) this.validateConfig(resolved); + const runtimeConfig = this.applyReliabilityDefaults(resolved); - const permissionResult = this.validatePermissions(resolved.agents, resolved.permission_profiles); + const permissionResult = this.validatePermissions(runtimeConfig.agents, runtimeConfig.permission_profiles); if (permissionResult.errors.length > 0) { throw new Error(`Permission validation failed:\n ${permissionResult.errors.join('\n ')}`); } @@ -2801,7 +2852,7 @@ export class WorkflowRunner { } // Resolve and validate named paths from the top-level `paths` config - const pathResult = this.resolvePathDefinitions(resolved.paths, this.cwd); + const pathResult = this.resolvePathDefinitions(runtimeConfig.paths, this.cwd); if (pathResult.errors.length > 0) { throw new Error(`Path validation failed:\n ${pathResult.errors.join('\n ')}`); } @@ -2812,7 +2863,7 @@ export class WorkflowRunner { } } - const workflows = resolved.workflows ?? []; + const workflows = runtimeConfig.workflows ?? []; const workflow = workflowName ? workflows.find((w) => w.name === workflowName) : workflows[0]; @@ -2834,9 +2885,9 @@ export class WorkflowRunner { id: runId, workspaceId: this.workspaceId, workflowName: resolvedWorkflow.name, - pattern: resolved.swarm.pattern, + pattern: runtimeConfig.swarm.pattern, status: 'pending', - config: resolved, + config: runtimeConfig, startedAt: now, createdAt: now, updatedAt: now, @@ -2921,7 +2972,7 @@ export class WorkflowRunner { return this.runWorkflowCore({ run, workflow: resolvedWorkflow, - config: resolved, + config: runtimeConfig, stepStates, isResume: false, }); @@ -2954,7 +3005,7 @@ export class WorkflowRunner { throw new Error(`Run "${runId}" is in status "${run.status}" and cannot be resumed`); } - const resolvedConfig = vars ? this.resolveVariables(run.config, vars) : run.config; + const resolvedConfig = this.applyReliabilityDefaults(vars ? this.resolveVariables(run.config, vars) : run.config); // Resolve path definitions (same as execute()) so workdir lookups work on resume const pathResult = this.resolvePathDefinitions(resolvedConfig.paths, this.cwd); @@ -3680,7 +3731,7 @@ export class WorkflowRunner { const repairRetries = errorHandling?.strategy === 'retry' ? errorHandling.repairRetries ?? 0 : 0; const repairAgent = repairRetries > 0 - ? this.resolveDeterministicRepairAgent(step, stepStates, agentMap, errorHandling) + ? this.resolveWorkflowRepairAgent(step, stepStates, agentMap, errorHandling) : undefined; const maxRetries = step.retries ?? errorHandling?.maxRetries ?? (repairAgent ? repairRetries : 0); const retryDelay = errorHandling?.retryDelayMs ?? 1000; @@ -3911,7 +3962,7 @@ export class WorkflowRunner { } } - private resolveDeterministicRepairAgent( + private resolveWorkflowRepairAgent( step: WorkflowStep, stepStates: Map, agentMap: Map, @@ -4058,6 +4109,105 @@ export class WorkflowRunner { ); } + private async runAgentStepRepairAgent(context: AgentStepRepairContext): Promise { + const repairAgent: AgentDefinition = { + ...context.agentDef, + interactive: false, + }; + const repairPrompt = this.buildAgentStepRepairPrompt(context); + const repairStep: WorkflowStep = { + name: `${context.step.name}-repair-${context.attempt}`, + type: 'agent', + agent: repairAgent.name, + task: repairPrompt, + cwd: context.cwd, + workdir: undefined, + retries: 0, + }; + const timeoutMs = + repairAgent.constraints?.timeoutMs ?? context.step.timeoutMs ?? this.currentConfig?.swarm?.timeoutMs; + + this.log( + `[${context.step.name}] Agent step failed; asking "${repairAgent.name}" to repair before retry ${context.attempt + 1}/${context.maxRetries + 1}` + ); + this.postToChannel( + `**[${context.step.name}]** Agent step failed; assigning repair to \`${repairAgent.name}\`` + ); + this.recordStepToolSideEffect(context.step.name, { + type: 'custom', + detail: `Assigned agent-step repair to ${repairAgent.name}`, + raw: { + repairAgent: repairAgent.name, + attempt: context.attempt, + maxRetries: context.maxRetries, + completionReason: context.completionReason, + exitCode: context.exitCode, + exitSignal: context.exitSignal, + }, + }); + + try { + this.ensureBudgetAllowsSpawn(context.step.name, repairAgent.name); + let repairOutput: string; + if (this.executor) { + repairOutput = await this.executor.executeAgentStep(repairStep, repairAgent, repairPrompt, timeoutMs); + } else if (repairAgent.cli === 'api') { + repairOutput = await executeApiStep( + repairAgent.constraints?.model ?? 'claude-sonnet-4-20250514', + repairPrompt, + { + envSecrets: this.envSecrets, + skills: repairAgent.skills, + defaultMaxTokens: repairAgent.constraints?.maxTokens, + } + ); + } else { + const result = await this.execNonInteractive(repairAgent, repairStep, timeoutMs); + repairOutput = result.output; + } + + this.recordStepToolSideEffect(context.step.name, { + type: 'custom', + detail: `Repair agent ${repairAgent.name} completed before agent retry`, + raw: { repairAgent: repairAgent.name, output: repairOutput.slice(0, 1000) }, + }); + } catch (error) { + if (error instanceof BudgetExceededError || this.abortController?.signal.aborted) { + throw error; + } + const message = error instanceof Error ? error.message : String(error); + this.log(`[${context.step.name}] Repair agent "${repairAgent.name}" failed: ${message}`); + this.postToChannel( + `**[${context.step.name}]** Repair agent \`${repairAgent.name}\` failed; retrying agent step anyway` + ); + this.recordStepToolSideEffect(context.step.name, { + type: 'custom', + detail: `Repair agent ${repairAgent.name} failed before agent retry: ${message}`, + raw: { repairAgent: repairAgent.name, error: message }, + }); + } + } + + private buildAgentStepRepairPrompt(context: AgentStepRepairContext): string { + const output = context.output.trim(); + const clippedOutput = output.length > 4000 ? output.slice(-4000) : output; + const task = (context.step.task ?? '').trim(); + const clippedTask = task.length > 3000 ? task.slice(0, 3000) : task; + return ( + `A workflow agent step failed or produced an invalid artifact. Repair the repository, workflow state, or step instructions so the step can succeed on the next retry.\n\n` + + `Step: ${context.step.name}\n` + + `Working directory: ${context.cwd}\n` + + `Completion reason: ${context.completionReason ?? 'unknown'}\n` + + `Failure:\n${context.error}\n` + + `Exit code: ${context.exitCode ?? 'unknown'}\n` + + `Exit signal: ${context.exitSignal ?? 'none'}\n\n` + + `Step task:\n${clippedTask || '(no task captured)'}\n\n` + + `Previous output:\n${clippedOutput || '(no output captured)'}\n\n` + + `Repair only what is needed for this step to produce the required artifact or evidence. ` + + `Preserve unrelated user changes. After making the fix, report the files changed and why the retry should pass.` + ); + } + /** * Execute a worktree step (git worktree setup). * Fast, reliable, $0 LLM cost. @@ -4329,63 +4479,7 @@ export class WorkflowRunner { } const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef); - // API-mode agents: execute via direct API call instead of spawning a PTY/subprocess. - if (specialistDef.cli === 'api') { - this.ensureBudgetAllowsSpawn(step.name, agentName); - const stepOutputContext = this.buildStepOutputContext(stepStates, runId); - const resolvedTask = this.interpolateStepTask(step.task ?? '', stepOutputContext); - - state.row.status = 'running'; - state.row.startedAt = new Date().toISOString(); - await this.db.updateStep(state.row.id, { - status: 'running', - startedAt: state.row.startedAt, - updatedAt: new Date().toISOString(), - }); - this.emit({ type: 'step:started', runId, stepName: step.name }); - this.postToChannel(`**[${step.name}]** Started (api)`); - - try { - const output = await executeApiStep( - specialistDef.constraints?.model ?? 'claude-sonnet-4-20250514', - resolvedTask, - { - envSecrets: this.envSecrets, - skills: specialistDef.skills, - defaultMaxTokens: specialistDef.constraints?.maxTokens, - } - ); - - state.row.status = 'completed'; - state.row.output = output; - state.row.completedAt = new Date().toISOString(); - await this.db.updateStep(state.row.id, { - status: 'completed', - output, - completedAt: state.row.completedAt, - updatedAt: new Date().toISOString(), - }); - await this.persistStepOutput(runId, step.name, output); - this.emit({ type: 'step:completed', runId, stepName: step.name, output }); - } catch (apiError) { - const errorMessage = apiError instanceof Error ? apiError.message : String(apiError); - state.row.status = 'failed'; - state.row.error = errorMessage; - state.row.completedAt = new Date().toISOString(); - await this.db.updateStep(state.row.id, { - status: 'failed', - error: errorMessage, - completedAt: state.row.completedAt, - updatedAt: new Date().toISOString(), - }); - this.emit({ type: 'step:failed', runId, stepName: step.name, error: errorMessage }); - this.postToChannel(`**[${step.name}]** Failed (api): ${errorMessage}`); - throw apiError; - } - return; - } - - const usesOwnerFlow = specialistDef.interactive !== false; + const usesOwnerFlow = specialistDef.cli !== 'api' && specialistDef.interactive !== false; const currentPattern = this.currentConfig?.swarm?.pattern ?? ''; const isHubPattern = WorkflowRunner.HUB_PATTERNS.has(currentPattern); const usesAutoHardening = @@ -4413,6 +4507,11 @@ export class WorkflowRunner { ownerDef.constraints?.timeoutMs ?? specialistDef.constraints?.timeoutMs ?? this.currentConfig?.swarm?.timeoutMs; + const repairRetries = errorHandling?.strategy === 'retry' ? (errorHandling.repairRetries ?? 0) : 0; + const repairAgent = + repairRetries > 0 + ? this.resolveWorkflowRepairAgent(step, stepStates, agentMap, errorHandling) + : undefined; let lastError: string | undefined; let lastExitCode: number | undefined; @@ -4455,6 +4554,20 @@ export class WorkflowRunner { updatedAt: new Date().toISOString(), }); await this.trajectory?.stepRetrying(step, attempt, maxRetries); + if (repairAgent && attempt <= repairRetries) { + await this.runAgentStepRepairAgent({ + step, + agentDef: repairAgent, + attempt, + maxRetries, + cwd: lastEffectiveCwd ?? this.resolveEffectiveCwd(step, specialistDef), + error: lastError ?? 'Unknown error', + output: this.lastFailedStepOutput.get(step.name) ?? '', + exitCode: lastExitCode, + exitSignal: lastExitSignal, + completionReason: lastCompletionReason, + }); + } await this.delay(retryDelay); } @@ -4602,7 +4715,21 @@ export class WorkflowRunner { // executors still take precedence. See process-backend-executor.ts. const spawnResult = this.executor ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs) - : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, { + : effectiveOwner.cli === 'api' + ? { + output: await executeApiStep( + effectiveOwner.constraints?.model ?? 'claude-sonnet-4-20250514', + ownerTask, + { + envSecrets: this.envSecrets, + skills: effectiveOwner.skills, + defaultMaxTokens: effectiveOwner.constraints?.maxTokens, + } + ), + exitCode: 0, + promptTaskText: ownerTask, + } + : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, { retryAttempt: attempt, evidenceStepName: step.name, evidenceRole: usesOwnerFlow ? 'owner' : 'specialist', @@ -4784,6 +4911,9 @@ export class WorkflowRunner { } catch (err) { lastError = err instanceof Error ? err.message : String(err); lastCompletionReason = err instanceof WorkflowCompletionError ? err.completionReason : undefined; + if (stepOutputForDiagnostic) { + this.lastFailedStepOutput.set(step.name, stepOutputForDiagnostic); + } const diagnosticVerification = step.verification as DiagnosticVerificationCheck | undefined; if ( err instanceof WorkflowCompletionError && diff --git a/packages/sdk/src/workflows/schema.json b/packages/sdk/src/workflows/schema.json index 6a1df2a61..c04325384 100644 --- a/packages/sdk/src/workflows/schema.json +++ b/packages/sdk/src/workflows/schema.json @@ -932,7 +932,7 @@ "repairRetries": { "type": "integer", "minimum": 0, - "description": "Retry budget for deterministic gate repair when maxRetries is not set. Defaults to 0, so deterministic repair is opt-in." + "description": "Retry budget for repair agents before terminal failure. Defaults to the retry budget when repair-capable agents are available; set 0 to disable repair agents." } } } diff --git a/packages/sdk/src/workflows/types.ts b/packages/sdk/src/workflows/types.ts index 023d42de5..099e1fd27 100644 --- a/packages/sdk/src/workflows/types.ts +++ b/packages/sdk/src/workflows/types.ts @@ -489,7 +489,7 @@ export interface ErrorHandlingConfig { notifyChannel?: string; /** Agent to use when a deterministic gate fails and needs code/workflow repair. */ repairAgent?: string; - /** Retry budget for deterministic gate repair when maxRetries is not set. Default: 0. */ + /** Retry budget for repair agents before terminal failure. Set 0 to disable repair agents. */ repairRetries?: number; } From 5f75811079b10bfbe344fa42a53f4542ee020a51 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 8 May 2026 20:34:11 +0200 Subject: [PATCH 2/2] fix(workflows): address reliability review feedback --- .github/workflows/workflow-reliability.yml | 2 - .../completed/2026-05/traj_34b1u84b19gz.json | 25 +++++++++ .../completed/2026-05/traj_34b1u84b19gz.md | 14 +++++ .../completed/2026-05/traj_bdrlknyl8twj.json | 4 +- .../completed/2026-05/traj_bdrlknyl8twj.md | 2 +- .trajectories/index.json | 9 ++- .../workflow-reliability-contract.test.ts | 55 ++++++++++++++++++- .../workflow-reliability-e2e.test.ts | 2 +- packages/sdk/src/workflows/builder.ts | 2 +- packages/sdk/src/workflows/runner.ts | 51 +++++++++++------ 10 files changed, 139 insertions(+), 27 deletions(-) create mode 100644 .trajectories/completed/2026-05/traj_34b1u84b19gz.json create mode 100644 .trajectories/completed/2026-05/traj_34b1u84b19gz.md diff --git a/.github/workflows/workflow-reliability.yml b/.github/workflows/workflow-reliability.yml index 7818db432..5e1f72b76 100644 --- a/.github/workflows/workflow-reliability.yml +++ b/.github/workflows/workflow-reliability.yml @@ -6,7 +6,6 @@ on: paths: - '.github/workflows/workflow-reliability.yml' - 'packages/sdk/src/workflows/**' - - 'packages/sdk/src/__tests__/**' - 'packages/sdk/package.json' - 'packages/workflow-types/**' - 'package-lock.json' @@ -16,7 +15,6 @@ on: paths: - '.github/workflows/workflow-reliability.yml' - 'packages/sdk/src/workflows/**' - - 'packages/sdk/src/__tests__/**' - 'packages/sdk/package.json' - 'packages/workflow-types/**' - 'package-lock.json' diff --git a/.trajectories/completed/2026-05/traj_34b1u84b19gz.json b/.trajectories/completed/2026-05/traj_34b1u84b19gz.json new file mode 100644 index 000000000..42aaaf74a --- /dev/null +++ b/.trajectories/completed/2026-05/traj_34b1u84b19gz.json @@ -0,0 +1,25 @@ +{ + "id": "traj_34b1u84b19gz", + "version": 1, + "task": { + "title": "Address PR 827 review feedback" + }, + "status": "completed", + "startedAt": "2026-05-08T18:29:34.717Z", + "completedAt": "2026-05-08T18:33:55.607Z", + "agents": [], + "chapters": [], + "retrospective": { + "summary": "Addressed PR #827 review feedback: cleaned reliability options type, tightened worktree branch validation, fixed supervised API-owner execution without interactive spawn, removed overlapping CI path filter, fixed E2E helper return shape, and cleaned duplicate trajectory text. Added a targeted supervised API-owner regression test and re-ran SDK typecheck plus reliability suites.", + "approach": "Standard approach", + "confidence": 0.9 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-workflow-reliability-defaults", + "tags": [], + "_trace": { + "startRef": "6d4b6969cd96596fea43808e6cddbdd70c029b8d", + "endRef": "6d4b6969cd96596fea43808e6cddbdd70c029b8d" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_34b1u84b19gz.md b/.trajectories/completed/2026-05/traj_34b1u84b19gz.md new file mode 100644 index 000000000..9f0927cd1 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_34b1u84b19gz.md @@ -0,0 +1,14 @@ +# Trajectory: Address PR 827 review feedback + +> **Status:** ✅ Completed +> **Confidence:** 90% +> **Started:** May 8, 2026 at 08:29 PM +> **Completed:** May 8, 2026 at 08:33 PM + +--- + +## Summary + +Addressed PR #827 review feedback: cleaned reliability options type, tightened worktree branch validation, fixed supervised API-owner execution without interactive spawn, removed overlapping CI path filter, fixed E2E helper return shape, and cleaned duplicate trajectory text. Added a targeted supervised API-owner regression test and re-ran SDK typecheck plus reliability suites. + +**Approach:** Standard approach diff --git a/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json index 83278b2e1..bc5b8e2fc 100644 --- a/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json +++ b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json @@ -25,7 +25,7 @@ { "ts": 1778263322077, "type": "decision", - "content": "Made retry-mode workflows repair-aware by default: Made retry-mode workflows repair-aware by default", + "content": "Made retry-mode workflows repair-aware by default", "raw": { "question": "Made retry-mode workflows repair-aware by default", "chosen": "Made retry-mode workflows repair-aware by default", @@ -50,4 +50,4 @@ "startRef": "0e536f46028fb008342efc0908342408984b37d0", "endRef": "0e536f46028fb008342efc0908342408984b37d0" } -} \ No newline at end of file +} diff --git a/.trajectories/completed/2026-05/traj_bdrlknyl8twj.md b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.md index 4d1c6b046..9a3d50b05 100644 --- a/.trajectories/completed/2026-05/traj_bdrlknyl8twj.md +++ b/.trajectories/completed/2026-05/traj_bdrlknyl8twj.md @@ -28,4 +28,4 @@ Added Relay workflow reliability defaults, repairable builder presets, agent-ste ### 1. Work *Agent: default* -- Made retry-mode workflows repair-aware by default: Made retry-mode workflows repair-aware by default +- Made retry-mode workflows repair-aware by default diff --git a/.trajectories/index.json b/.trajectories/index.json index 16fca31e4..ea541b519 100644 --- a/.trajectories/index.json +++ b/.trajectories/index.json @@ -1,6 +1,6 @@ { "version": 1, - "lastUpdated": "2026-05-08T18:05:37.419Z", + "lastUpdated": "2026-05-08T18:33:55.701Z", "trajectories": { "traj_1775914133873_35667beb": { "title": "fix-sdk-build-resolution-workflow", @@ -289,6 +289,13 @@ "startedAt": "2026-05-08T17:54:45.069Z", "completedAt": "2026-05-08T18:05:37.305Z", "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-workflow-reliability-defaults/.trajectories/completed/2026-05/traj_bdrlknyl8twj.json" + }, + "traj_34b1u84b19gz": { + "title": "Address PR 827 review feedback", + "status": "completed", + "startedAt": "2026-05-08T18:29:34.717Z", + "completedAt": "2026-05-08T18:33:55.607Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-workflow-reliability-defaults/.trajectories/completed/2026-05/traj_34b1u84b19gz.json" } } } \ No newline at end of file diff --git a/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts b/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts index b77cae1c1..7526b882d 100644 --- a/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts +++ b/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; import os from 'node:os'; import path from 'node:path'; @@ -7,6 +7,10 @@ import { workflow } from '../builder.js'; import { WorkflowRunner, type WorkflowDb } from '../runner.js'; import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; +afterEach(() => { + vi.unstubAllGlobals(); +}); + function makeDb(): WorkflowDb { const runs = new Map(); const steps = new Map(); @@ -563,6 +567,55 @@ describe('workflow reliability contract', () => { expect(executeDeterministicStep).toHaveBeenCalledTimes(3); }); + it('runs supervised api owners without spawning an interactive owner process', async () => { + const fetch = vi.fn(async () => { + return new Response( + JSON.stringify({ + content: [{ type: 'text', text: 'OWNER_DECISION: COMPLETE\nReason: worker output verified' }], + model: 'claude-sonnet-4-20250514', + }), + { status: 200, headers: { 'content-type': 'application/json' } } + ); + }); + vi.stubGlobal('fetch', fetch); + + const runner = new WorkflowRunner({ + db: makeDb(), + workspaceId: 'ws-test', + cwd: process.cwd(), + envSecrets: { ANTHROPIC_API_KEY: 'test-api-key' }, + }); + const spawnAndWait = vi.fn(async (agent: any, _step: any, _timeoutMs: any, options: any) => { + options?.onSpawned?.({ actualName: agent.name, agent: { release: async () => undefined } }); + if (agent.name === 'worker') { + return { output: 'DONE', exitCode: 0, promptTaskText: 'worker task' }; + } + throw new Error('api owner should not use spawnAndWait'); + }); + (runner as any).spawnAndWait = spawnAndWait; + + const result = await (runner as any).executeSupervisedAgentStep( + { + name: 'supervised-api-owner', + agent: 'worker', + task: 'produce done', + verification: { type: 'output_contains', value: 'DONE' }, + }, + { + specialist: { name: 'worker', cli: 'claude', role: 'worker' }, + owner: { name: 'owner', cli: 'api', role: 'owner' }, + }, + 'produce done' + ); + + expect(result).toMatchObject({ + specialistOutput: 'DONE', + completionReason: 'completed_by_owner_decision', + }); + expect(fetch).toHaveBeenCalledTimes(1); + expect(spawnAndWait).toHaveBeenCalledTimes(1); + }); + it('does not run repair agents for fail-fast workflows even when agents are present', async () => { const executeDeterministicStep = vi.fn(async () => ({ output: 'hard failure', exitCode: 1 })); const executeAgentStep = vi.fn(async () => 'unexpected repair'); diff --git a/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts b/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts index 6a222fdac..253734ccb 100644 --- a/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts +++ b/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts @@ -72,7 +72,7 @@ async function runReliabilityWorkflow(config: RelayYamlConfig, cwd = makeWorkspa try { const run = await runner.execute(config, 'default'); - return { run, callsByStep, cwd }; + return { run, callsByStep }; } finally { rmSync(cwd, { recursive: true, force: true }); } diff --git a/packages/sdk/src/workflows/builder.ts b/packages/sdk/src/workflows/builder.ts index 6756b4141..3e6a58f2e 100644 --- a/packages/sdk/src/workflows/builder.ts +++ b/packages/sdk/src/workflows/builder.ts @@ -98,7 +98,7 @@ export interface ErrorOptions { repairRetries?: number; } -export interface ReliabilityOptions extends ErrorOptions {} +export type ReliabilityOptions = ErrorOptions; export interface WorkflowRunOptions { /** Run a specific workflow by name (default: first). */ diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 93a125730..84f2fe7f0 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -2558,7 +2558,7 @@ export class WorkflowRunner { throw new Error(`${source}: deterministic step "${s.name}" must have a "command" field`); } } else if (s.type === 'worktree') { - if (typeof s.branch !== 'string') { + if (typeof s.branch !== 'string' || s.branch.trim().length === 0) { throw new Error(`${source}: worktree step "${s.name}" must have a "branch" string field`); } } else if (s.type === 'integration') { @@ -5415,23 +5415,38 @@ export class WorkflowRunner { const ownerStartTime = Date.now(); try { - const ownerResultObj = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, { - agentNameSuffix: 'owner', - retryAttempt, - evidenceStepName: step.name, - evidenceRole: 'owner', - logicalName: supervised.owner.name, - onSpawned: ({ actualName }) => { - this.supervisedRuntimeAgents.set(actualName, { - stepName: step.name, - role: 'owner', - logicalName: supervised.owner.name, - }); - }, - onChunk: ({ chunk }) => { - void this.recordOwnerMonitoringChunk(step, supervised.owner, chunk); - }, - }); + const ownerResultObj = + supervised.owner.cli === 'api' + ? { + output: await executeApiStep( + supervised.owner.constraints?.model ?? 'claude-sonnet-4-20250514', + supervisorTask, + { + envSecrets: this.envSecrets, + skills: supervised.owner.skills, + defaultMaxTokens: supervised.owner.constraints?.maxTokens, + } + ), + exitCode: 0, + promptTaskText: supervisorTask, + } + : await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, { + agentNameSuffix: 'owner', + retryAttempt, + evidenceStepName: step.name, + evidenceRole: 'owner', + logicalName: supervised.owner.name, + onSpawned: ({ actualName }) => { + this.supervisedRuntimeAgents.set(actualName, { + stepName: step.name, + role: 'owner', + logicalName: supervised.owner.name, + }); + }, + onChunk: ({ chunk }) => { + void this.recordOwnerMonitoringChunk(step, supervised.owner, chunk); + }, + }); const ownerElapsed = Date.now() - ownerStartTime; const ownerOutput = ownerResultObj.output; this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);