diff --git a/evals/suites/runtime-recovery/cases.jsonl b/evals/suites/runtime-recovery/cases.jsonl index 5aea64f..426c6b5 100644 --- a/evals/suites/runtime-recovery/cases.jsonl +++ b/evals/suites/runtime-recovery/cases.jsonl @@ -3,6 +3,7 @@ {"id":"runtime-recovery.classify-before-retry","suite":"runtime-recovery","executor":"manual","kind":"regression","input":{"message":"A workflow failed after one step timed out and another worker stayed alive without producing artifacts. Explain what Ricky should do next."},"expected":{"maxToolCalls":0,"must":["Classify the failure before deciding whether to retry.","Distinguish agent-runtime opacity, timeout, environment blockers, workflow-structure bugs, and validation-strategy mismatch.","Preserve raw evidence and uncertainty when the class is not known."],"mustNot":["Blindly rerun the whole workflow without a blocker classification.","Treat every failure as a broken workflow definition.","Claim Ricky fixed the workflow before rerun evidence proves it."],"humanReviewRequired":true},"tags":["runtime","recovery","taxonomy"]} {"id":"runtime-recovery.stale-relay-state","suite":"runtime-recovery","executor":"manual","kind":"regression","input":{"message":"Ricky detects stale `.agent-relay/`, `.relay/`, and `.trajectories/` state before launching a local workflow."},"expected":{"maxToolCalls":0,"must":["Classify stale local runtime state as an environment contamination issue.","Recommend quarantine or isolated-run guidance before launch.","Record the observed paths and the action taken or recommended."],"mustNot":["Treat stale runtime state as a workflow logic failure.","Delete or overwrite state without an explicit safe path or user intent.","Continue into execution as if the workspace were clean."],"humanReviewRequired":true},"tags":["runtime","environment"]} {"id":"runtime-recovery.already-running-conflict","suite":"runtime-recovery","executor":"manual","kind":"regression","input":{"message":"A run marker says another Ricky or Relay run is already active in this workspace."},"expected":{"maxToolCalls":0,"must":["Report the active marker, run id, or status path when available.","Ask the user to inspect, wait for, or explicitly clear the active run.","Avoid launching a competing run that could corrupt evidence."],"mustNot":["Silently start another run.","Hide the existing run marker from the user.","Treat the conflict as a generic failure with no recovery path."],"humanReviewRequired":true},"tags":["runtime","safety"]} +{"id":"runtime-recovery.env-loader-injection-runtime-loadable","suite":"runtime-recovery","executor":"manual","kind":"regression","input":{"message":"A workflow artifact references a `MISSING_ENV_VAR` value. Ricky's deterministic auto-fix injects the `.env.local` / `.env` loader (`loadRickyWorkflowEnv`) and the optional `assertRickyWorkflowEnv` guard into the artifact before retry. The artifact may be a master-rendered workflow whose `.step({ command: ... })` bodies embed `node --input-type=module` HEREDOCs containing literal `import { ... } from 'node:fs'` / `from 'node:path'` strings."},"expected":{"maxToolCalls":0,"must":["Produce a repaired artifact that successfully loads under Node, not just one that contains the marker comment. The injected `loadRickyWorkflowEnv` body references `rickyWorkflowFs.*` and `rickyWorkflowPath.*`, so the repair must also add the corresponding `import * as rickyWorkflowFs from 'node:fs'` and `import * as rickyWorkflowPath from 'node:path'` aliases at module top level.","Detect existing alias imports by matching real top-level `import * as from ''` statements, not by substring-matching the module specifier anywhere in the file (substrings inside HEREDOCs in `.step({ command: ... })` bodies do not count as imports).","Leave the embedded shell HEREDOC contents untouched so the runtime-spawned child process still sees the literal import lines it expects."],"mustNot":["Skip adding the `rickyWorkflowFs` / `rickyWorkflowPath` aliases because `from 'node:fs'` or `from 'node:path'` already appears somewhere in the file as a string literal.","Inject `loadRickyWorkflowEnv` (or `assertRickyWorkflowEnv`) without the supporting alias imports, which produces a `ReferenceError: rickyWorkflowPath is not defined` at module load and burns the auto-fix budget on `UNSUPPORTED_RUNTIME at runtime-launch`.","Rewrite or escape the embedded HEREDOC text in step commands."],"humanReviewRequired":true},"tags":["runtime","auto-fix","env-loader"]} {"id":"runtime-recovery.auto-fix-bounded-loop","suite":"runtime-recovery","executor":"manual","kind":"capability","input":{"message":"Run a local workflow with auto-fix enabled. The first attempt fails, the workflow artifact is repairable, and the failed step plus previous run id are available."},"expected":{"maxToolCalls":0,"must":["Use a bounded retry budget and summarize every attempt.","Ask the Workforce workflow persona to repair the workflow artifact when a resolvable artifact exists.","Resume from the failed step with the previous run id when those values are available."],"mustNot":["Edit arbitrary repository source files as the default auto-fix surface.","Keep retrying after the configured max attempts.","Lose the single Ricky tracking run id across repair/resume attempts."],"humanReviewRequired":true},"tags":["runtime","auto-fix","local"]} {"id":"runtime-recovery.no-auto-fix-preserves-single-attempt","suite":"runtime-recovery","executor":"manual","kind":"regression","input":{"message":"A user runs `ricky run workflows/foo.ts --no-auto-fix` and the workflow fails."},"expected":{"maxToolCalls":0,"must":["Preserve one-attempt behavior when auto-fix is disabled.","Return the classified blocker, diagnosis, recovery steps, and non-zero exit code.","Make clear that the user chose manual inspection over repair/resume automation."],"mustNot":["Start a repair loop despite `--no-auto-fix`.","Suppress the diagnosis because no repair was attempted.","Present the failure as a completed repair attempt."],"humanReviewRequired":true},"tags":["runtime","auto-fix","cli"]} {"id":"runtime-recovery.in-process-local-runner","suite":"runtime-recovery","executor":"manual","kind":"capability","input":{"message":"Explain how Ricky should execute a local TypeScript workflow artifact in the primary local path."},"expected":{"maxToolCalls":0,"must":["Prefer the Node strip-types route or equivalent SDK/programmatic route over requiring the `agent-relay` binary on PATH.","Precheck that Node and `@agent-relay/sdk` are resolvable for the workflow.","Record the actual spawn command in execution evidence."],"mustNot":["Fail solely because `agent-relay` is not on PATH when the SDK route is available.","Hide the actual runtime command from evidence.","Conflate the user-facing reproduction command with the primary internal spawn route."],"humanReviewRequired":true},"tags":["runtime","local","runner"]} diff --git a/evals/suites/runtime-recovery/cases.md b/evals/suites/runtime-recovery/cases.md index 6537b66..b3e2e72 100644 --- a/evals/suites/runtime-recovery/cases.md +++ b/evals/suites/runtime-recovery/cases.md @@ -69,6 +69,28 @@ maxToolCalls: 0 - Hide the existing run marker from the user. - Treat the conflict as a generic failure with no recovery path. +## runtime-recovery.env-loader-injection-runtime-loadable +Executor: manual +Kind: regression +Tags: runtime, auto-fix, env-loader +Human Review: true + +### Message +A workflow artifact references a `MISSING_ENV_VAR` value. Ricky's deterministic auto-fix injects the `.env.local` / `.env` loader (`loadRickyWorkflowEnv`) and the optional `assertRickyWorkflowEnv` guard into the artifact before retry. The artifact may be a master-rendered workflow whose `.step({ command: ... })` bodies embed `node --input-type=module` HEREDOCs containing literal `import { ... } from 'node:fs'` / `from 'node:path'` strings. + +### Deterministic Checks +maxToolCalls: 0 + +### Must +- Produce a repaired artifact that successfully loads under Node, not just one that contains the marker comment. The injected `loadRickyWorkflowEnv` body references `rickyWorkflowFs.*` and `rickyWorkflowPath.*`, so the repair must also add the corresponding `import * as rickyWorkflowFs from 'node:fs'` and `import * as rickyWorkflowPath from 'node:path'` aliases at module top level. +- Detect existing alias imports by matching real top-level `import * as from ''` statements, not by substring-matching the module specifier anywhere in the file (substrings inside HEREDOCs in `.step({ command: ... })` bodies do not count as imports). +- Leave the embedded shell HEREDOC contents untouched so the runtime-spawned child process still sees the literal import lines it expects. + +### Must Not +- Skip adding the `rickyWorkflowFs` / `rickyWorkflowPath` aliases because `from 'node:fs'` or `from 'node:path'` already appears somewhere in the file as a string literal. +- Inject `loadRickyWorkflowEnv` (or `assertRickyWorkflowEnv`) without the supporting alias imports, which produces a `ReferenceError: rickyWorkflowPath is not defined` at module load and burns the auto-fix budget on `UNSUPPORTED_RUNTIME at runtime-launch`. +- Rewrite or escape the embedded HEREDOC text in step commands. + ## runtime-recovery.auto-fix-bounded-loop Executor: manual Kind: capability diff --git a/package-lock.json b/package-lock.json index 4764615..1d02241 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,8 @@ "@agentworkforce/workload-router": "^0.19.0", "@inquirer/prompts": "^8.4.2", "ora": "^8.2.0", - "ssh2": "^1.17.0" + "ssh2": "^1.17.0", + "typescript": "^5.9.3" }, "bin": { "ricky": "dist/ricky.js" @@ -26,7 +27,6 @@ "@types/node": "^24.5.2", "esbuild": "^0.28.0", "tsx": "^4.21.0", - "typescript": "^5.9.3", "vitest": "^3.2.4" }, "engines": { @@ -6262,7 +6262,6 @@ "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", - "dev": true, "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", diff --git a/package.json b/package.json index 37f5392..bd5b44c 100644 --- a/package.json +++ b/package.json @@ -63,14 +63,14 @@ "@agentworkforce/workload-router": "^0.19.0", "@inquirer/prompts": "^8.4.2", "ora": "^8.2.0", - "ssh2": "^1.17.0" + "ssh2": "^1.17.0", + "typescript": "^5.9.3" }, "devDependencies": { "@agent-assistant/telemetry": "^0.4.31", "@types/node": "^24.5.2", "esbuild": "^0.28.0", "tsx": "^4.21.0", - "typescript": "^5.9.3", "vitest": "^3.2.4" } } diff --git a/src/local/auto-fix-loop.test.ts b/src/local/auto-fix-loop.test.ts index deddf2a..50b7374 100644 --- a/src/local/auto-fix-loop.test.ts +++ b/src/local/auto-fix-loop.test.ts @@ -609,6 +609,91 @@ describe('runWithAutoFix', () => { expect(repair?.content).toContain('MISSING_ENV_VAR:'); }); + // Regression: when a master-rendered workflow embeds a `node --input-type=module` + // HEREDOC inside a .step({ command: ... }) string, the embedded shell text + // contains the literal substring `from 'node:fs'`. The previous import-detection + // used `content.includes("from 'node:fs'")`, which the embedded string fooled + // — Ricky then injected `loadRickyWorkflowEnv` (which references + // `rickyWorkflowFs` and `rickyWorkflowPath`) without adding the + // `import * as rickyWorkflowFs from 'node:fs'` alias at module top level. The + // resulting workflow ReferenceError'd at module load and Auto-fix burned + // 7/7 attempts on UNSUPPORTED_RUNTIME at runtime-launch. Detection must + // match an actual top-level `import * as from ''` line. + it('adds the rickyWorkflow* alias imports even when the workflow embeds `from \'node:fs\'` inside a .step command HEREDOC', () => { + const masterRenderedContentWithEmbeddedImports = [ + "import { workflow } from '@agent-relay/sdk/workflows';", + '', + '// RICKY_MASTER_EXECUTOR_WORKFLOW', + 'async function main() {', + ' await workflow("ricky-master")', + ' .step("materialize-children", {', + ' type: "deterministic",', + // Mirrors master-workflow-renderer.ts:138-149 — the master renderer emits + // a node --input-type=module HEREDOC as a string inside a step command. + // That string literally contains `from \'node:fs\'` and `from \'node:path\'`. + ' command: "node --input-type=module <<\'NODE\'\\nimport { mkdirSync, writeFileSync } from \'node:fs\';\\nimport { dirname } from \'node:path\';\\nNODE",', + ' captureOutput: true,', + ' failOnError: true,', + ' })', + ' .run({ cwd: process.cwd() });', + '}', + ].join('\n'); + + const repair = repairWorkflowDeterministically({ + artifactPath: 'workflows/generated/master.ts', + artifactContent: masterRenderedContentWithEmbeddedImports, + evidence: missingEnvEvidence(), + response: blockerResponse('MISSING_ENV_VAR', 'run-1', 'runtime-launch'), + }); + + expect(repair?.applied).toBe(true); + // Aliases must be added at module top level despite the embedded + // HEREDOC string containing `from 'node:fs'` / `from 'node:path'`. + expect(repair?.content).toMatch(/^import \* as rickyWorkflowFs from 'node:fs';/m); + expect(repair?.content).toMatch(/^import \* as rickyWorkflowPath from 'node:path';/m); + expect(repair?.content).toContain('RICKY_WORKFLOW_ENV_LOADER'); + // The HEREDOC is preserved unchanged. + expect(repair?.content).toContain("import { mkdirSync, writeFileSync } from 'node:fs';"); + }); + + it('recognizes already-present rickyWorkflow* alias imports declared via multi-line statement and skips re-injection', () => { + // Multi-line import shapes are not handled by line-anchored regex/preamble + // checks but are trivially correct under an AST walk. If the AST detection + // misses the existing import, the injection logic would add a duplicate + // alias, which TypeScript's strip-types loader rejects with + // SyntaxError: Identifier 'rickyWorkflowFs' has already been declared. + const contentWithMultiLineExistingAlias = [ + "import { workflow } from '@agent-relay/sdk/workflows';", + "import * as", + ' rickyWorkflowFs', + " from 'node:fs';", + "import * as rickyWorkflowPath from 'node:path';", + '', + '// RICKY_WORKFLOW_ENV_LOADER', + 'function loadRickyWorkflowEnv() { /* already injected */ }', + '', + 'async function main() {', + ' loadRickyWorkflowEnv();', + ' await workflow("foo").run({ cwd: process.cwd() });', + '}', + ].join('\n'); + + const repair = repairWorkflowDeterministically({ + artifactPath: 'workflows/generated/already-injected.ts', + artifactContent: contentWithMultiLineExistingAlias, + evidence: missingEnvEvidence(), + response: blockerResponse('MISSING_ENV_VAR', 'run-1', 'runtime-launch'), + }); + + // No second `import * as rickyWorkflowFs` statement should appear. + const fsAliasMatches = (repair?.content ?? contentWithMultiLineExistingAlias) + .match(/import\s+\*\s+as\s+rickyWorkflowFs\b/g); + expect(fsAliasMatches).toHaveLength(1); + const pathAliasMatches = (repair?.content ?? contentWithMultiLineExistingAlias) + .match(/import\s+\*\s+as\s+rickyWorkflowPath\b/g); + expect(pathAliasMatches).toHaveLength(1); + }); + it('routes semantic workflow failures to persona repair instead of deterministic repair', async () => { const artifactPath = 'workflows/demo-persona-repair/semantic-contract.ts'; const artifactContent = await readFile(new URL('../../workflows/demo-persona-repair/semantic-contract.ts', import.meta.url), 'utf8'); diff --git a/src/local/auto-fix-loop.ts b/src/local/auto-fix-loop.ts index 6c124f4..c0cf611 100644 --- a/src/local/auto-fix-loop.ts +++ b/src/local/auto-fix-loop.ts @@ -3,6 +3,7 @@ import { constants } from 'node:fs'; import { spawn } from 'node:child_process'; import { randomUUID } from 'node:crypto'; import { basename, delimiter, dirname, isAbsolute, join, resolve } from 'node:path'; +import ts from 'typescript'; import type { LocalInvocationRequest } from './request-normalizer.js'; import type { LocalClassifiedBlocker, LocalResponse } from './entrypoint.js'; @@ -700,11 +701,23 @@ function injectWorkflowEnvLoader(content: string, requiredEnvVars: string[]): st let next = content; let changed = false; - if (!next.includes("from 'node:fs'") && !next.includes('from "node:fs"')) { + // We must check that the *aliases* loadRickyWorkflowEnv references are + // already imported, not just that the module name appears anywhere in the + // file. The master workflow renderer emits real `import { mkdirSync, + // writeFileSync } from 'node:fs'` strings inside shell HEREDOCs in + // .step({ command: ... }) calls — that's a string literal, not a module + // import, but a substring check for `from 'node:fs'` matches it and + // silently skips adding `import * as rickyWorkflowFs from 'node:fs'`. + // The injected loadRickyWorkflowEnv body then ReferenceErrors at module + // load time. hasRickyWorkflowAliasImport uses the TypeScript AST to walk + // module-scope ImportDeclaration nodes so string-literal contents are + // structurally inert and the alias detection is independent of source + // formatting. + if (!hasRickyWorkflowAliasImport(next, 'rickyWorkflowFs', 'node:fs')) { next = insertAfterWorkflowImport(next, "import * as rickyWorkflowFs from 'node:fs';"); changed = true; } - if (!next.includes("from 'node:path'") && !next.includes('from "node:path"')) { + if (!hasRickyWorkflowAliasImport(next, 'rickyWorkflowPath', 'node:path')) { next = insertAfterWorkflowImport(next, "import * as rickyWorkflowPath from 'node:path';"); changed = true; } @@ -742,6 +755,45 @@ function insertAfterWorkflowImport(content: string, importLine: string): string return `${importLine}\n${content}`; } +function hasRickyWorkflowAliasImport(content: string, alias: string, moduleName: string): boolean { + // Walk module-scope ImportDeclaration nodes via the TypeScript AST and + // look for `import * as from ''`. Using the parser + // (rather than substring or regex matching on the raw source) makes + // detection structural: contents inside StringLiteral / + // NoSubstitutionTemplateLiteral / TemplateExpression nodes are inert, so + // shell HEREDOCs in .step({ command: ... }) bodies that embed + // `import { ... } from 'node:fs'` as part of a `node --input-type=module` + // script no longer fool us into skipping the real top-level alias import. + // Comments are also inert. Multi-line imports, alternate quoting, and + // imports placed lower in the file all just work because the parser owns + // the lexical structure instead of us simulating it with regex. + let sourceFile: ts.SourceFile; + try { + sourceFile = ts.createSourceFile( + 'ricky-workflow-artifact.ts', + content, + ts.ScriptTarget.Latest, + /* setParentNodes */ false, + ts.ScriptKind.TS, + ); + } catch { + // Unparseable artifacts fall through and get the alias re-injected so + // the helpers always have their imports. The real syntax error will + // surface at runtime via the strip-types loader. + return false; + } + + for (const statement of sourceFile.statements) { + if (!ts.isImportDeclaration(statement)) continue; + if (!ts.isStringLiteral(statement.moduleSpecifier)) continue; + if (statement.moduleSpecifier.text !== moduleName) continue; + const namedBindings = statement.importClause?.namedBindings; + if (!namedBindings || !ts.isNamespaceImport(namedBindings)) continue; + if (namedBindings.name.text === alias) return true; + } + return false; +} + function insertBeforeMain(content: string, helper: string): string { if (content.includes('async function main()')) { return content.replace(/\nasync function main\(\)/, `\n${helper}\n\nasync function main()`);