Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ cd ../web; corepack.cmd pnpm typecheck; corepack.cmd pnpm exec vite build

UI 工作流变更必须有行为断言,不只截图:共享 unit/contract + Desktop/Web Playwright + Visual QA,证据等级按 `real-e2e-acceptance` 标注。Vite renderer 不等于 packaged Desktop;stub/fixture/readiness-only 必须写 `real_tested=false`。

跨平台前端 acceptance、Playwright/Visual QA 聚合和 manifest 生成优先用 Node/TypeScript runner。PowerShell 只用于 Windows、Tauri、installer、sidecar、release、签名等 OS-specific gate;不要为普通前端验收继续新增大型 `.ps1` 核心脚本。

禁止无保护力测试:

| 反模式 | 禁止原因 |
Expand Down
1 change: 1 addition & 0 deletions app/desktop/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"test:e2e:chat-flow": "playwright test --config playwright.config.ts --project=chromium chat-flow-ui.spec.ts",
"test:e2e:data-boundary": "playwright test --config playwright.config.ts --project=chromium desktop-data-boundary.spec.ts",
"test:visual:chat-flow": "node scripts/manual-chat-flow-check.mjs",
"test:acceptance:chat-flow": "pnpm test:e2e:chat-flow && pnpm test:visual:chat-flow",
"test:e2e:smoke": "playwright test --config playwright.config.ts --project=chromium smoke.spec.ts",
"test:e2e:ui": "playwright test --ui",
"analyze": "pnpm build && npx vite-bundle-visualizer",
Expand Down
1 change: 1 addition & 0 deletions app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"test:e2e:web": "pnpm --filter agenthub-web test:e2e",
"test:e2e:desktop": "pnpm --filter agenthub-desktop test:e2e",
"test:smoke:matrix": "powershell -NoProfile -ExecutionPolicy Bypass -File ../scripts/smoke/verify-e2e-smoke-matrix.ps1 -RepoRoot ..",
"test:acceptance:chat-flow": "node ../scripts/verify/chat-acceptance.mjs --repo-root ..",
"lint:css": "stylelint \"**/*.css\" --ignore-path .stylelintignore"
},
"pnpm": {
Expand Down
1 change: 1 addition & 0 deletions app/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"test:e2e:stubbed-hub": "playwright test --config playwright.config.ts --project=chromium chat-flow-contract.spec.ts web-stubbed-hub-replay-smoke.spec.ts task-contract.spec.ts",
"test:e2e:approved-real-stub": "pnpm test:e2e:stubbed-hub",
"test:visual:chat-flow": "node scripts/manual-chat-flow-check.mjs",
"test:acceptance:chat-flow": "pnpm test:e2e:chat-flow && pnpm test:visual:chat-flow",
"lint": "eslint src",
"typecheck": "tsc -p tsconfig.json --noEmit"
},
Expand Down
1 change: 1 addition & 0 deletions docs/progress/MASTER.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,4 @@ Per-task telemetry is stored in GitHub issue comments before task closure. Adapt
| 2026-06-29 | Phase 3 sync | Updated Phase 3 live state after #408: milestone #19 is 2/3 complete, adaptive drift_score is 1, #388 has a drift warning, and T3.3 (#388) is the active next task. |
| 2026-06-29 | T3.3 implementation | Hardened observed/approved-real manifest boundaries, kept packaged-release claims separate, aligned smoke-matrix contract checks with current stubbed-Hub replay names, and verified shared/contract gates with `real_tested=false`; PR pending. |
| 2026-06-29 | Phase 3 complete | #388 merged via #410 and closed manually because non-default base did not auto-close it; milestone #19 is closed at 3/3 with adaptive drift_score 2, so Phase 4 requires a lightweight checkpoint before T4.1. |
| 2026-06-29 | T4.1 implementation | Added a Node-based focused chat acceptance bundle for shared unit, Desktop/Web Playwright, and Desktop/Web Visual QA; package entry passed with `real_tested=false`; PR pending. |
316 changes: 316 additions & 0 deletions scripts/verify/chat-acceptance.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
#!/usr/bin/env node
/*
* Focused chat workflow acceptance bundle.
*
* Runs the merge-useful Desktop/Web chat gates and writes a machine-readable
* manifest. This is Vite/browser/fixture evidence only unless a future task
* adds a separate approved-real or packaged-release row.
*/

import { spawn } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import process from 'node:process';

const args = parseArgs(process.argv.slice(2));
const repoRoot = path.resolve(args.repoRoot ?? '.');
const artifactRoot = path.resolve(repoRoot, args.artifactRoot ?? path.join('.tmp', 'chat-acceptance', `run-${process.pid}`));
const outputPath = path.resolve(repoRoot, args.outputPath ?? path.join(path.relative(repoRoot, artifactRoot), 'chat-acceptance-manifest.json'));
const timeoutMs = Number(args.commandTimeoutSec ?? 300) * 1000;

fs.mkdirSync(artifactRoot, { recursive: true });

const startedAt = new Date();
const corepack = process.platform === 'win32' ? 'corepack.cmd' : 'corepack';

const rows = [];

const sharedUnitTests = [
'src/chatview/adapter.test.ts',
'src/chatview/components/AgentGroup.rendering.test.tsx',
'src/chatview/components/UserMessage.rendering.test.tsx',
'src/chatview/components/Transcript.autoscroll.test.tsx',
'src/chatview/components/Transcript.css.test.ts',
'src/transcript/runtimeDiagnostics.test.ts',
'src/workbench/AgentHubWorkbench.test.tsx',
];

await runGate({
name: 'shared-chat-unit',
surface: 'shared',
evidenceLevel: 'fixture-unit',
claim: 'Shared transcript ordering, optimistic send, markdown/table, diagnostic filtering, and card grouping unit coverage',
cwd: path.join(repoRoot, 'app', 'shared'),
command: corepack,
args: ['pnpm', '--dir', path.join(repoRoot, 'app', 'shared'), 'exec', 'vitest', 'run', ...sharedUnitTests],
skip: args.skipSharedUnit,
skipReason: 'skipped by --skip-shared-unit',
});

await runGate({
name: 'desktop-chat-playwright',
surface: 'desktop',
evidenceLevel: 'playwright-ui',
claim: 'Desktop Vite renderer chat flow Playwright coverage; not packaged Desktop',
cwd: path.join(repoRoot, 'app', 'desktop'),
command: corepack,
args: ['pnpm', '--dir', path.join(repoRoot, 'app', 'desktop'), 'run', 'test:e2e:chat-flow'],
skip: args.skipDesktopPlaywright,
skipReason: 'skipped by --skip-desktop-playwright',
});

await runGate({
name: 'web-chat-playwright',
surface: 'web',
evidenceLevel: 'playwright-ui',
claim: 'Web Vite renderer chat flow Playwright coverage; Hub-shaped stubs are not real login or model execution',
cwd: path.join(repoRoot, 'app', 'web'),
command: corepack,
args: ['pnpm', '--dir', path.join(repoRoot, 'app', 'web'), 'run', 'test:e2e:chat-flow'],
skip: args.skipWebPlaywright,
skipReason: 'skipped by --skip-web-playwright',
});

await runGate({
name: 'desktop-chat-visual-qa',
surface: 'desktop',
evidenceLevel: 'visual-qa',
claim: 'Desktop chat Visual QA screenshot and geometry checks at 1440x810',
cwd: path.join(repoRoot, 'app', 'desktop'),
command: corepack,
args: ['pnpm', '--dir', path.join(repoRoot, 'app', 'desktop'), 'run', 'test:visual:chat-flow'],
artifacts: ['app/desktop/.tmp/manual-chat-flow-uiux/desktop-1440x810-chat-flow.png'],
skip: args.skipDesktopVisualQa,
skipReason: 'skipped by --skip-desktop-visual-qa',
});

await runGate({
name: 'web-chat-visual-qa',
surface: 'web',
evidenceLevel: 'visual-qa',
claim: 'Web chat Visual QA screenshot and geometry checks at 1440x810',
cwd: path.join(repoRoot, 'app', 'web'),
command: corepack,
args: ['pnpm', '--dir', path.join(repoRoot, 'app', 'web'), 'run', 'test:visual:chat-flow'],
artifacts: ['app/web/.tmp/manual-chat-flow-uiux/web-1440x810-chat-flow.png'],
skip: args.skipWebVisualQa,
skipReason: 'skipped by --skip-web-visual-qa',
});

const endedAt = new Date();
const counts = {
passed: rows.filter((row) => row.status === 'passed').length,
failed: rows.filter((row) => row.status === 'failed').length,
skipped: rows.filter((row) => row.status === 'skipped').length,
total: rows.length,
};
const status = counts.failed > 0 ? 'failed' : counts.passed === 0 ? 'skipped' : counts.skipped > 0 ? 'passed_with_skips' : 'passed';
const executedRows = rows.filter((row) => row.status !== 'skipped');
const manifest = {
schema: 'agenthub.chat_acceptance_bundle.v1',
status,
real_tested: false,
evidence_levels: unique(executedRows.map((row) => row.evidence_level)),
planned_evidence_levels: unique(rows.map((row) => row.evidence_level)),
generated_at: endedAt.toISOString(),
started_at: startedAt.toISOString(),
ended_at: endedAt.toISOString(),
duration_ms: endedAt.getTime() - startedAt.getTime(),
artifact_root: artifactRoot,
counts,
boundaries: {
real_tokendance_id_login: false,
real_cli_or_model_api: false,
packaged_desktop: false,
signing: false,
release_upload: false,
production_deploy: false,
},
exclusions: [
'real TokenDance ID login',
'real CLI/model/API execution',
'packaged Tauri/Desktop installer',
'sidecar packaging proof',
'signing',
'release upload',
'production deployment',
],
rows,
};

fs.mkdirSync(path.dirname(outputPath), { recursive: true });
fs.writeFileSync(outputPath, `${JSON.stringify(manifest, null, 2)}\n`);

console.log(`\nChat acceptance status: ${status}`);
console.log(`Manifest: ${outputPath}`);
console.log('Evidence boundary: real_tested=false; no real login, model/API execution, packaged Desktop, signing, release upload, or production deploy.');

if (counts.failed > 0) {
process.exitCode = 1;
}

async function runGate(gate) {
const commandText = [gate.command, ...gate.args].map(quoteArg).join(' ');
const artifacts = (gate.artifacts ?? []).filter(Boolean);

if (gate.skip) {
rows.push({
name: gate.name,
surface: gate.surface,
evidence_level: gate.evidenceLevel,
real_tested: false,
status: 'skipped',
exit_code: null,
duration_ms: 0,
command: commandText,
working_directory: gate.cwd,
claim: gate.claim,
evidence: gate.skipReason,
artifacts,
});
console.log(`SKIP ${gate.name} - ${gate.skipReason}`);
return;
}

console.log(`RUN ${gate.name}`);
const started = Date.now();
const result = await runCommand(gate.command, gate.args, gate.cwd);
const status = result.exitCode === 0 ? 'passed' : 'failed';
rows.push({
name: gate.name,
surface: gate.surface,
evidence_level: gate.evidenceLevel,
real_tested: false,
status,
exit_code: result.exitCode,
duration_ms: Date.now() - started,
command: commandText,
working_directory: gate.cwd,
claim: gate.claim,
evidence: shortenText(`${result.stdout}\n${result.stderr}`),
artifacts,
});
console.log(`${status === 'passed' ? 'PASS' : 'FAIL'} ${gate.name}${status === 'failed' ? ` - exit ${result.exitCode}` : ''}`);
}

function runCommand(command, commandArgs, cwd) {
return new Promise((resolve) => {
const spawnCommand = process.platform === 'win32' ? process.env.ComSpec ?? 'cmd.exe' : command;
const spawnArgs = process.platform === 'win32'
? ['/d', '/s', '/c', [command, ...commandArgs].map(quoteArg).join(' ')]
Comment on lines +198 to +200

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Escape the Windows cmd.exe /c string (scripts/verify/chat-acceptance.mjs:198-200, 262-264). quoteArg() only covers spaces and " here, so a path containing &, ^, %, (, or ) can be re-parsed by cmd.exe and break the runner on Windows. Use cmd-safe escaping or avoid building a single /c string.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@scripts/verify/chat-acceptance.mjs` around lines 198 - 200, The Windows
cmd.exe invocation built in the chat-acceptance runner is not safely escaped, so
special characters can be re-parsed by cmd.exe. Update the command construction
in the logic around spawnCommand/spawnArgs and the helper that feeds quoteArg to
either use proper cmd-safe escaping for all shell metacharacters or avoid
assembling a single /c string altogether. Make sure the fix covers every place
that builds the Windows command line so paths containing &, ^, %, (, or ) are
handled correctly.

: commandArgs;
let child;
try {
child = spawn(spawnCommand, spawnArgs, {
cwd,
shell: false,
windowsHide: true,
env: {
...process.env,
AGENTHUB_EDGE_AUTH_TOKEN: '',
AGENTHUB_CHAT_ACCEPTANCE_ARTIFACT_ROOT: artifactRoot,
},
});
} catch (error) {
resolve({ exitCode: 127, stdout: '', stderr: error instanceof Error ? error.message : String(error) });
return;
}
let stdout = '';
let stderr = '';
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
child.kill('SIGTERM');
setTimeout(() => child.kill('SIGKILL'), 1000).unref();
}, timeoutMs);

child.stdout.on('data', (chunk) => { stdout += chunk.toString(); });
child.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
child.on('error', (error) => {
clearTimeout(timer);
resolve({ exitCode: 127, stdout, stderr: `${stderr}\n${error.message}` });
});
child.on('close', (code, signal) => {
clearTimeout(timer);
if (timedOut) {
resolve({ exitCode: 124, stdout, stderr: `${stderr}\nTimed out after ${timeoutMs / 1000} seconds; signal=${signal ?? 'none'}` });
return;
}
resolve({ exitCode: code ?? 1, stdout, stderr });
});
});
}

function redactSecretLike(value) {
return value
.replace(/(Authorization:\s*Bearer\s+)[^"'\s,}]+/gi, '$1<redacted-token>')
.replace(/\b(bearer\s+)[a-z0-9._-]{12,}/gi, '$1<redacted-token>')
.replace(/\b(sk|ghp|gho|ghu|ghs|glpat|xox[baprs])-[-_A-Za-z0-9]{8,}/gi, '<redacted-token>')
.replace(/((?:access[_-]?token|refresh[_-]?token|id[_-]?token|client_secret|password)\s*[=:]\s*)[^"'\s,}]+/gi, '$1<redacted-secret>')
.replace(/("?(?:access[_-]?token|refresh[_-]?token|id[_-]?token|client_secret|password)"?\s*:\s*")[^"]+/gi, '$1<redacted-secret>');
}

function shortenText(value, max = 4000) {
const safe = redactSecretLike(value);
return safe.length <= max ? safe : `${safe.slice(0, max)}\n...<truncated>...`;
}

function unique(values) {
return [...new Set(values)].sort();
}

function quoteArg(value) {
if (!value) return '""';
return /[\s"]/.test(value) ? `"${value.replaceAll('"', '\\"')}"` : value;
}

function parseArgs(rawArgs) {
const parsed = {};
for (let i = 0; i < rawArgs.length; i += 1) {
const arg = rawArgs[i];
if (arg === '--') {
continue;
}
switch (arg) {
case '--repo-root':
case '-RepoRoot':
parsed.repoRoot = rawArgs[++i];
break;
case '--artifact-root':
case '-ArtifactRoot':
parsed.artifactRoot = rawArgs[++i];
break;
case '--output-path':
case '-OutputPath':
parsed.outputPath = rawArgs[++i];
break;
case '--command-timeout-sec':
case '-CommandTimeoutSec':
parsed.commandTimeoutSec = rawArgs[++i];
break;
case '--skip-shared-unit':
case '-SkipSharedUnit':
parsed.skipSharedUnit = true;
break;
case '--skip-desktop-playwright':
case '-SkipDesktopPlaywright':
parsed.skipDesktopPlaywright = true;
break;
case '--skip-web-playwright':
case '-SkipWebPlaywright':
parsed.skipWebPlaywright = true;
break;
case '--skip-desktop-visual-qa':
case '-SkipDesktopVisualQa':
parsed.skipDesktopVisualQa = true;
break;
case '--skip-web-visual-qa':
case '-SkipWebVisualQa':
parsed.skipWebVisualQa = true;
break;
default:
throw new Error(`Unknown argument: ${arg}`);
}
}
return parsed;
}
Loading
Loading