Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
ff06799
feat(wizard-ci): full e2e via control plane (--e2e) + replay (--replay)
gewenyu99 Jun 21, 2026
a84c8d5
feat(wizard-ci): TUI visual-regression snapshots for CI-e2e test defi…
gewenyu99 Jun 22, 2026
6707c7d
docs(wizard-ci): document snapshots env prerequisites
gewenyu99 Jun 22, 2026
d0996bc
fix(wizard-ci): never fail on snapshot drift, just surface the diffs
gewenyu99 Jun 22, 2026
b1873ca
feat(wizard-ci): offer to replay the run's snapshots at the end
gewenyu99 Jun 22, 2026
3ae6048
docs: add agentic-exploration section with an example prompt
gewenyu99 Jun 22, 2026
77f1b74
docs: remove agent-exploration section (moved to wizard README); trim…
gewenyu99 Jun 22, 2026
3fdec88
feat(wizard-ci): render snapshot report.html to PNG screenshots (Play…
gewenyu99 Jun 22, 2026
fe46b52
feat(wizard-ci): snapshot-review dispatch — open a PR with TUI screen…
gewenyu99 Jun 22, 2026
084b1b0
refactor(wizard-ci): drive the real TUI for e2e + snapshots
gewenyu99 Jun 23, 2026
ff5dc36
fix(wizard-ci): print an openable command for the snapshot report
gewenyu99 Jun 23, 2026
c1f6475
refactor(wizard-ci): the e2e run always runs the agent (drop RUN_AGENT)
gewenyu99 Jun 23, 2026
d75f766
feat(wizard-ci): /wizard-ci PR-comment trigger; post the report back …
gewenyu99 Jun 23, 2026
e20233c
ci(wizard-ci): add an opt-in snapshots job (dispatch with snapshots=t…
gewenyu99 Jun 23, 2026
a88eec8
ci(wizard-ci): one snapshots switch on the existing job (headless or …
gewenyu99 Jun 23, 2026
c8af2c2
ci(wizard-ci): install Chromium for the snapshots path (screenshot ra…
gewenyu99 Jun 23, 2026
0464aff
fix(wizard-ci): git() takes a string, not an array (snapshot-review P…
gewenyu99 Jun 23, 2026
ee6c142
feat(wizard-ci): commit raw .txt snapshots into the review PR (debugg…
gewenyu99 Jun 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions .github/workflows/wizard-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ on:
description: 'Post notifications to Slack'
type: boolean
default: false
snapshots:
description: 'Run real-TUI snapshots instead of the evaluator (opens a review PR)'
type: boolean
default: false

# Webhook trigger from external repos (wizard, context-mill, posthog)
# POST https://api.github.com/repos/{owner}/{repo}/dispatches
Expand Down Expand Up @@ -619,19 +623,30 @@ jobs:
# PostHog tracking for eval analytics
POSTHOG_PROJECT_TOKEN: ${{ secrets.POSTHOG_PROJECT_TOKEN }}

- name: Install Chromium for Playwright (snapshots)
if: ${{ inputs.snapshots }}
run: pnpm exec playwright install --with-deps chromium

- name: Execute wizard
id: execute-wizard
continue-on-error: true
run: |
CMD_ARGS=(pnpm wizard-ci --app "$MATRIX_APP" --base "$INPUT_BASE_BRANCH")
if [ -n "$TRIGGER_ID" ]; then
CMD_ARGS+=(--trigger-id "$TRIGGER_ID")
fi
if [ "$INPUT_EVALUATE" = "true" ]; then
CMD_ARGS+=(--evaluate)
if [ "$SNAPSHOTS" = "true" ]; then
# Snapshot mode: real-TUI visual regression + review PR (no evaluator).
pnpm wizard-ci-snapshot-review "$MATRIX_APP" 2>&1 | tee wizard-output.log
else
CMD_ARGS=(pnpm wizard-ci --app "$MATRIX_APP" --base "$INPUT_BASE_BRANCH")
if [ -n "$TRIGGER_ID" ]; then
CMD_ARGS+=(--trigger-id "$TRIGGER_ID")
fi
if [ "$INPUT_EVALUATE" = "true" ]; then
CMD_ARGS+=(--evaluate)
fi
"${CMD_ARGS[@]}" 2>&1 | tee wizard-output.log
fi
"${CMD_ARGS[@]}" 2>&1 | tee wizard-output.log
env:
SNAPSHOTS: ${{ inputs.snapshots }}
POSTHOG_WIZARD_PROJECT_ID: '2'
MATRIX_APP: ${{ matrix.app }}
INPUT_BASE_BRANCH: ${{ needs.discover.outputs.input_base_branch }}
TRIGGER_ID: ${{ needs.discover.outputs.trigger_id }}
Expand Down Expand Up @@ -1022,4 +1037,3 @@ jobs:
}
]
}"

145 changes: 145 additions & 0 deletions .github/workflows/wizard-snapshots.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
name: Wizard Snapshots

# Visual-review dispatch: run the wizard e2e on an app, render the TUI to
# side-by-side screenshots (baseline | current), and open a PR for a human to
# eyeball — instead of running the agent evaluator. Merging the PR accepts the
# new baseline.

permissions:
contents: read

on:
workflow_dispatch:
inputs:
app:
description: 'App path (e.g. "basic-integration/javascript-node/express-todo")'
required: false
type: string
default: 'basic-integration/javascript-node/express-todo'
wizard_ref:
description: 'Wizard repo branch/tag/sha'
type: string
default: 'main'
context_mill_ref:
description: 'Context Mill repo branch/tag/sha'
type: string
default: 'main'
posthog_ref:
description: 'PostHog repo branch/tag/sha (for MCP)'
type: string
default: 'master'
project_id:
description: 'PostHog project id the key is scoped to (defaults to the WIZARD_SNAPSHOTS_PROJECT_ID repo variable)'
required: false
type: string
posthog_region:
description: 'PostHog region'
type: choice
options: [us, eu]
default: 'us'
repository_dispatch:
types: [wizard-snapshots-trigger]
# Comment `/wizard-ci [app] [wizard_ref]` on a PR to run the e2e and have the
# results posted back as a comment on that PR.
issue_comment:
types: [created]

jobs:
snapshots:
# Dispatches always run; comment triggers only on a PR, only for the
# `/wizard-ci` command, and only from a repo member/owner/collaborator.
if: >-
github.event_name == 'workflow_dispatch' ||
github.event_name == 'repository_dispatch' ||
(github.event_name == 'issue_comment' &&
github.event.issue.pull_request != null &&
startsWith(github.event.comment.body, '/wizard-ci') &&
contains(fromJSON('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association))
runs-on: ubuntu-latest
timeout-minutes: 60
permissions:
contents: write # commit the screenshots to the review branch
pull-requests: write
issues: write # comment + react on the triggering PR
id-token: write
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@d72941d797fd3113feb6b93fd0dec494b13a2547 # v1.12.0
with:
app-id: ${{ secrets.GH_APP_POSTHOG_WIZARD_CI_BOT_APP_ID }}
private-key: ${{ secrets.GH_APP_POSTHOG_WIZARD_CI_BOT_PRIVATE_KEY }}
owner: PostHog
repositories: wizard-workbench

- name: Resolve request
id: req
env:
COMMENT_BODY: ${{ github.event.comment.body }}
run: |
if [ "${{ github.event_name }}" = "issue_comment" ]; then
REST="${COMMENT_BODY#/wizard-ci}"
read -r APP REF _ <<< "$REST"
echo "app=${APP:-basic-integration/javascript-node/express-todo}" >> "$GITHUB_OUTPUT"
echo "wizard_ref=${REF:-main}" >> "$GITHUB_OUTPUT"
echo "comment_pr=${{ github.event.issue.number }}" >> "$GITHUB_OUTPUT"
echo "checkout_ref=refs/pull/${{ github.event.issue.number }}/merge" >> "$GITHUB_OUTPUT"
else
echo "app=${{ inputs.app || github.event.client_payload.app || 'basic-integration/javascript-node/express-todo' }}" >> "$GITHUB_OUTPUT"
echo "wizard_ref=${{ inputs.wizard_ref || github.event.client_payload.wizard_ref || 'main' }}" >> "$GITHUB_OUTPUT"
echo "comment_pr=${{ github.event.client_payload.comment_pr }}" >> "$GITHUB_OUTPUT"
echo "checkout_ref=" >> "$GITHUB_OUTPUT"
fi

- name: Acknowledge the command
if: github.event_name == 'issue_comment'
env:
GH_TOKEN: ${{ steps.app-token.outputs.token }}
run: |
gh api "repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions" -f content=eyes >/dev/null || true

- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
with:
token: ${{ steps.app-token.outputs.token }}
ref: ${{ steps.req.outputs.checkout_ref }}
fetch-depth: 0

- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: '24'

- name: Setup pnpm
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061

- name: Install dependencies
run: pnpm install --frozen-lockfile

- name: Install Chromium for Playwright

Check failure

Code scanning / CodeQL

Untrusted Checkout TOCTOU Critical

Insufficient protection against execution of untrusted code on a privileged workflow (
issue_comment
).
Comment on lines +116 to +119
run: pnpm exec playwright install --with-deps chromium

- name: Setup wizard dependencies

Check failure

Code scanning / CodeQL

Untrusted Checkout TOCTOU Critical

Insufficient protection against execution of untrusted code on a privileged workflow (
issue_comment
).
Comment on lines +119 to +122
# Exports WIZARD_PATH / CONTEXT_MILL_PATH / MCP_PATH.
uses: ./.github/actions/setup-wizard-deps
with:
wizard_ref: ${{ steps.req.outputs.wizard_ref }}
context_mill_ref: ${{ inputs.context_mill_ref || 'main' }}
posthog_ref: ${{ inputs.posthog_ref || 'master' }}
app_token: ${{ steps.app-token.outputs.token }}
save_cache: 'false'

- name: Render snapshots + report (review PR, and a comment when triggered by /wizard-ci)

Check failure

Code scanning / CodeQL

Untrusted Checkout TOCTOU Critical

Insufficient protection against execution of untrusted code on a privileged workflow (
issue_comment
).
Comment on lines +122 to +132
env:
GH_TOKEN: ${{ steps.app-token.outputs.token }}
POSTHOG_PERSONAL_API_KEY: ${{ secrets.GH_APP_POSTHOG_WIZARD_CI_BOT_POSTHOG_PERSONAL_KEY }}
POSTHOG_WIZARD_PROJECT_ID: ${{ inputs.project_id || github.event.client_payload.project_id || vars.WIZARD_SNAPSHOTS_PROJECT_ID }}
POSTHOG_REGION: ${{ inputs.posthog_region || 'us' }}
APP: ${{ steps.req.outputs.app }}
COMMENT_PR: ${{ steps.req.outputs.comment_pr }}
run: |
if [ -n "$COMMENT_PR" ]; then
pnpm wizard-ci-snapshot-review "$APP" --comment-pr "$COMMENT_PR"
else
pnpm wizard-ci-snapshot-review "$APP"
fi

Check failure

Code scanning / CodeQL

Untrusted Checkout TOCTOU Critical

Insufficient protection against execution of untrusted code on a privileged workflow (
issue_comment
).
Comment on lines +132 to +145
8 changes: 8 additions & 0 deletions mprocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,14 @@ procs:
autostart: false
env_file: .env

wizard-ci-snapshots:
# Run the CI-e2e test definitions, render TUI snapshots of each real run,
# and diff against the committed baseline. Prints a per-frame summary and
# writes a side-by-side visual report (report.html). --update to accept.
shell: "pnpm wizard-ci-snapshots"
autostart: false
env_file: .env

# ═══════════════════════════════════════════════════════════════════════════
# PR/BRANCH EVALUATION
# ═══════════════════════════════════════════════════════════════════════════
Expand Down
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"scripts": {
"evaluate": "tsx services/pr-evaluator/index.ts",
"wizard-ci": "tsx services/wizard-ci/index.ts",
"wizard-ci-snapshots": "tsx services/wizard-ci/snapshots.ts",
"wizard-ci-snapshot-review": "tsx services/wizard-ci/snapshot-review.ts",
"benchmark": "tsx services/wizard-benchmark/index.ts",
"framework-detect": "tsx services/framework-detect/index.ts",
"yara-scan": "tsx services/yara-scan/index.ts",
Expand All @@ -23,6 +25,7 @@
"devDependencies": {
"@types/node": "^22.0.0",
"@types/sanitize-html": "^2.16.1",
"playwright": "^1.61.0",
"tsx": "^4.19.0",
"typescript": "^5.8.0"
},
Expand Down
29 changes: 29 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading