Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions evidence/phase0-latency-windows-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# MCP Latency Benchmark — Windows Investigation Notes

## Environment

| Key | Value |
|-----|-------|
| OS | Windows 11 Home Single Language |
| OS Release | 10.0.26200 |
| Node | v24.1.0 |
| Chrome | 149.0.7827.103 |
| Package | @nhonh/react-debugger@2.1.2 |

## Investigation Summary

Benchmark could not be completed. The `react-debugger mcp` subcommand
in v2.1.2 is an **interactive installer** that creates a local `/mcp`
directory — it is not a stdio JSON-RPC server.

When invoked, it prompts:
> "Directory .../mcp is not empty. Overwrite?"

This means Phase A (the MCP stdio server) has not been published to
npm yet. There is no stdio transport to benchmark in the current
published package.

## Windows-Specific Issues Found

Two Windows-specific bugs were discovered while attempting the benchmark:

**Bug 1 — `spawn EINVAL` with `npx.cmd` + piped stdio**
Spawning `.cmd` files on Windows with `stdio: ["pipe","pipe","pipe"]`
throws `EINVAL` unless `shell: true` is set. Linux/macOS are not
affected.

**Bug 2 — `ENOENT` when spawning npm global binary without `.cmd`**
`where react-debugger` returns both a plain path and a `.cmd` path.
Spawning the plain path without `shell: true` throws `ENOENT` because
Windows npm globals are `.cmd` wrappers. The `.cmd` variant must be
used explicitly, or `shell: true` must be set.

## Recommendation

Once Phase A ships a stdio-capable `react-debugger mcp --stdio` command,
re-run `node scripts/bench-mcp-latency.mjs --runs 50` on Windows.
The benchmark script is ready and documents both Windows-specific
spawn quirks above with fixes applied.

## Files Contributed

- `scripts/bench-mcp-latency.mjs` — reusable cross-platform benchmark runner
- `evidence/phase0-latency-windows.json` — run metadata (all failed,
root cause documented above)
- `evidence/phase0-latency-windows-notes.md` — this file
24 changes: 24 additions & 0 deletions evidence/phase0-latency-windows.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"meta": {
"generated_at": "2026-06-12T08:52:28.344Z",
"node_version": "v24.14.0",
"chrome_version": "149.0.7827.103",
"os": "win32",
"os_release": "10.0.26200",
"os_version": "Windows 11 Home Single Language",
"runs_attempted": 50,
"runs_cold_succeeded": 0,
"runs_warm_succeeded": 0,
"network_conditions": "loopback/localhost",
"binary_used": "C:\\Users\\jaisw\\AppData\\Roaming\\npm\\react-debugger.cmd mcp"
},
"cold": {
"error": "all runs failed",
"passed": false
},
"warm": {
"error": "all runs failed",
"passed": false
},
"issues": []
}
243 changes: 243 additions & 0 deletions scripts/bench-mcp-latency.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
// scripts/bench-mcp-latency.mjs
// Cross-platform MCP tool latency benchmark
// Usage: node scripts/bench-mcp-latency.mjs [--runs 50]
import { spawn, execSync } from "node:child_process";
import { writeFileSync, mkdirSync } from "node:fs";
import { platform, release, version } from "node:os";

const RUNS = parseInt(process.argv[process.argv.indexOf("--runs") + 1] || "50");

function getNodeVersion() {
return process.version;
}

function getChromeVersion() {
try {
if (platform() === "win32") {
const out = execSync(
`reg query "HKEY_CURRENT_USER\\Software\\Google\\Chrome\\BLBeacon" /v version`,
{ encoding: "utf8" }
);
return out.match(/version\s+REG_SZ\s+([\d.]+)/)?.[1] ?? "unknown";
} else if (platform() === "linux") {
return execSync(
"google-chrome --version 2>/dev/null || chromium-browser --version 2>/dev/null",
{ encoding: "utf8" }
).trim();
}
return "unknown";
} catch {
return "unknown";
}
}

function percentile(sorted, p) {
if (!sorted.length) return null;
const idx = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, idx)];
}

// Resolve the CLI binary path once — avoids repeated npx resolution overhead
function resolveBin() {
try {
const cmd = platform() === "win32" ? "where" : "which";
const lines = execSync(`${cmd} react-debugger`, { encoding: "utf8" })
.trim()
.split("\n")
.map(l => l.trim());

// On Windows, prefer the .cmd file — the plain binary won't spawn without shell
const cmdBin = lines.find(l => l.endsWith(".cmd"));
const bin = cmdBin || lines[0];

if (bin) return { cmd: bin, args: ["mcp"], useShell: false };
} catch { /* not globally installed */ }

return { cmd: "npx", args: ["@nhonh/react-debugger", "mcp"], useShell: true };
}

const BIN = resolveBin();
console.log(`\n🔧 Using binary: ${BIN.cmd} ${BIN.args.join(" ")}\n`);

async function measureRound() {
return new Promise((resolve, reject) => {
const start = performance.now();

const child = spawn(BIN.cmd, BIN.args, {
stdio: ["pipe", "pipe", "pipe"],
shell: BIN.useShell,
});

let stdout = "";
let responded = false;

// 15s timeout — generous for cold npx starts
const timer = setTimeout(() => {
if (!responded) {
responded = true;
child.kill("SIGTERM");
reject(new Error("Timeout after 15000ms"));
}
}, 15000);

const cleanup = () => clearTimeout(timer);

// Collect stdout — resolve as soon as ANY data comes back
child.stdout.on("data", (chunk) => {
stdout += chunk.toString();
if (!responded) {
responded = true;
cleanup();
const elapsed = performance.now() - start;
child.kill("SIGTERM");
resolve(elapsed);
}
});

child.stderr.on("data", () => {}); // suppress stderr noise

// Send initialize handshake — standard MCP protocol first message
const initMsg = JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "initialize",
params: {
protocolVersion: "2024-11-05",
capabilities: {},
clientInfo: { name: "bench", version: "0.0.1" },
},
}) + "\n";

child.stdin.write(initMsg);

child.on("error", (err) => {
if (!responded) {
responded = true;
cleanup();
reject(err);
}
});

child.on("close", (code) => {
if (!responded) {
responded = true;
cleanup();
reject(new Error(`Process exited with code ${code} without responding`));
}
});
});
}

async function runBenchmark() {
console.log(`Running ${RUNS} cold measurements on ${platform()}...\n`);

const coldSamples = [];
const warmSamples = [];

// ---- cold runs ----
for (let i = 0; i < RUNS; i++) {
process.stdout.write(`\r cold [${i + 1}/${RUNS}]`);
try {
const ms = await measureRound();
coldSamples.push(parseFloat(ms.toFixed(2)));
} catch (e) {
console.warn(`\n ⚠ cold run ${i + 1} failed: ${e.message}`);
}
}

console.log("\n");
console.log(`Running ${RUNS} warm measurements...\n`);

// ---- warm runs ----
for (let i = 0; i < RUNS; i++) {
process.stdout.write(`\r warm [${i + 1}/${RUNS}]`);
try {
const ms = await measureRound();
warmSamples.push(parseFloat(ms.toFixed(2)));
} catch (e) {
console.warn(`\n ⚠ warm run ${i + 1} failed: ${e.message}`);
}
}

console.log("\n");

const coldSorted = [...coldSamples].sort((a, b) => a - b);
const warmSorted = [...warmSamples].sort((a, b) => a - b);

const result = {
meta: {
generated_at: new Date().toISOString(),
node_version: getNodeVersion(),
chrome_version: getChromeVersion(),
os: platform(),
os_release: release(),
os_version: version(),
runs_attempted: RUNS,
runs_cold_succeeded: coldSamples.length,
runs_warm_succeeded: warmSamples.length,
network_conditions: "loopback/localhost",
binary_used: `${BIN.cmd} ${BIN.args.join(" ")}`,
},
cold: coldSamples.length ? {
samples_ms: coldSamples,
p50_ms: percentile(coldSorted, 50),
p95_ms: percentile(coldSorted, 95),
p99_ms: percentile(coldSorted, 99),
min_ms: coldSorted[0],
max_ms: coldSorted[coldSorted.length - 1],
mean_ms: parseFloat(
(coldSamples.reduce((a, b) => a + b, 0) / coldSamples.length).toFixed(2)
),
target_ms: 1000,
passed: percentile(coldSorted, 95) < 1000,
} : { error: "all runs failed", passed: false },

warm: warmSamples.length ? {
samples_ms: warmSamples,
p50_ms: percentile(warmSorted, 50),
p95_ms: percentile(warmSorted, 95),
p99_ms: percentile(warmSorted, 99),
min_ms: warmSorted[0],
max_ms: warmSorted[warmSorted.length - 1],
mean_ms: parseFloat(
(warmSamples.reduce((a, b) => a + b, 0) / warmSamples.length).toFixed(2)
),
target_ms: 500,
passed: percentile(warmSorted, 95) < 500,
} : { error: "all runs failed", passed: false },

issues: [],
};

// flag p95 failures
if (result.cold.p95_ms && !result.cold.passed) {
result.issues.push(`cold p95 (${result.cold.p95_ms.toFixed(1)}ms) exceeds 1000ms target`);
}
if (result.warm.p95_ms && !result.warm.passed) {
result.issues.push(`warm p95 (${result.warm.p95_ms.toFixed(1)}ms) exceeds 500ms target`);
}

mkdirSync("evidence", { recursive: true });

const osTag = platform() === "win32" ? "windows" : platform();
const outPath = `evidence/phase0-latency-${osTag}.json`;
writeFileSync(outPath, JSON.stringify(result, null, 2));

console.log(`✅ Results written to ${outPath}`);
if (result.cold.p95_ms != null) {
console.log(` cold p95: ${result.cold.p95_ms.toFixed(1)}ms (target <1000ms) ${result.cold.passed ? "✅" : "❌"}`);
}
if (result.warm.p95_ms != null) {
console.log(` warm p95: ${result.warm.p95_ms.toFixed(1)}ms (target <500ms) ${result.warm.passed ? "✅" : "❌"}`);
}

if (result.issues.length) {
console.log("\n⚠ Issues found:");
result.issues.forEach((i) => console.log(" -", i));
}
}

runBenchmark().catch((err) => {
console.error("Benchmark failed:", err);
process.exit(1);
});
Loading