From 036e6a21f6db724fc6eca7852fd7c86e661c36b7 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 6 May 2026 17:41:41 -0700
Subject: [PATCH 1/4] gfql: add benchmark residual triage tool for issue #880

---
 ai/docs/gfql/README.md                        |   1 +
 bin/gfql_issue_880_triage.py                  |  14 +
 .../compute/gfql/benchmark_residual_triage.py | 324 ++++++++++++++++++
 .../gfql/test_benchmark_residual_triage.py    | 183 ++++++++++
 4 files changed, 522 insertions(+)
 create mode 100755 bin/gfql_issue_880_triage.py
 create mode 100644 graphistry/compute/gfql/benchmark_residual_triage.py
 create mode 100644 graphistry/tests/compute/gfql/test_benchmark_residual_triage.py

diff --git a/ai/docs/gfql/README.md b/ai/docs/gfql/README.md
index ce98239f9f..333783b90d 100644
--- a/ai/docs/gfql/README.md
+++ b/ai/docs/gfql/README.md
@@ -9,6 +9,7 @@ Guide for AI assistants working with GFQL (Graph Frame Query Language) in PyGrap
 - [`predicates_checklist.md`](./predicates_checklist.md) — End-to-end checklist for predicate implementations.
 - [`conformance.md`](./conformance.md) — Cypher TCK conformance harness and CI wiring.
 - [`../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md`](../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md) — Guidance for keeping AI assistants aligned with GFQL changes.
+- `python bin/gfql_issue_880_triage.py --runs-dir ../pyg-bench/results/runs` — Builds benchmark-evidence markdown for splitting residual workaround-backed `#880` lanes into narrower child issues.
 
 ### Essential GFQL Operations
 ```python
diff --git a/bin/gfql_issue_880_triage.py b/bin/gfql_issue_880_triage.py
new file mode 100755
index 0000000000..b58cbbf772
--- /dev/null
+++ b/bin/gfql_issue_880_triage.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+import sys
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from graphistry.compute.gfql.benchmark_residual_triage import run_cli
+
+
+if __name__ == "__main__":
+    raise SystemExit(run_cli())
diff --git a/graphistry/compute/gfql/benchmark_residual_triage.py b/graphistry/compute/gfql/benchmark_residual_triage.py
new file mode 100644
index 0000000000..47798335f7
--- /dev/null
+++ b/graphistry/compute/gfql/benchmark_residual_triage.py
@@ -0,0 +1,324 @@
+from __future__ import annotations
+
+import argparse
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
+
+
+@dataclass(frozen=True)
+class ResidualLane:
+    run_dir: Path
+    run_dir_name: str
+    probe_path: Path
+    config_path: Optional[str]
+    remote_command: Optional[str]
+    backend: str
+    status: str
+    issue_refs: Tuple[str, ...]
+    suite: Optional[str]
+    upstream_query: Optional[str]
+    query_id: Optional[str]
+    semantic_scope: Optional[str]
+    notes: Tuple[str, ...]
+    latency_ms: Optional[float]
+    adapter_overhead_latency_ms: Optional[float]
+    rows_returned: Optional[int]
+
+    @property
+    def lane_key(self) -> str:
+        upstream = self.upstream_query or "unknown-upstream"
+        query_id = self.query_id or "unknown-query-id"
+        return f"{upstream}:{query_id}:{self.backend}"
+
+
+def _read_json(path: Path) -> Any:
+    with path.open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def _coerce_float(value: Any) -> Optional[float]:
+    if value is None:
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    return None
+
+
+def _coerce_int(value: Any) -> Optional[int]:
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, int):
+        return value
+    return None
+
+
+def _coerce_str_list(value: Any) -> Tuple[str, ...]:
+    if not isinstance(value, list):
+        return tuple()
+    out: List[str] = []
+    for item in value:
+        if isinstance(item, str):
+            out.append(item)
+    return tuple(out)
+
+
+def _load_manifest_config_path(run_dir: Path) -> Optional[str]:
+    manifest_path = run_dir / "run-manifest.json"
+    if not manifest_path.exists():
+        return None
+    payload = _read_json(manifest_path)
+    if not isinstance(payload, Mapping):
+        return None
+    config_path = payload.get("config_path")
+    return config_path if isinstance(config_path, str) else None
+
+
+def _load_remote_command(run_dir: Path) -> Optional[str]:
+    remote_path = run_dir / "remote-execution.json"
+    if not remote_path.exists():
+        return None
+    payload = _read_json(remote_path)
+    if not isinstance(payload, Mapping):
+        return None
+    command = payload.get("command")
+    return command if isinstance(command, str) else None
+
+
+def iter_probe_result_paths(runs_dir: Path) -> Iterable[Path]:
+    for probe_path in sorted(runs_dir.glob("*/probe-results.json")):
+        if probe_path.is_file():
+            yield probe_path
+
+
+def load_residual_lanes(runs_dir: Path) -> List[ResidualLane]:
+    lanes: List[ResidualLane] = []
+    for probe_path in iter_probe_result_paths(runs_dir):
+        run_dir = probe_path.parent
+        run_dir_name = run_dir.name
+        config_path = _load_manifest_config_path(run_dir)
+        remote_command = _load_remote_command(run_dir)
+        payload = _read_json(probe_path)
+        if not isinstance(payload, list):
+            continue
+        for entry in payload:
+            if not isinstance(entry, Mapping):
+                continue
+            backend = entry.get("backend")
+            status = entry.get("status")
+            if not isinstance(backend, str) or not isinstance(status, str):
+                continue
+            lanes.append(
+                ResidualLane(
+                    run_dir=run_dir,
+                    run_dir_name=run_dir_name,
+                    probe_path=probe_path,
+                    config_path=config_path,
+                    remote_command=remote_command,
+                    backend=backend,
+                    status=status,
+                    issue_refs=_coerce_str_list(entry.get("issue_refs")),
+                    suite=entry.get("suite") if isinstance(entry.get("suite"), str) else None,
+                    upstream_query=entry.get("upstream_query") if isinstance(entry.get("upstream_query"), str) else None,
+                    query_id=entry.get("query_id") if isinstance(entry.get("query_id"), str) else None,
+                    semantic_scope=entry.get("semantic_scope") if isinstance(entry.get("semantic_scope"), str) else None,
+                    notes=_coerce_str_list(entry.get("notes")),
+                    latency_ms=_coerce_float(entry.get("latency_ms")),
+                    adapter_overhead_latency_ms=_coerce_float(entry.get("adapter_overhead_latency_ms")),
+                    rows_returned=_coerce_int(entry.get("rows_returned")),
+                )
+            )
+    return lanes
+
+
+def filter_issue_residuals(
+    lanes: Sequence[ResidualLane],
+    issue_ref: str,
+    backend: str = "gfql",
+    status: str = "partial",
+) -> List[ResidualLane]:
+    return [
+        lane
+        for lane in lanes
+        if lane.backend == backend and lane.status == status and issue_ref in lane.issue_refs
+    ]
+
+
+def latest_per_lane_key(lanes: Sequence[ResidualLane]) -> List[ResidualLane]:
+    latest: Dict[str, ResidualLane] = {}
+    for lane in lanes:
+        prior = latest.get(lane.lane_key)
+        if prior is None or lane.probe_path.stat().st_mtime > prior.probe_path.stat().st_mtime:
+            latest[lane.lane_key] = lane
+    return sorted(latest.values(), key=lambda lane: lane.lane_key)
+
+
+def classify_residual_bucket(lane: ResidualLane) -> str:
+    haystack = " ".join([lane.semantic_scope or "", *lane.notes]).lower()
+    if "ancestor-post" in haystack or "ancestor post" in haystack:
+        return "recursive_ancestor_row_join"
+    if "shortest" in haystack:
+        return "path_distance_plus_multi_table_join"
+    if "co-occurrence" in haystack or "cooccurrence" in haystack:
+        return "tag_cooccurrence_join_aggregation"
+    if "reply" in haystack and "author" in haystack:
+        return "reply_author_row_shaping_join"
+    if "employment" in haystack or "company" in haystack:
+        return "employment_company_row_join"
+    if "join" in haystack and "aggregate" in haystack:
+        return "joined_row_aggregation"
+    if "join" in haystack:
+        return "joined_row_projection"
+    return "unclassified_residual"
+
+
+def bucket_residuals(lanes: Sequence[ResidualLane]) -> Dict[str, List[ResidualLane]]:
+    buckets: Dict[str, List[ResidualLane]] = {}
+    for lane in lanes:
+        bucket = classify_residual_bucket(lane)
+        buckets.setdefault(bucket, []).append(lane)
+    for values in buckets.values():
+        values.sort(key=lambda lane: lane.lane_key)
+    return dict(sorted(buckets.items(), key=lambda kv: kv[0]))
+
+
+def _format_issue_template_title(bucket: str) -> str:
+    return f"[FEA] GFQL residual row-bindings: {bucket.replace('_', ' ')}"
+
+
+def render_markdown_report(
+    lanes: Sequence[ResidualLane],
+    issue_ref: str,
+    include_child_issue_templates: bool = True,
+) -> str:
+    rows: List[str] = []
+    rows.append(f"# Residual Triage For `{issue_ref}`")
+    rows.append("")
+    rows.append(f"- Residual lane count: **{len(lanes)}**")
+    rows.append("- Filter: backend=`gfql`, status=`partial`, matching issue ref")
+    rows.append("")
+    rows.append("## Lane Matrix")
+    rows.append("")
+    rows.append("| upstream_query | query_id | semantic_scope | latency_ms | run_dir |")
+    rows.append("|---|---|---|---:|---|")
+    for lane in lanes:
+        rows.append(
+            "| {upstream} | {query_id} | {scope} | {latency} | `{run_dir}` |".format(
+                upstream=lane.upstream_query or "-",
+                query_id=lane.query_id or "-",
+                scope=lane.semantic_scope or "-",
+                latency=f"{lane.latency_ms:.3f}" if lane.latency_ms is not None else "-",
+                run_dir=lane.run_dir_name,
+            )
+        )
+
+    buckets = bucket_residuals(lanes)
+    rows.append("")
+    rows.append("## Proposed Child-Issue Split")
+    rows.append("")
+    rows.append("| bucket | lane_count | upstream_queries |")
+    rows.append("|---|---:|---|")
+    for bucket, bucket_lanes in buckets.items():
+        upstreams = sorted({lane.upstream_query or "-" for lane in bucket_lanes})
+        rows.append(f"| `{bucket}` | {len(bucket_lanes)} | {', '.join(upstreams)} |")
+
+    if include_child_issue_templates:
+        rows.append("")
+        rows.append("## Child Issue Drafts")
+        rows.append("")
+        for bucket, bucket_lanes in buckets.items():
+            rows.append(f"### {_format_issue_template_title(bucket)}")
+            rows.append("")
+            rows.append("Scope:")
+            rows.append("- Residual workaround-backed GFQL partial lanes grouped by shared benchmark semantics.")
+            rows.append("- Split from umbrella `{}` to keep root causes narrow and reproducible.".format(issue_ref))
+            rows.append("")
+            rows.append("Repro Evidence:")
+            for lane in bucket_lanes:
+                rows.append(
+                    "- `{upstream}` / `{query_id}` from `{run_dir}` (`probe-results.json`)".format(
+                        upstream=lane.upstream_query or "-",
+                        query_id=lane.query_id or "-",
+                        run_dir=lane.run_dir_name,
+                    )
+                )
+                if lane.config_path:
+                    rows.append(f"  - config: `{lane.config_path}`")
+                if lane.remote_command:
+                    rows.append(f"  - command: `{lane.remote_command}`")
+                if lane.notes:
+                    rows.append(f"  - workaround note: {lane.notes[-1]}")
+            rows.append("")
+            rows.append("Done when:")
+            rows.append("- Query lanes no longer require adapter workaround rows/joins under benchmark runs.")
+            rows.append(f"- `issue_refs` in benchmark probes no longer need `{issue_ref}` for these lanes.")
+            rows.append("")
+    return "\n".join(rows).rstrip() + "\n"
+
+
+def build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Generate #880 residual triage report from pyg-bench run artifacts."
+    )
+    parser.add_argument(
+        "--runs-dir",
+        default=None,
+        help="Path to pyg-bench results/runs directory. If omitted, auto-detects common local locations.",
+    )
+    parser.add_argument(
+        "--issue-ref",
+        default="graphistry/pygraphistry#880",
+        help="Issue reference to filter on",
+    )
+    parser.add_argument(
+        "--output",
+        default=None,
+        help="Optional output markdown path",
+    )
+    parser.add_argument(
+        "--all-runs",
+        action="store_true",
+        help="Include all matching lanes instead of only latest per lane key",
+    )
+    return parser
+
+
+def _resolve_runs_dir(runs_dir_arg: Optional[str]) -> Path:
+    if runs_dir_arg:
+        return Path(runs_dir_arg).expanduser()
+
+    candidates = [
+        Path.cwd() / "results" / "runs",
+        Path.cwd().parent / "pyg-bench" / "results" / "runs",
+    ]
+    for candidate in candidates:
+        if candidate.exists():
+            return candidate
+
+    raise FileNotFoundError(
+        "Could not auto-detect pyg-bench runs directory. Pass --runs-dir explicitly."
+    )
+
+
+def run_cli(argv: Optional[Sequence[str]] = None) -> int:
+    parser = build_arg_parser()
+    args = parser.parse_args(argv)
+    runs_dir = _resolve_runs_dir(args.runs_dir)
+    lanes = load_residual_lanes(runs_dir)
+    lanes = filter_issue_residuals(lanes, issue_ref=args.issue_ref)
+    if not args.all_runs:
+        lanes = latest_per_lane_key(lanes)
+    report = render_markdown_report(lanes, issue_ref=args.issue_ref)
+    if args.output:
+        out_path = Path(args.output).expanduser()
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(report, encoding="utf-8")
+    print(report)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(run_cli())
diff --git a/graphistry/tests/compute/gfql/test_benchmark_residual_triage.py b/graphistry/tests/compute/gfql/test_benchmark_residual_triage.py
new file mode 100644
index 0000000000..1391de9b3c
--- /dev/null
+++ b/graphistry/tests/compute/gfql/test_benchmark_residual_triage.py
@@ -0,0 +1,183 @@
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+from graphistry.compute.gfql.benchmark_residual_triage import (
+    ResidualLane,
+    _resolve_runs_dir,
+    bucket_residuals,
+    filter_issue_residuals,
+    latest_per_lane_key,
+    load_residual_lanes,
+    render_markdown_report,
+)
+
+
+def _write_json(path: Path, payload: object) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload), encoding="utf-8")
+
+
+def test_load_filter_and_latest_selection(tmp_path: Path) -> None:
+    runs_dir = tmp_path / "runs"
+    issue = "graphistry/pygraphistry#880"
+    query_id = "new-topics"
+    upstream_query = "interactive-complex-4"
+
+    run_a = runs_dir / "run-a"
+    run_b = runs_dir / "run-b"
+
+    _write_json(
+        run_a / "probe-results.json",
+        [
+            {
+                "backend": "gfql",
+                "status": "partial",
+                "issue_refs": [issue],
+                "suite": "snb-interactive",
+                "upstream_query": upstream_query,
+                "query_id": query_id,
+                "semantic_scope": "adapter_join_aggregation_workaround",
+                "notes": ["Adapter workaround joins and aggregates locally."],
+                "latency_ms": 12.0,
+            },
+            {
+                "backend": "cypher",
+                "status": "ok",
+                "issue_refs": [],
+                "suite": "snb-interactive",
+                "upstream_query": upstream_query,
+                "query_id": query_id,
+            },
+        ],
+    )
+    _write_json(run_a / "run-manifest.json", {"config_path": "configs/suites/ic4.yaml"})
+    _write_json(run_a / "remote-execution.json", {"command": "uv run python scripts/run_suite.py ..."})
+
+    _write_json(
+        run_b / "probe-results.json",
+        [
+            {
+                "backend": "gfql",
+                "status": "partial",
+                "issue_refs": [issue],
+                "suite": "snb-interactive",
+                "upstream_query": upstream_query,
+                "query_id": query_id,
+                "semantic_scope": "adapter_join_aggregation_workaround",
+                "notes": ["Adapter workaround joins and aggregates locally."],
+                "latency_ms": 10.0,
+            }
+        ],
+    )
+    _write_json(run_b / "run-manifest.json", {"config_path": "configs/suites/ic4.yaml"})
+
+    # Ensure run-b is newer for latest-per-key selection.
+    older = run_a / "probe-results.json"
+    newer = run_b / "probe-results.json"
+    os.utime(older, (1_700_000_000, 1_700_000_000))
+    os.utime(newer, (1_700_000_100, 1_700_000_100))
+
+    lanes = load_residual_lanes(runs_dir)
+    assert len(lanes) == 3
+
+    residuals = filter_issue_residuals(lanes, issue_ref=issue)
+    assert len(residuals) == 2
+    assert all(lane.backend == "gfql" for lane in residuals)
+    assert all(lane.status == "partial" for lane in residuals)
+    assert all(issue in lane.issue_refs for lane in residuals)
+
+    latest = latest_per_lane_key(residuals)
+    assert len(latest) == 1
+    assert latest[0].run_dir_name == "run-b"
+
+
+def test_bucket_classification() -> None:
+    lane_join = ResidualLane(
+        run_dir=Path("/tmp/run1"),
+        run_dir_name="run1",
+        probe_path=Path("/tmp/run1/probe-results.json"),
+        config_path=None,
+        remote_command=None,
+        backend="gfql",
+        status="partial",
+        issue_refs=("graphistry/pygraphistry#880",),
+        suite="snb-interactive",
+        upstream_query="interactive-complex-4",
+        query_id="new-topics",
+        semantic_scope="adapter_join_aggregation_workaround",
+        notes=("Adapter workaround joins rows and aggregates locally.",),
+        latency_ms=1.0,
+        adapter_overhead_latency_ms=1.0,
+        rows_returned=10,
+    )
+    lane_ancestor = ResidualLane(
+        run_dir=Path("/tmp/run2"),
+        run_dir_name="run2",
+        probe_path=Path("/tmp/run2/probe-results.json"),
+        config_path=None,
+        remote_command=None,
+        backend="gfql",
+        status="partial",
+        issue_refs=("graphistry/pygraphistry#880",),
+        suite="snb-interactive",
+        upstream_query="interactive-short-6",
+        query_id="message-forum",
+        semantic_scope="adapter_join_workaround",
+        notes=("Adapter workaround resolves ancestor-post plus forum/moderator rows locally.",),
+        latency_ms=2.0,
+        adapter_overhead_latency_ms=2.0,
+        rows_returned=20,
+    )
+
+    buckets = bucket_residuals([lane_join, lane_ancestor])
+    assert "joined_row_aggregation" in buckets
+    assert "recursive_ancestor_row_join" in buckets
+    assert buckets["joined_row_aggregation"][0].query_id == "new-topics"
+    assert buckets["recursive_ancestor_row_join"][0].query_id == "message-forum"
+
+
+def test_render_markdown_report_contains_evidence_blocks() -> None:
+    lane = ResidualLane(
+        run_dir=Path("/tmp/runx"),
+        run_dir_name="runx",
+        probe_path=Path("/tmp/runx/probe-results.json"),
+        config_path="configs/suites/snb-interactive-ic4-conformance-sf1.yaml",
+        remote_command="uv run python scripts/run_suite.py --suite snb-interactive ...",
+        backend="gfql",
+        status="partial",
+        issue_refs=("graphistry/pygraphistry#880",),
+        suite="snb-interactive",
+        upstream_query="interactive-complex-4",
+        query_id="new-topics",
+        semantic_scope="adapter_join_aggregation_workaround",
+        notes=("Adapter workaround joins rows and aggregates locally.",),
+        latency_ms=123.456,
+        adapter_overhead_latency_ms=120.0,
+        rows_returned=10,
+    )
+
+    report = render_markdown_report([lane], issue_ref="graphistry/pygraphistry#880")
+    assert "Residual Triage For `graphistry/pygraphistry#880`" in report
+    assert "interactive-complex-4" in report
+    assert "Child Issue Drafts" in report
+    assert "configs/suites/snb-interactive-ic4-conformance-sf1.yaml" in report
+    assert "uv run python scripts/run_suite.py" in report
+
+
+def test_resolve_runs_dir_autodetect_and_explicit(tmp_path: Path, monkeypatch) -> None:
+    wd = tmp_path / "repo"
+    wd.mkdir(parents=True, exist_ok=True)
+    sibling_runs = tmp_path / "pyg-bench" / "results" / "runs"
+    sibling_runs.mkdir(parents=True, exist_ok=True)
+
+    monkeypatch.chdir(wd)
+    resolved = _resolve_runs_dir(None)
+    assert resolved == sibling_runs
+
+    explicit = tmp_path / "custom" / "runs"
+    explicit.mkdir(parents=True, exist_ok=True)
+    resolved_explicit = _resolve_runs_dir(str(explicit))
+    assert resolved_explicit == explicit

From 9dffbca055139c0800438188f2f626f5002f86c2 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 6 May 2026 18:08:24 -0700
Subject: [PATCH 2/4] chore(changelog): add #880 triage tooling entry

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e060a73887..9ede3be0f7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Documentation
 - **GFQL component-labeling examples + README clarity (#1324)**: Added concise WCC/SCC labeling examples for `compute_cugraph`, `compute_igraph('clusters')`, and local Cypher `CALL graphistry.cugraph.*` write/row modes in GFQL docs, clarified that component IDs are partition labels (not stable semantic IDs), and tightened the main README GFQL intro sentence for readability.
 
+### Added
+- **GFQL benchmark residual triage tooling for `#880`**: Added `graphistry.compute.gfql.benchmark_residual_triage` plus CLI wrapper `bin/gfql_issue_880_triage.py` to parse `pyg-bench` run artifacts, filter residual `gfql`/`partial` lanes by issue ref (default `graphistry/pygraphistry#880`), select latest lane evidence, bucket likely root-cause families, and emit split-ready child-issue draft markdown with concrete repro evidence (run dir, config path, command, workaround note). Includes focused regression coverage in `graphistry/tests/compute/gfql/test_benchmark_residual_triage.py` and a GFQL docs quick-reference pointer.
+
 ## [0.55.1 - 2026-05-05]
 
 ### Tests

From bac17b2d8f2a5e534e35a92922b37edfc1a91231 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 6 May 2026 18:30:58 -0700
Subject: [PATCH 3/4] revert: remove #880 triage tooling from pygraphistry

---
 CHANGELOG.md                                  |   2 -
 ai/docs/gfql/README.md                        |   1 -
 bin/gfql_issue_880_triage.py                  |  14 -
 .../compute/gfql/benchmark_residual_triage.py | 324 ------------------
 .../gfql/test_benchmark_residual_triage.py    | 183 ----------
 5 files changed, 524 deletions(-)
 delete mode 100755 bin/gfql_issue_880_triage.py
 delete mode 100644 graphistry/compute/gfql/benchmark_residual_triage.py
 delete mode 100644 graphistry/tests/compute/gfql/test_benchmark_residual_triage.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ede3be0f7..72b7c7aa93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,8 +11,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Documentation
 - **GFQL component-labeling examples + README clarity (#1324)**: Added concise WCC/SCC labeling examples for `compute_cugraph`, `compute_igraph('clusters')`, and local Cypher `CALL graphistry.cugraph.*` write/row modes in GFQL docs, clarified that component IDs are partition labels (not stable semantic IDs), and tightened the main README GFQL intro sentence for readability.
 
-### Added
-- **GFQL benchmark residual triage tooling for `#880`**: Added `graphistry.compute.gfql.benchmark_residual_triage` plus CLI wrapper `bin/gfql_issue_880_triage.py` to parse `pyg-bench` run artifacts, filter residual `gfql`/`partial` lanes by issue ref (default `graphistry/pygraphistry#880`), select latest lane evidence, bucket likely root-cause families, and emit split-ready child-issue draft markdown with concrete repro evidence (run dir, config path, command, workaround note). Includes focused regression coverage in `graphistry/tests/compute/gfql/test_benchmark_residual_triage.py` and a GFQL docs quick-reference pointer.
 
 ## [0.55.1 - 2026-05-05]
 
diff --git a/ai/docs/gfql/README.md b/ai/docs/gfql/README.md
index 333783b90d..ce98239f9f 100644
--- a/ai/docs/gfql/README.md
+++ b/ai/docs/gfql/README.md
@@ -9,7 +9,6 @@ Guide for AI assistants working with GFQL (Graph Frame Query Language) in PyGrap
 - [`predicates_checklist.md`](./predicates_checklist.md) — End-to-end checklist for predicate implementations.
 - [`conformance.md`](./conformance.md) — Cypher TCK conformance harness and CI wiring.
 - [`../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md`](../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md) — Guidance for keeping AI assistants aligned with GFQL changes.
-- `python bin/gfql_issue_880_triage.py --runs-dir ../pyg-bench/results/runs` — Builds benchmark-evidence markdown for splitting residual workaround-backed `#880` lanes into narrower child issues.
 
 ### Essential GFQL Operations
 ```python
diff --git a/bin/gfql_issue_880_triage.py b/bin/gfql_issue_880_triage.py
deleted file mode 100755
index b58cbbf772..0000000000
--- a/bin/gfql_issue_880_triage.py
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/env python3
-
-from pathlib import Path
-import sys
-
-REPO_ROOT = Path(__file__).resolve().parent.parent
-if str(REPO_ROOT) not in sys.path:
-    sys.path.insert(0, str(REPO_ROOT))
-
-from graphistry.compute.gfql.benchmark_residual_triage import run_cli
-
-
-if __name__ == "__main__":
-    raise SystemExit(run_cli())
diff --git a/graphistry/compute/gfql/benchmark_residual_triage.py b/graphistry/compute/gfql/benchmark_residual_triage.py
deleted file mode 100644
index 47798335f7..0000000000
--- a/graphistry/compute/gfql/benchmark_residual_triage.py
+++ /dev/null
@@ -1,324 +0,0 @@
-from __future__ import annotations
-
-import argparse
-import json
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
-
-
-@dataclass(frozen=True)
-class ResidualLane:
-    run_dir: Path
-    run_dir_name: str
-    probe_path: Path
-    config_path: Optional[str]
-    remote_command: Optional[str]
-    backend: str
-    status: str
-    issue_refs: Tuple[str, ...]
-    suite: Optional[str]
-    upstream_query: Optional[str]
-    query_id: Optional[str]
-    semantic_scope: Optional[str]
-    notes: Tuple[str, ...]
-    latency_ms: Optional[float]
-    adapter_overhead_latency_ms: Optional[float]
-    rows_returned: Optional[int]
-
-    @property
-    def lane_key(self) -> str:
-        upstream = self.upstream_query or "unknown-upstream"
-        query_id = self.query_id or "unknown-query-id"
-        return f"{upstream}:{query_id}:{self.backend}"
-
-
-def _read_json(path: Path) -> Any:
-    with path.open("r", encoding="utf-8") as f:
-        return json.load(f)
-
-
-def _coerce_float(value: Any) -> Optional[float]:
-    if value is None:
-        return None
-    if isinstance(value, (int, float)):
-        return float(value)
-    return None
-
-
-def _coerce_int(value: Any) -> Optional[int]:
-    if value is None:
-        return None
-    if isinstance(value, bool):
-        return None
-    if isinstance(value, int):
-        return value
-    return None
-
-
-def _coerce_str_list(value: Any) -> Tuple[str, ...]:
-    if not isinstance(value, list):
-        return tuple()
-    out: List[str] = []
-    for item in value:
-        if isinstance(item, str):
-            out.append(item)
-    return tuple(out)
-
-
-def _load_manifest_config_path(run_dir: Path) -> Optional[str]:
-    manifest_path = run_dir / "run-manifest.json"
-    if not manifest_path.exists():
-        return None
-    payload = _read_json(manifest_path)
-    if not isinstance(payload, Mapping):
-        return None
-    config_path = payload.get("config_path")
-    return config_path if isinstance(config_path, str) else None
-
-
-def _load_remote_command(run_dir: Path) -> Optional[str]:
-    remote_path = run_dir / "remote-execution.json"
-    if not remote_path.exists():
-        return None
-    payload = _read_json(remote_path)
-    if not isinstance(payload, Mapping):
-        return None
-    command = payload.get("command")
-    return command if isinstance(command, str) else None
-
-
-def iter_probe_result_paths(runs_dir: Path) -> Iterable[Path]:
-    for probe_path in sorted(runs_dir.glob("*/probe-results.json")):
-        if probe_path.is_file():
-            yield probe_path
-
-
-def load_residual_lanes(runs_dir: Path) -> List[ResidualLane]:
-    lanes: List[ResidualLane] = []
-    for probe_path in iter_probe_result_paths(runs_dir):
-        run_dir = probe_path.parent
-        run_dir_name = run_dir.name
-        config_path = _load_manifest_config_path(run_dir)
-        remote_command = _load_remote_command(run_dir)
-        payload = _read_json(probe_path)
-        if not isinstance(payload, list):
-            continue
-        for entry in payload:
-            if not isinstance(entry, Mapping):
-                continue
-            backend = entry.get("backend")
-            status = entry.get("status")
-            if not isinstance(backend, str) or not isinstance(status, str):
-                continue
-            lanes.append(
-                ResidualLane(
-                    run_dir=run_dir,
-                    run_dir_name=run_dir_name,
-                    probe_path=probe_path,
-                    config_path=config_path,
-                    remote_command=remote_command,
-                    backend=backend,
-                    status=status,
-                    issue_refs=_coerce_str_list(entry.get("issue_refs")),
-                    suite=entry.get("suite") if isinstance(entry.get("suite"), str) else None,
-                    upstream_query=entry.get("upstream_query") if isinstance(entry.get("upstream_query"), str) else None,
-                    query_id=entry.get("query_id") if isinstance(entry.get("query_id"), str) else None,
-                    semantic_scope=entry.get("semantic_scope") if isinstance(entry.get("semantic_scope"), str) else None,
-                    notes=_coerce_str_list(entry.get("notes")),
-                    latency_ms=_coerce_float(entry.get("latency_ms")),
-                    adapter_overhead_latency_ms=_coerce_float(entry.get("adapter_overhead_latency_ms")),
-                    rows_returned=_coerce_int(entry.get("rows_returned")),
-                )
-            )
-    return lanes
-
-
-def filter_issue_residuals(
-    lanes: Sequence[ResidualLane],
-    issue_ref: str,
-    backend: str = "gfql",
-    status: str = "partial",
-) -> List[ResidualLane]:
-    return [
-        lane
-        for lane in lanes
-        if lane.backend == backend and lane.status == status and issue_ref in lane.issue_refs
-    ]
-
-
-def latest_per_lane_key(lanes: Sequence[ResidualLane]) -> List[ResidualLane]:
-    latest: Dict[str, ResidualLane] = {}
-    for lane in lanes:
-        prior = latest.get(lane.lane_key)
-        if prior is None or lane.probe_path.stat().st_mtime > prior.probe_path.stat().st_mtime:
-            latest[lane.lane_key] = lane
-    return sorted(latest.values(), key=lambda lane: lane.lane_key)
-
-
-def classify_residual_bucket(lane: ResidualLane) -> str:
-    haystack = " ".join([lane.semantic_scope or "", *lane.notes]).lower()
-    if "ancestor-post" in haystack or "ancestor post" in haystack:
-        return "recursive_ancestor_row_join"
-    if "shortest" in haystack:
-        return "path_distance_plus_multi_table_join"
-    if "co-occurrence" in haystack or "cooccurrence" in haystack:
-        return "tag_cooccurrence_join_aggregation"
-    if "reply" in haystack and "author" in haystack:
-        return "reply_author_row_shaping_join"
-    if "employment" in haystack or "company" in haystack:
-        return "employment_company_row_join"
-    if "join" in haystack and "aggregate" in haystack:
-        return "joined_row_aggregation"
-    if "join" in haystack:
-        return "joined_row_projection"
-    return "unclassified_residual"
-
-
-def bucket_residuals(lanes: Sequence[ResidualLane]) -> Dict[str, List[ResidualLane]]:
-    buckets: Dict[str, List[ResidualLane]] = {}
-    for lane in lanes:
-        bucket = classify_residual_bucket(lane)
-        buckets.setdefault(bucket, []).append(lane)
-    for values in buckets.values():
-        values.sort(key=lambda lane: lane.lane_key)
-    return dict(sorted(buckets.items(), key=lambda kv: kv[0]))
-
-
-def _format_issue_template_title(bucket: str) -> str:
-    return f"[FEA] GFQL residual row-bindings: {bucket.replace('_', ' ')}"
-
-
-def render_markdown_report(
-    lanes: Sequence[ResidualLane],
-    issue_ref: str,
-    include_child_issue_templates: bool = True,
-) -> str:
-    rows: List[str] = []
-    rows.append(f"# Residual Triage For `{issue_ref}`")
-    rows.append("")
-    rows.append(f"- Residual lane count: **{len(lanes)}**")
-    rows.append("- Filter: backend=`gfql`, status=`partial`, matching issue ref")
-    rows.append("")
-    rows.append("## Lane Matrix")
-    rows.append("")
-    rows.append("| upstream_query | query_id | semantic_scope | latency_ms | run_dir |")
-    rows.append("|---|---|---|---:|---|")
-    for lane in lanes:
-        rows.append(
-            "| {upstream} | {query_id} | {scope} | {latency} | `{run_dir}` |".format(
-                upstream=lane.upstream_query or "-",
-                query_id=lane.query_id or "-",
-                scope=lane.semantic_scope or "-",
-                latency=f"{lane.latency_ms:.3f}" if lane.latency_ms is not None else "-",
-                run_dir=lane.run_dir_name,
-            )
-        )
-
-    buckets = bucket_residuals(lanes)
-    rows.append("")
-    rows.append("## Proposed Child-Issue Split")
-    rows.append("")
-    rows.append("| bucket | lane_count | upstream_queries |")
-    rows.append("|---|---:|---|")
-    for bucket, bucket_lanes in buckets.items():
-        upstreams = sorted({lane.upstream_query or "-" for lane in bucket_lanes})
-        rows.append(f"| `{bucket}` | {len(bucket_lanes)} | {', '.join(upstreams)} |")
-
-    if include_child_issue_templates:
-        rows.append("")
-        rows.append("## Child Issue Drafts")
-        rows.append("")
-        for bucket, bucket_lanes in buckets.items():
-            rows.append(f"### {_format_issue_template_title(bucket)}")
-            rows.append("")
-            rows.append("Scope:")
-            rows.append("- Residual workaround-backed GFQL partial lanes grouped by shared benchmark semantics.")
-            rows.append("- Split from umbrella `{}` to keep root causes narrow and reproducible.".format(issue_ref))
-            rows.append("")
-            rows.append("Repro Evidence:")
-            for lane in bucket_lanes:
-                rows.append(
-                    "- `{upstream}` / `{query_id}` from `{run_dir}` (`probe-results.json`)".format(
-                        upstream=lane.upstream_query or "-",
-                        query_id=lane.query_id or "-",
-                        run_dir=lane.run_dir_name,
-                    )
-                )
-                if lane.config_path:
-                    rows.append(f"  - config: `{lane.config_path}`")
-                if lane.remote_command:
-                    rows.append(f"  - command: `{lane.remote_command}`")
-                if lane.notes:
-                    rows.append(f"  - workaround note: {lane.notes[-1]}")
-            rows.append("")
-            rows.append("Done when:")
-            rows.append("- Query lanes no longer require adapter workaround rows/joins under benchmark runs.")
-            rows.append(f"- `issue_refs` in benchmark probes no longer need `{issue_ref}` for these lanes.")
-            rows.append("")
-    return "\n".join(rows).rstrip() + "\n"
-
-
-def build_arg_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(
-        description="Generate #880 residual triage report from pyg-bench run artifacts."
-    )
-    parser.add_argument(
-        "--runs-dir",
-        default=None,
-        help="Path to pyg-bench results/runs directory. If omitted, auto-detects common local locations.",
-    )
-    parser.add_argument(
-        "--issue-ref",
-        default="graphistry/pygraphistry#880",
-        help="Issue reference to filter on",
-    )
-    parser.add_argument(
-        "--output",
-        default=None,
-        help="Optional output markdown path",
-    )
-    parser.add_argument(
-        "--all-runs",
-        action="store_true",
-        help="Include all matching lanes instead of only latest per lane key",
-    )
-    return parser
-
-
-def _resolve_runs_dir(runs_dir_arg: Optional[str]) -> Path:
-    if runs_dir_arg:
-        return Path(runs_dir_arg).expanduser()
-
-    candidates = [
-        Path.cwd() / "results" / "runs",
-        Path.cwd().parent / "pyg-bench" / "results" / "runs",
-    ]
-    for candidate in candidates:
-        if candidate.exists():
-            return candidate
-
-    raise FileNotFoundError(
-        "Could not auto-detect pyg-bench runs directory. Pass --runs-dir explicitly."
-    )
-
-
-def run_cli(argv: Optional[Sequence[str]] = None) -> int:
-    parser = build_arg_parser()
-    args = parser.parse_args(argv)
-    runs_dir = _resolve_runs_dir(args.runs_dir)
-    lanes = load_residual_lanes(runs_dir)
-    lanes = filter_issue_residuals(lanes, issue_ref=args.issue_ref)
-    if not args.all_runs:
-        lanes = latest_per_lane_key(lanes)
-    report = render_markdown_report(lanes, issue_ref=args.issue_ref)
-    if args.output:
-        out_path = Path(args.output).expanduser()
-        out_path.parent.mkdir(parents=True, exist_ok=True)
-        out_path.write_text(report, encoding="utf-8")
-    print(report)
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(run_cli())
diff --git a/graphistry/tests/compute/gfql/test_benchmark_residual_triage.py b/graphistry/tests/compute/gfql/test_benchmark_residual_triage.py
deleted file mode 100644
index 1391de9b3c..0000000000
--- a/graphistry/tests/compute/gfql/test_benchmark_residual_triage.py
+++ /dev/null
@@ -1,183 +0,0 @@
-from __future__ import annotations
-
-import json
-import os
-from pathlib import Path
-
-from graphistry.compute.gfql.benchmark_residual_triage import (
-    ResidualLane,
-    _resolve_runs_dir,
-    bucket_residuals,
-    filter_issue_residuals,
-    latest_per_lane_key,
-    load_residual_lanes,
-    render_markdown_report,
-)
-
-
-def _write_json(path: Path, payload: object) -> None:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(json.dumps(payload), encoding="utf-8")
-
-
-def test_load_filter_and_latest_selection(tmp_path: Path) -> None:
-    runs_dir = tmp_path / "runs"
-    issue = "graphistry/pygraphistry#880"
-    query_id = "new-topics"
-    upstream_query = "interactive-complex-4"
-
-    run_a = runs_dir / "run-a"
-    run_b = runs_dir / "run-b"
-
-    _write_json(
-        run_a / "probe-results.json",
-        [
-            {
-                "backend": "gfql",
-                "status": "partial",
-                "issue_refs": [issue],
-                "suite": "snb-interactive",
-                "upstream_query": upstream_query,
-                "query_id": query_id,
-                "semantic_scope": "adapter_join_aggregation_workaround",
-                "notes": ["Adapter workaround joins and aggregates locally."],
-                "latency_ms": 12.0,
-            },
-            {
-                "backend": "cypher",
-                "status": "ok",
-                "issue_refs": [],
-                "suite": "snb-interactive",
-                "upstream_query": upstream_query,
-                "query_id": query_id,
-            },
-        ],
-    )
-    _write_json(run_a / "run-manifest.json", {"config_path": "configs/suites/ic4.yaml"})
-    _write_json(run_a / "remote-execution.json", {"command": "uv run python scripts/run_suite.py ..."})
-
-    _write_json(
-        run_b / "probe-results.json",
-        [
-            {
-                "backend": "gfql",
-                "status": "partial",
-                "issue_refs": [issue],
-                "suite": "snb-interactive",
-                "upstream_query": upstream_query,
-                "query_id": query_id,
-                "semantic_scope": "adapter_join_aggregation_workaround",
-                "notes": ["Adapter workaround joins and aggregates locally."],
-                "latency_ms": 10.0,
-            }
-        ],
-    )
-    _write_json(run_b / "run-manifest.json", {"config_path": "configs/suites/ic4.yaml"})
-
-    # Ensure run-b is newer for latest-per-key selection.
-    older = run_a / "probe-results.json"
-    newer = run_b / "probe-results.json"
-    os.utime(older, (1_700_000_000, 1_700_000_000))
-    os.utime(newer, (1_700_000_100, 1_700_000_100))
-
-    lanes = load_residual_lanes(runs_dir)
-    assert len(lanes) == 3
-
-    residuals = filter_issue_residuals(lanes, issue_ref=issue)
-    assert len(residuals) == 2
-    assert all(lane.backend == "gfql" for lane in residuals)
-    assert all(lane.status == "partial" for lane in residuals)
-    assert all(issue in lane.issue_refs for lane in residuals)
-
-    latest = latest_per_lane_key(residuals)
-    assert len(latest) == 1
-    assert latest[0].run_dir_name == "run-b"
-
-
-def test_bucket_classification() -> None:
-    lane_join = ResidualLane(
-        run_dir=Path("/tmp/run1"),
-        run_dir_name="run1",
-        probe_path=Path("/tmp/run1/probe-results.json"),
-        config_path=None,
-        remote_command=None,
-        backend="gfql",
-        status="partial",
-        issue_refs=("graphistry/pygraphistry#880",),
-        suite="snb-interactive",
-        upstream_query="interactive-complex-4",
-        query_id="new-topics",
-        semantic_scope="adapter_join_aggregation_workaround",
-        notes=("Adapter workaround joins rows and aggregates locally.",),
-        latency_ms=1.0,
-        adapter_overhead_latency_ms=1.0,
-        rows_returned=10,
-    )
-    lane_ancestor = ResidualLane(
-        run_dir=Path("/tmp/run2"),
-        run_dir_name="run2",
-        probe_path=Path("/tmp/run2/probe-results.json"),
-        config_path=None,
-        remote_command=None,
-        backend="gfql",
-        status="partial",
-        issue_refs=("graphistry/pygraphistry#880",),
-        suite="snb-interactive",
-        upstream_query="interactive-short-6",
-        query_id="message-forum",
-        semantic_scope="adapter_join_workaround",
-        notes=("Adapter workaround resolves ancestor-post plus forum/moderator rows locally.",),
-        latency_ms=2.0,
-        adapter_overhead_latency_ms=2.0,
-        rows_returned=20,
-    )
-
-    buckets = bucket_residuals([lane_join, lane_ancestor])
-    assert "joined_row_aggregation" in buckets
-    assert "recursive_ancestor_row_join" in buckets
-    assert buckets["joined_row_aggregation"][0].query_id == "new-topics"
-    assert buckets["recursive_ancestor_row_join"][0].query_id == "message-forum"
-
-
-def test_render_markdown_report_contains_evidence_blocks() -> None:
-    lane = ResidualLane(
-        run_dir=Path("/tmp/runx"),
-        run_dir_name="runx",
-        probe_path=Path("/tmp/runx/probe-results.json"),
-        config_path="configs/suites/snb-interactive-ic4-conformance-sf1.yaml",
-        remote_command="uv run python scripts/run_suite.py --suite snb-interactive ...",
-        backend="gfql",
-        status="partial",
-        issue_refs=("graphistry/pygraphistry#880",),
-        suite="snb-interactive",
-        upstream_query="interactive-complex-4",
-        query_id="new-topics",
-        semantic_scope="adapter_join_aggregation_workaround",
-        notes=("Adapter workaround joins rows and aggregates locally.",),
-        latency_ms=123.456,
-        adapter_overhead_latency_ms=120.0,
-        rows_returned=10,
-    )
-
-    report = render_markdown_report([lane], issue_ref="graphistry/pygraphistry#880")
-    assert "Residual Triage For `graphistry/pygraphistry#880`" in report
-    assert "interactive-complex-4" in report
-    assert "Child Issue Drafts" in report
-    assert "configs/suites/snb-interactive-ic4-conformance-sf1.yaml" in report
-    assert "uv run python scripts/run_suite.py" in report
-
-
-def test_resolve_runs_dir_autodetect_and_explicit(tmp_path: Path, monkeypatch) -> None:
-    wd = tmp_path / "repo"
-    wd.mkdir(parents=True, exist_ok=True)
-    sibling_runs = tmp_path / "pyg-bench" / "results" / "runs"
-    sibling_runs.mkdir(parents=True, exist_ok=True)
-
-    monkeypatch.chdir(wd)
-    resolved = _resolve_runs_dir(None)
-    assert resolved == sibling_runs
-
-    explicit = tmp_path / "custom" / "runs"
-    explicit.mkdir(parents=True, exist_ok=True)
-    resolved_explicit = _resolve_runs_dir(str(explicit))
-    assert resolved_explicit == explicit

From 06a5c18040bda2555119c87f37032a2330aedb96 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 6 May 2026 18:31:22 -0700
Subject: [PATCH 4/4] chore: drop accidental changelog whitespace diff

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 72b7c7aa93..e060a73887 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Documentation
 - **GFQL component-labeling examples + README clarity (#1324)**: Added concise WCC/SCC labeling examples for `compute_cugraph`, `compute_igraph('clusters')`, and local Cypher `CALL graphistry.cugraph.*` write/row modes in GFQL docs, clarified that component IDs are partition labels (not stable semantic IDs), and tightened the main README GFQL intro sentence for readability.
 
-
 ## [0.55.1 - 2026-05-05]
 
 ### Tests