diff --git a/.github/scripts/build_csp_pr_comment.py b/.github/scripts/build_csp_pr_comment.py
index 31e9670f9..edd633e67 100644
--- a/.github/scripts/build_csp_pr_comment.py
+++ b/.github/scripts/build_csp_pr_comment.py
@@ -2,9 +2,10 @@
"""Build a sticky PR comment for the CSP benchmarks workflow.
Reads the CSV emitted by ``scripts/run_csp_benchmarks.sh`` (one row per
-circuit) and renders it as a markdown table with human-readable units. If
-``--baseline-csv`` is given, each metric cell appends a percentage delta
-versus the baseline value (last successful CSP-benchmarks run on main).
+(circuit, backend)) and renders one markdown table per backend with
+human-readable units. If ``--baseline-csv`` is given, each metric cell
+appends a percentage delta versus the baseline value (last successful
+CSP-benchmarks run on main) keyed by (circuit, backend).
"""
from __future__ import annotations
@@ -111,30 +112,46 @@ def read_rows(csv_path: Path) -> list[dict[str, str]]:
return list(csv.DictReader(f))
-def index_baseline(rows: list[dict[str, str]]) -> dict[str, dict[str, float]]:
- """Index baseline rows by circuit name with float metric values."""
- out: dict[str, dict[str, float]] = {}
+def index_baseline(rows: list[dict[str, str]]) -> dict[tuple[str, str], dict[str, float]]:
+ """Index baseline rows by (circuit, backend) with float metric values.
+
+ Older baseline CSVs without a `backend` column are treated as `whir`
+ (the only backend that existed before backend-aware benchmarks landed),
+ so deltas remain valid across the schema bump.
+ """
+ out: dict[tuple[str, str], dict[str, float]] = {}
for row in rows:
circuit = (row.get("circuit") or "").strip()
if not circuit:
continue
+ backend = (row.get("backend") or "whir").strip() or "whir"
metrics: dict[str, float] = {}
for metric, _unit in METRIC_COLUMNS:
try:
metrics[metric] = float(row.get(metric) or 0)
except ValueError:
metrics[metric] = 0.0
- out[circuit] = metrics
+ out[(circuit, backend)] = metrics
+ return out
+
+
+def group_by_backend(rows: list[dict[str, str]]) -> dict[str, list[dict[str, str]]]:
+ """Bucket result rows by backend, preserving insertion order of backends."""
+ out: dict[str, list[dict[str, str]]] = {}
+ for row in rows:
+ backend = (row.get("backend") or "whir").strip() or "whir"
+ out.setdefault(backend, []).append(row)
return out
def render_table(
rows: list[dict[str, str]],
- baseline: dict[str, dict[str, float]],
+ backend: str,
+ baseline: dict[tuple[str, str], dict[str, float]],
has_baseline_file: bool,
) -> str:
if not rows:
- return "_No benchmark results were produced._"
+ return "_No benchmark results were produced for this backend._"
header = (
"| Circuit | Constraints | Witnesses | Prover time | Peak RSS | "
@@ -145,7 +162,7 @@ def render_table(
for row in sorted(rows, key=lambda r: r.get("circuit", "")):
circuit = row.get("circuit", "")
- baseline_metrics = baseline.get(circuit)
+ baseline_metrics = baseline.get((circuit, backend))
cells = [f"`{circuit}`"]
for metric, unit in METRIC_COLUMNS:
@@ -169,9 +186,18 @@ def render_table(
return "\n".join(lines)
+# Display order for backends. Anything not listed here is appended in the
+# order it appeared in the CSV.
+BACKEND_DISPLAY_ORDER: tuple[str, ...] = ("whir", "groth16")
+BACKEND_TITLES: dict[str, str] = {
+ "whir": "WHIR backend",
+ "groth16": "Groth16 backend",
+}
+
+
def compose_comment(
rows: list[dict[str, str]],
- baseline: dict[str, dict[str, float]],
+ baseline: dict[tuple[str, str], dict[str, float]],
baseline_run_id: str,
has_baseline_file: bool,
run_id: str,
@@ -181,7 +207,15 @@ def compose_comment(
runs_per_circuit: str,
) -> str:
short_sha = sha[:12] if sha else "unknown"
- table = render_table(rows, baseline, has_baseline_file)
+ by_backend = group_by_backend(rows)
+
+ # Stable backend display order: known backends first, unknown ones after.
+ backends_present = list(by_backend.keys())
+ ordered_backends = [b for b in BACKEND_DISPLAY_ORDER if b in by_backend]
+ ordered_backends += [b for b in backends_present if b not in BACKEND_DISPLAY_ORDER]
+
+ distinct_circuits = sorted({(row.get("circuit") or "") for row in rows})
+ distinct_circuits = [c for c in distinct_circuits if c]
if has_baseline_file:
if baseline_run_id:
@@ -189,13 +223,13 @@ def compose_comment(
f"Each metric cell shows the current value followed by the "
f"percentage delta against the latest successful "
f"[`main` run #{baseline_run_id}](https://github.com/worldfnd/provekit/actions/runs/{baseline_run_id}). "
- f"`(new)` marks circuits absent from the baseline."
+ f"`(new)` marks (circuit, backend) pairs absent from the baseline."
)
else:
baseline_note = (
"Each metric cell shows the current value followed by the "
"percentage delta against the latest successful `main` run. "
- "`(new)` marks circuits absent from the baseline."
+ "`(new)` marks (circuit, backend) pairs absent from the baseline."
)
else:
baseline_note = (
@@ -203,6 +237,10 @@ def compose_comment(
"workflow has produced at least one successful `main` run._"
)
+ backend_summary = ", ".join(
+ f"{BACKEND_TITLES.get(b, b)} ({len(by_backend[b])})" for b in ordered_backends
+ ) or "—"
+
lines = [
MARKER,
"## CSP benchmarks",
@@ -212,8 +250,9 @@ def compose_comment(
f"| Workflow status | {status_with_icon(status)} |",
f"| Commit | `{short_sha}` |",
f"| Run | [#{run_id}]({run_url}) |",
- f"| Circuits benchmarked | {len(rows)} |",
- f"| Iterations averaged per circuit | {runs_per_circuit} |",
+ f"| Distinct circuits | {len(distinct_circuits)} |",
+ f"| Backends benchmarked | {backend_summary} |",
+ f"| Iterations averaged per (circuit, backend) | {runs_per_circuit} |",
"",
"Prover time, peak RSS, peak heap, and verifier time are arithmetic means "
"across the iterations. Peak heap comes from the largest "
@@ -222,14 +261,27 @@ def compose_comment(
"",
baseline_note,
"",
- "",
- "Results
",
- "",
- table,
- "",
- " ",
- "",
]
+
+ if not ordered_backends:
+ lines.append("_No benchmark results were produced._")
+ lines.append("")
+ else:
+ for backend in ordered_backends:
+ title = BACKEND_TITLES.get(backend, backend)
+ table = render_table(by_backend[backend], backend, baseline, has_baseline_file)
+ lines.extend([
+ f"### {title}",
+ "",
+ "",
+ "Results
",
+ "",
+ table,
+ "",
+ " ",
+ "",
+ ])
+
return "\n".join(lines)
diff --git a/.github/workflows/csp-benchmarks.yml b/.github/workflows/csp-benchmarks.yml
index 3e17f6368..a7f4bc351 100644
--- a/.github/workflows/csp-benchmarks.yml
+++ b/.github/workflows/csp-benchmarks.yml
@@ -7,9 +7,17 @@ on:
workflow_dispatch:
inputs:
bench_runs:
- description: "Iterations per circuit (default: 3)"
+ description: "Iterations per (circuit, backend) (default: 3)"
required: false
default: "3"
+ bench_backends:
+ description: "Backends to benchmark, space-separated (default: \"whir groth16\")"
+ required: false
+ default: "whir groth16"
+ bench_skip_groth16:
+ description: "Regex of circuits to skip on the groth16 backend (default: empty)"
+ required: false
+ default: ""
permissions:
contents: read
@@ -22,6 +30,8 @@ permissions:
env:
CARGO_TERM_COLOR: always
BENCH_RUNS: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_runs != '' && github.event.inputs.bench_runs || '3') || '3' }}
+ BENCH_BACKENDS: ${{ github.event_name == 'workflow_dispatch' && (github.event.inputs.bench_backends != '' && github.event.inputs.bench_backends || 'whir groth16') || 'whir groth16' }}
+ BENCH_SKIP_GROTH16: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.bench_skip_groth16 || '' }}
REQUIRED_NARGO_VERSION: "1.0.0-beta.19"
concurrency:
@@ -58,6 +68,8 @@ jobs:
PROVEKIT_BIN: ${{ github.workspace }}/target/release/provekit-cli
BENCH_DIR: ${{ github.workspace }}/csp-bench-logs
BENCH_RUNS: ${{ env.BENCH_RUNS }}
+ BENCH_BACKENDS: ${{ env.BENCH_BACKENDS }}
+ BENCH_SKIP_GROTH16: ${{ env.BENCH_SKIP_GROTH16 }}
run: |
bash scripts/run_csp_benchmarks.sh
diff --git a/Cargo.lock b/Cargo.lock
index f93dc8543..5290adef5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -264,6 +264,7 @@ dependencies = [
"num-bigint",
"num-integer",
"num-traits",
+ "rayon",
"zeroize",
]
@@ -452,6 +453,7 @@ dependencies = [
"arrayvec",
"digest 0.10.7",
"num-bigint",
+ "rayon",
]
[[package]]
@@ -493,6 +495,7 @@ checksum = "246a225cc6131e9ee4f24619af0f19d67761fff15d7ccc22e42b80846e69449a"
dependencies = [
"num-traits",
"rand 0.8.5",
+ "rayon",
]
[[package]]
@@ -3211,6 +3214,15 @@ version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+[[package]]
+name = "memmap2"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "memoffset"
version = "0.6.5"
@@ -4595,17 +4607,19 @@ dependencies = [
"acir",
"anyhow",
"argh",
+ "ark-bn254",
+ "ark-ec",
"ark-ff 0.5.0",
+ "ark-serialize 0.5.0",
"base64",
"hex",
- "nargo",
"nargo_toml",
- "noir_artifact_cli",
"noirc_abi",
"noirc_driver",
"postcard",
"provekit-common",
"provekit-gnark",
+ "provekit-groth16",
"provekit-prover",
"provekit-r1cs-compiler",
"provekit-verifier",
@@ -4687,25 +4701,58 @@ dependencies = [
"whir",
]
+[[package]]
+name = "provekit-groth16"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "ark-bn254",
+ "ark-ec",
+ "ark-ff 0.5.0",
+ "ark-poly",
+ "ark-serialize 0.5.0",
+ "ark-std 0.5.0",
+ "memmap2",
+ "provekit-common",
+ "rayon",
+ "serde",
+ "sha2 0.10.9",
+ "sha3",
+ "tempfile",
+ "tracing",
+ "zeroize",
+]
+
[[package]]
name = "provekit-prover"
version = "0.1.0"
dependencies = [
"acir",
"anyhow",
+ "ark-bn254",
+ "ark-ec",
"ark-ff 0.5.0",
+ "ark-poly",
+ "ark-serialize 0.5.0",
"ark-std 0.5.0",
"bn254_blackbox_solver",
+ "bytes",
"mavros-artifacts",
"mavros-vm",
+ "memmap2",
"nargo",
"noir_artifact_cli",
"noirc_abi",
"num-bigint",
"postcard",
"provekit-common",
+ "provekit-groth16",
+ "rayon",
+ "serde",
"tracing",
"whir",
+ "xz2",
+ "zstd",
]
[[package]]
@@ -4734,8 +4781,11 @@ name = "provekit-verifier"
version = "0.1.0"
dependencies = [
"anyhow",
+ "ark-bn254",
+ "ark-serialize 0.5.0",
"ark-std 0.5.0",
"provekit-common",
+ "provekit-groth16",
"rayon",
"tracing",
"whir",
diff --git a/Cargo.toml b/Cargo.toml
index 73d5ac541..1c428a66c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ members = [
"provekit/r1cs-compiler",
"provekit/prover",
"provekit/verifier",
+ "provekit/groth16",
"tooling/cli",
"tooling/provekit-bench",
"tooling/provekit-ffi",
@@ -100,6 +101,7 @@ provekit-cli = { path = "tooling/cli" }
provekit-common = { path = "provekit/common" , features = ["provekit_ntt"]}
provekit-ffi = { path = "tooling/provekit-ffi" }
provekit-gnark = { path = "tooling/provekit-gnark" }
+provekit-groth16 = { path = "provekit/groth16" }
provekit-prover = { path = "provekit/prover", default-features = false }
provekit-r1cs-compiler = { path = "provekit/r1cs-compiler" }
provekit-verifier = { path = "provekit/verifier" }
@@ -155,6 +157,7 @@ parking_lot = "0.12"
# and calls keccak::f1600(), which was removed in keccak 0.2.0 stable. Pinning to
# the RC prevents `cargo update` from bumping acvm_blackbox_solver's keccak to stable.
keccak = "=0.2.0-rc.2"
+memmap2 = "0.9.5"
xz2 = "0.1.7"
zerocopy = "0.8.25"
zeroize = "1.8.1"
@@ -187,12 +190,14 @@ noirc_driver = { git = "https://github.com/noir-lang/noir", rev = "v1.0.0-beta.1
# Cryptography and proof systems
ark-bn254 = { version = "0.5.0", default-features = false, features = [
"scalar_field",
+ "curve",
] }
+ark-ec = { version = "0.5", features = ["parallel"] }
ark-ff = { version = "0.5", features = ["asm", "std"] }
ark-poly = "0.5"
ark-serialize = "0.5"
-ark-std = { version = "0.5", features = ["std"] }
+ark-std = { version = "0.5", features = ["std", "parallel"] }
mavros-vm = { git = "https://github.com/reilabs/mavros", rev = "3e47fd58001a0109a0314bc080b5246fd807ba04" }
mavros-artifacts = { git = "https://github.com/reilabs/mavros", rev = "3e47fd58001a0109a0314bc080b5246fd807ba04" }
spongefish = { git = "https://github.com/arkworks-rs/spongefish", features = [
diff --git a/README.md b/README.md
index a7e8f0b18..14d7973a4 100644
--- a/README.md
+++ b/README.md
@@ -40,18 +40,41 @@ cargo run --release --bin provekit-cli verify
`prepare` writes a **ProveKit Prover** key (`.pkp`) and a **ProveKit Verifier** key (`.pkv`). `prove` reads the PKP plus `Prover.toml` and writes `proof.np`. `verify` reads the PKV and the proof.
+### On-chain verification (Groth16 backend)
+
+For proofs produced with `prepare --backend groth16`, the same PKV + proof can be checked by a Solidity verifier. Two commands generate the artefacts:
+
+```sh
+# Render a circuit-specific Solidity verifier from the PKV.
+cargo run --release --bin provekit-cli export-solidity \
+ --pkv .pkv \
+ --template provekit/groth16/contracts/ProvekitGroth16Verifier.sol \
+ --out Verifier.sol
+
+# Convert the proof to EVM big-endian calldata + a public-inputs file.
+cargo run --release --bin provekit-cli export-evm-proof \
+ --proof proof.np \
+ --out-dir out/
+```
+
+`export-solidity` substitutes all `CODEGEN` markers in the in-repo template with constants from the verifying key (VK scalars, G1/G2 coordinates, gnark-style negations, public-input bases). `export-evm-proof` produces `out/proof.hex` (uncompressed `Ar‖Bs‖Krs‖Commit‖PoK` in EVM big-endian) and `out/inputs.txt` (one decimal field element per line, sized to `N_PUB`). Deploy the rendered `.sol` and call `verifyProof(bytes, uint256[N])` with those two inputs. Single-commitment circuits only for now — see [`provekit/groth16/contracts/README.md`](./provekit/groth16/contracts/README.md) for scope, constraints, and the multi-commitment roadmap.
+
### Command reference
| Command | Purpose | Key options |
| :--- | :--- | :--- |
-| `prepare` | Compile a Noir package and write prover/verifier keys | `--pkp`/`-p`, `--pkv`/`-v`, `--hash`; default hash: `skyscraper` |
+| `prepare` | Compile a Noir package and write prover/verifier keys | `--pkp`/`-p`, `--pkv`/`-v`, `--hash`, `--backend`, `--mmap` (Groth16 only); default hash: `skyscraper`, default backend: `whir` |
| `prove` | Produce `proof.np` from a prover key and inputs | `--prover`/`-p`, `--input`/`-i`, `--out`/`-o` |
| `verify` | Verify a proof against a verifier key | `--verifier`/`-v`, `--proof` |
+| `export-solidity` | Render a circuit-specific Solidity verifier from a Groth16 PKV (substitutes `CODEGEN` markers in the template) | `--pkv`/`-v`, `--template`/`-t`, `--out`/`-o` |
+| `export-evm-proof` | Re-emit a Groth16 `proof.np` as EVM big-endian calldata + a public-inputs file for `verifyProof(bytes, uint256[N])` | `--proof`/`-p`, `--out-dir`/`-o` |
Read the table per command: the short `-p` flag changes meaning between `prepare` and `prove`.
Available `prepare --hash` choices are `skyscraper`, `sha256`, `keccak`, `blake3`, and `poseidon2`.
+Available `prepare --backend` choices are `whir` (default) and `groth16`.
+
## How It Works
```mermaid
@@ -109,6 +132,7 @@ For larger circuits and integration experiments, see [`noir-examples/`](./noir-e
## Advanced Usage
+- **Mmap-format `.pkp`** (Groth16 only): pass `--mmap` to `prepare` to write an mmap-friendly `.pkp` instead of the zstd-compressed default. Larger artifact (no compression, raw in-memory layout for curve-point and R1CS arrays), but near-instant load — the kernel pages bytes in lazily as the MSM touches them, matching rapidsnark's zkey-loading model. Both layouts share the `.pkp` extension; `prove` auto-detects via the file's `MMAP` sentinel.
- **Direct R1CS frontend:** after generating Mavros artifacts, call `provekit-cli prepare --compiler mavros --r1cs `.
- **Recursive verifier inputs:** `provekit-cli generate-gnark-inputs ` writes `params_for_recursive_verifier` and `r1cs.json` by default; use `--params` and `--r1cs` to override those paths.
- **Inspection commands:** use `circuit-stats` for Noir ACIR/R1CS structure, `analyze-pkp` for Noir prover-key size breakdowns, and `show-inputs` for public inputs.
diff --git a/noir-examples/noir_sha256/Prover.toml b/noir-examples/noir_sha256/Prover.toml
index 689c76d15..7c0585c7b 100644
--- a/noir-examples/noir_sha256/Prover.toml
+++ b/noir-examples/noir_sha256/Prover.toml
@@ -1,3 +1,3 @@
input = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-expected = [67, 25, 141, 183, 254, 43, 174, 230, 241, 12, 52, 52, 209, 164, 42, 198, 77, 148, 199, 2, 25, 96, 124, 119, 128, 33, 172, 170, 236, 162, 201, 30]
+expected = [248, 42, 35, 54, 144, 202, 70, 153, 180, 85, 249, 220, 89, 110, 125, 88, 66, 133, 186, 224, 63, 42, 42, 43, 212, 248, 195, 112, 11, 16, 217, 156]
diff --git a/noir-examples/noir_sha256/src/main.nr b/noir-examples/noir_sha256/src/main.nr
index 31f5f3f92..cf5aec930 100644
--- a/noir-examples/noir_sha256/src/main.nr
+++ b/noir-examples/noir_sha256/src/main.nr
@@ -1,7 +1,7 @@
use sha256::sha256_var;
-// Chain 17 SHA-256 rounds over a 32-byte state.
-global NUM_SHA_CALLS: u32 = 17;
+// Change this to control how many SHA256 calls are generated.
+global NUM_SHA_CALLS: u32 = 35;
fn main(input: [u8; 32], expected: pub [u8; 32]) {
let mut data = input;
diff --git a/playground/passport-input-gen/src/bin/passport_cli/main.rs b/playground/passport-input-gen/src/bin/passport_cli/main.rs
index 0a21bb4fa..fe9d96eff 100644
--- a/playground/passport-input-gen/src/bin/passport_cli/main.rs
+++ b/playground/passport-input-gen/src/bin/passport_cli/main.rs
@@ -247,7 +247,7 @@ fn prove_circuit(
"\n [{circuit_name}] Loading prover from: {}",
pkp_path.display()
);
- let prover: provekit_common::Prover = provekit_common::file::read(pkp_path)
+ let prover: provekit_prover::Prover = provekit_prover::read_pkp(pkp_path)
.with_context(|| format!("Reading prover key for {circuit_name}"))?;
let (num_constraints, num_witnesses) = prover.size();
@@ -259,8 +259,9 @@ fn prove_circuit(
let json = serde_json::to_string(inputs)
.with_context(|| format!("Serializing {circuit_name} inputs to JSON"))?;
let abi = match &prover {
- provekit_common::Prover::Noir(p) => p.witness_generator.abi(),
- provekit_common::Prover::Mavros(p) => &p.abi,
+ provekit_prover::Prover::Noir(p) => p.witness_generator.abi(),
+ provekit_prover::Prover::Mavros(p) => &p.abi,
+ provekit_prover::Prover::Groth16(p) => p.witness_generator.abi(),
};
let input_map = Format::Json
.parse(&json, abi)
diff --git a/provekit/common/src/file/binary_format.rs b/provekit/common/src/file/binary_format.rs
index 44ff55717..949d491fd 100644
--- a/provekit/common/src/file/binary_format.rs
+++ b/provekit/common/src/file/binary_format.rs
@@ -15,13 +15,13 @@ pub const XZ_MAGIC: [u8; 6] = [0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
// ---------------------------------------------------------------------------
pub const PROVER_FORMAT: [u8; 8] = *b"PrvKitPr";
-pub const PROVER_VERSION: (u16, u16) = (1, 2);
+pub const PROVER_VERSION: (u16, u16) = (1, 5);
pub const VERIFIER_FORMAT: [u8; 8] = *b"PrvKitVr";
-pub const VERIFIER_VERSION: (u16, u16) = (1, 3);
+pub const VERIFIER_VERSION: (u16, u16) = (1, 4);
pub const NOIR_PROOF_SCHEME_FORMAT: [u8; 8] = *b"NrProScm";
pub const NOIR_PROOF_SCHEME_VERSION: (u16, u16) = (1, 2);
pub const NOIR_PROOF_FORMAT: [u8; 8] = *b"NPSProof";
-pub const NOIR_PROOF_VERSION: (u16, u16) = (1, 1);
+pub const NOIR_PROOF_VERSION: (u16, u16) = (1, 2);
diff --git a/provekit/common/src/file/io/bin.rs b/provekit/common/src/file/io/bin.rs
index a092b9462..4ad79f585 100644
--- a/provekit/common/src/file/io/bin.rs
+++ b/provekit/common/src/file/io/bin.rs
@@ -111,12 +111,29 @@ pub fn read_hash_config(
}
/// Read a compressed binary file, auto-detecting zstd or XZ compression.
+///
+/// The decompressed bytes are streamed directly into postcard's deserializer
+/// instead of being materialized into a single `Vec`. This keeps peak
+/// memory close to the size of the deserialized struct, instead of paying
+/// twice (once for the decompressed buffer, once for the parsed value).
+///
+/// `postcard::from_io` needs a scratch buffer sized to fit the largest
+/// `deserialize_bytes` / `deserialize_byte_buf` read it will encounter. For
+/// our types that's bounded by the on-disk file size (the largest single
+/// borrowed-bytes field — currently the Groth16 proving key — encodes
+/// ~1:1 against the compressed file because arkworks-serialized curve points
+/// are essentially random). We size the scratch buffer to the file size with
+/// a small floor for tiny files.
#[instrument(fields(size = path.metadata().map(|m| m.len()).ok()))]
pub fn read_bin Deserialize<'a>>(
path: &Path,
format: [u8; 8],
(major, minor): (u16, u16),
) -> Result {
+ use std::io::BufRead;
+
+ let file_size = path.metadata().map(|m| m.len()).unwrap_or(0) as usize;
+
let mut file = BufReader::new(File::open(path).context("while opening input file")?);
let mut buffer = [0; HEADER_SIZE];
@@ -140,9 +157,79 @@ pub fn read_bin Deserialize<'a>>(
// Skip hash_config byte (can be read separately via read_hash_config if needed)
let _hash_config_byte = header.get_u8();
- let uncompressed = decompress_stream(&mut file)?;
+ // Detect compression via magic bytes.
+ let peek = file.fill_buf().context("while peeking compression magic")?;
+ ensure!(
+ peek.len() >= 6,
+ "File too small to detect compression format"
+ );
+ let is_zstd = peek[..4] == ZSTD_MAGIC;
+ let is_xz = peek[..6] == XZ_MAGIC;
+
+ // Scratch buffer for postcard streaming. Must be at least as large as
+ // the largest single `deserialize_byte_buf` read; in practice this is
+ // a few MB at most for our formats. Cap the default at 16 MB so that
+ // opening a 1 GB .pkp doesn't allocate a 1 GB scratch on top of the
+ // decoder buffer and the parsed value. Floor at 1 MB for tiny .np
+ // proofs. Override with `PROVEKIT_SCRATCH_MAX_MB` if a future format
+ // needs more.
+ const DEFAULT_SCRATCH_CAP: usize = 16 << 20;
+ let scratch_cap = std::env::var("PROVEKIT_SCRATCH_MAX_MB")
+ .ok()
+ .and_then(|s| s.parse::().ok())
+ .and_then(|mb| mb.checked_shl(20))
+ .unwrap_or(DEFAULT_SCRATCH_CAP);
+ let scratch_size = file_size.min(scratch_cap).max(1 << 20);
+ let mut scratch = vec![0u8; scratch_size];
+
+ // Wrap the streaming decoder in a `BufReader` so postcard's per-byte
+ // `pop()` calls become fast in-memory reads instead of one syscall each.
+ // 256 KB is large enough to amortize syscall overhead without holding more
+ // decompressed data in memory than necessary.
+ const DECODER_BUF: usize = 256 * 1024;
+
+ // If the cap shrank scratch below the (compressed) file size, the failure
+ // mode for an oversized `deserialize_byte_buf` is opaque ("postcard
+ // streaming failed"). Attach a hint pointing at the env-var escape hatch
+ // so users don't have to guess. Compressed-vs-decompressed is an
+ // intentional under-approximation: if the file is small but the
+ // decompressed payload contains a huge byte_buf, the hint still fires.
+ let scratch_capped = scratch_size < file_size;
+ let postcard_err = |stage: &'static str, e: postcard::Error| -> anyhow::Error {
+ let err = anyhow::Error::from(e).context(stage);
+ if scratch_capped {
+ err.context(format!(
+ "postcard scratch capped at {} MB (file is {} MB); if a single \
+ `deserialize_byte_buf` read exceeded the cap, raise it with \
+ `PROVEKIT_SCRATCH_MAX_MB=`",
+ scratch_size >> 20,
+ file_size >> 20,
+ ))
+ } else {
+ err
+ }
+ };
+
+ let value = if is_zstd {
+ let decoder = zstd::Decoder::new(file).context("while initializing zstd decoder")?;
+ let buffered = BufReader::with_capacity(DECODER_BUF, decoder);
+ let (value, _) = postcard::from_io::((buffered, &mut scratch))
+ .map_err(|e| postcard_err("while streaming postcard from zstd", e))?;
+ value
+ } else if is_xz {
+ let decoder = xz2::read::XzDecoder::new(file);
+ let buffered = BufReader::with_capacity(DECODER_BUF, decoder);
+ let (value, _) = postcard::from_io::((buffered, &mut scratch))
+ .map_err(|e| postcard_err("while streaming postcard from xz", e))?;
+ value
+ } else {
+ anyhow::bail!(
+ "Unknown compression format (first bytes: {:02X?})",
+ &peek[..peek.len().min(6)]
+ );
+ };
- postcard::from_bytes(&uncompressed).context("while decoding from postcard")
+ Ok(value)
}
/// Serialize a value to bytes in the same format as `write_bin` (header +
@@ -234,40 +321,3 @@ fn decompress_bytes(data: &[u8]) -> Result> {
);
}
}
-
-/// Peek at the first bytes to detect compression format, then
-/// stream-decompress.
-fn decompress_stream(reader: &mut BufReader) -> Result> {
- use std::io::BufRead;
-
- let buf = reader
- .fill_buf()
- .context("while peeking compression magic")?;
- ensure!(
- buf.len() >= 6,
- "File too small to detect compression format"
- );
-
- let is_zstd = buf[..4] == ZSTD_MAGIC;
- let is_xz = buf[..6] == XZ_MAGIC;
-
- let mut out = Vec::new();
- if is_zstd {
- let mut decoder = zstd::Decoder::new(reader).context("while initializing zstd decoder")?;
- decoder
- .read_to_end(&mut out)
- .context("while decompressing zstd data")?;
- } else if is_xz {
- let mut decoder = xz2::read::XzDecoder::new(reader);
- decoder
- .read_to_end(&mut out)
- .context("while decompressing XZ data")?;
- } else {
- anyhow::bail!(
- "Unknown compression format (first bytes: {:02X?})",
- &buf[..buf.len().min(6)]
- );
- }
-
- Ok(out)
-}
diff --git a/provekit/common/src/file/io/mod.rs b/provekit/common/src/file/io/mod.rs
index 049c984a7..c2e6ae9aa 100644
--- a/provekit/common/src/file/io/mod.rs
+++ b/provekit/common/src/file/io/mod.rs
@@ -3,17 +3,18 @@ mod buf_ext;
mod counting_writer;
mod json;
+pub use self::bin::Compression;
use {
self::{
bin::{
deserialize_from_bytes, read_bin, read_hash_config as read_hash_config_bin,
- serialize_to_bytes, write_bin, Compression,
+ serialize_to_bytes, write_bin,
},
buf_ext::BufExt,
counting_writer::CountingWriter,
json::{read_json, write_json},
},
- crate::{HashConfig, NoirProof, NoirProofScheme, Prover, Verifier},
+ crate::{HashConfig, NoirProof, NoirProofScheme, Verifier},
anyhow::Result,
serde::{Deserialize, Serialize},
std::{ffi::OsStr, path::Path},
@@ -29,20 +30,13 @@ pub trait FileFormat: Serialize + for<'a> Deserialize<'a> {
}
/// Helper trait to optionally extract hash config.
-pub(crate) trait MaybeHashAware {
+///
+/// `pub` so downstream crates (e.g. `provekit_prover`) can implement it for
+/// types they own. Internal helpers in this module are the only consumers.
+pub trait MaybeHashAware {
fn maybe_hash_config(&self) -> Option;
}
-/// Impl for Prover (has hash config).
-impl MaybeHashAware for Prover {
- fn maybe_hash_config(&self) -> Option {
- match self {
- Prover::Noir(p) => Some(p.hash_config),
- Prover::Mavros(p) => Some(p.hash_config),
- }
- }
-}
-
/// Impl for Verifier (has hash config).
impl MaybeHashAware for Verifier {
fn maybe_hash_config(&self) -> Option {
@@ -74,13 +68,6 @@ impl FileFormat for NoirProofScheme {
const COMPRESSION: Compression = Compression::Zstd;
}
-impl FileFormat for Prover {
- const FORMAT: [u8; 8] = crate::binary_format::PROVER_FORMAT;
- const EXTENSION: &'static str = "pkp";
- const VERSION: (u16, u16) = crate::binary_format::PROVER_VERSION;
- const COMPRESSION: Compression = Compression::Xz;
-}
-
impl FileFormat for Verifier {
const FORMAT: [u8; 8] = crate::binary_format::VERIFIER_FORMAT;
const EXTENSION: &'static str = "pkv";
diff --git a/provekit/common/src/interner.rs b/provekit/common/src/interner.rs
index 822a6a7dd..413885f95 100644
--- a/provekit/common/src/interner.rs
+++ b/provekit/common/src/interner.rs
@@ -39,4 +39,30 @@ impl Interner {
pub fn get(&self, el: InternedFieldElement) -> Option {
self.values.get(el.0).copied()
}
+
+ /// Borrow the deduplicated values array. Used by mmap-format writers
+ /// that need the raw bytes.
+ pub fn values_raw(&self) -> &[FieldElement] {
+ &self.values
+ }
+
+ /// Construct an Interner from a pre-built values vector. Bypasses the
+ /// dedup work in `intern()` — used by mmap-format readers that have
+ /// already loaded a deduplicated set of values from disk.
+ pub fn from_values(values: Vec) -> Self {
+ Self { values }
+ }
+}
+
+impl InternedFieldElement {
+ /// Construct an InternedFieldElement from a raw index. Used by
+ /// mmap-format readers that load the index Vec from raw bytes.
+ pub const fn new(idx: usize) -> Self {
+ Self(idx)
+ }
+
+ /// Inner index value.
+ pub const fn index(&self) -> usize {
+ self.0
+ }
}
diff --git a/provekit/common/src/lib.rs b/provekit/common/src/lib.rs
index 3953207d8..cba3fda9f 100644
--- a/provekit/common/src/lib.rs
+++ b/provekit/common/src/lib.rs
@@ -19,19 +19,17 @@ mod verifier;
mod whir_r1cs;
pub mod witness;
-use crate::{
- interner::{InternedFieldElement, Interner},
- sparse_matrix::{HydratedSparseMatrix, SparseMatrix},
-};
pub use {
acir::FieldElement as NoirElement,
ark_bn254::Fr as FieldElement,
hash_config::HashConfig,
+ interner::{InternedFieldElement, Interner},
mavros::{MavrosProver, MavrosSchemeData},
noir_proof_scheme::{NoirProof, NoirProofScheme, NoirSchemeData},
prefix_covector::{OffsetCovector, PrefixCovector, SparseCovector},
- prover::{NoirProver, Prover},
+ prover::NoirProver,
r1cs::R1CS,
+ sparse_matrix::{HydratedSparseMatrix, SparseMatrix},
transcript_sponge::TranscriptSponge,
verifier::Verifier,
whir_r1cs::{R1csHash, WhirConfig, WhirR1CSProof, WhirR1CSScheme, WhirZkConfig},
diff --git a/provekit/common/src/noir_proof_scheme.rs b/provekit/common/src/noir_proof_scheme.rs
index 7731d3c47..d084190ed 100644
--- a/provekit/common/src/noir_proof_scheme.rs
+++ b/provekit/common/src/noir_proof_scheme.rs
@@ -27,10 +27,48 @@ pub enum NoirProofScheme {
Mavros(MavrosSchemeData),
}
+// INVARIANT: Variant order is wire-format-critical (postcard uses positional
+// discriminants). Do not reorder, cfg-gate, or insert variants without
+// verifying cross-target deserialization (native <-> WASM).
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct NoirProof {
- pub public_inputs: PublicInputs,
- pub whir_r1cs_proof: WhirR1CSProof,
+pub enum NoirProof {
+ Whir {
+ public_inputs: PublicInputs,
+ whir_r1cs_proof: WhirR1CSProof,
+ },
+ Groth16 {
+ public_inputs: PublicInputs,
+ /// CanonicalSerialize'd `provekit_groth16::Proof`.
+ groth16_proof: Vec,
+ },
+}
+
+impl NoirProof {
+ /// Access public inputs regardless of proof variant.
+ pub fn public_inputs(&self) -> &PublicInputs {
+ match self {
+ NoirProof::Whir { public_inputs, .. } => public_inputs,
+ NoirProof::Groth16 { public_inputs, .. } => public_inputs,
+ }
+ }
+
+ /// Mutably access public inputs regardless of proof variant.
+ pub fn public_inputs_mut(&mut self) -> &mut PublicInputs {
+ match self {
+ NoirProof::Whir { public_inputs, .. } => public_inputs,
+ NoirProof::Groth16 { public_inputs, .. } => public_inputs,
+ }
+ }
+
+ /// Access the WHIR proof, panics if this is a Groth16 proof.
+ pub fn whir_r1cs_proof(&self) -> &WhirR1CSProof {
+ match self {
+ NoirProof::Whir {
+ whir_r1cs_proof, ..
+ } => whir_r1cs_proof,
+ NoirProof::Groth16 { .. } => panic!("called whir_r1cs_proof() on a Groth16 proof"),
+ }
+ }
}
impl NoirProofScheme {
diff --git a/provekit/common/src/prover.rs b/provekit/common/src/prover.rs
index 88e2da07b..88b89e6b8 100644
--- a/provekit/common/src/prover.rs
+++ b/provekit/common/src/prover.rs
@@ -1,12 +1,19 @@
+//! Backend-specific prover types that don't introduce a `provekit_groth16`
+//! dependency.
+//!
+//! `NoirProver` lives here because it's referenced by the WHIR pipeline that
+//! is shared by everything in the workspace. The Groth16 prover and the
+//! `Prover` enum live in `provekit_prover::prover_types` so they can hold a
+//! typed `provekit_groth16::ProvingKey` without creating a dependency cycle
+//! (`provekit_groth16` depends on this crate for `R1CS`).
+
use {
crate::{
- noir_proof_scheme::NoirProofScheme,
whir_r1cs::WhirR1CSScheme,
witness::{NoirWitnessGenerator, SplitWitnessBuilders},
- HashConfig, MavrosProver, NoirElement, R1CS,
+ HashConfig, NoirElement, R1CS,
},
acir::circuit::Program,
- noirc_abi::Abi,
serde::{Deserialize, Serialize},
};
@@ -19,68 +26,3 @@ pub struct NoirProver {
pub witness_generator: NoirWitnessGenerator,
pub whir_for_witness: WhirR1CSScheme,
}
-
-/// On-disk **ProveKit Prover** (PKP) — the prover-side scheme that gets
-/// serialized to a `.pkp` file by `prepare` and loaded by `prove`.
-///
-/// Holds the R1CS, witness builders, WHIR config, and frontend-specific
-/// program data needed to produce a proof.
-///
-/// INVARIANT: Variant order is wire-format-critical (postcard uses positional
-/// discriminants). Do not reorder, cfg-gate, or insert variants without
-/// verifying cross-target deserialization (native <-> WASM).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum Prover {
- Noir(NoirProver),
- Mavros(MavrosProver),
-}
-
-impl Prover {
- /// Convert a compilation output into the on-disk prover format.
- pub fn from_noir_proof_scheme(scheme: NoirProofScheme) -> Self {
- match scheme {
- NoirProofScheme::Noir(d) => Prover::Noir(NoirProver {
- hash_config: d.hash_config,
- program: d.program,
- r1cs: d.r1cs,
- split_witness_builders: d.split_witness_builders,
- witness_generator: d.witness_generator,
- whir_for_witness: d.whir_for_witness,
- }),
- NoirProofScheme::Mavros(d) => Prover::Mavros(MavrosProver {
- abi: d.abi,
- num_public_inputs: d.num_public_inputs,
- whir_for_witness: d.whir_for_witness,
- witgen_binary: d.witgen_binary,
- ad_binary: d.ad_binary,
- constraints_layout: d.constraints_layout,
- witness_layout: d.witness_layout,
- hash_config: d.hash_config,
- }),
- }
- }
-
- pub fn abi(&self) -> &Abi {
- match self {
- Prover::Noir(p) => p.witness_generator.abi(),
- Prover::Mavros(p) => &p.abi,
- }
- }
-
- pub fn size(&self) -> (usize, usize) {
- match self {
- Prover::Noir(p) => (p.r1cs.num_constraints(), p.r1cs.num_witnesses()),
- Prover::Mavros(p) => (
- p.constraints_layout.algebraic_size,
- p.witness_layout.algebraic_size,
- ),
- }
- }
-
- pub fn whir_for_witness(&self) -> &WhirR1CSScheme {
- match self {
- Prover::Noir(p) => &p.whir_for_witness,
- Prover::Mavros(p) => &p.whir_for_witness,
- }
- }
-}
diff --git a/provekit/common/src/sparse_matrix.rs b/provekit/common/src/sparse_matrix.rs
index 012a3b447..887fd727a 100644
--- a/provekit/common/src/sparse_matrix.rs
+++ b/provekit/common/src/sparse_matrix.rs
@@ -312,6 +312,45 @@ impl SparseMatrix {
}
}
+ /// Borrow the internal `new_row_indices` array. Used by mmap-format
+ /// writers that need the raw bytes.
+ pub fn new_row_indices_raw(&self) -> &[u32] {
+ &self.new_row_indices
+ }
+
+ /// Borrow the internal `col_indices` array (absolute, not
+ /// delta-encoded). Used by mmap-format writers that need the raw
+ /// bytes.
+ pub fn col_indices_raw(&self) -> &[u32] {
+ &self.col_indices
+ }
+
+ /// Borrow the internal `values` array (interner indices). Used by
+ /// mmap-format writers that need the raw bytes.
+ pub fn values_raw(&self) -> &[InternedFieldElement] {
+ &self.values
+ }
+
+ /// Construct a `SparseMatrix` directly from its three internal arrays.
+ /// Used by mmap-format readers that have just memcpy'd the bytes
+ /// from disk into owned `Vec`s. Skips the per-entry insertion path
+ /// that goes through `set` / `push_row` / delta decoding.
+ pub fn from_raw_parts(
+ num_rows: usize,
+ num_cols: usize,
+ new_row_indices: Vec,
+ col_indices: Vec,
+ values: Vec,
+ ) -> Self {
+ Self {
+ num_rows,
+ num_cols,
+ new_row_indices,
+ col_indices,
+ values,
+ }
+ }
+
pub const fn hydrate<'a>(&'a self, interner: &'a Interner) -> HydratedSparseMatrix<'a> {
HydratedSparseMatrix {
matrix: self,
diff --git a/provekit/common/src/verifier.rs b/provekit/common/src/verifier.rs
index 2663cff61..a82dcf569 100644
--- a/provekit/common/src/verifier.rs
+++ b/provekit/common/src/verifier.rs
@@ -20,6 +20,10 @@ pub struct Verifier {
pub whir_for_witness: Option,
#[serde(with = "serde_jsonify")]
pub abi: Abi,
+ /// CanonicalSerialize'd `provekit_groth16::VerifyingKey` (None for WHIR
+ /// proofs).
+ #[serde(default)]
+ pub groth16_vk: Option>,
}
impl Verifier {
@@ -30,12 +34,14 @@ impl Verifier {
whir_for_witness: Some(d.whir_for_witness),
abi: d.witness_generator.abi.clone(),
hash_config: d.hash_config,
+ groth16_vk: None,
},
NoirProofScheme::Mavros(d) => Self {
r1cs: d.r1cs,
whir_for_witness: Some(d.whir_for_witness),
abi: d.abi.clone(),
hash_config: d.hash_config,
+ groth16_vk: None,
},
}
}
diff --git a/provekit/groth16/Cargo.toml b/provekit/groth16/Cargo.toml
new file mode 100644
index 000000000..6cb53c45a
--- /dev/null
+++ b/provekit/groth16/Cargo.toml
@@ -0,0 +1,32 @@
+[package]
+name = "provekit-groth16"
+version = "0.1.0"
+edition.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+license.workspace = true
+
+[dependencies]
+ark-bn254 = { workspace = true }
+ark-ff = { workspace = true }
+ark-ec = { workspace = true }
+ark-poly = { workspace = true }
+ark-serialize = { workspace = true }
+ark-std = { workspace = true }
+rayon = { workspace = true }
+anyhow = { workspace = true }
+sha3 = { workspace = true }
+serde = { workspace = true }
+tracing = { workspace = true }
+zeroize = { workspace = true, features = ["derive"] }
+provekit-common = { workspace = true }
+
+# Target-specific: mmap-backed proving key loader is non-WASM only.
+[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
+memmap2 = { workspace = true }
+
+[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
+tempfile = { workspace = true }
+
+[lints]
+workspace = true
diff --git a/provekit/groth16/src/lib.rs b/provekit/groth16/src/lib.rs
new file mode 100644
index 000000000..966a477bf
--- /dev/null
+++ b/provekit/groth16/src/lib.rs
@@ -0,0 +1,93 @@
+/// Groth16 proof system with BSB22 commitment extension for BN254.
+///
+/// Built on arkworks primitives for elliptic curve operations, pairings,
+/// FFT, and MSM.
+///
+/// Reference: DIZK paper (Figure 4)
+/// BSB22 extension:
+pub mod pedersen;
+pub mod prover;
+pub mod setup;
+pub mod types;
+pub mod verifier;
+
+#[cfg(not(target_arch = "wasm32"))]
+pub mod mmap_pk;
+
+#[cfg(not(target_arch = "wasm32"))]
+pub use mmap_pk::{MmapProvingKey, MMAP_SENTINEL};
+pub use types::{Proof, ProvingKey, VerifyingKey};
+
+/// Extension trait for [`provekit_common::Verifier`] that decodes the
+/// `groth16_vk: Option>` field into a typed [`VerifyingKey`] in one
+/// place — so consumers don't each repeat the
+/// `CanonicalDeserialize::deserialize_uncompressed(&bytes[..])` dance.
+pub trait VerifierGroth16Ext {
+ /// Decode the embedded Groth16 verifying key, if present.
+ ///
+ /// `Ok(None)` means this PKV is for the WHIR backend; `Err` means a VK
+ /// was present but failed to deserialize.
+ fn groth16_vk_typed(&self) -> anyhow::Result