From c72e9dd9f5eea512ffd8d42f88c69d8bd79bcb5b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 12 Jun 2026 16:37:33 +0000 Subject: [PATCH] feat(parakeet-cpp): enable GGML_CUDA_GRAPHS in the cublas build ggml leaves GGML_CUDA_GRAPHS off by default. Passing -DGGML_CUDA_GRAPHS=ON for cublas builds lets the CUDA backend capture and replay the compute graph for a small free speedup (about 1% measured on a GB10, never negative). It is not gated by parakeet.cpp's CMake options, so it passes straight through to ggml. Assisted-by: Claude Opus 4.8 --- backend/go/parakeet-cpp/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/go/parakeet-cpp/Makefile b/backend/go/parakeet-cpp/Makefile index 32f43e080807..eea251cb169f 100644 --- a/backend/go/parakeet-cpp/Makefile +++ b/backend/go/parakeet-cpp/Makefile @@ -39,7 +39,10 @@ endif # is overwritten back to OFF and the build silently falls back to CPU. Forward the # PARAKEET_GGML_* options instead. (openblas is not gated, so -DGGML_BLAS passes through.) ifeq ($(BUILD_TYPE),cublas) - CMAKE_ARGS+=-DPARAKEET_GGML_CUDA=ON + # GGML_CUDA_GRAPHS is OFF by ggml default; enabling it gives a small free + # speedup (~1% measured on GB10, never negative) by capturing/replaying the + # CUDA graph. Not gated by parakeet.cpp, so it passes straight through to ggml. + CMAKE_ARGS+=-DPARAKEET_GGML_CUDA=ON -DGGML_CUDA_GRAPHS=ON else ifeq ($(BUILD_TYPE),openblas) CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS else ifeq ($(BUILD_TYPE),hipblas)