-
Notifications
You must be signed in to change notification settings - Fork 388
Add PiPNN: high-performance alternative index builder (7.6-10x faster graph build) #856
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
de1ca34
4d4ca8b
4012217
271f233
68460c8
186737d
ae970df
bc5986e
7dc1370
7f6a3a4
ff59bc9
ef76b68
a46708c
8ee64a2
fe0b4e3
c4319b9
142994d
cbab9fc
6dd0097
1f76ca0
4cb3308
0de6bb3
9c7d711
342781d
ebfe634
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -53,6 +53,7 @@ use crate::{ | |||||||||||
| }, | ||||||||||||
| continuation::{process_while_resource_is_available_async, ChunkingConfig}, | ||||||||||||
| }, | ||||||||||||
| configuration::build_algorithm::BuildAlgorithm, | ||||||||||||
| }, | ||||||||||||
| storage::{ | ||||||||||||
| quant::{GeneratorContext, PQGeneration, PQGenerationContext, QuantDataGenerator}, | ||||||||||||
|
|
@@ -235,15 +236,35 @@ where | |||||||||||
| self.index_configuration.num_threads | ||||||||||||
| ); | ||||||||||||
|
|
||||||||||||
| let t_pq = std::time::Instant::now(); | ||||||||||||
| self.generate_compressed_data(&pool).await?; | ||||||||||||
| logger.log_checkpoint(DiskIndexBuildCheckpoint::PqConstruction); | ||||||||||||
| let pq_secs = t_pq.elapsed().as_secs_f64(); | ||||||||||||
|
|
||||||||||||
| let t_index = std::time::Instant::now(); | ||||||||||||
| self.build_inmem_index(&pool).await?; | ||||||||||||
| logger.log_checkpoint(DiskIndexBuildCheckpoint::InmemIndexBuild); | ||||||||||||
| let index_secs = t_index.elapsed().as_secs_f64(); | ||||||||||||
|
|
||||||||||||
| // Return freed memory (f32 data, graph, PiPNN internals) to the OS | ||||||||||||
| // before disk layout starts. Without this, ~1.7 GB of freed-but-retained | ||||||||||||
| // memory inflates peak RSS during the disk layout phase. | ||||||||||||
| #[cfg(target_os = "linux")] | ||||||||||||
| unsafe { | ||||||||||||
| extern "C" { fn malloc_trim(pad: usize) -> i32; } | ||||||||||||
| malloc_trim(0); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| // Use physical file to pass the memory index to the disk writer | ||||||||||||
| let t_layout = std::time::Instant::now(); | ||||||||||||
| self.create_disk_layout()?; | ||||||||||||
| logger.log_checkpoint(DiskIndexBuildCheckpoint::DiskLayout); | ||||||||||||
| let layout_secs = t_layout.elapsed().as_secs_f64(); | ||||||||||||
|
|
||||||||||||
| println!("Disk Index Build Phases"); | ||||||||||||
| println!(" PQ compression: {:.3}s", pq_secs); | ||||||||||||
| println!(" Graph build: {:.3}s", index_secs); | ||||||||||||
| println!(" Disk layout: {:.3}s", layout_secs); | ||||||||||||
|
|
||||||||||||
| Ok(()) | ||||||||||||
| } | ||||||||||||
|
|
@@ -313,6 +334,22 @@ where | |||||||||||
| } | ||||||||||||
|
|
||||||||||||
| async fn build_inmem_index(&mut self, pool: &RayonThreadPool) -> ANNResult<()> { | ||||||||||||
| // Check for PiPNN algorithm | ||||||||||||
| #[cfg(feature = "pipnn")] | ||||||||||||
| if let BuildAlgorithm::PiPNN { .. } = self.disk_build_param.build_algorithm() { | ||||||||||||
|
||||||||||||
| if let BuildAlgorithm::PiPNN { .. } = self.disk_build_param.build_algorithm() { | |
| if matches!( | |
| self.disk_build_param.build_algorithm(), | |
| &BuildAlgorithm::PiPNN { .. } | |
| ) { |
Copilot
AI
Mar 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similarly, this matches!(self.disk_build_param.build_algorithm(), BuildAlgorithm::Vamana) is matching &BuildAlgorithm against a by-value pattern and will not compile under #[cfg(not(feature = "pipnn"))]. The pattern needs to match a reference (e.g., &BuildAlgorithm::Vamana).
| BuildAlgorithm::Vamana | |
| &BuildAlgorithm::Vamana |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
diskann-pipnnis added as an unconditional dependency, but it is not referenced from this crate (anddiskann-diskis already optional behind thedisk-indexfeature). This forces PiPNN to compile even whendisk-indexis disabled, increasing build times/binary size. Consider removing it or making it conditional (e.g., only pulled in viadiskann-disk'spipnnfeature underdisk-index).