Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

75 changes: 35 additions & 40 deletions diskann-benchmark-core/src/build/graph/multi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use diskann::{
ANNError, ANNErrorKind, ANNResult,
graph::{self, glue},
provider,
utils::async_tools::VectorIdBoxSlice,
};
use diskann_utils::{future::AsyncFriendly, views::Matrix};

Expand Down Expand Up @@ -64,12 +63,9 @@ where

impl<DP, T, S> Build for MultiInsert<DP, T, S>
where
DP: provider::DataProvider<Context: Default> + provider::SetElement<[T]>,
S: glue::InsertStrategy<DP, [T], PruneStrategy: Clone> + Clone + AsyncFriendly,
DP: provider::DataProvider<Context: Default> + for<'a> provider::SetElement<&'a [T]>,
S: glue::MultiInsertStrategy<DP, Matrix<T>> + Clone + 'static,
T: AsyncFriendly + Clone,
// TODO (Mark): This is a very very unfortunate bound and should be cleaned up with
// an overhaul to the working set.
for<'a> glue::aliases::InsertPruneAccessor<'a, S, DP, [T]>: glue::AsElement<&'a [T]>,
{
type Output = ();

Expand All @@ -78,43 +74,42 @@ where
}

async fn build(&self, range: Range<usize>) -> ANNResult<Self::Output> {
let vectors: ANNResult<Box<[_]>> = range
.into_iter()
.map(|i| {
let id = self.to_id.to_id(i)?;
let vector = self.data.get_row(i).ok_or_else(|| {
#[derive(Debug)]
struct OutOfBounds {
max: usize,
accessed: usize,
let vectors = self
.data
.subview(range.clone())
.ok_or_else(|| {
#[derive(Debug)]
struct OutOfBounds {
max: usize,
start: usize,
end: usize,
}

impl std::fmt::Display for OutOfBounds {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"tried to access data with {} rows at range [{}, {})",
self.max, self.start, self.end
)
}

impl std::fmt::Display for OutOfBounds {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"tried to access data with {} rows at index {}",
self.max, self.accessed
)
}
}

ANNError::message(
ANNErrorKind::Opaque,
OutOfBounds {
max: self.data.nrows(),
accessed: i,
},
)
})?;

Ok(VectorIdBoxSlice::new(id, vector.into()))
})
.collect();

}

ANNError::message(
ANNErrorKind::Opaque,
OutOfBounds {
max: self.data.nrows(),
start: range.start,
end: range.end,
},
)
})?
.to_owned();

let ids: ANNResult<Arc<[_]>> = range.into_iter().map(|i| self.to_id.to_id(i)).collect();
let context = DP::Context::default();
self.index
.multi_insert(self.strategy.clone(), &context, vectors?)
.multi_insert::<S, _>(self.strategy.clone(), &context, Arc::new(vectors), ids?)
.await?;

Ok(())
Expand Down
4 changes: 2 additions & 2 deletions diskann-benchmark-core/src/build/graph/single.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ where

impl<DP, T, S> Build for SingleInsert<DP, T, S>
where
DP: provider::DataProvider<Context: Default> + provider::SetElement<[T]>,
S: glue::InsertStrategy<DP, [T]> + Clone + AsyncFriendly,
DP: provider::DataProvider<Context: Default> + for<'a> provider::SetElement<&'a [T]>,
S: for<'a> glue::InsertStrategy<DP, &'a [T]> + Clone + AsyncFriendly,
T: AsyncFriendly + Clone,
{
type Output = ();
Expand Down
2 changes: 1 addition & 1 deletion diskann-benchmark-core/src/search/graph/knn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ pub struct Metrics {
impl<DP, T, S> Search for KNN<DP, T, S>
where
DP: provider::DataProvider<Context: Default, ExternalId: search::Id>,
S: glue::DefaultSearchStrategy<DP, [T], DP::ExternalId> + Clone + AsyncFriendly,
S: for<'a> glue::DefaultSearchStrategy<DP, &'a [T], DP::ExternalId> + Clone + AsyncFriendly,
T: AsyncFriendly + Clone,
{
type Id = DP::ExternalId;
Expand Down
2 changes: 1 addition & 1 deletion diskann-benchmark-core/src/search/graph/multihop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ where
impl<DP, T, S> Search for MultiHop<DP, T, S>
where
DP: provider::DataProvider<Context: Default, ExternalId: search::Id>,
S: glue::DefaultSearchStrategy<DP, [T], DP::ExternalId> + Clone + AsyncFriendly,
S: for<'a> glue::DefaultSearchStrategy<DP, &'a [T], DP::ExternalId> + Clone + AsyncFriendly,
T: AsyncFriendly + Clone,
{
type Id = DP::ExternalId;
Expand Down
2 changes: 1 addition & 1 deletion diskann-benchmark-core/src/search/graph/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ pub struct Metrics {}
impl<DP, T, S> Search for Range<DP, T, S>
where
DP: provider::DataProvider<Context: Default, ExternalId: search::Id>,
S: glue::DefaultSearchStrategy<DP, [T], DP::ExternalId> + Clone + AsyncFriendly,
S: for<'a> glue::DefaultSearchStrategy<DP, &'a [T], DP::ExternalId> + Clone + AsyncFriendly,
T: AsyncFriendly + Clone,
{
type Id = DP::ExternalId;
Expand Down
8 changes: 4 additions & 4 deletions diskann-benchmark/src/backend/index/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,10 +315,10 @@ pub(super) fn run_build<T, BF, CF, B, DP>(
) -> anyhow::Result<(Index<DP>, BuildStats)>
where
DP: DataProvider<Context = DefaultContext, InternalId = u32, ExternalId = u32>
+ provider::SetElement<[T]>,
+ for<'a> provider::SetElement<&'a [T]>,
CF: FnOnce(MatrixView<T>) -> anyhow::Result<Arc<DiskANNIndex<DP>>>,
T: diskann::graph::SampleableForStart + std::fmt::Debug + Copy + AsyncFriendly + bytemuck::Pod,
B: glue::SearchStrategy<DP, [T]> + Clone + Send + Sync,
B: for<'a> glue::SearchStrategy<DP, &'a [T]> + Clone + Send + Sync,
BF: FnOnce(
Index<DP>,
B,
Expand Down Expand Up @@ -347,9 +347,9 @@ pub(super) fn run_search_outer<T, S, DP>(
) -> anyhow::Result<BuildResult>
where
DP: DataProvider<Context = DefaultContext, InternalId = u32, ExternalId = u32>
+ provider::SetElement<[T]>,
+ for<'a> provider::SetElement<&'a [T]>,
T: SampleableForStart + std::fmt::Debug + Copy + AsyncFriendly + bytemuck::Pod,
S: glue::DefaultSearchStrategy<DP, [T]> + Clone + AsyncFriendly,
S: for<'a> glue::DefaultSearchStrategy<DP, &'a [T]> + Clone + AsyncFriendly,
{
match &input {
SearchPhase::Topk(search_phase) => {
Expand Down
3 changes: 2 additions & 1 deletion diskann-benchmark/src/backend/index/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ pub(super) async fn save_index<DP, T>(
save_path: &str,
) -> anyhow::Result<()>
where
DP: DataProvider<Context = DefaultContext, ExternalId = u32> + provider::SetElement<[T]>,
DP: DataProvider<Context = DefaultContext, ExternalId = u32>
+ for<'a> provider::SetElement<&'a [T]>,
DiskANNIndex<DP>: SaveWith<AsyncIndexMetadata, Error = ANNError>,
{
index
Expand Down
2 changes: 1 addition & 1 deletion diskann-disk/src/build/builder/inmem_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ impl<T, Q> InmemIndexBuilder<T> for QuantInMemBuilder<T, Q>
where
T: VectorRepr,
Q: AsyncFriendly + VectorStore + SetElementHelper<T>,
Quantized: InsertStrategy<DefaultProvider<NoStore, Q>, [T]>
Quantized: for<'a> InsertStrategy<DefaultProvider<NoStore, Q>, &'a [T]>
+ PruneStrategy<DefaultProvider<NoStore, Q>>,
DefaultProvider<NoStore, Q>: SaveWith<(u32, AsyncIndexMetadata), Error = ANNError>,
{
Expand Down
28 changes: 12 additions & 16 deletions diskann-disk/src/search/provider/disk_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use diskann::{
neighbor::Neighbor,
provider::{
Accessor, BuildQueryComputer, DataProvider, DefaultContext, DelegateNeighbor, HasId,
NeighborAccessor,
NeighborAccessor, NoopGuard,
},
utils::{
object_pool::{ObjectPool, PoolOption, TryAsPooled},
Expand Down Expand Up @@ -103,6 +103,8 @@ where

type ExternalId = u32;

type Guard = NoopGuard<u32>;

type Error = ANNError;

/// Translate an external id to its corresponding internal id.
Expand Down Expand Up @@ -282,7 +284,7 @@ impl<'a> RerankAndFilter<'a> {
impl<Data, VP>
SearchPostProcess<
DiskAccessor<'_, Data, VP>,
[Data::VectorDataType],
&[Data::VectorDataType],
(
<DiskProvider<Data> as DataProvider>::InternalId,
Data::AssociatedDataType,
Expand Down Expand Up @@ -340,7 +342,7 @@ where
}
}

impl<'this, Data, ProviderFactory> SearchStrategy<DiskProvider<Data>, [Data::VectorDataType]>
impl<'this, Data, ProviderFactory> SearchStrategy<DiskProvider<Data>, &[Data::VectorDataType]>
for DiskSearchStrategy<'this, Data, ProviderFactory>
where
Data: GraphDataType<VectorIdType = u32>,
Expand Down Expand Up @@ -368,7 +370,7 @@ where
impl<'this, Data, ProviderFactory>
DefaultPostProcessor<
DiskProvider<Data>,
[Data::VectorDataType],
&[Data::VectorDataType],
(
<DiskProvider<Data> as DataProvider>::InternalId,
Data::AssociatedDataType,
Expand Down Expand Up @@ -406,7 +408,7 @@ impl PreprocessedDistanceFunction<&[u8], f32> for DiskQueryComputer {
}
}

impl<Data, VP> BuildQueryComputer<[Data::VectorDataType]> for DiskAccessor<'_, Data, VP>
impl<Data, VP> BuildQueryComputer<&[Data::VectorDataType]> for DiskAccessor<'_, Data, VP>
where
Data: GraphDataType<VectorIdType = u32>,
VP: VertexProvider<Data>,
Expand Down Expand Up @@ -443,7 +445,7 @@ where
}
}

impl<Data, VP> ExpandBeam<[Data::VectorDataType]> for DiskAccessor<'_, Data, VP>
impl<Data, VP> ExpandBeam<&[Data::VectorDataType]> for DiskAccessor<'_, Data, VP>
where
Data: GraphDataType<VectorIdType = u32>,
VP: VertexProvider<Data>,
Expand Down Expand Up @@ -690,26 +692,20 @@ where
type Id = u32;
}

impl<'a, Data, VP> Accessor for DiskAccessor<'a, Data, VP>
impl<Data, VP> Accessor for DiskAccessor<'_, Data, VP>
where
Data: GraphDataType<VectorIdType = u32>,
VP: VertexProvider<Data>,
{
/// This references the PQ vector in the underlying `pq_data` store.
type Extended = &'a [u8];

/// This accessor returns raw slices. There *is* a chance of racing when the fast
/// providers are used. We just have to live with it.
///
/// Since the underlying PQ store is shared, we ignore the `'b` lifetime here and
/// instead use `'a`.
type Element<'b>
type Element<'a>
= &'a [u8]
where
Self: 'b;
Self: 'a;

/// `ElementRef` can have arbitrary lifetimes.
type ElementRef<'b> = &'b [u8];
type ElementRef<'a> = &'a [u8];

/// Choose to panic on an out-of-bounds access rather than propagate an error.
type GetError = ANNError;
Expand Down
1 change: 1 addition & 0 deletions diskann-garnet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ diskann-vector.workspace = true
foldhash = "0.2.0"
thiserror.workspace = true
tokio.workspace = true
diskann-utils.workspace = true
4 changes: 2 additions & 2 deletions diskann-garnet/src/dyn_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ impl<T: VectorRepr> DynIndex for DiskANNIndex<GarnetProvider<T>> {
let query = bytemuck::cast_slice::<u8, T>(data);
if let Some((labels, beta)) = filter {
let beta_filter = BetaFilter::new(FullPrecision, Arc::new(labels.clone()), beta);
self.search(&beta_filter, context, query, params, output)
self.search(*params, &beta_filter, context, query, output)
} else {
self.search(&FullPrecision, context, query, params, output)
self.search(*params, &FullPrecision, context, query, output)
}
}

Expand Down
Loading
Loading