diff --git a/Cargo.toml b/Cargo.toml index 0726da9..798108a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ libc = { version = "0.2.150", optional = true } bincode = { version = "2.0.1", default-features = false, features = ["alloc", "derive", "std"], optional = true } borsh = { version = "1.5", default-features = false, features = ["derive"] } hex = { version = "0.4.3", optional = true } +rusqlite = { version = "0.31", features = ["bundled"], optional = true } # optional wasm feature wasm-bindgen = { version ="0.2.100", optional = true } @@ -33,3 +34,4 @@ default = ["std", "extras"] std = ["libc", "hex", "bincode"] wasm = ["wasm-bindgen", "js-sys"] extras = [] +hash-idx = ["std", "rusqlite"] diff --git a/src/db.rs b/src/db.rs index 2a637c2..cbcea94 100644 --- a/src/db.rs +++ b/src/db.rs @@ -29,6 +29,7 @@ pub struct Database { pub(crate) header: Arc>, pub(crate) file: Arc>, pub config: Configuration, + pub(crate) path: Option, } #[derive(Clone, Encode, Decode, Debug, Eq, PartialEq, Hash)] @@ -101,6 +102,10 @@ impl DatabaseHeader { impl Database { pub fn open(path: &str) -> Result { + Self::open_with_config(path, Configuration::standard()) + } + + pub fn open_with_config(path: &str, config: Configuration) -> Result { let mut opts = OpenOptions::new(); opts.read(true).write(true).create(true); @@ -112,9 +117,10 @@ impl Database { } let file = opts.open(path).map_err(crate::Error::IO)?; - let config = Configuration::standard(); let backend = FileBackend::new(file)?; - Self::new(Box::new(backend), config) + let mut db = Self::new(Box::new(backend), config)?; + db.path = Some(path.to_string()); + Ok(db) } pub fn open_read_only(path: &str) -> Result { @@ -123,7 +129,9 @@ impl Database { .open(path) .map_err(crate::Error::IO)?; let config = Configuration::standard(); - Self::new(Box::new(FileBackend::read_only(file)), config) + let mut db = Self::new(Box::new(FileBackend::read_only(file)), config)?; + db.path = Some(path.to_string()); + Ok(db) } pub fn memory() -> Result { @@ -150,6 +158,7 @@ impl Database { header: Arc::new(Mutex::new(header)), file: Arc::new(file), config, + path: None, }; if !has_header { @@ -212,9 +221,86 @@ impl Database { *header = DatabaseHeader::new(); self.write_header(&header)?; self.file.set_len(header.len())?; + Self::cleanup_hash_indexes(&self.path, 0); Ok(()) } + /// Deletes hash index sidecar files whose root offset >= min_offset. + /// Pass min_offset=0 to delete all index files. + pub fn cleanup_hash_indexes(db_path: &Option, min_offset: u64) { + let db_path = match db_path { + Some(p) => p, + None => return, + }; + let path = std::path::Path::new(db_path); + let stem = match path.file_stem().and_then(|s| s.to_str()) { + Some(s) => s.to_string(), + None => return, + }; + let parent = path.parent().unwrap_or(std::path::Path::new(".")); + let prefix = format!("{}.", stem); + let suffix = ".hidx.sqlite"; + + if let Ok(entries) = std::fs::read_dir(parent) { + for entry in entries.flatten() { + let name = entry.file_name(); + let name_str = match name.to_str() { + Some(s) => s, + None => continue, + }; + if let Some(rest) = name_str.strip_prefix(&prefix) { + if let Some(offset_str) = rest.strip_suffix(suffix) { + if let Ok(offset) = offset_str.parse::() { + if offset >= min_offset { + let _ = std::fs::remove_file(entry.path()); + } + } + } + } + } + } + } + + /// Deletes all hash index sidecar files except those belonging to the given snapshots. + #[cfg(feature = "hash-idx")] + pub fn retain_hash_indexes(&self, keep: &[&ReadTransaction]) { + let db_path = match &self.path { + Some(p) => p, + None => return, + }; + let path = std::path::Path::new(db_path); + let stem = match path.file_stem().and_then(|s| s.to_str()) { + Some(s) => s.to_string(), + None => return, + }; + let parent = path.parent().unwrap_or(std::path::Path::new(".")); + let prefix = format!("{}.", stem); + let suffix = ".hidx.sqlite"; + + let keep_offsets: std::collections::HashSet = keep.iter() + .map(|tx| tx.root_offset()) + .collect(); + + if let Ok(entries) = std::fs::read_dir(parent) { + for entry in entries.flatten() { + let name = entry.file_name(); + let name_str = match name.to_str() { + Some(s) => s, + None => continue, + }; + if let Some(rest) = name_str.strip_prefix(&prefix) { + if let Some(offset_str) = rest.strip_suffix(suffix) { + if let Ok(offset) = offset_str.parse::() { + if !keep_offsets.contains(&offset) { + let _ = std::fs::remove_file(entry.path()); + } + } + } + } + } + } + } + pub fn begin_write(&self) -> Result> { Ok(WriteTransaction::new(self)) } diff --git a/src/lib.rs b/src/lib.rs index 33c43c8..dfd649c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,6 +37,7 @@ pub type Result = core::result::Result; #[derive(Clone, Debug)] pub struct Configuration { pub cache_size: usize, + pub auto_hash_index: bool, _marker: PhantomData, } @@ -100,6 +101,7 @@ impl Configuration { pub fn new() -> Self { Self { cache_size: DEFAULT_CACHE_SIZE, + auto_hash_index: false, _marker: PhantomData, } } @@ -108,6 +110,11 @@ impl Configuration { self.cache_size = size; self } + + pub fn with_auto_hash_index(mut self, enabled: bool) -> Self { + self.auto_hash_index = enabled; + self + } } pub trait NodeHasher: Clone { diff --git a/src/tx.rs b/src/tx.rs index b99a3d0..ef9fa17 100644 --- a/src/tx.rs +++ b/src/tx.rs @@ -31,11 +31,31 @@ pub struct WriteTransaction<'db, H: NodeHasher> { metadata: Option>, } +#[cfg(feature = "hash-idx")] +pub struct HashIndex { + conn: Arc, +} + +#[cfg(feature = "hash-idx")] +use std::sync::Arc; + +#[cfg(feature = "hash-idx")] +impl Clone for HashIndex { + fn clone(&self) -> Self { + Self { conn: self.conn.clone() } + } +} + +#[cfg(not(feature = "hash-idx"))] +#[derive(Clone)] +pub struct HashIndex; + #[derive(Clone)] pub struct ReadTransaction { db: Database, savepoint: SavePoint, cache: Cache, + hash_index: Option, } #[derive(Clone)] @@ -60,11 +80,18 @@ impl ReadTransaction { let cache_size = db.config.cache_size; let root = savepoint.root; - Self { + #[allow(unused_mut)] + let mut tx = Self { db, savepoint, cache: Cache::new(root, cache_size), - } + hash_index: None, + }; + + #[cfg(feature = "hash-idx")] + { let _ = tx.load_hash_index(); } + + tx } pub fn iter(&self) -> KeyIterator { @@ -76,6 +103,7 @@ impl ReadTransaction { header.savepoint = self.savepoint.clone(); self.db.write_header(&header)?; self.db.file.set_len(header.len())?; + Database::::cleanup_hash_indexes(&self.db.path, header.len()); Ok(()) } @@ -86,6 +114,183 @@ impl ReadTransaction { } } + /// Returns the sidecar hash index path for the current snapshot. + #[cfg(feature = "hash-idx")] + fn hash_index_path(&self) -> Option { + let db_path = self.db.path.as_ref()?; + let path = std::path::Path::new(db_path); + let stem = path.file_stem()?.to_str()?; + let parent = path.parent().unwrap_or(std::path::Path::new(".")); + let idx_path = parent.join(format!("{}.{}.hidx.sqlite", stem, self.savepoint.root.offset)); + idx_path.to_str().map(|s| s.to_string()) + } + + /// Builds a hash index sidecar file for the current snapshot. + /// This precomputes all node hashes and stores them in a sqlite database, + /// so that future `prove()` and `compute_root()` calls can look up hashes + /// instead of recursively walking the tree. + #[cfg(feature = "hash-idx")] + pub fn build_hash_index(&mut self) -> Result<()> { + if self.is_empty() { + return Ok(()); + } + + let idx_path = self.hash_index_path() + .ok_or_else(|| io::Error::new(io::ErrorKind::Unsupported, "Cannot build hash index for in-memory database"))?; + + // Compute fingerprint: hash of root node's raw bytes + let root_raw = self.db.file.read(self.savepoint.root.offset, self.savepoint.root.size as usize)?; + let fingerprint = H::hash(&root_raw); + + // Skip if a valid index already exists + if std::path::Path::new(&idx_path).exists() { + if let Ok(existing) = rusqlite::Connection::open_with_flags( + &idx_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX, + ) { + let stored: core::result::Result, _> = existing.query_row( + "SELECT fingerprint FROM meta LIMIT 1", [], |row| row.get(0), + ); + if let Ok(stored) = stored { + if stored.len() == 32 && stored == fingerprint { + return Ok(()); + } + } + } + } + + // Create the sqlite sidecar + let _ = std::fs::remove_file(&idx_path); + let conn = rusqlite::Connection::open(&idx_path) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + + conn.execute_batch( + "PRAGMA journal_mode = OFF; + PRAGMA synchronous = OFF; + PRAGMA cache_size = -65536; + PRAGMA page_size = 4096; + CREATE TABLE hashes (offset INTEGER PRIMARY KEY, value BLOB NOT NULL) WITHOUT ROWID; + CREATE TABLE meta (fingerprint BLOB NOT NULL);" + ).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + + // Walk tree bottom-up, flushing in batches + let batch_size = 500_000; + let mut buffer: Vec<(u64, Hash)> = Vec::with_capacity(batch_size); + + self.build_index_node(self.savepoint.root, &mut buffer, &conn, batch_size)?; + + // Flush remaining + if !buffer.is_empty() { + Self::flush_index_batch(&buffer, &conn)?; + } + + // Store fingerprint + conn.execute( + "INSERT INTO meta (fingerprint) VALUES (?1)", + [&fingerprint[..]], + ).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + + // Auto-load the index we just built + let _ = self.load_hash_index(); + + Ok(()) + } + + #[cfg(feature = "hash-idx")] + fn flush_index_batch( + buffer: &[(u64, Hash)], + conn: &rusqlite::Connection, + ) -> Result<()> { + conn.execute_batch("BEGIN") + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + { + let mut stmt = conn.prepare_cached("INSERT OR REPLACE INTO hashes (offset, value) VALUES (?1, ?2)") + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + for (offset, v) in buffer { + stmt.execute(rusqlite::params![offset, &v[..]]) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + } + } + conn.execute_batch("COMMIT") + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + Ok(()) + } + + #[cfg(feature = "hash-idx")] + fn build_index_node( + &self, + node_id: Record, + buffer: &mut Vec<(u64, Hash)>, + conn: &rusqlite::Connection, + batch_size: usize, + ) -> Result { + let inner = self.db.load_node(node_id)?; + match inner { + NodeInner::Leaf { key, value } => { + let value_hash = H::hash(&value); + let hash = H::hash_leaf(&key.0, &value_hash); + buffer.push((node_id.offset, hash)); + if buffer.len() >= batch_size { + Self::flush_index_batch(buffer, conn)?; + buffer.clear(); + } + Ok(hash) + } + NodeInner::Internal { prefix, left, right } => { + let left_hash = self.build_index_node(left.id, buffer, conn, batch_size)?; + let right_hash = self.build_index_node(right.id, buffer, conn, batch_size)?; + let hash = H::hash_internal(prefix.as_bytes(), &left_hash, &right_hash); + buffer.push((node_id.offset, hash)); + if buffer.len() >= batch_size { + Self::flush_index_batch(buffer, conn)?; + buffer.clear(); + } + Ok(hash) + } + } + } + + /// Loads a previously built hash index sidecar for the current snapshot. + /// Returns `true` if a valid index was loaded, `false` if no index exists + /// or the fingerprint doesn't match (stale index). + #[cfg(feature = "hash-idx")] + pub fn load_hash_index(&mut self) -> Result { + if self.is_empty() { + return Ok(false); + } + + let idx_path = match self.hash_index_path() { + Some(p) => p, + None => return Ok(false), + }; + + if !std::path::Path::new(&idx_path).exists() { + return Ok(false); + } + + let conn = rusqlite::Connection::open_with_flags( + &idx_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX, + ).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + + // Validate fingerprint + let root_raw = self.db.file.read(self.savepoint.root.offset, self.savepoint.root.size as usize)?; + let expected_fingerprint = H::hash(&root_raw); + + let stored: Vec = conn.query_row( + "SELECT fingerprint FROM meta LIMIT 1", + [], + |row| row.get(0), + ).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + + if stored.len() != 32 || stored != expected_fingerprint { + return Ok(false); + } + + self.hash_index = Some(HashIndex { conn: Arc::new(conn) }); + Ok(true) + } + /// Exports the current snapshot to a new database file. pub fn export(&self, path: &str) -> Result<()> { use std::fs::OpenOptions; @@ -198,7 +403,7 @@ impl ReadTransaction { let mut n = self.cache.node.take().unwrap(); let h = { - let entry = Self::hash_node(&self.db, &mut self.cache, &mut n)?; + let entry = Self::hash_node(&self.db, &mut self.cache, &mut n, &self.hash_index)?; entry.node.hash_cache.clone().unwrap() }; self.cache.node = Some(n); @@ -214,7 +419,7 @@ impl ReadTransaction { let mut key_paths = keys.iter().map(|k| Path(k)).collect::>(); key_paths.sort(); - match Self::prove_nodes(&self.db, &mut self.cache, &mut node, key_paths.as_slice(), 0, proof_type) { + match Self::prove_nodes(&self.db, &mut self.cache, &mut node, key_paths.as_slice(), 0, proof_type, &self.hash_index) { Ok(info) => { self.cache.node = Some(node); Ok(SubTree:: { @@ -233,6 +438,11 @@ impl ReadTransaction { self.savepoint.root == EMPTY_RECORD } + /// Returns the root node's file offset for this snapshot. + pub fn root_offset(&self) -> u64 { + self.savepoint.root.offset + } + fn prove_nodes( db: &Database, cache: &mut Cache, @@ -240,6 +450,7 @@ impl ReadTransaction { keys: &[Path<&Hash>], depth: usize, proof_type: ProofType, + hash_index: &Option, ) -> Result { let entry = cache.load_node(db, node)?; match entry.node.inner.as_mut().unwrap() { @@ -277,31 +488,31 @@ impl ReadTransaction { let (left_keys, right_keys) = keys.split_at(split); let mut left_subtree = if left_keys.is_empty() { None } else { - Some(Self::prove_nodes(db, cache, left, left_keys, depth + 1, proof_type)?) + Some(Self::prove_nodes(db, cache, left, left_keys, depth + 1, proof_type, hash_index)?) }; let mut right_subtree = if right_keys.is_empty() { None } else { - Some(Self::prove_nodes(db, cache, right, right_keys, depth + 1, proof_type)?) + Some(Self::prove_nodes(db, cache, right, right_keys, depth + 1, proof_type, hash_index)?) }; // Include extended hash of the sibling if its subtree isn't already part of the proof if proof_type == ProofType::Extended && left_subtree.is_none() && right_subtree.is_some() && right_subtree.as_ref().unwrap().value_node { left_subtree = Some(SubTreeNodeInfo { - node: Self::hash_node_extended(db, cache, left)?, + node: Self::hash_node_extended(db, cache, left, hash_index)?, value_node: false, }) } if proof_type == ProofType::Extended && right_subtree.is_none() && left_subtree.is_some() && left_subtree.as_ref().unwrap().value_node { right_subtree = Some(SubTreeNodeInfo { - node: Self::hash_node_extended(db, cache, right)?, + node: Self::hash_node_extended(db, cache, right, hash_index)?, value_node: false, }) } // If extended hashes aren't needed, include basic ones if left_subtree.is_none() { - let left_entry = Self::hash_node(db, cache, left)?; + let left_entry = Self::hash_node(db, cache, left, hash_index)?; let left_hash = left_entry.node.hash_cache.clone().unwrap(); left_subtree = Some(SubTreeNodeInfo { node: SubTreeNode::Hash(left_hash), @@ -309,7 +520,7 @@ impl ReadTransaction { }); } if right_subtree.is_none() { - let right_entry = Self::hash_node(db, cache, right)?; + let right_entry = Self::hash_node(db, cache, right, hash_index)?; let right_hash = right_entry.node.hash_cache.clone().unwrap(); right_subtree = Some(SubTreeNodeInfo { node: SubTreeNode::Hash(right_hash), @@ -338,11 +549,32 @@ impl ReadTransaction { db: &Database, cache: &mut Cache, node: &'c mut Node, + hash_index: &Option, ) -> Result> { if node.hash_cache.is_some() { return Ok(CacheEntry::new(node, false)); } + // Check hash index sidecar before loading/recursing + #[cfg(feature = "hash-idx")] + if node.id != EMPTY_RECORD { + if let Some(ref idx) = hash_index { + let result: core::result::Result, _> = idx.conn.query_row( + "SELECT value FROM hashes WHERE offset = ?1", + [node.id.offset as i64], + |row| row.get(0), + ); + if let Ok(cached) = result { + if cached.len() == 32 { + let mut hash = [0u8; 32]; + hash.copy_from_slice(&cached); + node.hash_cache = Some(hash); + return Ok(CacheEntry::new(node, false)); + } + } + } + } + let entry = cache.load_node(db, node)?; match entry.node.inner.as_mut().unwrap() { NodeInner::Leaf { key, value } => { @@ -354,9 +586,9 @@ impl ReadTransaction { left, right, } => { - let left_entry = Self::hash_node(db, cache, left)?; + let left_entry = Self::hash_node(db, cache, left, hash_index)?; let left_hash = left_entry.node.hash_cache.as_ref().unwrap(); - let right_entry = Self::hash_node(db, cache, right)?; + let right_entry = Self::hash_node(db, cache, right, hash_index)?; let right_hash = right_entry.node.hash_cache.as_ref().unwrap(); entry.node.hash_cache = Some(H::hash_internal(prefix.as_bytes(), left_hash, right_hash)); @@ -376,6 +608,7 @@ impl ReadTransaction { db: &Database, cache: &mut Cache, node: &mut Node, + hash_index: &Option, ) -> Result { let entry = cache.load_node(db, node)?; match entry.node.inner.as_mut().unwrap() { @@ -391,8 +624,8 @@ impl ReadTransaction { left, right, } => { - let left_hash = Self::hash_node(db, cache, left)?.node.hash_cache.as_ref().unwrap().clone(); - let right_hash = Self::hash_node(db, cache, right)?.node.hash_cache.as_ref().unwrap().clone(); + let left_hash = Self::hash_node(db, cache, left, hash_index)?.node.hash_cache.as_ref().unwrap().clone(); + let right_hash = Self::hash_node(db, cache, right, hash_index)?.node.hash_cache.as_ref().unwrap().clone(); Ok(SubTreeNode::Internal { prefix: prefix.clone(), left: Box::new(SubTreeNode::Hash(left_hash)), @@ -765,6 +998,13 @@ impl<'db, H: NodeHasher> WriteTransaction<'db, H> { }; self.db.write_header(&self.header)?; + + #[cfg(feature = "hash-idx")] + if self.db.config.auto_hash_index { + let mut snapshot = ReadTransaction::new(self.db.clone(), self.header.savepoint.clone()); + let _ = snapshot.build_hash_index(); + } + Ok(()) } } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 1287fc5..1b76c2f 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1399,3 +1399,302 @@ fn reset_to_empty() { assert_eq!(snapshot.get(&[0x42u8; 32]).unwrap(), Some(vec![1, 2, 3])); assert_ne!(snapshot.compute_root().unwrap(), empty_root); } + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_prove_matches_without_index() { + let dir = std::env::temp_dir().join("spacedb_hidx_test_prove"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + let db = Database::open(db_path).unwrap(); + let mut tx = db.begin_write().unwrap(); + for i in 0u32..200 { + let key = Sha256Hasher::hash(&i.to_le_bytes()); + tx = tx.insert(key, vec![i as u8; 32]).unwrap(); + } + tx.commit().unwrap(); + + // Compute root and proofs without index + let mut snapshot = db.begin_read().unwrap(); + let root_without = snapshot.compute_root().unwrap(); + + let prove_keys: Vec = (0u32..5) + .map(|i| Sha256Hasher::hash(&i.to_le_bytes())) + .collect(); + let proof_without = snapshot.prove(&prove_keys, ProofType::Standard).unwrap(); + let proof_root_without = proof_without.compute_root().unwrap(); + + // Build the hash index + let mut snapshot = db.begin_read().unwrap(); + snapshot.build_hash_index().unwrap(); + + // Now load it and prove again + let mut snapshot = db.begin_read().unwrap(); + assert!(snapshot.load_hash_index().unwrap(), "should load index"); + + let root_with = snapshot.compute_root().unwrap(); + assert_eq!(root_with, root_without, "root must match with and without index"); + + let proof_with = snapshot.prove(&prove_keys, ProofType::Standard).unwrap(); + let proof_root_with = proof_with.compute_root().unwrap(); + assert_eq!(proof_root_with, proof_root_without, "proof root must match"); + assert_eq!(proof_root_with, root_with, "proof root must equal tree root"); + + // Cleanup + let _ = std::fs::remove_file(&dir); + // Cleanup index files + Database::::cleanup_hash_indexes(&Some(db_path.to_string()), 0); +} + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_extended_proof_matches() { + let dir = std::env::temp_dir().join("spacedb_hidx_test_extended"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + let db = Database::open(db_path).unwrap(); + let mut tx = db.begin_write().unwrap(); + for i in 0u32..50 { + let key = Sha256Hasher::hash(&i.to_le_bytes()); + tx = tx.insert(key, vec![i as u8]).unwrap(); + } + tx.commit().unwrap(); + + let prove_keys: Vec = (0u32..3) + .map(|i| Sha256Hasher::hash(&i.to_le_bytes())) + .collect(); + + // Proof without index + let mut snapshot = db.begin_read().unwrap(); + let proof_without = snapshot.prove(&prove_keys, ProofType::Extended).unwrap(); + let root_without = proof_without.compute_root().unwrap(); + + // Build and load index + let mut snapshot = db.begin_read().unwrap(); + snapshot.build_hash_index().unwrap(); + + let mut snapshot = db.begin_read().unwrap(); + snapshot.load_hash_index().unwrap(); + + let proof_with = snapshot.prove(&prove_keys, ProofType::Extended).unwrap(); + let root_with = proof_with.compute_root().unwrap(); + assert_eq!(root_with, root_without, "extended proof root must match"); + + let _ = std::fs::remove_file(&dir); + Database::::cleanup_hash_indexes(&Some(db_path.to_string()), 0); +} + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_rollback_deletes_stale_index() { + let dir = std::env::temp_dir().join("spacedb_hidx_test_rollback"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + let db = Database::open(db_path).unwrap(); + + // Snapshot 1 + db.begin_write().unwrap() + .insert(Sha256Hasher::hash(b"a"), vec![1]).unwrap() + .commit().unwrap(); + let snapshot1 = db.begin_read().unwrap(); + + // Snapshot 2 + db.begin_write().unwrap() + .insert(Sha256Hasher::hash(b"b"), vec![2]).unwrap() + .commit().unwrap(); + let mut snapshot2 = db.begin_read().unwrap(); + + // Build index for snapshot 2 + snapshot2.build_hash_index().unwrap(); + + // Verify index file exists + let stem = std::path::Path::new(db_path).file_stem().unwrap().to_str().unwrap(); + let parent = std::path::Path::new(db_path).parent().unwrap(); + + let index_exists_before = std::fs::read_dir(parent).unwrap() + .filter_map(|e| e.ok()) + .any(|e| { + let name = e.file_name().to_str().unwrap_or("").to_string(); + name.starts_with(&format!("{}.", stem)) && name.ends_with(".hidx.sqlite") + }); + assert!(index_exists_before, "index file should exist before rollback"); + + // Rollback to snapshot 1 + snapshot1.rollback().unwrap(); + + // Index for snapshot 2 should be deleted + let index_exists_after = std::fs::read_dir(parent).unwrap() + .filter_map(|e| e.ok()) + .any(|e| { + let name = e.file_name().to_str().unwrap_or("").to_string(); + name.starts_with(&format!("{}.", stem)) && name.ends_with(".hidx.sqlite") + }); + assert!(!index_exists_after, "index file should be deleted after rollback"); + + let _ = std::fs::remove_file(&dir); +} + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_reset_deletes_all_indexes() { + let dir = std::env::temp_dir().join("spacedb_hidx_test_reset"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + let db = Database::open(db_path).unwrap(); + + // Create two snapshots with indexes + db.begin_write().unwrap() + .insert(Sha256Hasher::hash(b"a"), vec![1]).unwrap() + .commit().unwrap(); + db.begin_read().unwrap().build_hash_index().unwrap(); + + db.begin_write().unwrap() + .insert(Sha256Hasher::hash(b"b"), vec![2]).unwrap() + .commit().unwrap(); + db.begin_read().unwrap().build_hash_index().unwrap(); + + // Reset + db.reset().unwrap(); + + // All index files should be gone + let stem = std::path::Path::new(db_path).file_stem().unwrap().to_str().unwrap(); + let parent = std::path::Path::new(db_path).parent().unwrap(); + let any_index = std::fs::read_dir(parent).unwrap() + .filter_map(|e| e.ok()) + .any(|e| { + let name = e.file_name().to_str().unwrap_or("").to_string(); + name.starts_with(&format!("{}.", stem)) && name.ends_with(".hidx.sqlite") + }); + assert!(!any_index, "all index files should be deleted after reset"); + + let _ = std::fs::remove_file(&dir); +} + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_fingerprint_mismatch_after_rollback_and_new_writes() { + let dir = std::env::temp_dir().join("spacedb_hidx_test_fp"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + let db = Database::open(db_path).unwrap(); + + // Snapshot 1 + db.begin_write().unwrap() + .insert(Sha256Hasher::hash(b"x"), vec![1]).unwrap() + .commit().unwrap(); + let snap1 = db.begin_read().unwrap(); + + // Snapshot 2 + db.begin_write().unwrap() + .insert(Sha256Hasher::hash(b"y"), vec![2]).unwrap() + .commit().unwrap(); + + // Build index for snapshot 2 + db.begin_read().unwrap().build_hash_index().unwrap(); + + // Rollback to snapshot 1 (this deletes the index via cleanup) + snap1.rollback().unwrap(); + + // New writes — new snapshot may reuse the old root offset + db.begin_write().unwrap() + .insert(Sha256Hasher::hash(b"z"), vec![3]).unwrap() + .commit().unwrap(); + + // The old index was already cleaned up by rollback, so load should return false + let mut snapshot = db.begin_read().unwrap(); + assert!(!snapshot.load_hash_index().unwrap(), "should not load stale index"); + + let _ = std::fs::remove_file(&dir); + Database::::cleanup_hash_indexes(&Some(db_path.to_string()), 0); +} + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_memory_db_returns_error() { + let db = Database::memory().unwrap(); + db.begin_write().unwrap() + .insert([1u8; 32], vec![1]).unwrap() + .commit().unwrap(); + + let mut snapshot = db.begin_read().unwrap(); + assert!(snapshot.build_hash_index().is_err(), "should error for in-memory db"); +} + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_no_index_fallback() { + let dir = std::env::temp_dir().join("spacedb_hidx_test_fallback"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + let db = Database::open(db_path).unwrap(); + let mut tx = db.begin_write().unwrap(); + for i in 0u32..100 { + let key = Sha256Hasher::hash(&i.to_le_bytes()); + tx = tx.insert(key, vec![i as u8]).unwrap(); + } + tx.commit().unwrap(); + + // Don't build index — load should return false + let mut snapshot = db.begin_read().unwrap(); + assert!(!snapshot.load_hash_index().unwrap(), "no index to load"); + + // prove still works + let key = Sha256Hasher::hash(&0u32.to_le_bytes()); + let proof = snapshot.prove(&[key], ProofType::Standard).unwrap(); + let root = snapshot.compute_root().unwrap(); + assert_eq!(proof.compute_root().unwrap(), root); + + let _ = std::fs::remove_file(&dir); +} + +#[cfg(feature = "hash-idx")] +#[test] +fn hash_index_auto_build_on_commit() { + use spacedb::Configuration; + + let dir = std::env::temp_dir().join("spacedb_hidx_test_auto"); + let _ = std::fs::remove_file(&dir); + let db_path = dir.to_str().unwrap(); + + // Open with auto_hash_index enabled + let config = Configuration::standard().with_auto_hash_index(true); + let db = Database::open_with_config(db_path, config).unwrap(); + + // Insert and commit + let mut tx = db.begin_write().unwrap(); + for i in 0u32..50 { + let key = Sha256Hasher::hash(&i.to_le_bytes()); + tx = tx.insert(key, vec![i as u8]).unwrap(); + } + tx.commit().unwrap(); + + // begin_read should auto-load the index that was auto-built on commit + let mut snapshot = db.begin_read().unwrap(); + + // Verify the index file exists + let stem = std::path::Path::new(db_path).file_stem().unwrap().to_str().unwrap(); + let parent = std::path::Path::new(db_path).parent().unwrap(); + let has_index = std::fs::read_dir(parent).unwrap() + .filter_map(|e| e.ok()) + .any(|e| { + let name = e.file_name().to_str().unwrap_or("").to_string(); + name.starts_with(&format!("{}.", stem)) && name.ends_with(".hidx.sqlite") + }); + assert!(has_index, "auto-built index file should exist after commit"); + + // Verify prove works and roots match + let keys: Vec<_> = (0u32..3).map(|i| Sha256Hasher::hash(&i.to_le_bytes())).collect(); + let root = snapshot.compute_root().unwrap(); + let proof = snapshot.prove(&keys, ProofType::Standard).unwrap(); + assert_eq!(proof.compute_root().unwrap(), root); + + // Cleanup + let _ = std::fs::remove_file(&dir); + Database::::cleanup_hash_indexes(&Some(db_path.to_string()), 0); +}