From af630267f73548af06020b61779acb73ee050610 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 3 Jan 2025 21:45:14 +0300 Subject: [PATCH 01/24] WIP --- Cargo.toml | 4 +- src/page/index/data_page.rs | 74 +++++++++++++++++++ src/page/{index.rs => index/mappers.rs} | 93 ++---------------------- src/page/index/mod.rs | 26 +++++++ src/page/index/table_of_contents_page.rs | 0 src/page/mod.rs | 2 +- 6 files changed, 107 insertions(+), 92 deletions(-) create mode 100644 src/page/index/data_page.rs rename src/page/{index.rs => index/mappers.rs} (73%) create mode 100644 src/page/index/mod.rs create mode 100644 src/page/index/table_of_contents_page.rs diff --git a/Cargo.toml b/Cargo.toml index 173e3d4..55f24c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,4 @@ derive_more = { version = "1.0.0", features = ["from", "error", "display", "into rkyv = { version = "0.8.9", features = ["uuid-1"]} scc = "2.1.16" lockfree = "0.5.1" -serde = { version = "1.0.215", features = ["derive"] } -uuid = { version = "1.11.0", features = ["v4"] } -bitcode = "0.6.3" \ No newline at end of file +uuid = { version = "1.11.0", features = ["v4"] } \ No newline at end of file diff --git a/src/page/index/data_page.rs b/src/page/index/data_page.rs new file mode 100644 index 0000000..bf028a3 --- /dev/null +++ b/src/page/index/data_page.rs @@ -0,0 +1,74 @@ +//! [`IndexPage`] definition. + +use std::fmt::Debug; +use std::sync::Arc; + +use rkyv::rancor::Strategy; +use rkyv::ser::allocator::ArenaHandle; +use rkyv::ser::sharing::Share; +use rkyv::ser::Serializer; +use rkyv::util::AlignedVec; +use rkyv::{Archive, Deserialize, Serialize}; +use scc::ebr::Guard; +use scc::TreeIndex; + +use crate::link::Link; +use crate::page::IndexValue; +use crate::util::Persistable; + +/// Represents a page, which is filled with [`IndexValue`]'s of some index. +#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] +pub struct IndexPage { + //pub node_id: T, + pub index_values: Vec>, +} + +// Manual `Default` implementation to avoid `T: Default` +impl<'a, T> Default for IndexPage { + fn default() -> Self { + Self { + index_values: vec![], + } + } +} + +impl IndexPage +where + T: Clone + Ord + Debug + 'static, +{ + pub fn append_to_unique_tree_index(self, index: &TreeIndex) { + for val in self.index_values { + // Errors only if key is already exists. + index.insert(val.key, val.link).expect("index is unique"); + } + } + + pub fn append_to_tree_index(self, index: &TreeIndex>>) { + for val in self.index_values { + let guard = Guard::new(); + if let Some(set) = index.peek(&val.key, &guard) { + set.insert(val.link).expect("Link should be unique"); + } else { + let set = lockfree::set::Set::new(); + set + .insert(val.link) + .expect("Link should be unique as first inserted value"); + index + .insert(val.key, Arc::new(set)) + .expect("index is unique"); + } + } + } +} + +impl Persistable for IndexPage +where + T: Archive + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, +{ + fn as_bytes(&self) -> impl AsRef<[u8]> { + rkyv::to_bytes::(self).unwrap() + } +} diff --git a/src/page/index.rs b/src/page/index/mappers.rs similarity index 73% rename from src/page/index.rs rename to src/page/index/mappers.rs index 97a9461..069506f 100644 --- a/src/page/index.rs +++ b/src/page/index/mappers.rs @@ -1,78 +1,7 @@ -//! [`IndexPage`] definition. - -use std::fmt::Debug; use std::sync::Arc; -use rkyv::rancor::Strategy; -use rkyv::ser::allocator::ArenaHandle; -use rkyv::ser::sharing::Share; -use rkyv::ser::Serializer; -use rkyv::util::AlignedVec; -use rkyv::{Archive, Deserialize, Serialize}; -use scc::ebr::Guard; -use scc::TreeIndex; - -use crate::link::Link; -use crate::util::{Persistable, SizeMeasurable}; - -/// Represents `key/value` pair of B-Tree index, where value is always -/// [`data::Link`], as it is represented in primary and secondary indexes. -#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] -pub struct IndexValue { - pub key: T, - pub link: Link, -} - -impl SizeMeasurable for IndexValue -where - T: SizeMeasurable, -{ - fn aligned_size(&self) -> usize { - self.key.aligned_size() + self.link.aligned_size() - } -} - -/// Represents a page, which is filled with [`IndexValue`]'s of some index. -#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] -pub struct IndexPage { - pub index_values: Vec>, -} - -// Manual `Default` implementation to avoid `T: Default` -impl<'a, T> Default for IndexPage { - fn default() -> Self { - Self { - index_values: vec![], - } - } -} - -impl IndexPage -where - T: Clone + Ord + Debug + 'static, -{ - pub fn append_to_unique_tree_index(self, index: &TreeIndex) { - for val in self.index_values { - // Errors only if key is already exists. - index.insert(val.key, val.link).expect("index is unique"); - } - } - - pub fn append_to_tree_index(self, index: &TreeIndex>>) { - for val in self.index_values { - let guard = Guard::new(); - if let Some(set) = index.peek(&val.key, &guard) { - set.insert(val.link).expect("is ok"); - } else { - let set = lockfree::set::Set::new(); - set.insert(val.link).expect("is ok"); - index - .insert(val.key, Arc::new(set)) - .expect("index is unique"); - } - } - } -} +use crate::{Link, SizeMeasurable}; +use crate::page::{IndexPage, IndexValue}; pub fn map_unique_tree_index<'a, T, const PAGE_SIZE: usize>( index: impl Iterator, @@ -132,28 +61,16 @@ where pages } -impl Persistable for IndexPage -where - T: Archive - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, -{ - fn as_bytes(&self) -> impl AsRef<[u8]> { - rkyv::to_bytes::(self).unwrap() - } -} - #[cfg(test)] mod test { use scc::ebr::Guard; use scc::TreeIndex; use std::sync::Arc; - use crate::page::index::map_unique_tree_index; use crate::page::{INNER_PAGE_SIZE, PAGE_SIZE}; use crate::util::{Persistable, SizeMeasurable}; - use crate::{map_tree_index, Link}; + use crate::Link; + use crate::page::index::mappers::{map_tree_index, map_unique_tree_index}; #[test] fn map_single_value() { @@ -333,4 +250,4 @@ mod test { let bytes = page.as_bytes(); assert!(bytes.as_ref().len() <= INNER_PAGE_SIZE) } -} +} \ No newline at end of file diff --git a/src/page/index/mod.rs b/src/page/index/mod.rs new file mode 100644 index 0000000..4a218a1 --- /dev/null +++ b/src/page/index/mod.rs @@ -0,0 +1,26 @@ +use rkyv::{Archive, Deserialize, Serialize}; + +use crate::{Link, SizeMeasurable}; + +mod data_page; +mod mappers; + +pub use data_page::IndexPage; +pub use mappers::{map_tree_index, map_unique_tree_index}; + +/// Represents `key/value` pair of B-Tree index, where value is always +/// [`data::Link`], as it is represented in primary and secondary indexes. +#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] +pub struct IndexValue { + pub key: T, + pub link: Link, +} + +impl SizeMeasurable for IndexValue +where + T: SizeMeasurable, +{ + fn aligned_size(&self) -> usize { + self.key.aligned_size() + self.link.aligned_size() + } +} \ No newline at end of file diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/page/mod.rs b/src/page/mod.rs index 67bd103..a444bf4 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -12,7 +12,7 @@ use rkyv::{Archive, Deserialize, Serialize}; pub use data::Data; pub use data_type::DataType; pub use header::{GeneralHeader, DATA_VERSION}; -pub use index::{map_tree_index, map_unique_tree_index, IndexPage}; +pub use index::{map_tree_index, map_unique_tree_index, IndexPage, IndexValue}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ From 30fc79d98641e294408f8c577f9c96219bb12c9a Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Thu, 9 Jan 2025 22:12:36 +0300 Subject: [PATCH 02/24] WIP --- src/lib.rs | 1 + src/page/index/mod.rs | 6 +- src/page/index/{data_page.rs => page.rs} | 0 src/page/index/table_of_contents_page.rs | 90 ++++++++++++++++++++++++ src/page/mod.rs | 2 +- 5 files changed, 96 insertions(+), 3 deletions(-) rename src/page/index/{data_page.rs => page.rs} (100%) diff --git a/src/lib.rs b/src/lib.rs index 1a4af0e..0eff6a5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ pub use page::{ seek_to_page_start, update_at, Data as DataPage, DataType, General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, + TableOfContentsPage }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable}; diff --git a/src/page/index/mod.rs b/src/page/index/mod.rs index 4a218a1..5333b3c 100644 --- a/src/page/index/mod.rs +++ b/src/page/index/mod.rs @@ -2,10 +2,12 @@ use rkyv::{Archive, Deserialize, Serialize}; use crate::{Link, SizeMeasurable}; -mod data_page; +mod page; mod mappers; +mod table_of_contents_page; -pub use data_page::IndexPage; +pub use page::IndexPage; +pub use table_of_contents_page::TableOfContentsPage; pub use mappers::{map_tree_index, map_unique_tree_index}; /// Represents `key/value` pair of B-Tree index, where value is always diff --git a/src/page/index/data_page.rs b/src/page/index/page.rs similarity index 100% rename from src/page/index/data_page.rs rename to src/page/index/page.rs diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index e69de29..fdc3568 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -0,0 +1,90 @@ +use std::collections::HashMap; +use std::hash::Hash; + +use rkyv::{Archive, Deserialize, Serialize}; +use rkyv::rancor::Strategy; +use rkyv::ser::allocator::ArenaHandle; +use rkyv::ser::Serializer; +use rkyv::ser::sharing::Share; +use rkyv::util::AlignedVec; +use rkyv::with::Skip; + +use crate::page::PageId; +use crate::{align, Persistable, SizeMeasurable}; + +#[derive(Archive, Clone, Deserialize, Debug, Serialize)] +pub struct TableOfContentsPage { + records: HashMap, + #[rkyv(with = Skip)] + estimated_size: usize, + is_last: bool, +} + +impl Default for TableOfContentsPage { + fn default() -> Self { + Self { + records: HashMap::new(), + estimated_size: 0, + is_last: false, + } + } +} + +impl TableOfContentsPage +{ + + pub fn is_last(&self) -> bool { + self.is_last + } + + pub fn mark_not_last(&mut self) { + self.is_last = false; + } + + pub fn estimated_size(&self) -> usize { + self.estimated_size + } + + pub fn insert(&mut self, val: T, page_id: PageId) + where T: Hash + Eq + SizeMeasurable + { + self.estimated_size += align(val.aligned_size() + page_id.0.aligned_size()); + let _ = self.records.insert(val, page_id); + } + + pub fn remove(&mut self, val: &T) + where T: Hash + Eq + SizeMeasurable + { + self.estimated_size -= align(val.aligned_size() + PageId::default().0.aligned_size()); + let _ = self.records.remove(val); + } + + pub fn contains(&self, val: &T) -> bool + where T: Hash + Eq + { + self.records.contains_key(val) + } +} + +impl IntoIterator for TableOfContentsPage { + type Item = (T, PageId); + type IntoIter = as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.records.into_iter() + } +} + +impl Persistable for TableOfContentsPage +where + T: Archive + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + > + Hash + Eq, + ::Archived: Hash + Eq +{ + fn as_bytes(&self) -> impl AsRef<[u8]> { + rkyv::to_bytes::(self).unwrap() + } +} + diff --git a/src/page/mod.rs b/src/page/mod.rs index a444bf4..3bc4547 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -12,7 +12,7 @@ use rkyv::{Archive, Deserialize, Serialize}; pub use data::Data; pub use data_type::DataType; pub use header::{GeneralHeader, DATA_VERSION}; -pub use index::{map_tree_index, map_unique_tree_index, IndexPage, IndexValue}; +pub use index::{map_tree_index, map_unique_tree_index, IndexPage, IndexValue, TableOfContentsPage}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ From 9f5af7f5ce56581b23dbd218006849bbfe54acf7 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 12 Jan 2025 21:54:24 +0300 Subject: [PATCH 03/24] still WIP --- src/lib.rs | 5 +- src/page/header.rs | 29 +++++-- src/page/index/mod.rs | 4 +- src/page/index/new_page.rs | 106 +++++++++++++++++++++++ src/page/index/table_of_contents_page.rs | 16 ++-- src/page/mod.rs | 2 +- src/page/ty.rs | 2 + 7 files changed, 150 insertions(+), 14 deletions(-) create mode 100644 src/page/index/new_page.rs diff --git a/src/lib.rs b/src/lib.rs index 0eff6a5..b86735a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +extern crate core; + pub mod link; pub mod page; pub mod persistence; @@ -13,7 +15,8 @@ pub use page::{ seek_to_page_start, update_at, Data as DataPage, DataType, General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, - TableOfContentsPage + TableOfContentsPage, NewIndexPage }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable}; +pub use space::{Id as SpaceId}; diff --git a/src/page/header.rs b/src/page/header.rs index 49da323..2e73f00 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -6,6 +6,7 @@ use crate::page::ty::PageType; use crate::space; use crate::util::Persistable; use crate::{page, PAGE_SIZE}; +use crate::page::PageId; pub const DATA_VERSION: u32 = 1u32; @@ -16,15 +17,15 @@ pub const DATA_VERSION: u32 = 1u32; pub struct GeneralHeader { pub data_version: u32, pub space_id: space::Id, - pub page_id: page::PageId, - pub previous_id: page::PageId, - pub next_id: page::PageId, + pub page_id: PageId, + pub previous_id: PageId, + pub next_id: PageId, pub page_type: PageType, pub data_length: u32, } impl GeneralHeader { - pub fn new(page_id: page::PageId, type_: PageType, space_id: space::Id) -> Self { + pub fn new(page_id: PageId, type_: PageType, space_id: space::Id) -> Self { Self { data_version: DATA_VERSION, page_id, @@ -38,7 +39,7 @@ impl GeneralHeader { /// Creates a new [`GeneralHeader`] for a page that follows page with given /// header. It means that [`PageType`] and [`space::Id`] are same and - /// old [`page::PageId`] will be `previous_id`. + /// old [`PageId`] will be `previous_id`. pub fn follow(&mut self) -> Self { self.next_id = self.page_id.next(); Self { @@ -54,7 +55,7 @@ impl GeneralHeader { /// Creates a new [`GeneralHeader`] for a page that follows page with given /// header but with different [`PageType`]. [`space::Id`] is same and old - /// [`page::PageId`] will be `previous_id`. + /// [`PageId`] will be `previous_id`. pub fn follow_with(&mut self, page_type: PageType) -> Self { self.next_id = self.page_id.next(); Self { @@ -67,6 +68,22 @@ impl GeneralHeader { data_length: PAGE_SIZE as u32, } } + + /// Creates a new [`GeneralHeader`] for a page that follows page with given + /// header with provided [`PageId`], but with different [`PageType`]. + /// [`space::Id`] is same and old [`PageId`] will be `previous_id`. + pub fn follow_with_page_id(&mut self, page_id: PageId) -> Self { + self.next_id = page_id; + Self { + data_version: DATA_VERSION, + page_id, + previous_id: self.page_id, + next_id: 0.into(), + page_type: self.page_type, + space_id: self.space_id, + data_length: PAGE_SIZE as u32, + } + } } impl Persistable for GeneralHeader { diff --git a/src/page/index/mod.rs b/src/page/index/mod.rs index 5333b3c..6695a58 100644 --- a/src/page/index/mod.rs +++ b/src/page/index/mod.rs @@ -5,14 +5,16 @@ use crate::{Link, SizeMeasurable}; mod page; mod mappers; mod table_of_contents_page; +mod new_page; pub use page::IndexPage; +pub use new_page::NewIndexPage; pub use table_of_contents_page::TableOfContentsPage; pub use mappers::{map_tree_index, map_unique_tree_index}; /// Represents `key/value` pair of B-Tree index, where value is always /// [`data::Link`], as it is represented in primary and secondary indexes. -#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] +#[derive(Archive, Clone, Deserialize, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] pub struct IndexValue { pub key: T, pub link: Link, diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs new file mode 100644 index 0000000..b62701b --- /dev/null +++ b/src/page/index/new_page.rs @@ -0,0 +1,106 @@ +//! [`crate::page::IndexPage`] definition. + +use std::fmt::Debug; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom, Write}; + +use rkyv::{Archive, Deserialize, Serialize}; +use rkyv::rancor::Strategy; +use rkyv::ser::allocator::ArenaHandle; +use rkyv::ser::Serializer; +use rkyv::ser::sharing::Share; +use rkyv::util::AlignedVec; + +use crate::page::{IndexValue, PageId}; +use crate::{align, seek_to_page_start, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; + +/// Represents a page, which is filled with [`IndexValue`]'s of some index. +#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] +pub struct NewIndexPage { + pub node_id: T, + pub values_count: u16, + pub slots: Vec, + pub index_values: Vec>, +} + +impl NewIndexPage { + pub fn new(node_id: T, size: usize) -> Self + where T: Default + Clone, + { + let slots = vec![0u16; size]; + let index_values = vec![IndexValue::default(); size]; + Self { + node_id, + values_count: 0, + slots, + index_values, + } + } + + pub fn parse_slots_and_values_count(file: &mut File, page_id: PageId, size: usize) -> eyre::Result<(Vec, u16)> + where T: Default + SizeMeasurable + { + seek_to_page_start(file, page_id.0)?; + file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + align(T::default().aligned_size()) as i64))?; + let mut values_count_bytes = vec![0u8; align(u16::default().aligned_size())]; + file.read_exact(values_count_bytes.as_mut_slice())?; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(values_count_bytes.as_slice()) }; + let values_count = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); + + let mut slots_bytes = vec![0u8; align(size * u16::default().aligned_size() + 8)]; + file.read_exact(slots_bytes.as_mut_slice())?; + let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(slots_bytes.as_slice()) }; + let slots = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); + + Ok((slots, values_count)) + } + + pub fn persist_slots(file: &mut File, page_id: PageId, slots: Vec, values_count: u16) -> eyre::Result<()> + where T: Default + SizeMeasurable + { + seek_to_page_start(file, page_id.0)?; + file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + align(T::default().aligned_size()) as i64))?; + + let bytes = rkyv::to_bytes::(&values_count)?; + file.write(bytes.as_slice())?; + + let bytes = rkyv::to_bytes::(&slots)?; + file.write(bytes.as_slice())?; + Ok(()) + } + + pub fn persist_value(file: &mut File, page_id: PageId, size: usize, value: IndexValue, value_index: u16) -> eyre::Result<()> + where + T: Archive + + Default + + SizeMeasurable + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, + { + seek_to_page_start(file, page_id.0)?; + + let mut offset = GENERAL_HEADER_SIZE; + offset += align(T::default().aligned_size()); + offset += align(u16::default().aligned_size() * size + 8); + offset += value_index as usize * align(T::default().aligned_size() + PageId::default().0.aligned_size()); + + file.seek(SeekFrom::Current(offset as i64))?; + let bytes = rkyv::to_bytes::(&value)?; + file.write(bytes.as_slice())?; + Ok(()) + } +} + +impl Persistable for NewIndexPage +where + T: Archive + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, +{ + fn as_bytes(&self) -> impl AsRef<[u8]> { + rkyv::to_bytes::(self).unwrap() + } +} + diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index fdc3568..ab2c925 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -17,7 +17,7 @@ pub struct TableOfContentsPage { records: HashMap, #[rkyv(with = Skip)] estimated_size: usize, - is_last: bool, + next_page: Option, } impl Default for TableOfContentsPage { @@ -25,7 +25,7 @@ impl Default for TableOfContentsPage { Self { records: HashMap::new(), estimated_size: 0, - is_last: false, + next_page: None, } } } @@ -34,11 +34,11 @@ impl TableOfContentsPage { pub fn is_last(&self) -> bool { - self.is_last + self.next_page.is_some() } - pub fn mark_not_last(&mut self) { - self.is_last = false; + pub fn mark_not_last(&mut self, page_id: PageId) { + self.next_page = Some(page_id) } pub fn estimated_size(&self) -> usize { @@ -52,6 +52,12 @@ impl TableOfContentsPage let _ = self.records.insert(val, page_id); } + pub fn get(&self, val: &T) -> Option + where T: Hash + Eq + { + self.records.get(val).copied() + } + pub fn remove(&mut self, val: &T) where T: Hash + Eq + SizeMeasurable { diff --git a/src/page/mod.rs b/src/page/mod.rs index 3bc4547..bb98cce 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -12,7 +12,7 @@ use rkyv::{Archive, Deserialize, Serialize}; pub use data::Data; pub use data_type::DataType; pub use header::{GeneralHeader, DATA_VERSION}; -pub use index::{map_tree_index, map_unique_tree_index, IndexPage, IndexValue, TableOfContentsPage}; +pub use index::{map_tree_index, map_unique_tree_index, IndexPage, IndexValue, TableOfContentsPage, NewIndexPage}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ diff --git a/src/page/ty.rs b/src/page/ty.rs index 4e47fdb..27795d5 100644 --- a/src/page/ty.rs +++ b/src/page/ty.rs @@ -25,4 +25,6 @@ pub enum PageType { Data = 2, /// Index `Page` type. Index = 3, + /// Index's table of contests `Page` type. Is used to determine node's `PageId`. + IndexTableOfContents = 31, } From f4b77d29b712b4d09e27f565a697f90387fc08c7 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 15 Jan 2025 22:49:14 +0300 Subject: [PATCH 04/24] still WIP --- src/lib.rs | 2 +- src/page/data.rs | 11 +++- src/page/header.rs | 5 ++ src/page/index/new_page.rs | 82 ++++++++++++++++++++++-- src/page/index/page.rs | 8 +++ src/page/index/table_of_contents_page.rs | 11 +++- src/page/space_info.rs | 9 ++- src/page/util.rs | 2 +- src/util/mod.rs | 2 +- src/util/persistable.rs | 1 + src/util/sized.rs | 38 +++++++++++ 11 files changed, 159 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b86735a..c46ee2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,5 +18,5 @@ pub use page::{ TableOfContentsPage, NewIndexPage }; pub use persistence::{PersistableIndex, PersistableTable}; -pub use util::{align, Persistable, SizeMeasurable}; +pub use util::{align, Persistable, SizeMeasurable, align8, align_vec}; pub use space::{Id as SpaceId}; diff --git a/src/page/data.rs b/src/page/data.rs index d7966a0..c6f6602 100644 --- a/src/page/data.rs +++ b/src/page/data.rs @@ -1,5 +1,5 @@ use eyre::{eyre, Result}; - +use rkyv::Archive; use crate::Link; use crate::Persistable; @@ -56,6 +56,15 @@ impl Persistable for Data { fn as_bytes(&self) -> impl AsRef<[u8]> { &self.data[..self.length as usize] } + + fn from_bytes(bytes: &[u8]) -> Self { + let mut data = [0; DATA_LENGTH]; + data.copy_from_slice(bytes); + Self { + length: bytes.len() as u32, + data + } + } } #[cfg(test)] diff --git a/src/page/header.rs b/src/page/header.rs index 2e73f00..969978d 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -90,6 +90,11 @@ impl Persistable for GeneralHeader { fn as_bytes(&self) -> impl AsRef<[u8]> { rkyv::to_bytes::(self).unwrap() } + + fn from_bytes(bytes: &[u8]) -> Self { + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") + } } #[cfg(test)] diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index b62701b..ab8ae41 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -1,10 +1,13 @@ //! [`crate::page::IndexPage`] definition. +use std::array; use std::fmt::Debug; use std::fs::File; +use std::hash::Hash; use std::io::{Read, Seek, SeekFrom, Write}; use rkyv::{Archive, Deserialize, Serialize}; +use rkyv::de::Pool; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; use rkyv::ser::Serializer; @@ -12,11 +15,12 @@ use rkyv::ser::sharing::Share; use rkyv::util::AlignedVec; use crate::page::{IndexValue, PageId}; -use crate::{align, seek_to_page_start, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; +use crate::{align, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; /// Represents a page, which is filled with [`IndexValue`]'s of some index. #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] pub struct NewIndexPage { + pub size: u16, pub node_id: T, pub values_count: u16, pub slots: Vec, @@ -28,8 +32,13 @@ impl NewIndexPage { where T: Default + Clone, { let slots = vec![0u16; size]; - let index_values = vec![IndexValue::default(); size]; + let mut v = IndexValue::default(); + v.link.page_id = 1.into(); + v.link.length = 4; + v.link.offset = 3; + let index_values = vec![v; size]; Self { + size: size as u16, node_id, values_count: 0, slots, @@ -41,7 +50,8 @@ impl NewIndexPage { where T: Default + SizeMeasurable { seek_to_page_start(file, page_id.0)?; - file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + align(T::default().aligned_size()) as i64))?; + let offset = GENERAL_HEADER_SIZE as i64 + align(T::default().aligned_size()) as i64; + file.seek(SeekFrom::Current(offset))?; let mut values_count_bytes = vec![0u8; align(u16::default().aligned_size())]; file.read_exact(values_count_bytes.as_mut_slice())?; let archived = unsafe { rkyv::access_unchecked::<::Archived>(values_count_bytes.as_slice()) }; @@ -97,10 +107,72 @@ where T: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - >, + > + Default + SizeMeasurable + Debug + Clone, + ::Archived: Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { - rkyv::to_bytes::(self).unwrap() + let mut bytes = Vec::with_capacity(self.size as usize); + let size_bytes = rkyv::to_bytes::(&self.size).unwrap(); + bytes.extend_from_slice(size_bytes.as_ref()); + let node_id_bytes = rkyv::to_bytes::(&self.node_id).unwrap(); + bytes.extend_from_slice(node_id_bytes.as_ref()); + let values_count_bytes = rkyv::to_bytes::(&self.values_count).unwrap(); + bytes.extend_from_slice(values_count_bytes.as_ref()); + let slots_bytes = rkyv::to_bytes::(&self.slots).unwrap(); + bytes.extend_from_slice(slots_bytes.as_ref()); + let values_bytes = rkyv::to_bytes::(&self.index_values).unwrap(); + bytes.extend_from_slice(values_bytes.as_ref()); + + bytes + } + + fn from_bytes(bytes: &[u8]) -> Self { + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[0..2]) }; + let size = rkyv::deserialize::(archived).expect("data should be valid"); + println!("{}", size); + let t_size = T::default().aligned_size(); + let mut offset = 2; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + t_size]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let node_id = rkyv::deserialize(archived).expect("data should be valid"); + println!("{:?}", node_id); + offset = 2 + t_size; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + 2]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let values_count = rkyv::deserialize::(archived).expect("data should be valid"); + println!("{:?}", values_count); + + Self::new(node_id, size as usize) } } +#[cfg(test)] +mod tests { + use crate::{align8, Link, NewIndexPage, Persistable, SizeMeasurable, INNER_PAGE_SIZE}; + + pub fn get_size_from_data_length(length: usize) -> usize + where + T: Default + SizeMeasurable, + { + let node_id_size = T::default().aligned_size(); + let slot_size = u16::default().aligned_size(); + let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); + let vec_util_size = 8; + let size = (length - node_id_size - slot_size * 2 - vec_util_size * 2) / (slot_size + index_value_size); + size + } + + #[test] + fn test_bytes() { + let size: usize = get_size_from_data_length::(INNER_PAGE_SIZE); + let page = NewIndexPage::::new(1, size); + let bytes = page.as_bytes(); + println!("{:?}", bytes.as_ref()); + println!("{}", size); + println!("{}", bytes.as_ref().len()); + let page = NewIndexPage::::from_bytes(bytes.as_ref()); + + } +} diff --git a/src/page/index/page.rs b/src/page/index/page.rs index bf028a3..8ec93dc 100644 --- a/src/page/index/page.rs +++ b/src/page/index/page.rs @@ -9,6 +9,7 @@ use rkyv::ser::sharing::Share; use rkyv::ser::Serializer; use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; +use rkyv::api::high::HighDeserializer; use scc::ebr::Guard; use scc::TreeIndex; @@ -67,8 +68,15 @@ where + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, >, + ::Archived: + rkyv::Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { rkyv::to_bytes::(self).unwrap() } + + fn from_bytes(bytes: &[u8]) -> Self { + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + rkyv::deserialize(archived).expect("data should be valid") + } } diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index ab2c925..6b18e5f 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::hash::Hash; use rkyv::{Archive, Deserialize, Serialize}; +use rkyv::api::high::HighDeserializer; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; use rkyv::ser::Serializer; @@ -34,7 +35,7 @@ impl TableOfContentsPage { pub fn is_last(&self) -> bool { - self.next_page.is_some() + self.next_page.is_none() } pub fn mark_not_last(&mut self, page_id: PageId) { @@ -87,10 +88,16 @@ where + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, > + Hash + Eq, - ::Archived: Hash + Eq + ::Archived: + rkyv::Deserialize> + Hash + Eq, { fn as_bytes(&self) -> impl AsRef<[u8]> { rkyv::to_bytes::(self).unwrap() } + + fn from_bytes(bytes: &[u8]) -> Self { + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + rkyv::deserialize(archived).expect("data should be valid") + } } diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 37f7f1e..c24751d 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -7,7 +7,7 @@ use rkyv::ser::sharing::Share; use rkyv::ser::Serializer; use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; - +use rkyv::api::high::HighDeserializer; use crate::util::Persistable; use crate::DataType; use crate::{space, Link}; @@ -47,10 +47,17 @@ where + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, >, + ::Archived: + rkyv::Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { rkyv::to_bytes::(self).unwrap() } + + fn from_bytes(bytes: &[u8]) -> Self { + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + rkyv::deserialize(archived).expect("data should be valid") + } } #[cfg(test)] diff --git a/src/page/util.rs b/src/page/util.rs index 937890c..1f388e1 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -150,7 +150,7 @@ pub fn parse_page( index: u32, ) -> eyre::Result> where - Page: rkyv::Archive, + Page: rkyv::Archive + Persistable, ::Archived: rkyv::Deserialize>, { diff --git a/src/util/mod.rs b/src/util/mod.rs index d343007..1721f80 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -2,4 +2,4 @@ mod persistable; mod sized; pub use persistable::Persistable; -pub use sized::{align, SizeMeasurable}; +pub use sized::{align, align8, SizeMeasurable, align_vec}; diff --git a/src/util/persistable.rs b/src/util/persistable.rs index 5b05d72..98fb0f8 100644 --- a/src/util/persistable.rs +++ b/src/util/persistable.rs @@ -1,3 +1,4 @@ pub trait Persistable { fn as_bytes(&self) -> impl AsRef<[u8]>; + fn from_bytes(bytes: &[u8]) -> Self; } diff --git a/src/util/sized.rs b/src/util/sized.rs index c16a02c..74e0590 100644 --- a/src/util/sized.rs +++ b/src/util/sized.rs @@ -1,5 +1,6 @@ use crate::link::{Link, LINK_LENGTH}; use std::{mem, sync::Arc}; +use rkyv::util::AlignedVec; use uuid::Uuid; pub const fn align(len: usize) -> usize { @@ -10,6 +11,25 @@ pub const fn align(len: usize) -> usize { } } +pub const fn align8(len: usize) -> usize { + if len % 8 == 0 { + len + } else { + (len / 8 + 1) * 8 + } +} + +pub fn align_vec(mut v: AlignedVec) -> AlignedVec { + if v.len() != align(v.len()) { + let count = align(v.len()) - v.len(); + for _ in 0..count { + v.push(0) + } + } + + v +} + /// Marks an objects that can return theirs approximate size after archiving via /// [`rkyv`]. pub trait SizeMeasurable { @@ -76,6 +96,24 @@ impl SizeMeasurable for String { } } +impl SizeMeasurable for Vec +where + T: SizeMeasurable + Default, +{ + fn aligned_size(&self) -> usize { + let val_size = T::default().aligned_size(); + let vec_content_size = if val_size == 2 { + 2 + } else if val_size == 4 { + 4 + } else { + align8(val_size) + }; + + align(self.len() * vec_content_size) + 8 + } +} + impl SizeMeasurable for Arc { fn aligned_size(&self) -> usize { self.as_ref().aligned_size() From c4ec258eaec1f285a65fbb508e90d631a7b775a3 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 17 Jan 2025 00:46:36 +0300 Subject: [PATCH 05/24] page corrections --- src/page/index/new_page.rs | 74 ++++++++++++++++++++++++++------------ src/page/space_info.rs | 2 +- src/page/util.rs | 3 +- 3 files changed, 53 insertions(+), 26 deletions(-) diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index ab8ae41..a94228d 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -15,7 +15,7 @@ use rkyv::ser::sharing::Share; use rkyv::util::AlignedVec; use crate::page::{IndexValue, PageId}; -use crate::{align, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; +use crate::{align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; /// Represents a page, which is filled with [`IndexValue`]'s of some index. #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] @@ -32,11 +32,7 @@ impl NewIndexPage { where T: Default + Clone, { let slots = vec![0u16; size]; - let mut v = IndexValue::default(); - v.link.page_id = 1.into(); - v.link.length = 4; - v.link.offset = 3; - let index_values = vec![v; size]; + let index_values = vec![IndexValue::default(); size]; Self { size: size as u16, node_id, @@ -46,18 +42,28 @@ impl NewIndexPage { } } + fn values_count_offset() -> usize + where T: Default + SizeMeasurable + { + GENERAL_HEADER_SIZE + T::default().aligned_size() + u16::default().aligned_size() + } + + fn slots_length(size: usize) -> usize { + align(size * u16::default().aligned_size()) + 8 + } + pub fn parse_slots_and_values_count(file: &mut File, page_id: PageId, size: usize) -> eyre::Result<(Vec, u16)> where T: Default + SizeMeasurable { seek_to_page_start(file, page_id.0)?; - let offset = GENERAL_HEADER_SIZE as i64 + align(T::default().aligned_size()) as i64; + let offset = Self::values_count_offset() as i64; file.seek(SeekFrom::Current(offset))?; - let mut values_count_bytes = vec![0u8; align(u16::default().aligned_size())]; + let mut values_count_bytes = vec![0u8; u16::default().aligned_size()]; file.read_exact(values_count_bytes.as_mut_slice())?; let archived = unsafe { rkyv::access_unchecked::<::Archived>(values_count_bytes.as_slice()) }; let values_count = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); - let mut slots_bytes = vec![0u8; align(size * u16::default().aligned_size() + 8)]; + let mut slots_bytes = vec![0u8; Self::slots_length(size)]; file.read_exact(slots_bytes.as_mut_slice())?; let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(slots_bytes.as_slice()) }; let slots = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); @@ -69,7 +75,7 @@ impl NewIndexPage { where T: Default + SizeMeasurable { seek_to_page_start(file, page_id.0)?; - file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + align(T::default().aligned_size()) as i64))?; + file.seek(SeekFrom::Current(Self::values_count_offset() as i64))?; let bytes = rkyv::to_bytes::(&values_count)?; file.write(bytes.as_slice())?; @@ -90,10 +96,10 @@ impl NewIndexPage { { seek_to_page_start(file, page_id.0)?; - let mut offset = GENERAL_HEADER_SIZE; - offset += align(T::default().aligned_size()); - offset += align(u16::default().aligned_size() * size + 8); - offset += value_index as usize * align(T::default().aligned_size() + PageId::default().0.aligned_size()); + let mut offset = Self::values_count_offset(); + offset += u16::default().aligned_size(); + offset += Self::slots_length(size); + offset += value_index as usize * align8(IndexValue::::default().aligned_size()); file.seek(SeekFrom::Current(offset as i64))?; let bytes = rkyv::to_bytes::(&value)?; @@ -107,7 +113,7 @@ where T: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - > + Default + SizeMeasurable + Debug + Clone, + > + Default + SizeMeasurable + Clone, ::Archived: Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { @@ -129,22 +135,42 @@ where fn from_bytes(bytes: &[u8]) -> Self { let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[0..2]) }; let size = rkyv::deserialize::(archived).expect("data should be valid"); - println!("{}", size); + let t_size = T::default().aligned_size(); let mut offset = 2; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + t_size]); let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; let node_id = rkyv::deserialize(archived).expect("data should be valid"); - println!("{:?}", node_id); + offset = 2 + t_size; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + 2]); let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; let values_count = rkyv::deserialize::(archived).expect("data should be valid"); - println!("{:?}", values_count); - Self::new(node_id, size as usize) + offset = offset + 2; + let slots_len = align(size as usize * u16::default().aligned_size()) + 8; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + slots_len]); + let s = format!("{:?}", v.as_ref()); + let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; + let slots = rkyv::deserialize::, rkyv::rancor::Error>(archived).expect("data should be valid"); + + offset = offset + slots_len; + let values_len = size as usize * align8(IndexValue::::default().aligned_size()) + 8; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + values_len]); + let archived = unsafe { rkyv::access_unchecked::<> as Archive>::Archived>(&v[..]) }; + let index_values = rkyv::deserialize::>, rkyv::rancor::Error>(archived).expect("data should be valid"); + + Self { + slots, + size, + values_count, + node_id, + index_values + } } } @@ -169,10 +195,12 @@ mod tests { let size: usize = get_size_from_data_length::(INNER_PAGE_SIZE); let page = NewIndexPage::::new(1, size); let bytes = page.as_bytes(); - println!("{:?}", bytes.as_ref()); - println!("{}", size); - println!("{}", bytes.as_ref().len()); - let page = NewIndexPage::::from_bytes(bytes.as_ref()); + let new_page = NewIndexPage::::from_bytes(bytes.as_ref()); + assert_eq!(new_page.node_id, page.node_id); + assert_eq!(new_page.values_count, page.values_count); + assert_eq!(new_page.size, page.size); + assert_eq!(new_page.slots, page.slots); + assert_eq!(new_page.index_values, page.index_values); } } diff --git a/src/page/space_info.rs b/src/page/space_info.rs index c24751d..2580b06 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -76,7 +76,7 @@ mod test { primary_key_intervals: vec![], secondary_index_intervals: HashMap::new(), data_intervals: vec![], - pk_gen_state: (), + pk_gen_state: 0u128, empty_links_list: vec![], secondary_index_map: HashMap::new(), }; diff --git a/src/page/util.rs b/src/page/util.rs index 1f388e1..4bda064 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -159,8 +159,7 @@ where let mut buffer: Vec = vec![0u8; header.data_length as usize]; file.read_exact(&mut buffer)?; - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&buffer[..]) }; - let info = rkyv::deserialize(archived).expect("data should be valid"); + let info = Page::from_bytes(buffer.as_ref()); Ok(GeneralPage { header, From c6c7b63b295e43244040f3ecd55a3154df693cd6 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sat, 18 Jan 2025 23:19:52 +0300 Subject: [PATCH 06/24] corrections --- src/page/index/new_page.rs | 201 +++++++++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 51 deletions(-) diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index a94228d..d69257a 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -1,6 +1,5 @@ //! [`crate::page::IndexPage`] definition. -use std::array; use std::fmt::Debug; use std::fs::File; use std::hash::Hash; @@ -15,7 +14,7 @@ use rkyv::ser::sharing::Share; use rkyv::util::AlignedVec; use crate::page::{IndexValue, PageId}; -use crate::{align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; +use crate::{align, align8, seek_to_page_start, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; /// Represents a page, which is filled with [`IndexValue`]'s of some index. #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] @@ -27,6 +26,13 @@ pub struct NewIndexPage { pub index_values: Vec>, } +#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] +pub struct IndexPageUtility { + pub node_id: T, + pub values_count: u16, + pub slots: Vec, +} + impl NewIndexPage { pub fn new(node_id: T, size: usize) -> Self where T: Default + Clone, @@ -42,68 +48,177 @@ impl NewIndexPage { } } - fn values_count_offset() -> usize + fn index_page_utility_length(size: usize) -> usize where T: Default + SizeMeasurable { - GENERAL_HEADER_SIZE + T::default().aligned_size() + u16::default().aligned_size() + T::default().aligned_size() + u16::default().aligned_size() + align(size * u16::default().aligned_size()) + 8 } - fn slots_length(size: usize) -> usize { - align(size * u16::default().aligned_size()) + 8 + fn get_index_page_utility_from_bytes(bytes: &[u8]) -> IndexPageUtility + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, + { + let t_size = T::default().aligned_size(); + let mut offset = 0; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + t_size]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let node_id = rkyv::deserialize(archived).expect("data should be valid"); + + offset = t_size; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + 2]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let values_count = rkyv::deserialize::(archived).expect("data should be valid"); + + offset = offset + 2; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..]); + let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; + let slots = rkyv::deserialize::, rkyv::rancor::Error>(archived).expect("data should be valid"); + + IndexPageUtility { + node_id, + values_count, + slots + } } - pub fn parse_slots_and_values_count(file: &mut File, page_id: PageId, size: usize) -> eyre::Result<(Vec, u16)> - where T: Default + SizeMeasurable + pub fn parse_index_page_utility(file: &mut File, page_id: PageId) -> eyre::Result> + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, { seek_to_page_start(file, page_id.0)?; - let offset = Self::values_count_offset() as i64; + let offset = GENERAL_HEADER_SIZE as i64; file.seek(SeekFrom::Current(offset))?; - let mut values_count_bytes = vec![0u8; u16::default().aligned_size()]; - file.read_exact(values_count_bytes.as_mut_slice())?; - let archived = unsafe { rkyv::access_unchecked::<::Archived>(values_count_bytes.as_slice()) }; - let values_count = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); - let mut slots_bytes = vec![0u8; Self::slots_length(size)]; - file.read_exact(slots_bytes.as_mut_slice())?; - let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(slots_bytes.as_slice()) }; - let slots = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); + let mut size_bytes = vec![0u8; 2]; + file.read_exact(size_bytes.as_mut_slice())?; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&size_bytes[0..2]) }; + let size = rkyv::deserialize::(archived).expect("data should be valid"); - Ok((slots, values_count)) + let index_utility_len = Self::index_page_utility_length(size as usize); + let mut index_utility_bytes = vec![0u8; index_utility_len]; + file.read_exact(index_utility_bytes.as_mut_slice())?; + let utility = Self::get_index_page_utility_from_bytes(index_utility_bytes.as_ref()); + + Ok(utility) } - pub fn persist_slots(file: &mut File, page_id: PageId, slots: Vec, values_count: u16) -> eyre::Result<()> - where T: Default + SizeMeasurable + pub fn persist_index_page_utility(file: &mut File, page_id: PageId, utility: IndexPageUtility) -> eyre::Result<()> + where + T: Archive + + Default + + SizeMeasurable + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, { seek_to_page_start(file, page_id.0)?; - file.seek(SeekFrom::Current(Self::values_count_offset() as i64))?; + file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + u16::default().aligned_size() as i64))?; - let bytes = rkyv::to_bytes::(&values_count)?; + let bytes = rkyv::to_bytes::(&utility.node_id)?; file.write(bytes.as_slice())?; - let bytes = rkyv::to_bytes::(&slots)?; + let bytes = rkyv::to_bytes::(&utility.values_count)?; + file.write(bytes.as_slice())?; + + let bytes = rkyv::to_bytes::(&utility.slots)?; file.write(bytes.as_slice())?; Ok(()) } - pub fn persist_value(file: &mut File, page_id: PageId, size: usize, value: IndexValue, value_index: u16) -> eyre::Result<()> + fn read_value(file: &mut File) -> eyre::Result> + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, + { + let mut bytes = vec![0u8; align8(IndexValue::::default().aligned_size())]; + file.read_exact(bytes.as_mut_slice())?; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(bytes.as_slice()); + let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; + Ok(rkyv::deserialize(archived).expect("data should be valid")) + } + + pub fn read_value_with_index(file: &mut File, page_id: PageId, size: usize, index: usize) -> eyre::Result> + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, + { + seek_to_page_start(file, page_id.0)?; + + let offset = Self::get_value_offset(size, index as usize); + file.seek(SeekFrom::Current(offset as i64))?; + let mut bytes = vec![0u8; align8(IndexValue::::default().aligned_size())]; + Self::read_value(file) + } + + fn get_value_offset(size: usize, value_index: usize) -> usize + where T: Default + SizeMeasurable + { + let mut offset = GENERAL_HEADER_SIZE; + offset += u16::default().aligned_size(); + offset += T::default().aligned_size(); + offset += u16::default().aligned_size(); + offset += align(size * u16::default().aligned_size()) + 8; + offset += value_index * align8(IndexValue::::default().aligned_size()); + + offset + } + + pub fn persist_value(file: &mut File, page_id: PageId, size: usize, value: IndexValue, mut value_index: u16) -> eyre::Result where T: Archive + Default + SizeMeasurable + + Eq + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, >, + ::Archived: Deserialize>, { seek_to_page_start(file, page_id.0)?; - let mut offset = Self::values_count_offset(); - offset += u16::default().aligned_size(); - offset += Self::slots_length(size); - offset += value_index as usize * align8(IndexValue::::default().aligned_size()); + let offset = Self::get_value_offset(size, value_index as usize); + file.seek(SeekFrom::Current(offset as i64))?; + let bytes = rkyv::to_bytes::(&value)?; + file.write(bytes.as_slice())?; + + let mut value = Self::read_value(file)?; + while value != IndexValue::default() { + value_index += 1; + value = Self::read_value(file)?; + } + Ok(value_index + 1) + } + + pub fn remove_value(file: &mut File, page_id: PageId, size: usize, mut value_index: u16) -> eyre::Result<()> + where + T: Archive + + Default + + SizeMeasurable + + Eq + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, + ::Archived: Deserialize>, + { + seek_to_page_start(file, page_id.0)?; + + let offset = Self::get_value_offset(size, value_index as usize); file.seek(SeekFrom::Current(offset as i64))?; + let value = IndexValue::::default(); let bytes = rkyv::to_bytes::(&value)?; file.write(bytes.as_slice())?; + Ok(()) } } @@ -136,28 +251,12 @@ where let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[0..2]) }; let size = rkyv::deserialize::(archived).expect("data should be valid"); - let t_size = T::default().aligned_size(); let mut offset = 2; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..offset + t_size]); - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let node_id = rkyv::deserialize(archived).expect("data should be valid"); + let utility_length = Self::index_page_utility_length(size as usize); + let index_utility_bytes = &bytes[offset..offset + utility_length]; + let utility = Self::get_index_page_utility_from_bytes(index_utility_bytes); + offset += utility_length; - offset = 2 + t_size; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..offset + 2]); - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let values_count = rkyv::deserialize::(archived).expect("data should be valid"); - - offset = offset + 2; - let slots_len = align(size as usize * u16::default().aligned_size()) + 8; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..offset + slots_len]); - let s = format!("{:?}", v.as_ref()); - let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; - let slots = rkyv::deserialize::, rkyv::rancor::Error>(archived).expect("data should be valid"); - - offset = offset + slots_len; let values_len = size as usize * align8(IndexValue::::default().aligned_size()) + 8; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + values_len]); @@ -165,10 +264,10 @@ where let index_values = rkyv::deserialize::>, rkyv::rancor::Error>(archived).expect("data should be valid"); Self { - slots, + slots: utility.slots, size, - values_count, - node_id, + values_count: utility.values_count, + node_id: utility.node_id, index_values } } From 7ba90f28a5c2659e53735df379f07b08109c0b36 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 19 Jan 2025 18:47:41 +0300 Subject: [PATCH 07/24] corrections --- codegen/src/size_measure/generator.rs | 5 +++-- src/page/data.rs | 1 - src/page/header.rs | 2 +- src/page/index/new_page.rs | 3 +-- src/page/index/table_of_contents_page.rs | 9 ++++----- src/page/mod.rs | 4 ++++ src/util/sized.rs | 8 ++++++++ 7 files changed, 21 insertions(+), 11 deletions(-) diff --git a/codegen/src/size_measure/generator.rs b/codegen/src/size_measure/generator.rs index 3fe6a6a..2e0a424 100644 --- a/codegen/src/size_measure/generator.rs +++ b/codegen/src/size_measure/generator.rs @@ -10,7 +10,7 @@ impl Generator { pub fn gen_impl(&self) -> TokenStream { let struct_ident = &self.struct_def.ident; - let mut num = -1; + let mut num = 0; let sum = self .struct_def .fields @@ -21,9 +21,10 @@ impl Generator { self.#i.aligned_size() } } else { + let i = syn::Index::from(num); num += 1; quote! { - self.#num.aligned_size() + self.#i.aligned_size() } } }) diff --git a/src/page/data.rs b/src/page/data.rs index c6f6602..7bdb449 100644 --- a/src/page/data.rs +++ b/src/page/data.rs @@ -1,5 +1,4 @@ use eyre::{eyre, Result}; -use rkyv::Archive; use crate::Link; use crate::Persistable; diff --git a/src/page/header.rs b/src/page/header.rs index 969978d..92e54b7 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -5,7 +5,7 @@ use rkyv::{Archive, Deserialize, Serialize}; use crate::page::ty::PageType; use crate::space; use crate::util::Persistable; -use crate::{page, PAGE_SIZE}; +use crate::PAGE_SIZE; use crate::page::PageId; pub const DATA_VERSION: u32 = 1u32; diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index d69257a..630194f 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -156,7 +156,6 @@ impl NewIndexPage { let offset = Self::get_value_offset(size, index as usize); file.seek(SeekFrom::Current(offset as i64))?; - let mut bytes = vec![0u8; align8(IndexValue::::default().aligned_size())]; Self::read_value(file) } @@ -200,7 +199,7 @@ impl NewIndexPage { Ok(value_index + 1) } - pub fn remove_value(file: &mut File, page_id: PageId, size: usize, mut value_index: u16) -> eyre::Result<()> + pub fn remove_value(file: &mut File, page_id: PageId, size: usize, value_index: u16) -> eyre::Result<()> where T: Archive + Default diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 6b18e5f..4a66166 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -1,6 +1,5 @@ use std::collections::HashMap; use std::hash::Hash; - use rkyv::{Archive, Deserialize, Serialize}; use rkyv::api::high::HighDeserializer; use rkyv::rancor::Strategy; @@ -8,7 +7,6 @@ use rkyv::ser::allocator::ArenaHandle; use rkyv::ser::Serializer; use rkyv::ser::sharing::Share; use rkyv::util::AlignedVec; -use rkyv::with::Skip; use crate::page::PageId; use crate::{align, Persistable, SizeMeasurable}; @@ -16,16 +14,17 @@ use crate::{align, Persistable, SizeMeasurable}; #[derive(Archive, Clone, Deserialize, Debug, Serialize)] pub struct TableOfContentsPage { records: HashMap, - #[rkyv(with = Skip)] estimated_size: usize, next_page: Option, } -impl Default for TableOfContentsPage { +impl Default for TableOfContentsPage +where T: SizeMeasurable +{ fn default() -> Self { Self { records: HashMap::new(), - estimated_size: 0, + estimated_size: usize::default().aligned_size() + Option::::default().aligned_size(), next_page: None, } } diff --git a/src/page/mod.rs b/src/page/mod.rs index bb98cce..dfcf896 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -8,6 +8,9 @@ mod util; use derive_more::{Display, From}; use rkyv::{Archive, Deserialize, Serialize}; +use data_bucket_codegen::SizeMeasure; + +use crate::{SizeMeasurable, align}; pub use data::Data; pub use data_type::DataType; @@ -62,6 +65,7 @@ pub const INNER_PAGE_SIZE: usize = PAGE_SIZE - GENERAL_HEADER_SIZE; PartialEq, PartialOrd, Serialize, + SizeMeasure )] pub struct PageId(u32); diff --git a/src/util/sized.rs b/src/util/sized.rs index 74e0590..14ffc0b 100644 --- a/src/util/sized.rs +++ b/src/util/sized.rs @@ -119,12 +119,20 @@ impl SizeMeasurable for Arc { self.as_ref().aligned_size() } } + impl SizeMeasurable for lockfree::set::Set { fn aligned_size(&self) -> usize { self.iter().map(|elem| elem.aligned_size()).sum() } } +impl SizeMeasurable for Option +where T: SizeMeasurable{ + fn aligned_size(&self) -> usize { + size_of::>() + } +} + #[cfg(test)] mod test { use crate::util::sized::SizeMeasurable; From 0e87d94f415ee656738c902c27cf0716561b5f21 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 24 Jan 2025 05:30:11 +0300 Subject: [PATCH 08/24] add empty pages vec to TableOfContents --- src/page/index/new_page.rs | 20 ++++++++++---------- src/page/index/table_of_contents_page.rs | 21 ++++++++++++++++++++- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index 630194f..0755009 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -21,7 +21,7 @@ use crate::{align, align8, seek_to_page_start, Persistable, SizeMeasurable, GENE pub struct NewIndexPage { pub size: u16, pub node_id: T, - pub values_count: u16, + pub current_index: u16, pub slots: Vec, pub index_values: Vec>, } @@ -29,7 +29,7 @@ pub struct NewIndexPage { #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] pub struct IndexPageUtility { pub node_id: T, - pub values_count: u16, + pub current_index: u16, pub slots: Vec, } @@ -42,7 +42,7 @@ impl NewIndexPage { Self { size: size as u16, node_id, - values_count: 0, + current_index: 0, slots, index_values, } @@ -71,7 +71,7 @@ impl NewIndexPage { let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + 2]); let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let values_count = rkyv::deserialize::(archived).expect("data should be valid"); + let current_index = rkyv::deserialize::(archived).expect("data should be valid"); offset = offset + 2; let mut v = AlignedVec::<4>::new(); @@ -81,7 +81,7 @@ impl NewIndexPage { IndexPageUtility { node_id, - values_count, + current_index, slots } } @@ -124,7 +124,7 @@ impl NewIndexPage { let bytes = rkyv::to_bytes::(&utility.node_id)?; file.write(bytes.as_slice())?; - let bytes = rkyv::to_bytes::(&utility.values_count)?; + let bytes = rkyv::to_bytes::(&utility.current_index)?; file.write(bytes.as_slice())?; let bytes = rkyv::to_bytes::(&utility.slots)?; @@ -236,8 +236,8 @@ where bytes.extend_from_slice(size_bytes.as_ref()); let node_id_bytes = rkyv::to_bytes::(&self.node_id).unwrap(); bytes.extend_from_slice(node_id_bytes.as_ref()); - let values_count_bytes = rkyv::to_bytes::(&self.values_count).unwrap(); - bytes.extend_from_slice(values_count_bytes.as_ref()); + let current_index_bytes = rkyv::to_bytes::(&self.current_index).unwrap(); + bytes.extend_from_slice(current_index_bytes.as_ref()); let slots_bytes = rkyv::to_bytes::(&self.slots).unwrap(); bytes.extend_from_slice(slots_bytes.as_ref()); let values_bytes = rkyv::to_bytes::(&self.index_values).unwrap(); @@ -265,7 +265,7 @@ where Self { slots: utility.slots, size, - values_count: utility.values_count, + current_index: utility.current_index, node_id: utility.node_id, index_values } @@ -296,7 +296,7 @@ mod tests { let new_page = NewIndexPage::::from_bytes(bytes.as_ref()); assert_eq!(new_page.node_id, page.node_id); - assert_eq!(new_page.values_count, page.values_count); + assert_eq!(new_page.current_index, page.current_index); assert_eq!(new_page.size, page.size); assert_eq!(new_page.slots, page.slots); assert_eq!(new_page.index_values, page.index_values); diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 4a66166..0616531 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; use std::hash::Hash; + use rkyv::{Archive, Deserialize, Serialize}; use rkyv::api::high::HighDeserializer; use rkyv::rancor::Strategy; @@ -14,6 +15,8 @@ use crate::{align, Persistable, SizeMeasurable}; #[derive(Archive, Clone, Deserialize, Debug, Serialize)] pub struct TableOfContentsPage { records: HashMap, + + empty_pages: Vec, estimated_size: usize, next_page: Option, } @@ -24,6 +27,7 @@ where T: SizeMeasurable fn default() -> Self { Self { records: HashMap::new(), + empty_pages: vec![], estimated_size: usize::default().aligned_size() + Option::::default().aligned_size(), next_page: None, } @@ -52,6 +56,18 @@ impl TableOfContentsPage let _ = self.records.insert(val, page_id); } + pub fn pop_empty_page(&mut self) -> Option + where T: SizeMeasurable + { + if self.empty_pages.is_empty() { + return None + } + + let val = self.empty_pages.pop().expect("should not be empty as checked before"); + self.estimated_size -= val.aligned_size(); + Some(val) + } + pub fn get(&self, val: &T) -> Option where T: Hash + Eq { @@ -62,7 +78,10 @@ impl TableOfContentsPage where T: Hash + Eq + SizeMeasurable { self.estimated_size -= align(val.aligned_size() + PageId::default().0.aligned_size()); - let _ = self.records.remove(val); + self.estimated_size += PageId::default().0.aligned_size(); + + let id = self.records.remove(val).expect("value should be available if remove is called"); + self.empty_pages.push(id); } pub fn contains(&self, val: &T) -> bool From e33be00e9ea42ac73d511acbe4b4f3f3d4dcf47e Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 26 Jan 2025 03:12:49 +0300 Subject: [PATCH 09/24] remove next id from TOC page because GeneralHeader allows to do this --- src/page/index/table_of_contents_page.rs | 10 ---------- src/page/mod.rs | 4 ++++ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 0616531..ecffeea 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -18,7 +18,6 @@ pub struct TableOfContentsPage { empty_pages: Vec, estimated_size: usize, - next_page: Option, } impl Default for TableOfContentsPage @@ -29,7 +28,6 @@ where T: SizeMeasurable records: HashMap::new(), empty_pages: vec![], estimated_size: usize::default().aligned_size() + Option::::default().aligned_size(), - next_page: None, } } } @@ -37,14 +35,6 @@ where T: SizeMeasurable impl TableOfContentsPage { - pub fn is_last(&self) -> bool { - self.next_page.is_none() - } - - pub fn mark_not_last(&mut self, page_id: PageId) { - self.next_page = Some(page_id) - } - pub fn estimated_size(&self) -> usize { self.estimated_size } diff --git a/src/page/mod.rs b/src/page/mod.rs index dfcf896..a89c2f8 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -73,6 +73,10 @@ impl PageId { pub fn next(self) -> Self { PageId(self.0 + 1) } + + pub fn is_empty(&self) -> bool { + self.0 == 0 + } } impl From for usize { From eab0208804aa37d3b8e22793c17fd1a68339ac22 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 26 Jan 2025 03:42:23 +0300 Subject: [PATCH 10/24] add update_key fn --- src/page/index/table_of_contents_page.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index ecffeea..4b446b2 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -64,7 +64,7 @@ impl TableOfContentsPage self.records.get(val).copied() } - pub fn remove(&mut self, val: &T) + pub fn remove(&mut self, val: &T) -> PageId where T: Hash + Eq + SizeMeasurable { self.estimated_size -= align(val.aligned_size() + PageId::default().0.aligned_size()); @@ -72,6 +72,14 @@ impl TableOfContentsPage let id = self.records.remove(val).expect("value should be available if remove is called"); self.empty_pages.push(id); + id + } + + pub fn update_key(&mut self, old_key: &T, new_key: T) + where T: Hash + Eq + { + let id = self.records.remove(old_key).expect("value should be available if update is called"); + self.records.insert(new_key, id); } pub fn contains(&self, val: &T) -> bool From ac28a95c2a01dfe82c2ce0faaf05987fab4b1e58 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 26 Jan 2025 04:15:43 +0300 Subject: [PATCH 11/24] add remove_without_record call for proper insert undo without insertion in empty_pages --- src/page/index/table_of_contents_page.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 4b446b2..cb3b7d6 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -66,12 +66,19 @@ impl TableOfContentsPage pub fn remove(&mut self, val: &T) -> PageId where T: Hash + Eq + SizeMeasurable + { + let id = self.remove_without_record(val); + self.empty_pages.push(id); + id + } + + pub fn remove_without_record(&mut self, val: &T) -> PageId + where T: Hash + Eq + SizeMeasurable { self.estimated_size -= align(val.aligned_size() + PageId::default().0.aligned_size()); self.estimated_size += PageId::default().0.aligned_size(); - + let id = self.records.remove(val).expect("value should be available if remove is called"); - self.empty_pages.push(id); id } From 4858a0f73d9ca3ceb0c6489ce2c322d6c7a3f027 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 31 Jan 2025 12:06:58 +0300 Subject: [PATCH 12/24] add split logic for page --- src/page/index/new_page.rs | 62 +++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index 0755009..b87efab 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -4,7 +4,7 @@ use std::fmt::Debug; use std::fs::File; use std::hash::Hash; use std::io::{Read, Seek, SeekFrom, Write}; - +use std::mem; use rkyv::{Archive, Deserialize, Serialize}; use rkyv::de::Pool; use rkyv::rancor::Strategy; @@ -48,6 +48,28 @@ impl NewIndexPage { } } + pub fn split(&mut self, index: usize) -> NewIndexPage + where T: Clone + Default + { + let mut new_page = NewIndexPage::new(self.node_id.clone(), self.size as usize); + let mut first_empty_value = u16::MAX; + for (index, slot) in self.slots[index..].iter_mut().enumerate() { + if first_empty_value > *slot { + first_empty_value = *slot; + } + let mut index_value = IndexValue::default(); + mem::swap(&mut self.index_values[*slot as usize], &mut index_value); + new_page.index_values[index] = index_value; + new_page.slots[index] = index as u16; + new_page.current_index = (index + 1) as u16; + *slot = 0 + } + self.current_index = first_empty_value; + + + new_page + } + fn index_page_utility_length(size: usize) -> usize where T: Default + SizeMeasurable { @@ -275,6 +297,7 @@ where #[cfg(test)] mod tests { use crate::{align8, Link, NewIndexPage, Persistable, SizeMeasurable, INNER_PAGE_SIZE}; + use crate::page::IndexValue; pub fn get_size_from_data_length(length: usize) -> usize where @@ -301,4 +324,41 @@ mod tests { assert_eq!(new_page.slots, page.slots); assert_eq!(new_page.index_values, page.index_values); } + + #[test] + fn test_split() { + let mut page = NewIndexPage::::new(7, 8); + page.slots = vec![0, 1, 2, 3, 4, 5, 6, 7]; + page.current_index = 8; + page.index_values = { + let mut v = vec![]; + for i in &page.slots { + v.push(IndexValue { + key: *i as u64, + link: Default::default(), + }) + } + v + }; + + let split = page.split(4); + assert_eq!(page.current_index, 4); + assert_eq!(page.slots[page.current_index as usize], 0); + + assert_eq!(page.index_values[0].key, 0); + assert_eq!(page.index_values[1].key, 1); + assert_eq!(page.index_values[2].key, 2); + assert_eq!(page.index_values[3].key, 3); + + assert_eq!(split.current_index, 4); + assert_eq!(split.slots[0], 0); + assert_eq!(split.slots[1], 1); + assert_eq!(split.slots[2], 2); + assert_eq!(split.slots[3], 3); + + assert_eq!(split.index_values[0].key, 4); + assert_eq!(split.index_values[1].key, 5); + assert_eq!(split.index_values[2].key, 6); + assert_eq!(split.index_values[3].key, 7); + } } From edc7bf5de2ab4527a9a115ce32c9e1e32fc13c8d Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 31 Jan 2025 20:20:08 +0300 Subject: [PATCH 13/24] some corrections --- src/page/index/new_page.rs | 14 ++++++------ src/page/index/table_of_contents_page.rs | 27 ++++++++++++------------ src/page/mod.rs | 3 ++- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index b87efab..df53ef5 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -65,7 +65,7 @@ impl NewIndexPage { *slot = 0 } self.current_index = first_empty_value; - + self.node_id = self.index_values[self.slots[index - 1] as usize].key.clone(); new_page } @@ -176,7 +176,7 @@ impl NewIndexPage { { seek_to_page_start(file, page_id.0)?; - let offset = Self::get_value_offset(size, index as usize); + let offset = Self::get_value_offset(size, index); file.seek(SeekFrom::Current(offset as i64))?; Self::read_value(file) } @@ -212,10 +212,12 @@ impl NewIndexPage { let bytes = rkyv::to_bytes::(&value)?; file.write(bytes.as_slice())?; - let mut value = Self::read_value(file)?; - while value != IndexValue::default() { - value_index += 1; - value = Self::read_value(file)?; + if value_index != size as u16 - 1 { + let mut value = Self::read_value(file)?; + while value != IndexValue::default() { + value_index += 1; + value = Self::read_value(file)?; + } } Ok(value_index + 1) diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index cb3b7d6..9ceb5b0 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -1,5 +1,4 @@ -use std::collections::HashMap; -use std::hash::Hash; +use std::collections::BTreeMap; use rkyv::{Archive, Deserialize, Serialize}; use rkyv::api::high::HighDeserializer; @@ -14,7 +13,7 @@ use crate::{align, Persistable, SizeMeasurable}; #[derive(Archive, Clone, Deserialize, Debug, Serialize)] pub struct TableOfContentsPage { - records: HashMap, + records: BTreeMap, empty_pages: Vec, estimated_size: usize, @@ -25,7 +24,7 @@ where T: SizeMeasurable { fn default() -> Self { Self { - records: HashMap::new(), + records: BTreeMap::new(), empty_pages: vec![], estimated_size: usize::default().aligned_size() + Option::::default().aligned_size(), } @@ -40,7 +39,7 @@ impl TableOfContentsPage } pub fn insert(&mut self, val: T, page_id: PageId) - where T: Hash + Eq + SizeMeasurable + where T: Ord + Eq + SizeMeasurable { self.estimated_size += align(val.aligned_size() + page_id.0.aligned_size()); let _ = self.records.insert(val, page_id); @@ -59,13 +58,13 @@ impl TableOfContentsPage } pub fn get(&self, val: &T) -> Option - where T: Hash + Eq + where T: Ord + Eq { self.records.get(val).copied() } pub fn remove(&mut self, val: &T) -> PageId - where T: Hash + Eq + SizeMeasurable + where T: Ord + Eq + SizeMeasurable { let id = self.remove_without_record(val); self.empty_pages.push(id); @@ -73,7 +72,7 @@ impl TableOfContentsPage } pub fn remove_without_record(&mut self, val: &T) -> PageId - where T: Hash + Eq + SizeMeasurable + where T: Ord + Eq + SizeMeasurable { self.estimated_size -= align(val.aligned_size() + PageId::default().0.aligned_size()); self.estimated_size += PageId::default().0.aligned_size(); @@ -81,16 +80,16 @@ impl TableOfContentsPage let id = self.records.remove(val).expect("value should be available if remove is called"); id } - + pub fn update_key(&mut self, old_key: &T, new_key: T) - where T: Hash + Eq + where T: Ord + Eq { let id = self.records.remove(old_key).expect("value should be available if update is called"); self.records.insert(new_key, id); } pub fn contains(&self, val: &T) -> bool - where T: Hash + Eq + where T: Ord + Eq { self.records.contains_key(val) } @@ -98,7 +97,7 @@ impl TableOfContentsPage impl IntoIterator for TableOfContentsPage { type Item = (T, PageId); - type IntoIter = as IntoIterator>::IntoIter; + type IntoIter = as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { self.records.into_iter() @@ -110,9 +109,9 @@ where T: Archive + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, - > + Hash + Eq, + > + Ord + Eq, ::Archived: - rkyv::Deserialize> + Hash + Eq, + rkyv::Deserialize> + Ord + Eq, { fn as_bytes(&self) -> impl AsRef<[u8]> { rkyv::to_bytes::(self).unwrap() diff --git a/src/page/mod.rs b/src/page/mod.rs index a89c2f8..f779bf4 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -6,7 +6,7 @@ mod space_info; mod ty; mod util; -use derive_more::{Display, From}; +use derive_more::{Display, From, Into}; use rkyv::{Archive, Deserialize, Serialize}; use data_bucket_codegen::SizeMeasure; @@ -61,6 +61,7 @@ pub const INNER_PAGE_SIZE: usize = PAGE_SIZE - GENERAL_HEADER_SIZE; Eq, From, Hash, + Into, Ord, PartialEq, PartialOrd, From e3cfc96f26da1f9e6a81fd8082dba4cfa5ca2e2d Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 2 Feb 2025 00:34:11 +0300 Subject: [PATCH 14/24] add length to the index page and add fn to map page to indexset node like --- Cargo.toml | 3 +- src/page/index/mod.rs | 12 +++++++ src/page/index/new_page.rs | 46 ++++++++++++++++++------ src/page/index/table_of_contents_page.rs | 4 +++ 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dbfab7b..ca2063e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,4 +18,5 @@ derive_more = { version = "1.0.0", features = ["from", "error", "display", "into rkyv = { version = "0.8.9", features = ["uuid-1"]} scc = "2.1.16" lockfree = "0.5.1" -uuid = { version = "1.11.0", features = ["v4"] } \ No newline at end of file +uuid = { version = "1.11.0", features = ["v4"] } +indexset = { path = "../indexset", version = "0.10.3", features = ["concurrent", "cdc"] } \ No newline at end of file diff --git a/src/page/index/mod.rs b/src/page/index/mod.rs index 6695a58..b026ab7 100644 --- a/src/page/index/mod.rs +++ b/src/page/index/mod.rs @@ -1,3 +1,4 @@ +use indexset::core::pair::Pair; use rkyv::{Archive, Deserialize, Serialize}; use crate::{Link, SizeMeasurable}; @@ -27,4 +28,15 @@ where fn aligned_size(&self) -> usize { self.key.aligned_size() + self.link.aligned_size() } +} + +impl From> for Pair +where T: Ord +{ + fn from(value: IndexValue) -> Self { + Pair { + key: value.key, + value: value.link, + } + } } \ No newline at end of file diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index df53ef5..42ca733 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -5,6 +5,8 @@ use std::fs::File; use std::hash::Hash; use std::io::{Read, Seek, SeekFrom, Write}; use std::mem; + +use indexset::core::pair::Pair; use rkyv::{Archive, Deserialize, Serialize}; use rkyv::de::Pool; use rkyv::rancor::Strategy; @@ -14,7 +16,7 @@ use rkyv::ser::sharing::Share; use rkyv::util::AlignedVec; use crate::page::{IndexValue, PageId}; -use crate::{align, align8, seek_to_page_start, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; +use crate::{align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; /// Represents a page, which is filled with [`IndexValue`]'s of some index. #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] @@ -22,6 +24,7 @@ pub struct NewIndexPage { pub size: u16, pub node_id: T, pub current_index: u16, + pub current_length: u16, pub slots: Vec, pub index_values: Vec>, } @@ -30,6 +33,7 @@ pub struct NewIndexPage { pub struct IndexPageUtility { pub node_id: T, pub current_index: u16, + pub current_length: u16, pub slots: Vec, } @@ -43,6 +47,7 @@ impl NewIndexPage { size: size as u16, node_id, current_index: 0, + current_length: 0, slots, index_values, } @@ -73,7 +78,7 @@ impl NewIndexPage { fn index_page_utility_length(size: usize) -> usize where T: Default + SizeMeasurable { - T::default().aligned_size() + u16::default().aligned_size() + align(size * u16::default().aligned_size()) + 8 + T::default().aligned_size() + u16::default().aligned_size() + u16::default().aligned_size() + align(size * u16::default().aligned_size()) + 8 } fn get_index_page_utility_from_bytes(bytes: &[u8]) -> IndexPageUtility @@ -95,6 +100,12 @@ impl NewIndexPage { let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; let current_index = rkyv::deserialize::(archived).expect("data should be valid"); + offset = offset + 2; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + 2]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let current_length = rkyv::deserialize::(archived).expect("data should be valid"); + offset = offset + 2; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..]); @@ -104,6 +115,7 @@ impl NewIndexPage { IndexPageUtility { node_id, current_index, + current_length, slots } } @@ -143,14 +155,14 @@ impl NewIndexPage { seek_to_page_start(file, page_id.0)?; file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + u16::default().aligned_size() as i64))?; - let bytes = rkyv::to_bytes::(&utility.node_id)?; - file.write(bytes.as_slice())?; - - let bytes = rkyv::to_bytes::(&utility.current_index)?; - file.write(bytes.as_slice())?; - - let bytes = rkyv::to_bytes::(&utility.slots)?; - file.write(bytes.as_slice())?; + let node_id_bytes = rkyv::to_bytes::(&utility.node_id)?; + file.write(node_id_bytes.as_slice())?; + let current_index_bytes = rkyv::to_bytes::(&utility.current_index)?; + file.write(current_index_bytes.as_slice())?; + let current_length_bytes = rkyv::to_bytes::(&utility.current_length)?; + file.write(current_length_bytes.as_slice())?; + let slots_bytes = rkyv::to_bytes::(&utility.slots)?; + file.write(slots_bytes.as_slice())?; Ok(()) } @@ -188,6 +200,7 @@ impl NewIndexPage { offset += u16::default().aligned_size(); offset += T::default().aligned_size(); offset += u16::default().aligned_size(); + offset += u16::default().aligned_size(); offset += align(size * u16::default().aligned_size()) + 8; offset += value_index * align8(IndexValue::::default().aligned_size()); @@ -244,6 +257,16 @@ impl NewIndexPage { Ok(()) } + + pub fn get_node(&self) -> Vec> + where T: Clone + Ord + { + let mut node = Vec::with_capacity(self.size as usize); + for slot in &self.slots[..self.current_index as usize] { + node.push(self.index_values[*slot as usize].clone().into()) + } + node + } } impl Persistable for NewIndexPage @@ -262,6 +285,8 @@ where bytes.extend_from_slice(node_id_bytes.as_ref()); let current_index_bytes = rkyv::to_bytes::(&self.current_index).unwrap(); bytes.extend_from_slice(current_index_bytes.as_ref()); + let current_length_bytes = rkyv::to_bytes::(&self.current_length).unwrap(); + bytes.extend_from_slice(current_length_bytes.as_ref()); let slots_bytes = rkyv::to_bytes::(&self.slots).unwrap(); bytes.extend_from_slice(slots_bytes.as_ref()); let values_bytes = rkyv::to_bytes::(&self.index_values).unwrap(); @@ -290,6 +315,7 @@ where slots: utility.slots, size, current_index: utility.current_index, + current_length: utility.current_length, node_id: utility.node_id, index_values } diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 9ceb5b0..643d594 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -93,6 +93,10 @@ impl TableOfContentsPage { self.records.contains_key(val) } + + pub fn iter(&self) -> impl Iterator { + self.records.iter() + } } impl IntoIterator for TableOfContentsPage { From 2bc06c4ee766adf2fd0b7ea5d26a2cf7ae37393a Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 2 Feb 2025 00:42:14 +0300 Subject: [PATCH 15/24] fix split logic --- src/page/index/new_page.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index 42ca733..69e5308 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -67,10 +67,13 @@ impl NewIndexPage { new_page.index_values[index] = index_value; new_page.slots[index] = index as u16; new_page.current_index = (index + 1) as u16; - *slot = 0 + *slot = 0; } + new_page.current_length = self.current_length - index as u16; + self.current_index = first_empty_value; self.node_id = self.index_values[self.slots[index - 1] as usize].key.clone(); + self.current_length = index as u16; new_page } @@ -358,6 +361,7 @@ mod tests { let mut page = NewIndexPage::::new(7, 8); page.slots = vec![0, 1, 2, 3, 4, 5, 6, 7]; page.current_index = 8; + page.current_length = 8; page.index_values = { let mut v = vec![]; for i in &page.slots { @@ -371,6 +375,7 @@ mod tests { let split = page.split(4); assert_eq!(page.current_index, 4); + assert_eq!(page.current_length, 4); assert_eq!(page.slots[page.current_index as usize], 0); assert_eq!(page.index_values[0].key, 0); @@ -379,6 +384,7 @@ mod tests { assert_eq!(page.index_values[3].key, 3); assert_eq!(split.current_index, 4); + assert_eq!(split.current_length, 4); assert_eq!(split.slots[0], 0); assert_eq!(split.slots[1], 1); assert_eq!(split.slots[2], 2); From 67daa3168d8cedb813d13b5ff172edd0103e224a Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 5 Feb 2025 22:56:44 +0300 Subject: [PATCH 16/24] WIP --- src/lib.rs | 6 +- src/page/index/mod.rs | 25 +- src/page/index/new_page.rs | 28 ++ src/page/iterators.rs | 73 +--- src/page/mod.rs | 10 +- src/page/space_info.rs | 12 +- src/page/util.rs | 820 ++++++++++++++++--------------------- src/persistence/index.rs | 2 - 8 files changed, 433 insertions(+), 543 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 770ac1a..6d1f348 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,11 +11,11 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, - parse_data_page, parse_index_page, parse_page, persist_page, read_data_pages, read_index_pages, - read_rows_schema, seek_by_link, seek_to_page_start, update_at, Data as DataPage, + parse_data_page, parse_index_page, parse_page, persist_page, + seek_by_link, seek_to_page_start, update_at, Data as DataPage, General as GeneralPage, GeneralHeader, IndexPage as IndexData, IndexValue, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, - TableOfContentsPage, NewIndexPage + TableOfContentsPage, NewIndexPage, get_index_page_size_from_data_length }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable, align8, align_vec}; diff --git a/src/page/index/mod.rs b/src/page/index/mod.rs index 3cd01ac..33621a0 100644 --- a/src/page/index/mod.rs +++ b/src/page/index/mod.rs @@ -1,3 +1,4 @@ +use indexset::core::multipair::MultiPair; use indexset::core::pair::Pair; use rkyv::{Archive, Deserialize, Serialize}; @@ -9,7 +10,7 @@ mod table_of_contents_page; mod new_page; pub use page::IndexPage; -pub use new_page::NewIndexPage; +pub use new_page::{NewIndexPage, get_index_page_size_from_data_length}; pub use table_of_contents_page::TableOfContentsPage; pub use mappers::map_tree_index; @@ -39,4 +40,26 @@ where T: Ord value: value.link, } } +} + +impl From> for IndexValue +where T: Ord +{ + fn from(pair: Pair) -> Self { + IndexValue { + key: pair.key, + link: pair.value + } + } +} + +impl From> for IndexValue +where T: Ord +{ + fn from(pair: MultiPair) -> Self { + IndexValue { + key: pair.key, + link: pair.value + } + } } \ No newline at end of file diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs index 69e5308..7133ea7 100644 --- a/src/page/index/new_page.rs +++ b/src/page/index/new_page.rs @@ -18,6 +18,19 @@ use rkyv::util::AlignedVec; use crate::page::{IndexValue, PageId}; use crate::{align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; +pub fn get_index_page_size_from_data_length(length: usize) -> usize +where + T: Default + SizeMeasurable, +{ + let node_id_size = T::default().aligned_size(); + let slot_size = u16::default().aligned_size(); + let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); + let vec_util_size = 8; + let size = (length - node_id_size - slot_size * 3 - vec_util_size * 2) + / (slot_size + index_value_size); + size +} + /// Represents a page, which is filled with [`IndexValue`]'s of some index. #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] pub struct NewIndexPage { @@ -270,6 +283,21 @@ impl NewIndexPage { } node } + + pub fn from_node(node: &Vec> + Clone>, size: usize) -> Self + where T: Clone + Ord + Default + { + let mut page = NewIndexPage::new(Into::>::into(node.last().expect("should contain at least one key").clone()).key, size); + + for (i, pair) in node.iter().enumerate() { + page.index_values[i] = Into::>::into(pair.clone()); + page.slots[i] = i as u16; + } + page.current_index = node.len() as u16; + page.current_length = node.len() as u16; + + page + } } impl Persistable for NewIndexPage diff --git a/src/page/iterators.rs b/src/page/iterators.rs index f0c0634..aa89a98 100644 --- a/src/page/iterators.rs +++ b/src/page/iterators.rs @@ -10,56 +10,6 @@ use crate::{ use super::{index::{ArchivedIndexValue, IndexValue}, seek_by_link, seek_to_page_start, Interval, SpaceInfo}; -pub struct PageIterator { - intervals: Vec, - current_intervals_index: usize, - current_position_in_interval: usize, -} - -impl PageIterator { - pub fn new(intervals: Vec) -> PageIterator { - PageIterator { - current_intervals_index: 0, - current_position_in_interval: if intervals.len() > 0 { - intervals[0].0 - } else { - 0 - }, - intervals, - } - } -} - -impl Iterator for PageIterator { - type Item = u32; - - fn next(&mut self) -> Option { - let mut result: Option = None; - - if self.current_intervals_index >= self.intervals.len() { - result = None - } else if self.current_position_in_interval - >= self.intervals[self.current_intervals_index].0 - && self.current_position_in_interval <= self.intervals[self.current_intervals_index].1 - { - result = Some(self.current_position_in_interval as u32); - self.current_position_in_interval += 1; - } else if self.current_position_in_interval > self.intervals[self.current_intervals_index].1 - { - self.current_intervals_index += 1; - if self.current_intervals_index >= self.intervals.len() { - result = None; - } else { - self.current_position_in_interval = self.intervals[self.current_intervals_index].0; - result = Some(self.current_position_in_interval as u32); - self.current_position_in_interval += 1; - } - } - - result - } -} - pub struct LinksIterator<'a> { file: &'a mut std::fs::File, page_id: u32, @@ -213,29 +163,14 @@ mod test { persistence::data::DataTypeValue, Interval, Link, PAGE_SIZE, }; + use crate::page::util::test::create_test_database_file; + use super::LinksIterator; - use super::{LinksIterator, PageIterator}; - - #[test] - fn test_page_iterator() { - let interval1 = Interval(1, 2); - let interval2 = Interval(5, 7); - let page_iterator = PageIterator::new(vec![interval1, interval2]); - let collected = page_iterator.collect::>(); - assert_eq!(collected, vec![1, 2, 5, 6, 7]); - } - - #[test] - fn test_page_iterator_empty() { - let page_iterator = PageIterator::new(vec![]); - let collected = page_iterator.collect::>(); - assert_eq!(collected, Vec::::new()); - } #[test] fn test_links_iterator() { let filename = "tests/data/table_links_test.wt"; - super::super::util::test::create_test_database_file(filename); + create_test_database_file(filename); let mut file = std::fs::File::open(filename).unwrap(); let space_info = parse_space_info::(&mut file).unwrap(); @@ -260,7 +195,7 @@ mod test { #[test] fn test_pages_and_links_iterators() { let filename = "tests/data/table_pages_and_links_test.wt"; - super::super::util::test::create_test_database_file(filename); + create_test_database_file(filename); let mut file = std::fs::File::open(filename).unwrap(); let space_info = parse_space_info::(&mut file).unwrap(); diff --git a/src/page/mod.rs b/src/page/mod.rs index e745b11..064f08f 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -1,7 +1,7 @@ mod data; mod header; mod index; -mod iterators; +//mod iterators; mod space_info; mod ty; mod util; @@ -14,14 +14,14 @@ use crate::{SizeMeasurable, align}; pub use data::Data; pub use header::{GeneralHeader, DATA_VERSION}; -pub use index::{map_tree_index, IndexPage, IndexValue, TableOfContentsPage, NewIndexPage}; -pub use iterators::{DataIterator, LinksIterator, PageIterator}; +pub use index::{map_tree_index, IndexPage, IndexValue, TableOfContentsPage, NewIndexPage, get_index_page_size_from_data_length}; +//pub use iterators::{DataIterator, LinksIterator}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ map_data_pages_to_general, map_index_pages_to_general, parse_data_page, parse_index_page, - parse_page, parse_space_info, persist_page, read_data_pages, read_index_pages, - read_rows_schema, seek_by_link, seek_to_page_start, update_at, + parse_page, parse_space_info, persist_page, + seek_by_link, seek_to_page_start, update_at, }; // TODO: Move to config diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 3f84832..076971d 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -24,10 +24,10 @@ pub struct SpaceInfo { pub name: SpaceName, pub row_schema: Vec<(String, String)>, pub primary_key_fields: Vec, - pub primary_key_intervals: Vec, + pub primary_key_length: u32, pub secondary_index_types: Vec<(String, String)>, - pub secondary_index_intervals: HashMap>, - pub data_intervals: Vec, + pub secondary_index_lengths: HashMap, + pub data_length: u32, pub pk_gen_state: Pk, pub empty_links_list: Vec, } @@ -76,9 +76,9 @@ mod test { name: "Test".to_string(), row_schema: vec![], primary_key_fields: vec![], - primary_key_intervals: vec![], - secondary_index_intervals: HashMap::new(), - data_intervals: vec![], + primary_key_length: 0, + secondary_index_lengths: HashMap::new(), + data_length: 0, pk_gen_state: 0u128, empty_links_list: vec![], secondary_index_types: vec![], diff --git a/src/page/util.rs b/src/page/util.rs index 83d2ec8..23574c9 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -6,15 +6,13 @@ use rkyv::api::high::HighDeserializer; use rkyv::Archive; use super::index::IndexValue; -use super::{Interval, SpaceInfo}; +use super::SpaceInfo; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; use crate::page::General; -use crate::persistence::data::rkyv_data::parse_archived_row; -use crate::persistence::data::DataTypeValue; -use crate::{DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; +use crate::{DataPage, GeneralPage, IndexData, Link, NewIndexPage, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; -pub fn map_index_pages_to_general(pages: Vec>) -> Vec>> { +pub fn map_index_pages_to_general(pages: Vec>) -> Vec>> { // We are starting ID's from `1` because `0`'s page in file is info page. let header = &mut GeneralHeader::new(1.into(), PageType::Index, 0.into()); let mut general_pages = vec![]; @@ -193,29 +191,29 @@ pub fn parse_data_page( }) } -pub fn parse_data_record( - file: &mut std::fs::File, - index: u32, - offset: u32, - length: u32, - schema: &Vec<(String, String)>, -) -> eyre::Result> { - seek_to_page_start(file, index)?; - let header = parse_general_header(file)?; - if header.page_type != PageType::Data { - return Err(eyre::Report::msg(format!( - "The type of the page with index {} is not `Data`", - index - ))); - } - file.seek(io::SeekFrom::Current(offset as i64))?; - let mut buffer = vec![0u8; length as usize]; - file.read_exact(&mut buffer)?; - - let parsed_record = parse_archived_row(&buffer, &schema); - - Ok(parsed_record) -} +// pub fn parse_data_record( +// file: &mut std::fs::File, +// index: u32, +// offset: u32, +// length: u32, +// schema: &Vec<(String, String)>, +// ) -> eyre::Result> { +// seek_to_page_start(file, index)?; +// let header = parse_general_header(file)?; +// if header.page_type != PageType::Data { +// return Err(eyre::Report::msg(format!( +// "The type of the page with index {} is not `Data`", +// index +// ))); +// } +// file.seek(io::SeekFrom::Current(offset as i64))?; +// let mut buffer = vec![0u8; length as usize]; +// file.read_exact(&mut buffer)?; +// +// let parsed_record = parse_archived_row(&buffer, &schema); +// +// Ok(parsed_record) +// } pub fn parse_index_page( file: &mut std::fs::File, @@ -255,432 +253,340 @@ pub fn parse_space_info( Ok(space_info) } -pub fn read_secondary_index_pages( - file: &mut std::fs::File, - index_name: &str, - intervals: Vec, -) -> eyre::Result>> -where - T: Archive, - ::Archived: rkyv::Deserialize>, -{ - let space_info = parse_space_info::(file)?; - - let space_info_intervals = space_info - .secondary_index_intervals - .get(index_name) - .ok_or_else(|| eyre!("No index with name \"{}\" found", index_name))?; - - // check that all of the provided intervals are valid - for interval in intervals.iter() { - let mut contained = false; - for space_info_interval in space_info_intervals.iter() { - if space_info_interval.contains(interval) { - contained = true; - break; - } - } - if !contained { - return Err(eyre!("The index interval {:?} is not valid", interval)); - } - } - - let mut result: Vec> = vec![]; - for interval in intervals.iter() { - for index in interval.0..=interval.1 { - let mut index_records = parse_index_page::(file, index as u32)?; - result.append(&mut index_records); - } - } - - Ok(result) -} - -pub fn read_index_pages( - file: &mut std::fs::File, - intervals: &Vec, -) -> eyre::Result>> -where - T: Archive, - ::Archived: rkyv::Deserialize>, -{ - let mut result: Vec> = vec![]; - for interval in intervals.iter() { - for index in interval.0..=interval.1 { - let mut index_records = parse_index_page::(file, index as u32)?; - result.append(&mut index_records); - } - } - Ok(result) -} - -fn read_links( - mut file: &mut std::fs::File, - space_info: &SpaceInfo, -) -> eyre::Result> { - Ok( - read_index_pages::(&mut file, &space_info.primary_key_intervals)? - .iter() - .map(|index_value| index_value.link) - .collect::>(), - ) -} - -pub fn read_rows_schema( - file: &mut std::fs::File, -) -> eyre::Result> { - let space_info = parse_space_info::(file)?; - Ok(space_info.row_schema) -} - -pub fn read_data_pages( - mut file: &mut std::fs::File, -) -> eyre::Result>> { - let space_info = parse_space_info::(file)?; - let primary_key_fields = &space_info.primary_key_fields; - if primary_key_fields.len() != 1 { - panic!("Currently only single primary key is supported"); - } - - let primary_key_type = space_info - .row_schema - .iter() - .filter(|(field_name, _field_type)| field_name == &primary_key_fields[0]) - .map(|(_field_name, field_type)| field_type) - .take(1) - .collect::>()[0] - .as_str(); - let links = match primary_key_type { - "String" => read_links::(&mut file, &space_info)?, - "i128" => read_links::(&mut file, &space_info)?, - "i64" => read_links::(&mut file, &space_info)?, - "i32" => read_links::(&mut file, &space_info)?, - "i16" => read_links::(&mut file, &space_info)?, - "i8" => read_links::(&mut file, &space_info)?, - "u128" => read_links::(&mut file, &space_info)?, - "u64" => read_links::(&mut file, &space_info)?, - "u32" => read_links::(&mut file, &space_info)?, - "u16" => read_links::(&mut file, &space_info)?, - "u8" => read_links::(&mut file, &space_info)?, - "f64" => read_links::(&mut file, &space_info)?, - "f32" => read_links::(&mut file, &space_info)?, - _ => panic!("Unsupported primary key data type `{}`", primary_key_type), - }; - - let mut result: Vec> = vec![]; - for link in links { - let row = parse_data_record::( - &mut file, - link.page_id.0, - link.offset, - link.length, - &space_info.row_schema, - )?; - result.push(row); - } - - Ok(result) -} - -#[cfg(test)] -pub mod test { - use std::collections::HashMap; - use std::fs::remove_file; - use std::path::Path; - - use rkyv::{Archive, Deserialize, Serialize}; - use indexset::concurrent::map::BTreeMap; - - use crate::page::index::IndexValue; - use crate::page::util::read_secondary_index_pages; - use crate::page::INNER_PAGE_SIZE; - use crate::persistence::data::DataTypeValue; - use crate::{map_index_pages_to_general, map_tree_index, read_data_pages, GeneralHeader, GeneralPage, IndexData, Interval, Link, PageType, SpaceInfoData, DATA_VERSION, PAGE_SIZE}; - - use super::persist_page; - - #[test] - fn test_map() { - let index = BTreeMap::new(); - for i in 0..3060 { - let l = Link { - page_id: 1.into(), - offset: 0, - length: 32, - }; - index.insert(i, l); - } - - let res = map_tree_index::<_, { INNER_PAGE_SIZE }>(index.iter()); - let header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 0.into(), - page_id: 0.into(), - previous_id: 0.into(), - next_id: 0.into(), - page_type: PageType::SpaceInfo, - data_length: 0u32, - }; - let generalised = map_index_pages_to_general(res); - assert_eq!(generalised.len(), 3); - let first = generalised.get(0).unwrap().header; - let second = generalised.get(1).unwrap().header; - let third = generalised.get(2).unwrap().header; - - assert_eq!(first.page_id, 1.into()); - assert_eq!(first.space_id, header.space_id); - assert_eq!(first.previous_id, header.previous_id); - assert_eq!(first.next_id, header.next_id); - assert_eq!(first.page_type, PageType::Index); - - assert_eq!(first.next_id, header.page_id); - assert_eq!(second.space_id, header.space_id); - assert_eq!(second.previous_id, first.page_id); - assert_eq!(second.next_id, third.page_id); - assert_eq!(second.page_type, PageType::Index); - - assert_eq!(third.next_id, 0.into()); - assert_eq!(third.space_id, header.space_id); - assert_eq!(third.previous_id, second.page_id); - assert_eq!(third.page_type, PageType::Index); - } - - fn create_space_with_intervals(intervals: &Vec) -> GeneralPage { - let space_info_header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 1.into(), - page_id: 0.into(), - previous_id: 0.into(), - next_id: 1.into(), - page_type: PageType::SpaceInfo, - data_length: 0u32, - }; - let space_info = SpaceInfoData { - id: 0.into(), - page_count: 0, - name: "Test".to_string(), - row_schema: vec![], - primary_key_fields: vec![], - primary_key_intervals: vec![], - secondary_index_intervals: HashMap::from([( - "string_index".to_owned(), - intervals.clone(), - )]), - data_intervals: vec![], - pk_gen_state: (), - empty_links_list: vec![], - secondary_index_types: vec![("string_index".to_string(), "String".to_string())], - }; - let space_info_page = GeneralPage { - header: space_info_header, - inner: space_info, - }; - - space_info_page - } - - fn create_index_pages(intervals: &Vec) -> Vec>> { - let mut index_pages = Vec::>>::new(); - - for interval in intervals { - for index in interval.0..=interval.1 { - let index_header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 1.into(), - page_id: (index as u32).into(), - previous_id: (if index > 0 { index as u32 - 1 } else { 0 }).into(), - next_id: (index as u32 + 1).into(), - page_type: PageType::SpaceInfo, - data_length: 0u32, - }; - let index_data = IndexData { - index_values: vec![IndexValue { - key: "first_value".to_string(), - link: Link { - page_id: 2.into(), - length: 0, - offset: 0, - }, - }], - }; - let index_page = GeneralPage { - header: index_header, - inner: index_data, - }; - index_pages.push(index_page); - } - } - - index_pages - } - - #[test] - fn test_read_index_pages() { - let filename = "tests/data/table.wt"; - if Path::new(filename).exists() { - remove_file(filename).unwrap(); - } - let mut file: std::fs::File = std::fs::File::create(filename).unwrap(); - - let intervals = vec![Interval(1, 2), Interval(5, 7)]; - - // create the space page - let mut space_info_page = create_space_with_intervals(&intervals); - persist_page(&mut space_info_page, &mut file).unwrap(); - - // create the index pages - for mut index_page in create_index_pages(&intervals) { - persist_page(&mut index_page, &mut file).unwrap(); - } - - // read the data - let mut file = std::fs::File::open(filename).unwrap(); - let index_pages = read_secondary_index_pages::( - &mut file, - "string_index", - vec![Interval(1, 2), Interval(5, 6)], - ) - .unwrap(); - assert_eq!(index_pages.len(), 4); - assert_eq!(index_pages[0].key, "first_value"); - assert_eq!(index_pages[0].link.page_id, 2.into()); - assert_eq!(index_pages[0].link.offset, 0); - assert_eq!(index_pages[0].link.length, 0); - } - - #[derive(Archive, Debug, Deserialize, Serialize)] - struct TableStruct { - int1: i32, - string1: String, - } - - pub fn create_test_database_file(filename: &str) { - if Path::new(filename).exists() { - remove_file(filename).unwrap(); - } - let mut file: std::fs::File = std::fs::File::create(filename).unwrap(); - - let space_info_header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 1.into(), - page_id: 0.into(), - previous_id: 0.into(), - next_id: 1.into(), - page_type: PageType::SpaceInfo, - data_length: 0u32, - }; - let space_info = SpaceInfoData { - id: 1.into(), - page_count: 4, - name: "test space".to_owned(), - row_schema: vec![ - ("int1".to_string(), "i32".to_string()), - ("string1".to_string(), "String".to_string()), - ], - primary_key_fields: vec!["int1".to_string()], - primary_key_intervals: vec![Interval(1, 1)], - secondary_index_types: vec![], - secondary_index_intervals: Default::default(), - data_intervals: vec![], - pk_gen_state: (), - empty_links_list: vec![], - }; - let mut space_info_page = GeneralPage { - header: space_info_header, - inner: space_info, - }; - persist_page(&mut space_info_page, &mut file).unwrap(); - - let index_header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 1.into(), - page_id: 1.into(), - previous_id: 0.into(), - next_id: 2.into(), - page_type: PageType::Index, - data_length: 0, - }; - - let data_header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 1.into(), - page_id: 2.into(), - previous_id: 2.into(), - next_id: 4.into(), - page_type: PageType::Data, - data_length: 0, - }; - - let data_row1 = TableStruct { - int1: 1, - string1: "first string".to_string(), - }; - - let data_row2 = TableStruct { - int1: 2, - string1: "second string".to_string(), - }; - - let data_row1_inner = rkyv::to_bytes::(&data_row1).unwrap(); - let data_row1_offset = 0; - let data_row1_length = data_row1_inner.len(); - - let data_row2_inner = rkyv::to_bytes::(&data_row2).unwrap(); - let data_row2_offset = data_row1_offset + data_row1_length; - let data_row2_length = data_row2_inner.len(); - - let data_rows12_buffer = [data_row1_inner, data_row2_inner].concat(); - - let mut data_page = GeneralPage::> { - header: data_header, - inner: data_rows12_buffer, - }; - - let index_data: IndexData = IndexData:: { - index_values: vec![ - IndexValue:: { - key: 1, - link: Link { - page_id: data_header.page_id, - offset: data_row1_offset as u32, - length: data_row1_length as u32, - }, - }, - IndexValue:: { - key: 2, - link: Link { - page_id: data_header.page_id, - offset: data_row2_offset as u32, - length: data_row2_length as u32, - }, - }, - ], - }; - let mut index_page = GeneralPage { - header: index_header, - inner: index_data, - }; - - persist_page(&mut index_page, &mut file).unwrap(); - persist_page(&mut data_page, &mut file).unwrap(); - } - - #[test] - fn test_read_table_data() { - let filename = "tests/data/table_with_rows.wt"; - create_test_database_file(filename); - - let mut file: std::fs::File = std::fs::File::open(filename).unwrap(); - let data_pages: Vec> = read_data_pages::(&mut file).unwrap(); - assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); - assert_eq!( - data_pages[0][1], - DataTypeValue::String("first string".to_string()) - ); - assert_eq!(data_pages[1][0], DataTypeValue::I32(2)); - assert_eq!( - data_pages[1][1], - DataTypeValue::String("second string".to_string()) - ); - } -} +// pub fn read_index_pages( +// file: &mut std::fs::File, +// length: u32, +// ) -> eyre::Result>> +// where +// T: Archive, +// ::Archived: rkyv::Deserialize>, +// { +// let mut result: Vec> = vec![]; +// for index in 0..length { +// let mut index_records = parse_index_page::(file, index)?; +// result.append(&mut index_records); +// } +// Ok(result) +// } +// +// fn read_links( +// mut file: &mut std::fs::File, +// space_info: &SpaceInfo, +// ) -> eyre::Result> { +// Ok( +// read_index_pages::(&mut file, space_info.primary_key_length)? +// .iter() +// .map(|index_value| index_value.link) +// .collect::>(), +// ) +// } +// +// pub fn read_rows_schema( +// file: &mut std::fs::File, +// ) -> eyre::Result> { +// let space_info = parse_space_info::(file)?; +// Ok(space_info.row_schema) +// } +// +// pub fn read_data_pages( +// mut file: &mut std::fs::File, +// ) -> eyre::Result>> { +// let space_info = parse_space_info::(file)?; +// let primary_key_fields = &space_info.primary_key_fields; +// if primary_key_fields.len() != 1 { +// panic!("Currently only single primary key is supported"); +// } +// +// let primary_key_type = space_info +// .row_schema +// .iter() +// .filter(|(field_name, _)| field_name == &primary_key_fields[0]) +// .map(|(_, field_type)| field_type) +// .take(1) +// .collect::>()[0] +// .as_str(); +// let links = match primary_key_type { +// "String" => read_links::(&mut file, &space_info)?, +// "i128" => read_links::(&mut file, &space_info)?, +// "i64" => read_links::(&mut file, &space_info)?, +// "i32" => read_links::(&mut file, &space_info)?, +// "i16" => read_links::(&mut file, &space_info)?, +// "i8" => read_links::(&mut file, &space_info)?, +// "u128" => read_links::(&mut file, &space_info)?, +// "u64" => read_links::(&mut file, &space_info)?, +// "u32" => read_links::(&mut file, &space_info)?, +// "u16" => read_links::(&mut file, &space_info)?, +// "u8" => read_links::(&mut file, &space_info)?, +// "f64" => read_links::(&mut file, &space_info)?, +// "f32" => read_links::(&mut file, &space_info)?, +// _ => panic!("Unsupported primary key data type `{}`", primary_key_type), +// }; +// +// let mut result: Vec> = vec![]; +// for link in links { +// let row = parse_data_record::( +// &mut file, +// link.page_id.0, +// link.offset, +// link.length, +// &space_info.row_schema, +// )?; +// result.push(row); +// } +// +// Ok(result) +// } + +// #[cfg(test)] +// pub mod test { +// use std::collections::HashMap; +// use std::fs::remove_file; +// use std::path::Path; +// +// use rkyv::{Archive, Deserialize, Serialize}; +// +// use crate::page::index::IndexValue; +// use crate::persistence::data::DataTypeValue; +// use crate::{read_data_pages, GeneralHeader, GeneralPage, IndexData, Interval, Link, PageType, SpaceInfoData, DATA_VERSION, PAGE_SIZE}; +// +// use super::persist_page; +// +// fn create_space_with_intervals(intervals: &Vec) -> GeneralPage { +// let space_info_header = GeneralHeader { +// data_version: DATA_VERSION, +// space_id: 1.into(), +// page_id: 0.into(), +// previous_id: 0.into(), +// next_id: 1.into(), +// page_type: PageType::SpaceInfo, +// data_length: 0u32, +// }; +// let space_info = SpaceInfoData { +// id: 0.into(), +// page_count: 0, +// name: "Test".to_string(), +// row_schema: vec![], +// primary_key_fields: vec![], +// primary_key_length: 1, +// secondary_index_lengths: HashMap::from([( +// "string_index".to_owned(), +// 1, +// )]), +// data_length: 1, +// pk_gen_state: (), +// empty_links_list: vec![], +// secondary_index_types: vec![("string_index".to_string(), "String".to_string())], +// }; +// let space_info_page = GeneralPage { +// header: space_info_header, +// inner: space_info, +// }; +// +// space_info_page +// } +// +// fn create_index_pages(intervals: &Vec) -> Vec>> { +// let mut index_pages = Vec::>>::new(); +// +// for interval in intervals { +// for index in interval.0..=interval.1 { +// let index_header = GeneralHeader { +// data_version: DATA_VERSION, +// space_id: 1.into(), +// page_id: (index as u32).into(), +// previous_id: (if index > 0 { index as u32 - 1 } else { 0 }).into(), +// next_id: (index as u32 + 1).into(), +// page_type: PageType::SpaceInfo, +// data_length: 0u32, +// }; +// let index_data = IndexData { +// index_values: vec![IndexValue { +// key: "first_value".to_string(), +// link: Link { +// page_id: 2.into(), +// length: 0, +// offset: 0, +// }, +// }], +// }; +// let index_page = GeneralPage { +// header: index_header, +// inner: index_data, +// }; +// index_pages.push(index_page); +// } +// } +// +// index_pages +// } +// +// #[test] +// fn test_read_index_pages() { +// let filename = "tests/data/table.wt"; +// if Path::new(filename).exists() { +// remove_file(filename).unwrap(); +// } +// let mut file: std::fs::File = std::fs::File::create(filename).unwrap(); +// +// let intervals = vec![Interval(1, 2), Interval(5, 7)]; +// +// // create the space page +// let mut space_info_page = create_space_with_intervals(&intervals); +// persist_page(&mut space_info_page, &mut file).unwrap(); +// +// // create the index pages +// for mut index_page in create_index_pages(&intervals) { +// persist_page(&mut index_page, &mut file).unwrap(); +// } +// +// // read the data +// let mut file = std::fs::File::open(filename).unwrap(); +// let index_pages = read_secondary_index_pages::( +// &mut file, +// "string_index", +// vec![Interval(1, 2), Interval(5, 6)], +// ) +// .unwrap(); +// assert_eq!(index_pages.len(), 4); +// assert_eq!(index_pages[0].key, "first_value"); +// assert_eq!(index_pages[0].link.page_id, 2.into()); +// assert_eq!(index_pages[0].link.offset, 0); +// assert_eq!(index_pages[0].link.length, 0); +// } +// +// #[derive(Archive, Debug, Deserialize, Serialize)] +// struct TableStruct { +// int1: i32, +// string1: String, +// } +// +// pub fn create_test_database_file(filename: &str) { +// if Path::new(filename).exists() { +// remove_file(filename).unwrap(); +// } +// let mut file: std::fs::File = std::fs::File::create(filename).unwrap(); +// +// let space_info_header = GeneralHeader { +// data_version: DATA_VERSION, +// space_id: 1.into(), +// page_id: 0.into(), +// previous_id: 0.into(), +// next_id: 1.into(), +// page_type: PageType::SpaceInfo, +// data_length: 0u32, +// }; +// let space_info = SpaceInfoData { +// id: 1.into(), +// page_count: 4, +// name: "test space".to_owned(), +// row_schema: vec![ +// ("int1".to_string(), "i32".to_string()), +// ("string1".to_string(), "String".to_string()), +// ], +// primary_key_fields: vec!["int1".to_string()], +// primary_key_intervals: vec![Interval(1, 1)], +// secondary_index_types: vec![], +// secondary_index_intervals: Default::default(), +// data_intervals: vec![], +// pk_gen_state: (), +// empty_links_list: vec![], +// }; +// let mut space_info_page = GeneralPage { +// header: space_info_header, +// inner: space_info, +// }; +// persist_page(&mut space_info_page, &mut file).unwrap(); +// +// let index_header = GeneralHeader { +// data_version: DATA_VERSION, +// space_id: 1.into(), +// page_id: 1.into(), +// previous_id: 0.into(), +// next_id: 2.into(), +// page_type: PageType::Index, +// data_length: 0, +// }; +// +// let data_header = GeneralHeader { +// data_version: DATA_VERSION, +// space_id: 1.into(), +// page_id: 2.into(), +// previous_id: 2.into(), +// next_id: 4.into(), +// page_type: PageType::Data, +// data_length: 0, +// }; +// +// let data_row1 = TableStruct { +// int1: 1, +// string1: "first string".to_string(), +// }; +// +// let data_row2 = TableStruct { +// int1: 2, +// string1: "second string".to_string(), +// }; +// +// let data_row1_inner = rkyv::to_bytes::(&data_row1).unwrap(); +// let data_row1_offset = 0; +// let data_row1_length = data_row1_inner.len(); +// +// let data_row2_inner = rkyv::to_bytes::(&data_row2).unwrap(); +// let data_row2_offset = data_row1_offset + data_row1_length; +// let data_row2_length = data_row2_inner.len(); +// +// let data_rows12_buffer = [data_row1_inner, data_row2_inner].concat(); +// +// let mut data_page = GeneralPage::> { +// header: data_header, +// inner: data_rows12_buffer, +// }; +// +// let index_data: IndexData = IndexData:: { +// index_values: vec![ +// IndexValue:: { +// key: 1, +// link: Link { +// page_id: data_header.page_id, +// offset: data_row1_offset as u32, +// length: data_row1_length as u32, +// }, +// }, +// IndexValue:: { +// key: 2, +// link: Link { +// page_id: data_header.page_id, +// offset: data_row2_offset as u32, +// length: data_row2_length as u32, +// }, +// }, +// ], +// }; +// let mut index_page = GeneralPage { +// header: index_header, +// inner: index_data, +// }; +// +// persist_page(&mut index_page, &mut file).unwrap(); +// persist_page(&mut data_page, &mut file).unwrap(); +// } +// +// #[test] +// fn test_read_table_data() { +// let filename = "tests/data/table_with_rows.wt"; +// create_test_database_file(filename); +// +// let mut file: std::fs::File = std::fs::File::open(filename).unwrap(); +// let data_pages: Vec> = read_data_pages::(&mut file).unwrap(); +// assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); +// assert_eq!( +// data_pages[0][1], +// DataTypeValue::String("first string".to_string()) +// ); +// assert_eq!(data_pages[1][0], DataTypeValue::I32(2)); +// assert_eq!( +// data_pages[1][1], +// DataTypeValue::String("second string".to_string()) +// ); +// } +// } diff --git a/src/persistence/index.rs b/src/persistence/index.rs index b8cef28..58ed417 100644 --- a/src/persistence/index.rs +++ b/src/persistence/index.rs @@ -1,8 +1,6 @@ pub trait PersistableIndex { type PersistedIndex; - fn get_index_names(&self) -> Vec<&str>; - fn get_persisted_index(&self) -> Self::PersistedIndex; fn from_persisted(persisted: Self::PersistedIndex) -> Self; From 38154ca55b3368d28ecad049712922bde962f5c0 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sat, 8 Feb 2025 04:13:29 +0300 Subject: [PATCH 17/24] space info refactor --- src/page/space_info.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 076971d..d860a7b 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -1,5 +1,4 @@ //! [`SpaceInfo`] declaration. -use std::collections::HashMap; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; @@ -8,6 +7,7 @@ use rkyv::ser::Serializer; use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; use rkyv::api::high::HighDeserializer; + use crate::util::Persistable; use crate::{space, Link}; @@ -24,10 +24,7 @@ pub struct SpaceInfo { pub name: SpaceName, pub row_schema: Vec<(String, String)>, pub primary_key_fields: Vec, - pub primary_key_length: u32, pub secondary_index_types: Vec<(String, String)>, - pub secondary_index_lengths: HashMap, - pub data_length: u32, pub pk_gen_state: Pk, pub empty_links_list: Vec, } @@ -63,8 +60,6 @@ where #[cfg(test)] mod test { - use std::collections::HashMap; - use crate::page::{SpaceInfo, INNER_PAGE_SIZE}; use crate::util::Persistable; @@ -76,9 +71,6 @@ mod test { name: "Test".to_string(), row_schema: vec![], primary_key_fields: vec![], - primary_key_length: 0, - secondary_index_lengths: HashMap::new(), - data_length: 0, pk_gen_state: 0u128, empty_links_list: vec![], secondary_index_types: vec![], From 212715d23009097852012eed5889cb6184aedfd2 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 9 Feb 2025 13:59:43 +0300 Subject: [PATCH 18/24] remove old index page model --- src/lib.rs | 10 +- src/page/data.rs | 14 +- src/page/index/mappers.rs | 210 ------------------ src/page/index/mod.rs | 8 +- src/page/index/new_page.rs | 426 ------------------------------------- src/page/index/page.rs | 423 +++++++++++++++++++++++++++++++++--- src/page/mod.rs | 10 +- src/page/util.rs | 19 +- 8 files changed, 418 insertions(+), 702 deletions(-) delete mode 100644 src/page/index/mappers.rs delete mode 100644 src/page/index/new_page.rs diff --git a/src/lib.rs b/src/lib.rs index 6d1f348..b4e94a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,12 +10,12 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ - map_data_pages_to_general, map_index_pages_to_general, map_tree_index, + map_data_pages_to_general, map_index_pages_to_general, parse_data_page, parse_index_page, parse_page, persist_page, - seek_by_link, seek_to_page_start, update_at, Data as DataPage, - General as GeneralPage, GeneralHeader, IndexPage as IndexData, IndexValue, Interval, PageType, - SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, - TableOfContentsPage, NewIndexPage, get_index_page_size_from_data_length + seek_by_link, seek_to_page_start, update_at, DataPage, + GeneralPage, GeneralHeader, IndexValue, Interval, PageType, + SpaceInfo, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, + TableOfContentsPage, IndexPage, get_index_page_size_from_data_length }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable, align8, align_vec}; diff --git a/src/page/data.rs b/src/page/data.rs index 7bdb449..c7ea769 100644 --- a/src/page/data.rs +++ b/src/page/data.rs @@ -3,12 +3,12 @@ use crate::Link; use crate::Persistable; #[derive(Debug)] -pub struct Data { +pub struct DataPage { pub length: u32, pub data: [u8; DATA_LENGTH], } -impl Data { +impl DataPage { pub fn update_at(&mut self, link: Link, new_data: &[u8]) -> Result<()> { if new_data.len() as u32 != link.length { return Err(eyre!( @@ -51,7 +51,7 @@ impl Data { } } -impl Persistable for Data { +impl Persistable for DataPage { fn as_bytes(&self) -> impl AsRef<[u8]> { &self.data[..self.length as usize] } @@ -72,7 +72,7 @@ mod tests { #[test] fn test_update_at_success() { - let mut data = Data { + let mut data = DataPage { length: 0, data: [0; 100], }; @@ -90,7 +90,7 @@ mod tests { #[test] fn test_update_at_wrong_length() { - let mut data = Data { + let mut data = DataPage { length: 0, data: [0; 100], }; @@ -109,7 +109,7 @@ mod tests { #[test] fn test_update_at_out_of_bounds() { - let mut data = Data { + let mut data = DataPage { length: 0, data: [0; 100], }; @@ -128,7 +128,7 @@ mod tests { #[test] fn test_get_at_out_of_bounds() { - let data = Data { + let data = DataPage { length: 0, data: [0; 100], }; diff --git a/src/page/index/mappers.rs b/src/page/index/mappers.rs deleted file mode 100644 index 7c0b2b6..0000000 --- a/src/page/index/mappers.rs +++ /dev/null @@ -1,210 +0,0 @@ -use crate::{Link, SizeMeasurable}; -use crate::page::{IndexPage, IndexValue}; - -pub fn map_tree_index<'a, T, const PAGE_SIZE: usize>( - index: impl Iterator, -) -> Vec> -where - T: Clone + Ord + SizeMeasurable + 'static, -{ - let mut pages = vec![]; - let mut current_page = IndexPage::default(); - let mut current_size = 8; - - for (key, link) in index { - let index_value = IndexValue { - key: key.clone(), - link: *link, - }; - current_size += index_value.aligned_size(); - if current_size > PAGE_SIZE { - pages.push(current_page.clone()); - current_page.index_values.clear(); - current_size = 8 + index_value.aligned_size() - } - current_page.index_values.push(index_value) - } - pages.push(current_page); - - pages -} - -#[cfg(test)] -mod test { - use indexset::concurrent::map::BTreeMap; - use indexset::concurrent::multimap::BTreeMultiMap; - - use crate::page::{INNER_PAGE_SIZE, PAGE_SIZE}; - use crate::util::{Persistable, SizeMeasurable}; - use crate::Link; - use crate::page::index::mappers::map_tree_index; - - #[test] - fn map_single_value() { - let index = BTreeMap::new(); - let l = Link { - page_id: 1.into(), - offset: 0, - length: 32, - }; - index.insert(1u32, l); - - let res = map_tree_index::<_, { PAGE_SIZE }>(index.iter()); - assert_eq!(res.len(), 1); - assert_eq!(res[0].index_values.len(), 1); - let v = &res[0].index_values[0]; - assert_eq!(v.key, 1); - assert_eq!(v.link, l); - assert_eq!( - rkyv::to_bytes::(&res[0]) - .unwrap() - .len(), - 1u32.aligned_size() + l.aligned_size() + 8 - ) - } - - #[test] - fn map_page_border() { - let index = BTreeMap::new(); - for i in 0..1023 { - let l = Link { - page_id: 1.into(), - offset: 0, - length: 32, - }; - index.insert(i, l); - } - - let res = map_tree_index::<_, { PAGE_SIZE }>(index.iter()); - assert_eq!(res.len(), 1); - assert_eq!(res[0].index_values.len(), 1023); - // As 1023 * 16 + 8 - assert_eq!( - rkyv::to_bytes::(&res[0]) - .unwrap() - .len(), - 16_376 - ); - - let l = Link { - page_id: 1.into(), - offset: 0, - length: 32, - }; - index.insert(1024, l); - let res = map_tree_index::<_, { PAGE_SIZE }>(index.iter()); - assert_eq!(res.len(), 2); - assert_eq!(res[0].index_values.len(), 1023); - assert_eq!(res[1].index_values.len(), 1); - // As 16 + 8 - assert_eq!( - rkyv::to_bytes::(&res[0]) - .unwrap() - .len(), - 16_376 - ); - assert_eq!( - rkyv::to_bytes::(&res[1]) - .unwrap() - .len(), - 24 - ); - } - - #[test] - fn map_unique_and_back() { - let index = BTreeMap::new(); - for i in 0..1023 { - let l = Link { - page_id: 1.into(), - offset: 0, - length: 32, - }; - index.insert(i, l); - } - - let pages = map_tree_index::<_, { PAGE_SIZE }>(index.iter()); - let res_index = BTreeMap::new(); - - for page in pages { - page.append_to_unique_tree_index(&res_index) - } - - assert_eq!(index.iter().collect::>(), res_index.iter().collect::>()) - } - - #[test] - fn map_and_back() { - let index = BTreeMultiMap::new(); - for i in 0..256 { - for j in 0..4 { - let l = Link { - page_id: j.into(), - offset: 0, - length: 32, - }; - index.insert(i, l); - } - } - - let pages = map_tree_index::<_, { PAGE_SIZE }>(index.iter()); - let res_index = BTreeMultiMap::new(); - - for page in pages { - page.append_to_tree_index(&res_index) - } - - let mut vals = index.iter().collect::>(); - - for v in res_index.iter() { - assert!(vals.contains(&v)); - let i = vals.iter().position(|n| n == &v).unwrap(); - vals.remove(i); - } - - assert!(vals.is_empty()) - } - - #[test] - fn map_single_string() { - let index = BTreeMap::new(); - let l = Link { - page_id: 1.into(), - offset: 0, - length: 32, - }; - let s = "some string example".to_string(); - index.insert(s.clone(), l); - - let res = map_tree_index::<_, { PAGE_SIZE }>(index.iter()); - assert_eq!(res.len(), 1); - assert_eq!(res[0].index_values.len(), 1); - let v = &res[0].index_values[0]; - assert_eq!(v.key, s); - assert_eq!(v.link, l); - assert_eq!( - rkyv::to_bytes::(&res[0]) - .unwrap() - .len(), - s.aligned_size() + l.aligned_size() + 8 - ) - } - - #[test] - fn test_as_bytes() { - let index = BTreeMap::new(); - for i in 0..1022 { - let l = Link { - page_id: 1.into(), - offset: 0, - length: 32, - }; - index.insert(i, l); - } - let pages = map_tree_index::<_, { INNER_PAGE_SIZE }>(index.iter()); - let page = pages.get(0).unwrap(); - - let bytes = page.as_bytes(); - assert!(bytes.as_ref().len() <= INNER_PAGE_SIZE) - } -} \ No newline at end of file diff --git a/src/page/index/mod.rs b/src/page/index/mod.rs index 33621a0..c20a8c8 100644 --- a/src/page/index/mod.rs +++ b/src/page/index/mod.rs @@ -4,15 +4,11 @@ use rkyv::{Archive, Deserialize, Serialize}; use crate::{Link, SizeMeasurable}; -mod page; -mod mappers; mod table_of_contents_page; -mod new_page; +mod page; -pub use page::IndexPage; -pub use new_page::{NewIndexPage, get_index_page_size_from_data_length}; +pub use page::{IndexPage, get_index_page_size_from_data_length}; pub use table_of_contents_page::TableOfContentsPage; -pub use mappers::map_tree_index; /// Represents `key/value` pair of B-Tree index, where value is always /// [`data::Link`], as it is represented in primary and secondary indexes. diff --git a/src/page/index/new_page.rs b/src/page/index/new_page.rs deleted file mode 100644 index 7133ea7..0000000 --- a/src/page/index/new_page.rs +++ /dev/null @@ -1,426 +0,0 @@ -//! [`crate::page::IndexPage`] definition. - -use std::fmt::Debug; -use std::fs::File; -use std::hash::Hash; -use std::io::{Read, Seek, SeekFrom, Write}; -use std::mem; - -use indexset::core::pair::Pair; -use rkyv::{Archive, Deserialize, Serialize}; -use rkyv::de::Pool; -use rkyv::rancor::Strategy; -use rkyv::ser::allocator::ArenaHandle; -use rkyv::ser::Serializer; -use rkyv::ser::sharing::Share; -use rkyv::util::AlignedVec; - -use crate::page::{IndexValue, PageId}; -use crate::{align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; - -pub fn get_index_page_size_from_data_length(length: usize) -> usize -where - T: Default + SizeMeasurable, -{ - let node_id_size = T::default().aligned_size(); - let slot_size = u16::default().aligned_size(); - let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); - let vec_util_size = 8; - let size = (length - node_id_size - slot_size * 3 - vec_util_size * 2) - / (slot_size + index_value_size); - size -} - -/// Represents a page, which is filled with [`IndexValue`]'s of some index. -#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] -pub struct NewIndexPage { - pub size: u16, - pub node_id: T, - pub current_index: u16, - pub current_length: u16, - pub slots: Vec, - pub index_values: Vec>, -} - -#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] -pub struct IndexPageUtility { - pub node_id: T, - pub current_index: u16, - pub current_length: u16, - pub slots: Vec, -} - -impl NewIndexPage { - pub fn new(node_id: T, size: usize) -> Self - where T: Default + Clone, - { - let slots = vec![0u16; size]; - let index_values = vec![IndexValue::default(); size]; - Self { - size: size as u16, - node_id, - current_index: 0, - current_length: 0, - slots, - index_values, - } - } - - pub fn split(&mut self, index: usize) -> NewIndexPage - where T: Clone + Default - { - let mut new_page = NewIndexPage::new(self.node_id.clone(), self.size as usize); - let mut first_empty_value = u16::MAX; - for (index, slot) in self.slots[index..].iter_mut().enumerate() { - if first_empty_value > *slot { - first_empty_value = *slot; - } - let mut index_value = IndexValue::default(); - mem::swap(&mut self.index_values[*slot as usize], &mut index_value); - new_page.index_values[index] = index_value; - new_page.slots[index] = index as u16; - new_page.current_index = (index + 1) as u16; - *slot = 0; - } - new_page.current_length = self.current_length - index as u16; - - self.current_index = first_empty_value; - self.node_id = self.index_values[self.slots[index - 1] as usize].key.clone(); - self.current_length = index as u16; - - new_page - } - - fn index_page_utility_length(size: usize) -> usize - where T: Default + SizeMeasurable - { - T::default().aligned_size() + u16::default().aligned_size() + u16::default().aligned_size() + align(size * u16::default().aligned_size()) + 8 - } - - fn get_index_page_utility_from_bytes(bytes: &[u8]) -> IndexPageUtility - where - T: Archive - + Default + SizeMeasurable, - ::Archived: Deserialize>, - { - let t_size = T::default().aligned_size(); - let mut offset = 0; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..offset + t_size]); - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let node_id = rkyv::deserialize(archived).expect("data should be valid"); - - offset = t_size; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..offset + 2]); - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let current_index = rkyv::deserialize::(archived).expect("data should be valid"); - - offset = offset + 2; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..offset + 2]); - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let current_length = rkyv::deserialize::(archived).expect("data should be valid"); - - offset = offset + 2; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..]); - let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; - let slots = rkyv::deserialize::, rkyv::rancor::Error>(archived).expect("data should be valid"); - - IndexPageUtility { - node_id, - current_index, - current_length, - slots - } - } - - pub fn parse_index_page_utility(file: &mut File, page_id: PageId) -> eyre::Result> - where - T: Archive - + Default + SizeMeasurable, - ::Archived: Deserialize>, - { - seek_to_page_start(file, page_id.0)?; - let offset = GENERAL_HEADER_SIZE as i64; - file.seek(SeekFrom::Current(offset))?; - - let mut size_bytes = vec![0u8; 2]; - file.read_exact(size_bytes.as_mut_slice())?; - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&size_bytes[0..2]) }; - let size = rkyv::deserialize::(archived).expect("data should be valid"); - - let index_utility_len = Self::index_page_utility_length(size as usize); - let mut index_utility_bytes = vec![0u8; index_utility_len]; - file.read_exact(index_utility_bytes.as_mut_slice())?; - let utility = Self::get_index_page_utility_from_bytes(index_utility_bytes.as_ref()); - - Ok(utility) - } - - pub fn persist_index_page_utility(file: &mut File, page_id: PageId, utility: IndexPageUtility) -> eyre::Result<()> - where - T: Archive - + Default - + SizeMeasurable - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, - { - seek_to_page_start(file, page_id.0)?; - file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + u16::default().aligned_size() as i64))?; - - let node_id_bytes = rkyv::to_bytes::(&utility.node_id)?; - file.write(node_id_bytes.as_slice())?; - let current_index_bytes = rkyv::to_bytes::(&utility.current_index)?; - file.write(current_index_bytes.as_slice())?; - let current_length_bytes = rkyv::to_bytes::(&utility.current_length)?; - file.write(current_length_bytes.as_slice())?; - let slots_bytes = rkyv::to_bytes::(&utility.slots)?; - file.write(slots_bytes.as_slice())?; - Ok(()) - } - - fn read_value(file: &mut File) -> eyre::Result> - where - T: Archive - + Default + SizeMeasurable, - ::Archived: Deserialize>, - { - let mut bytes = vec![0u8; align8(IndexValue::::default().aligned_size())]; - file.read_exact(bytes.as_mut_slice())?; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(bytes.as_slice()); - let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; - Ok(rkyv::deserialize(archived).expect("data should be valid")) - } - - pub fn read_value_with_index(file: &mut File, page_id: PageId, size: usize, index: usize) -> eyre::Result> - where - T: Archive - + Default + SizeMeasurable, - ::Archived: Deserialize>, - { - seek_to_page_start(file, page_id.0)?; - - let offset = Self::get_value_offset(size, index); - file.seek(SeekFrom::Current(offset as i64))?; - Self::read_value(file) - } - - fn get_value_offset(size: usize, value_index: usize) -> usize - where T: Default + SizeMeasurable - { - let mut offset = GENERAL_HEADER_SIZE; - offset += u16::default().aligned_size(); - offset += T::default().aligned_size(); - offset += u16::default().aligned_size(); - offset += u16::default().aligned_size(); - offset += align(size * u16::default().aligned_size()) + 8; - offset += value_index * align8(IndexValue::::default().aligned_size()); - - offset - } - - pub fn persist_value(file: &mut File, page_id: PageId, size: usize, value: IndexValue, mut value_index: u16) -> eyre::Result - where - T: Archive - + Default - + SizeMeasurable - + Eq - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, - ::Archived: Deserialize>, - { - seek_to_page_start(file, page_id.0)?; - - let offset = Self::get_value_offset(size, value_index as usize); - file.seek(SeekFrom::Current(offset as i64))?; - let bytes = rkyv::to_bytes::(&value)?; - file.write(bytes.as_slice())?; - - if value_index != size as u16 - 1 { - let mut value = Self::read_value(file)?; - while value != IndexValue::default() { - value_index += 1; - value = Self::read_value(file)?; - } - } - - Ok(value_index + 1) - } - - pub fn remove_value(file: &mut File, page_id: PageId, size: usize, value_index: u16) -> eyre::Result<()> - where - T: Archive - + Default - + SizeMeasurable - + Eq - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, - ::Archived: Deserialize>, - { - seek_to_page_start(file, page_id.0)?; - - let offset = Self::get_value_offset(size, value_index as usize); - file.seek(SeekFrom::Current(offset as i64))?; - let value = IndexValue::::default(); - let bytes = rkyv::to_bytes::(&value)?; - file.write(bytes.as_slice())?; - - Ok(()) - } - - pub fn get_node(&self) -> Vec> - where T: Clone + Ord - { - let mut node = Vec::with_capacity(self.size as usize); - for slot in &self.slots[..self.current_index as usize] { - node.push(self.index_values[*slot as usize].clone().into()) - } - node - } - - pub fn from_node(node: &Vec> + Clone>, size: usize) -> Self - where T: Clone + Ord + Default - { - let mut page = NewIndexPage::new(Into::>::into(node.last().expect("should contain at least one key").clone()).key, size); - - for (i, pair) in node.iter().enumerate() { - page.index_values[i] = Into::>::into(pair.clone()); - page.slots[i] = i as u16; - } - page.current_index = node.len() as u16; - page.current_length = node.len() as u16; - - page - } -} - -impl Persistable for NewIndexPage -where - T: Archive - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - > + Default + SizeMeasurable + Clone, - ::Archived: Deserialize>, -{ - fn as_bytes(&self) -> impl AsRef<[u8]> { - let mut bytes = Vec::with_capacity(self.size as usize); - let size_bytes = rkyv::to_bytes::(&self.size).unwrap(); - bytes.extend_from_slice(size_bytes.as_ref()); - let node_id_bytes = rkyv::to_bytes::(&self.node_id).unwrap(); - bytes.extend_from_slice(node_id_bytes.as_ref()); - let current_index_bytes = rkyv::to_bytes::(&self.current_index).unwrap(); - bytes.extend_from_slice(current_index_bytes.as_ref()); - let current_length_bytes = rkyv::to_bytes::(&self.current_length).unwrap(); - bytes.extend_from_slice(current_length_bytes.as_ref()); - let slots_bytes = rkyv::to_bytes::(&self.slots).unwrap(); - bytes.extend_from_slice(slots_bytes.as_ref()); - let values_bytes = rkyv::to_bytes::(&self.index_values).unwrap(); - bytes.extend_from_slice(values_bytes.as_ref()); - - bytes - } - - fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[0..2]) }; - let size = rkyv::deserialize::(archived).expect("data should be valid"); - - let mut offset = 2; - let utility_length = Self::index_page_utility_length(size as usize); - let index_utility_bytes = &bytes[offset..offset + utility_length]; - let utility = Self::get_index_page_utility_from_bytes(index_utility_bytes); - offset += utility_length; - - let values_len = size as usize * align8(IndexValue::::default().aligned_size()) + 8; - let mut v = AlignedVec::<4>::new(); - v.extend_from_slice(&bytes[offset..offset + values_len]); - let archived = unsafe { rkyv::access_unchecked::<> as Archive>::Archived>(&v[..]) }; - let index_values = rkyv::deserialize::>, rkyv::rancor::Error>(archived).expect("data should be valid"); - - Self { - slots: utility.slots, - size, - current_index: utility.current_index, - current_length: utility.current_length, - node_id: utility.node_id, - index_values - } - } -} - -#[cfg(test)] -mod tests { - use crate::{align8, Link, NewIndexPage, Persistable, SizeMeasurable, INNER_PAGE_SIZE}; - use crate::page::IndexValue; - - pub fn get_size_from_data_length(length: usize) -> usize - where - T: Default + SizeMeasurable, - { - let node_id_size = T::default().aligned_size(); - let slot_size = u16::default().aligned_size(); - let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); - let vec_util_size = 8; - let size = (length - node_id_size - slot_size * 2 - vec_util_size * 2) / (slot_size + index_value_size); - size - } - - #[test] - fn test_bytes() { - let size: usize = get_size_from_data_length::(INNER_PAGE_SIZE); - let page = NewIndexPage::::new(1, size); - let bytes = page.as_bytes(); - let new_page = NewIndexPage::::from_bytes(bytes.as_ref()); - - assert_eq!(new_page.node_id, page.node_id); - assert_eq!(new_page.current_index, page.current_index); - assert_eq!(new_page.size, page.size); - assert_eq!(new_page.slots, page.slots); - assert_eq!(new_page.index_values, page.index_values); - } - - #[test] - fn test_split() { - let mut page = NewIndexPage::::new(7, 8); - page.slots = vec![0, 1, 2, 3, 4, 5, 6, 7]; - page.current_index = 8; - page.current_length = 8; - page.index_values = { - let mut v = vec![]; - for i in &page.slots { - v.push(IndexValue { - key: *i as u64, - link: Default::default(), - }) - } - v - }; - - let split = page.split(4); - assert_eq!(page.current_index, 4); - assert_eq!(page.current_length, 4); - assert_eq!(page.slots[page.current_index as usize], 0); - - assert_eq!(page.index_values[0].key, 0); - assert_eq!(page.index_values[1].key, 1); - assert_eq!(page.index_values[2].key, 2); - assert_eq!(page.index_values[3].key, 3); - - assert_eq!(split.current_index, 4); - assert_eq!(split.current_length, 4); - assert_eq!(split.slots[0], 0); - assert_eq!(split.slots[1], 1); - assert_eq!(split.slots[2], 2); - assert_eq!(split.slots[3], 3); - - assert_eq!(split.index_values[0].key, 4); - assert_eq!(split.index_values[1].key, 5); - assert_eq!(split.index_values[2].key, 6); - assert_eq!(split.index_values[3].key, 7); - } -} diff --git a/src/page/index/page.rs b/src/page/index/page.rs index fd59559..aa001ae 100644 --- a/src/page/index/page.rs +++ b/src/page/index/page.rs @@ -1,69 +1,426 @@ -//! [`IndexPage`] definition. +//! [`crate::page::IndexPage`] definition. use std::fmt::Debug; +use std::fs::File; +use std::hash::Hash; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::mem; -use indexset::concurrent::map::BTreeMap; -use indexset::concurrent::multimap::BTreeMultiMap; +use indexset::core::pair::Pair; +use rkyv::{Archive, Deserialize, Serialize}; +use rkyv::de::Pool; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; -use rkyv::ser::sharing::Share; use rkyv::ser::Serializer; +use rkyv::ser::sharing::Share; use rkyv::util::AlignedVec; -use rkyv::{Archive, Deserialize, Serialize}; -use rkyv::api::high::HighDeserializer; -use crate::link::Link; -use crate::page::IndexValue; -use crate::util::Persistable; +use crate::page::{IndexValue, PageId}; +use crate::{align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; + +pub fn get_index_page_size_from_data_length(length: usize) -> usize +where + T: Default + SizeMeasurable, +{ + let node_id_size = T::default().aligned_size(); + let slot_size = u16::default().aligned_size(); + let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); + let vec_util_size = 8; + let size = (length - node_id_size - slot_size * 3 - vec_util_size * 2) + / (slot_size + index_value_size); + size +} /// Represents a page, which is filled with [`IndexValue`]'s of some index. #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] pub struct IndexPage { - //pub node_id: T, + pub size: u16, + pub node_id: T, + pub current_index: u16, + pub current_length: u16, + pub slots: Vec, pub index_values: Vec>, } -// Manual `Default` implementation to avoid `T: Default` -impl<'a, T> Default for IndexPage { - fn default() -> Self { +#[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] +pub struct IndexPageUtility { + pub node_id: T, + pub current_index: u16, + pub current_length: u16, + pub slots: Vec, +} + +impl IndexPage { + pub fn new(node_id: T, size: usize) -> Self + where T: Default + Clone, + { + let slots = vec![0u16; size]; + let index_values = vec![IndexValue::default(); size]; Self { - index_values: vec![], + size: size as u16, + node_id, + current_index: 0, + current_length: 0, + slots, + index_values, } } -} -impl IndexPage -where - T: Clone + Ord + Debug + Send + 'static, -{ - pub fn append_to_unique_tree_index(self, index: &BTreeMap) { - for val in self.index_values { - index.insert(val.key, val.link); + pub fn split(&mut self, index: usize) -> IndexPage + where T: Clone + Default + { + let mut new_page = IndexPage::new(self.node_id.clone(), self.size as usize); + let mut first_empty_value = u16::MAX; + for (index, slot) in self.slots[index..].iter_mut().enumerate() { + if first_empty_value > *slot { + first_empty_value = *slot; + } + let mut index_value = IndexValue::default(); + mem::swap(&mut self.index_values[*slot as usize], &mut index_value); + new_page.index_values[index] = index_value; + new_page.slots[index] = index as u16; + new_page.current_index = (index + 1) as u16; + *slot = 0; + } + new_page.current_length = self.current_length - index as u16; + + self.current_index = first_empty_value; + self.node_id = self.index_values[self.slots[index - 1] as usize].key.clone(); + self.current_length = index as u16; + + new_page + } + + fn index_page_utility_length(size: usize) -> usize + where T: Default + SizeMeasurable + { + T::default().aligned_size() + u16::default().aligned_size() + u16::default().aligned_size() + align(size * u16::default().aligned_size()) + 8 + } + + fn get_index_page_utility_from_bytes(bytes: &[u8]) -> IndexPageUtility + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, + { + let t_size = T::default().aligned_size(); + let mut offset = 0; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + t_size]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let node_id = rkyv::deserialize(archived).expect("data should be valid"); + + offset = t_size; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + 2]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let current_index = rkyv::deserialize::(archived).expect("data should be valid"); + + offset = offset + 2; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + 2]); + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; + let current_length = rkyv::deserialize::(archived).expect("data should be valid"); + + offset = offset + 2; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..]); + let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; + let slots = rkyv::deserialize::, rkyv::rancor::Error>(archived).expect("data should be valid"); + + IndexPageUtility { + node_id, + current_index, + current_length, + slots + } + } + + pub fn parse_index_page_utility(file: &mut File, page_id: PageId) -> eyre::Result> + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, + { + seek_to_page_start(file, page_id.0)?; + let offset = GENERAL_HEADER_SIZE as i64; + file.seek(SeekFrom::Current(offset))?; + + let mut size_bytes = vec![0u8; 2]; + file.read_exact(size_bytes.as_mut_slice())?; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&size_bytes[0..2]) }; + let size = rkyv::deserialize::(archived).expect("data should be valid"); + + let index_utility_len = Self::index_page_utility_length(size as usize); + let mut index_utility_bytes = vec![0u8; index_utility_len]; + file.read_exact(index_utility_bytes.as_mut_slice())?; + let utility = Self::get_index_page_utility_from_bytes(index_utility_bytes.as_ref()); + + Ok(utility) + } + + pub fn persist_index_page_utility(file: &mut File, page_id: PageId, utility: IndexPageUtility) -> eyre::Result<()> + where + T: Archive + + Default + + SizeMeasurable + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, + { + seek_to_page_start(file, page_id.0)?; + file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + u16::default().aligned_size() as i64))?; + + let node_id_bytes = rkyv::to_bytes::(&utility.node_id)?; + file.write(node_id_bytes.as_slice())?; + let current_index_bytes = rkyv::to_bytes::(&utility.current_index)?; + file.write(current_index_bytes.as_slice())?; + let current_length_bytes = rkyv::to_bytes::(&utility.current_length)?; + file.write(current_length_bytes.as_slice())?; + let slots_bytes = rkyv::to_bytes::(&utility.slots)?; + file.write(slots_bytes.as_slice())?; + Ok(()) + } + + fn read_value(file: &mut File) -> eyre::Result> + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, + { + let mut bytes = vec![0u8; align8(IndexValue::::default().aligned_size())]; + file.read_exact(bytes.as_mut_slice())?; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(bytes.as_slice()); + let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; + Ok(rkyv::deserialize(archived).expect("data should be valid")) + } + + pub fn read_value_with_index(file: &mut File, page_id: PageId, size: usize, index: usize) -> eyre::Result> + where + T: Archive + + Default + SizeMeasurable, + ::Archived: Deserialize>, + { + seek_to_page_start(file, page_id.0)?; + + let offset = Self::get_value_offset(size, index); + file.seek(SeekFrom::Current(offset as i64))?; + Self::read_value(file) + } + + fn get_value_offset(size: usize, value_index: usize) -> usize + where T: Default + SizeMeasurable + { + let mut offset = GENERAL_HEADER_SIZE; + offset += u16::default().aligned_size(); + offset += T::default().aligned_size(); + offset += u16::default().aligned_size(); + offset += u16::default().aligned_size(); + offset += align(size * u16::default().aligned_size()) + 8; + offset += value_index * align8(IndexValue::::default().aligned_size()); + + offset + } + + pub fn persist_value(file: &mut File, page_id: PageId, size: usize, value: IndexValue, mut value_index: u16) -> eyre::Result + where + T: Archive + + Default + + SizeMeasurable + + Eq + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, + ::Archived: Deserialize>, + { + seek_to_page_start(file, page_id.0)?; + + let offset = Self::get_value_offset(size, value_index as usize); + file.seek(SeekFrom::Current(offset as i64))?; + let bytes = rkyv::to_bytes::(&value)?; + file.write(bytes.as_slice())?; + + if value_index != size as u16 - 1 { + let mut value = Self::read_value(file)?; + while value != IndexValue::default() { + value_index += 1; + value = Self::read_value(file)?; + } + } + + Ok(value_index + 1) + } + + pub fn remove_value(file: &mut File, page_id: PageId, size: usize, value_index: u16) -> eyre::Result<()> + where + T: Archive + + Default + + SizeMeasurable + + Eq + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, + ::Archived: Deserialize>, + { + seek_to_page_start(file, page_id.0)?; + + let offset = Self::get_value_offset(size, value_index as usize); + file.seek(SeekFrom::Current(offset as i64))?; + let value = IndexValue::::default(); + let bytes = rkyv::to_bytes::(&value)?; + file.write(bytes.as_slice())?; + + Ok(()) + } + + pub fn get_node(&self) -> Vec> + where T: Clone + Ord + { + let mut node = Vec::with_capacity(self.size as usize); + for slot in &self.slots[..self.current_index as usize] { + node.push(self.index_values[*slot as usize].clone().into()) } + node } - pub fn append_to_tree_index(self, index: &BTreeMultiMap) { - for val in self.index_values { - index.insert(val.key, val.link); + pub fn from_node(node: &Vec> + Clone>, size: usize) -> Self + where T: Clone + Ord + Default + { + let mut page = IndexPage::new(Into::>::into(node.last().expect("should contain at least one key").clone()).key, size); + + for (i, pair) in node.iter().enumerate() { + page.index_values[i] = Into::>::into(pair.clone()); + page.slots[i] = i as u16; } + page.current_index = node.len() as u16; + page.current_length = node.len() as u16; + + page } } impl Persistable for IndexPage where T: Archive - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, - ::Archived: - rkyv::Deserialize>, + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + > + Default + SizeMeasurable + Clone, + ::Archived: Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { - rkyv::to_bytes::(self).unwrap() + let mut bytes = Vec::with_capacity(self.size as usize); + let size_bytes = rkyv::to_bytes::(&self.size).unwrap(); + bytes.extend_from_slice(size_bytes.as_ref()); + let node_id_bytes = rkyv::to_bytes::(&self.node_id).unwrap(); + bytes.extend_from_slice(node_id_bytes.as_ref()); + let current_index_bytes = rkyv::to_bytes::(&self.current_index).unwrap(); + bytes.extend_from_slice(current_index_bytes.as_ref()); + let current_length_bytes = rkyv::to_bytes::(&self.current_length).unwrap(); + bytes.extend_from_slice(current_length_bytes.as_ref()); + let slots_bytes = rkyv::to_bytes::(&self.slots).unwrap(); + bytes.extend_from_slice(slots_bytes.as_ref()); + let values_bytes = rkyv::to_bytes::(&self.index_values).unwrap(); + bytes.extend_from_slice(values_bytes.as_ref()); + + bytes } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; - rkyv::deserialize(archived).expect("data should be valid") + let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[0..2]) }; + let size = rkyv::deserialize::(archived).expect("data should be valid"); + + let mut offset = 2; + let utility_length = Self::index_page_utility_length(size as usize); + let index_utility_bytes = &bytes[offset..offset + utility_length]; + let utility = Self::get_index_page_utility_from_bytes(index_utility_bytes); + offset += utility_length; + + let values_len = size as usize * align8(IndexValue::::default().aligned_size()) + 8; + let mut v = AlignedVec::<4>::new(); + v.extend_from_slice(&bytes[offset..offset + values_len]); + let archived = unsafe { rkyv::access_unchecked::<> as Archive>::Archived>(&v[..]) }; + let index_values = rkyv::deserialize::>, rkyv::rancor::Error>(archived).expect("data should be valid"); + + Self { + slots: utility.slots, + size, + current_index: utility.current_index, + current_length: utility.current_length, + node_id: utility.node_id, + index_values + } + } +} + +#[cfg(test)] +mod tests { + use crate::{align8, Link, IndexPage, Persistable, SizeMeasurable, INNER_PAGE_SIZE}; + use crate::page::IndexValue; + + pub fn get_size_from_data_length(length: usize) -> usize + where + T: Default + SizeMeasurable, + { + let node_id_size = T::default().aligned_size(); + let slot_size = u16::default().aligned_size(); + let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); + let vec_util_size = 8; + let size = (length - node_id_size - slot_size * 2 - vec_util_size * 2) / (slot_size + index_value_size); + size + } + + #[test] + fn test_bytes() { + let size: usize = get_size_from_data_length::(INNER_PAGE_SIZE); + let page = IndexPage::::new(1, size); + let bytes = page.as_bytes(); + let new_page = IndexPage::::from_bytes(bytes.as_ref()); + + assert_eq!(new_page.node_id, page.node_id); + assert_eq!(new_page.current_index, page.current_index); + assert_eq!(new_page.size, page.size); + assert_eq!(new_page.slots, page.slots); + assert_eq!(new_page.index_values, page.index_values); + } + + #[test] + fn test_split() { + let mut page = IndexPage::::new(7, 8); + page.slots = vec![0, 1, 2, 3, 4, 5, 6, 7]; + page.current_index = 8; + page.current_length = 8; + page.index_values = { + let mut v = vec![]; + for i in &page.slots { + v.push(IndexValue { + key: *i as u64, + link: Default::default(), + }) + } + v + }; + + let split = page.split(4); + assert_eq!(page.current_index, 4); + assert_eq!(page.current_length, 4); + assert_eq!(page.slots[page.current_index as usize], 0); + + assert_eq!(page.index_values[0].key, 0); + assert_eq!(page.index_values[1].key, 1); + assert_eq!(page.index_values[2].key, 2); + assert_eq!(page.index_values[3].key, 3); + + assert_eq!(split.current_index, 4); + assert_eq!(split.current_length, 4); + assert_eq!(split.slots[0], 0); + assert_eq!(split.slots[1], 1); + assert_eq!(split.slots[2], 2); + assert_eq!(split.slots[3], 3); + + assert_eq!(split.index_values[0].key, 4); + assert_eq!(split.index_values[1].key, 5); + assert_eq!(split.index_values[2].key, 6); + assert_eq!(split.index_values[3].key, 7); } } diff --git a/src/page/mod.rs b/src/page/mod.rs index 064f08f..7f738c5 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -12,9 +12,9 @@ use data_bucket_codegen::SizeMeasure; use crate::{SizeMeasurable, align}; -pub use data::Data; +pub use data::DataPage; pub use header::{GeneralHeader, DATA_VERSION}; -pub use index::{map_tree_index, IndexPage, IndexValue, TableOfContentsPage, NewIndexPage, get_index_page_size_from_data_length}; +pub use index::{IndexValue, TableOfContentsPage, IndexPage, get_index_page_size_from_data_length}; //pub use iterators::{DataIterator, LinksIterator}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; @@ -46,8 +46,8 @@ pub const PAGE_SIZE: usize = 4096 * 4; /// **2 bytes are added by rkyv implicitly.** pub const GENERAL_HEADER_SIZE: usize = 28; -/// Length of the inner part of [`General`] page. It's counted as [`PAGE_SIZE`] -/// without [`General`] page [`GENERAL_HEADER_SIZE`]. +/// Length of the inner part of [`GeneralPage`] page. It's counted as [`PAGE_SIZE`] +/// without [`GeneralPage`] page [`GENERAL_HEADER_SIZE`]. pub const INNER_PAGE_SIZE: usize = PAGE_SIZE - GENERAL_HEADER_SIZE; /// Represents page's identifier. Is unique within the table bounds @@ -91,7 +91,7 @@ impl From for usize { #[derive( Archive, Copy, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, )] -pub struct General { +pub struct GeneralPage { pub header: GeneralHeader, pub inner: Inner, } diff --git a/src/page/util.rs b/src/page/util.rs index 23574c9..74ecb48 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -9,17 +9,16 @@ use super::index::IndexValue; use super::SpaceInfo; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; -use crate::page::General; -use crate::{DataPage, GeneralPage, IndexData, Link, NewIndexPage, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; +use crate::{DataPage, GeneralPage, Link, IndexPage, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; -pub fn map_index_pages_to_general(pages: Vec>) -> Vec>> { +pub fn map_index_pages_to_general(pages: Vec>) -> Vec>> { // We are starting ID's from `1` because `0`'s page in file is info page. let header = &mut GeneralHeader::new(1.into(), PageType::Index, 0.into()); let mut general_pages = vec![]; let mut pages = pages.into_iter(); if let Some(p) = pages.next() { - let general = General { + let general = GeneralPage { header: header.clone(), inner: p, }; @@ -28,7 +27,7 @@ pub fn map_index_pages_to_general(pages: Vec>) -> Vec(pages: Vec>) -> Vec( pages: Vec>, -) -> Vec>> { +) -> Vec>> { // We are starting ID's from `1` because `0`'s page in file is info page. let header = &mut GeneralHeader::new(1.into(), PageType::Data, 0.into()); let mut general_pages = vec![]; let mut pages = pages.into_iter(); if let Some(p) = pages.next() { - let general = General { + let general = GeneralPage { header: header.clone(), inner: p, }; @@ -58,7 +57,7 @@ pub fn map_data_pages_to_general( let mut previous_header = header; for p in pages { - let general = General { + let general = GeneralPage { header: previous_header.follow_with(PageType::Data), inner: p, }; @@ -229,8 +228,8 @@ where let mut buffer: Vec = vec![0u8; header.data_length as usize]; file.read_exact(&mut buffer)?; let archived = - unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) }; - let index_records: Vec> = rkyv::deserialize::, _>(archived) + unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) }; + let index_records: Vec> = rkyv::deserialize::, _>(archived) .expect("data should be valid") .index_values; From a9455aa08bfcac72b3f7e85ce2929e8c31000fc6 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 9 Feb 2025 14:01:14 +0300 Subject: [PATCH 19/24] rename space_info --- src/lib.rs | 2 +- src/page/mod.rs | 2 +- src/page/space_info.rs | 10 +++++----- src/page/util.rs | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b4e94a8..c3ff7c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ pub use page::{ parse_data_page, parse_index_page, parse_page, persist_page, seek_by_link, seek_to_page_start, update_at, DataPage, GeneralPage, GeneralHeader, IndexValue, Interval, PageType, - SpaceInfo, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, + SpaceInfoPage, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, TableOfContentsPage, IndexPage, get_index_page_size_from_data_length }; pub use persistence::{PersistableIndex, PersistableTable}; diff --git a/src/page/mod.rs b/src/page/mod.rs index 7f738c5..af84b9d 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -16,7 +16,7 @@ pub use data::DataPage; pub use header::{GeneralHeader, DATA_VERSION}; pub use index::{IndexValue, TableOfContentsPage, IndexPage, get_index_page_size_from_data_length}; //pub use iterators::{DataIterator, LinksIterator}; -pub use space_info::{Interval, SpaceInfo}; +pub use space_info::{Interval, SpaceInfoPage}; pub use ty::PageType; pub use util::{ map_data_pages_to_general, map_index_pages_to_general, parse_data_page, parse_index_page, diff --git a/src/page/space_info.rs b/src/page/space_info.rs index d860a7b..e6a703a 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -1,4 +1,4 @@ -//! [`SpaceInfo`] declaration. +//! [`SpaceInfoPage`] declaration. use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; @@ -18,7 +18,7 @@ pub type SpaceName = String; /// Internal information about a `Space`. Always appears first before all other /// pages in a `Space`. #[derive(Archive, Clone, Deserialize, Debug, PartialEq, Serialize)] -pub struct SpaceInfo { +pub struct SpaceInfoPage { pub id: space::Id, pub page_count: u32, pub name: SpaceName, @@ -39,7 +39,7 @@ impl Interval { } } -impl Persistable for SpaceInfo +impl Persistable for SpaceInfoPage where Pk: Archive + for<'a> Serialize< @@ -60,12 +60,12 @@ where #[cfg(test)] mod test { - use crate::page::{SpaceInfo, INNER_PAGE_SIZE}; + use crate::page::{SpaceInfoPage, INNER_PAGE_SIZE}; use crate::util::Persistable; #[test] fn test_as_bytes() { - let info = SpaceInfo { + let info = SpaceInfoPage { id: 0.into(), page_count: 0, name: "Test".to_string(), diff --git a/src/page/util.rs b/src/page/util.rs index 74ecb48..d064686 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -6,7 +6,7 @@ use rkyv::api::high::HighDeserializer; use rkyv::Archive; use super::index::IndexValue; -use super::SpaceInfo; +use super::SpaceInfoPage; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; use crate::{DataPage, GeneralPage, Link, IndexPage, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; @@ -238,15 +238,15 @@ where pub fn parse_space_info( file: &mut std::fs::File, -) -> eyre::Result { +) -> eyre::Result { file.seek(io::SeekFrom::Start(0))?; let header = parse_general_header(file)?; let mut buffer = vec![0u8; header.data_length as usize]; file.read_exact(&mut buffer)?; let archived = - unsafe { rkyv::access_unchecked::<::Archived>(&buffer[..]) }; - let space_info: SpaceInfo = + unsafe { rkyv::access_unchecked::<::Archived>(&buffer[..]) }; + let space_info: SpaceInfoPage = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); Ok(space_info) From b9456f8029d84d3ee7a7fe5ab689f0641403cbea Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 9 Feb 2025 14:01:38 +0300 Subject: [PATCH 20/24] fmt --- src/lib.rs | 15 +- src/page/data.rs | 4 +- src/page/header.rs | 2 +- src/page/index/mod.rs | 23 +-- src/page/index/page.rs | 194 +++++++++++++++-------- src/page/index/table_of_contents_page.rs | 62 +++++--- src/page/mod.rs | 11 +- src/page/space_info.rs | 5 +- src/page/util.rs | 2 +- src/util/mod.rs | 2 +- src/util/persistable.rs | 12 +- src/util/sized.rs | 6 +- 12 files changed, 209 insertions(+), 129 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c3ff7c6..6c0a103 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,13 +10,12 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ - map_data_pages_to_general, map_index_pages_to_general, - parse_data_page, parse_index_page, parse_page, persist_page, - seek_by_link, seek_to_page_start, update_at, DataPage, - GeneralPage, GeneralHeader, IndexValue, Interval, PageType, - SpaceInfoPage, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, - TableOfContentsPage, IndexPage, get_index_page_size_from_data_length + get_index_page_size_from_data_length, map_data_pages_to_general, map_index_pages_to_general, + parse_data_page, parse_index_page, parse_page, persist_page, seek_by_link, seek_to_page_start, + update_at, DataPage, GeneralHeader, GeneralPage, IndexPage, IndexValue, Interval, PageType, + SpaceInfoPage, TableOfContentsPage, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, + PAGE_SIZE, }; pub use persistence::{PersistableIndex, PersistableTable}; -pub use util::{align, Persistable, SizeMeasurable, align8, align_vec}; -pub use space::{Id as SpaceId}; +pub use space::Id as SpaceId; +pub use util::{align, align8, align_vec, Persistable, SizeMeasurable}; diff --git a/src/page/data.rs b/src/page/data.rs index c7ea769..3d948c4 100644 --- a/src/page/data.rs +++ b/src/page/data.rs @@ -1,6 +1,6 @@ -use eyre::{eyre, Result}; use crate::Link; use crate::Persistable; +use eyre::{eyre, Result}; #[derive(Debug)] pub struct DataPage { @@ -61,7 +61,7 @@ impl Persistable for DataPage { data.copy_from_slice(bytes); Self { length: bytes.len() as u32, - data + data, } } } diff --git a/src/page/header.rs b/src/page/header.rs index 92e54b7..4a09447 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -3,10 +3,10 @@ use rkyv::{Archive, Deserialize, Serialize}; use crate::page::ty::PageType; +use crate::page::PageId; use crate::space; use crate::util::Persistable; use crate::PAGE_SIZE; -use crate::page::PageId; pub const DATA_VERSION: u32 = 1u32; diff --git a/src/page/index/mod.rs b/src/page/index/mod.rs index c20a8c8..767c056 100644 --- a/src/page/index/mod.rs +++ b/src/page/index/mod.rs @@ -4,15 +4,17 @@ use rkyv::{Archive, Deserialize, Serialize}; use crate::{Link, SizeMeasurable}; -mod table_of_contents_page; mod page; +mod table_of_contents_page; -pub use page::{IndexPage, get_index_page_size_from_data_length}; +pub use page::{get_index_page_size_from_data_length, IndexPage}; pub use table_of_contents_page::TableOfContentsPage; /// Represents `key/value` pair of B-Tree index, where value is always /// [`data::Link`], as it is represented in primary and secondary indexes. -#[derive(Archive, Clone, Deserialize, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] +#[derive( + Archive, Clone, Deserialize, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, +)] pub struct IndexValue { pub key: T, pub link: Link, @@ -28,7 +30,8 @@ where } impl From> for Pair -where T: Ord +where + T: Ord, { fn from(value: IndexValue) -> Self { Pair { @@ -39,23 +42,25 @@ where T: Ord } impl From> for IndexValue -where T: Ord +where + T: Ord, { fn from(pair: Pair) -> Self { IndexValue { key: pair.key, - link: pair.value + link: pair.value, } } } impl From> for IndexValue -where T: Ord +where + T: Ord, { fn from(pair: MultiPair) -> Self { IndexValue { key: pair.key, - link: pair.value + link: pair.value, } } -} \ No newline at end of file +} diff --git a/src/page/index/page.rs b/src/page/index/page.rs index aa001ae..c303c19 100644 --- a/src/page/index/page.rs +++ b/src/page/index/page.rs @@ -7,16 +7,18 @@ use std::io::{Read, Seek, SeekFrom, Write}; use std::mem; use indexset::core::pair::Pair; -use rkyv::{Archive, Deserialize, Serialize}; use rkyv::de::Pool; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; -use rkyv::ser::Serializer; use rkyv::ser::sharing::Share; +use rkyv::ser::Serializer; use rkyv::util::AlignedVec; +use rkyv::{Archive, Deserialize, Serialize}; use crate::page::{IndexValue, PageId}; -use crate::{align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE}; +use crate::{ + align, align8, seek_to_page_start, Link, Persistable, SizeMeasurable, GENERAL_HEADER_SIZE, +}; pub fn get_index_page_size_from_data_length(length: usize) -> usize where @@ -52,9 +54,10 @@ pub struct IndexPageUtility { impl IndexPage { pub fn new(node_id: T, size: usize) -> Self - where T: Default + Clone, + where + T: Default + Clone, { - let slots = vec![0u16; size]; + let slots = vec![0u16; size]; let index_values = vec![IndexValue::default(); size]; Self { size: size as u16, @@ -67,7 +70,8 @@ impl IndexPage { } pub fn split(&mut self, index: usize) -> IndexPage - where T: Clone + Default + where + T: Clone + Default, { let mut new_page = IndexPage::new(self.node_id.clone(), self.size as usize); let mut first_empty_value = u16::MAX; @@ -85,23 +89,29 @@ impl IndexPage { new_page.current_length = self.current_length - index as u16; self.current_index = first_empty_value; - self.node_id = self.index_values[self.slots[index - 1] as usize].key.clone(); + self.node_id = self.index_values[self.slots[index - 1] as usize] + .key + .clone(); self.current_length = index as u16; new_page } fn index_page_utility_length(size: usize) -> usize - where T: Default + SizeMeasurable + where + T: Default + SizeMeasurable, { - T::default().aligned_size() + u16::default().aligned_size() + u16::default().aligned_size() + align(size * u16::default().aligned_size()) + 8 + T::default().aligned_size() + + u16::default().aligned_size() + + u16::default().aligned_size() + + align(size * u16::default().aligned_size()) + + 8 } fn get_index_page_utility_from_bytes(bytes: &[u8]) -> IndexPageUtility where - T: Archive - + Default + SizeMeasurable, - ::Archived: Deserialize>, + T: Archive + Default + SizeMeasurable, + ::Archived: Deserialize>, { let t_size = T::default().aligned_size(); let mut offset = 0; @@ -114,32 +124,37 @@ impl IndexPage { let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + 2]); let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let current_index = rkyv::deserialize::(archived).expect("data should be valid"); + let current_index = + rkyv::deserialize::(archived).expect("data should be valid"); offset = offset + 2; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + 2]); let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; - let current_length = rkyv::deserialize::(archived).expect("data should be valid"); + let current_length = + rkyv::deserialize::(archived).expect("data should be valid"); offset = offset + 2; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..]); let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; - let slots = rkyv::deserialize::, rkyv::rancor::Error>(archived).expect("data should be valid"); + let slots = rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid"); IndexPageUtility { node_id, current_index, current_length, - slots + slots, } } - pub fn parse_index_page_utility(file: &mut File, page_id: PageId) -> eyre::Result> + pub fn parse_index_page_utility( + file: &mut File, + page_id: PageId, + ) -> eyre::Result> where - T: Archive - + Default + SizeMeasurable, + T: Archive + Default + SizeMeasurable, ::Archived: Deserialize>, { seek_to_page_start(file, page_id.0)?; @@ -148,8 +163,10 @@ impl IndexPage { let mut size_bytes = vec![0u8; 2]; file.read_exact(size_bytes.as_mut_slice())?; - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&size_bytes[0..2]) }; - let size = rkyv::deserialize::(archived).expect("data should be valid"); + let archived = + unsafe { rkyv::access_unchecked::<::Archived>(&size_bytes[0..2]) }; + let size = + rkyv::deserialize::(archived).expect("data should be valid"); let index_utility_len = Self::index_page_utility_length(size as usize); let mut index_utility_bytes = vec![0u8; index_utility_len]; @@ -159,17 +176,23 @@ impl IndexPage { Ok(utility) } - pub fn persist_index_page_utility(file: &mut File, page_id: PageId, utility: IndexPageUtility) -> eyre::Result<()> + pub fn persist_index_page_utility( + file: &mut File, + page_id: PageId, + utility: IndexPageUtility, + ) -> eyre::Result<()> where T: Archive - + Default - + SizeMeasurable - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, + + Default + + SizeMeasurable + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, { seek_to_page_start(file, page_id.0)?; - file.seek(SeekFrom::Current(GENERAL_HEADER_SIZE as i64 + u16::default().aligned_size() as i64))?; + file.seek(SeekFrom::Current( + GENERAL_HEADER_SIZE as i64 + u16::default().aligned_size() as i64, + ))?; let node_id_bytes = rkyv::to_bytes::(&utility.node_id)?; file.write(node_id_bytes.as_slice())?; @@ -184,22 +207,26 @@ impl IndexPage { fn read_value(file: &mut File) -> eyre::Result> where - T: Archive - + Default + SizeMeasurable, + T: Archive + Default + SizeMeasurable, ::Archived: Deserialize>, { let mut bytes = vec![0u8; align8(IndexValue::::default().aligned_size())]; file.read_exact(bytes.as_mut_slice())?; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(bytes.as_slice()); - let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; + let archived = + unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; Ok(rkyv::deserialize(archived).expect("data should be valid")) } - pub fn read_value_with_index(file: &mut File, page_id: PageId, size: usize, index: usize) -> eyre::Result> + pub fn read_value_with_index( + file: &mut File, + page_id: PageId, + size: usize, + index: usize, + ) -> eyre::Result> where - T: Archive - + Default + SizeMeasurable, + T: Archive + Default + SizeMeasurable, ::Archived: Deserialize>, { seek_to_page_start(file, page_id.0)?; @@ -210,7 +237,8 @@ impl IndexPage { } fn get_value_offset(size: usize, value_index: usize) -> usize - where T: Default + SizeMeasurable + where + T: Default + SizeMeasurable, { let mut offset = GENERAL_HEADER_SIZE; offset += u16::default().aligned_size(); @@ -223,15 +251,21 @@ impl IndexPage { offset } - pub fn persist_value(file: &mut File, page_id: PageId, size: usize, value: IndexValue, mut value_index: u16) -> eyre::Result + pub fn persist_value( + file: &mut File, + page_id: PageId, + size: usize, + value: IndexValue, + mut value_index: u16, + ) -> eyre::Result where T: Archive - + Default - + SizeMeasurable - + Eq - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, + + Default + + SizeMeasurable + + Eq + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, ::Archived: Deserialize>, { seek_to_page_start(file, page_id.0)?; @@ -252,15 +286,20 @@ impl IndexPage { Ok(value_index + 1) } - pub fn remove_value(file: &mut File, page_id: PageId, size: usize, value_index: u16) -> eyre::Result<()> + pub fn remove_value( + file: &mut File, + page_id: PageId, + size: usize, + value_index: u16, + ) -> eyre::Result<()> where T: Archive - + Default - + SizeMeasurable - + Eq - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - >, + + Default + + SizeMeasurable + + Eq + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, ::Archived: Deserialize>, { seek_to_page_start(file, page_id.0)?; @@ -275,7 +314,8 @@ impl IndexPage { } pub fn get_node(&self) -> Vec> - where T: Clone + Ord + where + T: Clone + Ord, { let mut node = Vec::with_capacity(self.size as usize); for slot in &self.slots[..self.current_index as usize] { @@ -285,9 +325,18 @@ impl IndexPage { } pub fn from_node(node: &Vec> + Clone>, size: usize) -> Self - where T: Clone + Ord + Default + where + T: Clone + Ord + Default, { - let mut page = IndexPage::new(Into::>::into(node.last().expect("should contain at least one key").clone()).key, size); + let mut page = IndexPage::new( + Into::>::into( + node.last() + .expect("should contain at least one key") + .clone(), + ) + .key, + size, + ); for (i, pair) in node.iter().enumerate() { page.index_values[i] = Into::>::into(pair.clone()); @@ -303,32 +352,38 @@ impl IndexPage { impl Persistable for IndexPage where T: Archive - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - > + Default + SizeMeasurable + Clone, + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + > + Default + + SizeMeasurable + + Clone, ::Archived: Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { let mut bytes = Vec::with_capacity(self.size as usize); - let size_bytes = rkyv::to_bytes::(&self.size).unwrap(); + let size_bytes = rkyv::to_bytes::(&self.size).unwrap(); bytes.extend_from_slice(size_bytes.as_ref()); - let node_id_bytes = rkyv::to_bytes::(&self.node_id).unwrap(); + let node_id_bytes = rkyv::to_bytes::(&self.node_id).unwrap(); bytes.extend_from_slice(node_id_bytes.as_ref()); - let current_index_bytes = rkyv::to_bytes::(&self.current_index).unwrap(); + let current_index_bytes = + rkyv::to_bytes::(&self.current_index).unwrap(); bytes.extend_from_slice(current_index_bytes.as_ref()); - let current_length_bytes = rkyv::to_bytes::(&self.current_length).unwrap(); + let current_length_bytes = + rkyv::to_bytes::(&self.current_length).unwrap(); bytes.extend_from_slice(current_length_bytes.as_ref()); - let slots_bytes = rkyv::to_bytes::(&self.slots).unwrap(); + let slots_bytes = rkyv::to_bytes::(&self.slots).unwrap(); bytes.extend_from_slice(slots_bytes.as_ref()); - let values_bytes = rkyv::to_bytes::(&self.index_values).unwrap(); + let values_bytes = rkyv::to_bytes::(&self.index_values).unwrap(); bytes.extend_from_slice(values_bytes.as_ref()); bytes } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[0..2]) }; - let size = rkyv::deserialize::(archived).expect("data should be valid"); + let archived = + unsafe { rkyv::access_unchecked::<::Archived>(&bytes[0..2]) }; + let size = + rkyv::deserialize::(archived).expect("data should be valid"); let mut offset = 2; let utility_length = Self::index_page_utility_length(size as usize); @@ -339,8 +394,10 @@ where let values_len = size as usize * align8(IndexValue::::default().aligned_size()) + 8; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + values_len]); - let archived = unsafe { rkyv::access_unchecked::<> as Archive>::Archived>(&v[..]) }; - let index_values = rkyv::deserialize::>, rkyv::rancor::Error>(archived).expect("data should be valid"); + let archived = + unsafe { rkyv::access_unchecked::<> as Archive>::Archived>(&v[..]) }; + let index_values = rkyv::deserialize::>, rkyv::rancor::Error>(archived) + .expect("data should be valid"); Self { slots: utility.slots, @@ -348,15 +405,15 @@ where current_index: utility.current_index, current_length: utility.current_length, node_id: utility.node_id, - index_values + index_values, } } } #[cfg(test)] mod tests { - use crate::{align8, Link, IndexPage, Persistable, SizeMeasurable, INNER_PAGE_SIZE}; use crate::page::IndexValue; + use crate::{align8, IndexPage, Link, Persistable, SizeMeasurable, INNER_PAGE_SIZE}; pub fn get_size_from_data_length(length: usize) -> usize where @@ -366,7 +423,8 @@ mod tests { let slot_size = u16::default().aligned_size(); let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); let vec_util_size = 8; - let size = (length - node_id_size - slot_size * 2 - vec_util_size * 2) / (slot_size + index_value_size); + let size = (length - node_id_size - slot_size * 2 - vec_util_size * 2) + / (slot_size + index_value_size); size } diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 643d594..676f222 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -1,12 +1,12 @@ use std::collections::BTreeMap; -use rkyv::{Archive, Deserialize, Serialize}; use rkyv::api::high::HighDeserializer; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; -use rkyv::ser::Serializer; use rkyv::ser::sharing::Share; +use rkyv::ser::Serializer; use rkyv::util::AlignedVec; +use rkyv::{Archive, Deserialize, Serialize}; use crate::page::PageId; use crate::{align, Persistable, SizeMeasurable}; @@ -20,51 +20,58 @@ pub struct TableOfContentsPage { } impl Default for TableOfContentsPage -where T: SizeMeasurable +where + T: SizeMeasurable, { fn default() -> Self { Self { records: BTreeMap::new(), empty_pages: vec![], - estimated_size: usize::default().aligned_size() + Option::::default().aligned_size(), + estimated_size: usize::default().aligned_size() + + Option::::default().aligned_size(), } } } -impl TableOfContentsPage -{ - +impl TableOfContentsPage { pub fn estimated_size(&self) -> usize { self.estimated_size } pub fn insert(&mut self, val: T, page_id: PageId) - where T: Ord + Eq + SizeMeasurable + where + T: Ord + Eq + SizeMeasurable, { self.estimated_size += align(val.aligned_size() + page_id.0.aligned_size()); let _ = self.records.insert(val, page_id); } pub fn pop_empty_page(&mut self) -> Option - where T: SizeMeasurable + where + T: SizeMeasurable, { if self.empty_pages.is_empty() { - return None + return None; } - let val = self.empty_pages.pop().expect("should not be empty as checked before"); + let val = self + .empty_pages + .pop() + .expect("should not be empty as checked before"); self.estimated_size -= val.aligned_size(); Some(val) } pub fn get(&self, val: &T) -> Option - where T: Ord + Eq + where + T: Ord + Eq, { self.records.get(val).copied() } pub fn remove(&mut self, val: &T) -> PageId - where T: Ord + Eq + SizeMeasurable + where + T: Ord + Eq + SizeMeasurable, { let id = self.remove_without_record(val); self.empty_pages.push(id); @@ -72,24 +79,33 @@ impl TableOfContentsPage } pub fn remove_without_record(&mut self, val: &T) -> PageId - where T: Ord + Eq + SizeMeasurable + where + T: Ord + Eq + SizeMeasurable, { self.estimated_size -= align(val.aligned_size() + PageId::default().0.aligned_size()); self.estimated_size += PageId::default().0.aligned_size(); - let id = self.records.remove(val).expect("value should be available if remove is called"); + let id = self + .records + .remove(val) + .expect("value should be available if remove is called"); id } pub fn update_key(&mut self, old_key: &T, new_key: T) - where T: Ord + Eq + where + T: Ord + Eq, { - let id = self.records.remove(old_key).expect("value should be available if update is called"); + let id = self + .records + .remove(old_key) + .expect("value should be available if update is called"); self.records.insert(new_key, id); } pub fn contains(&self, val: &T) -> bool - where T: Ord + Eq + where + T: Ord + Eq, { self.records.contains_key(val) } @@ -111,11 +127,12 @@ impl IntoIterator for TableOfContentsPage { impl Persistable for TableOfContentsPage where T: Archive - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - > + Ord + Eq, + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + > + Ord + + Eq, ::Archived: - rkyv::Deserialize> + Ord + Eq, + rkyv::Deserialize> + Ord + Eq, { fn as_bytes(&self) -> impl AsRef<[u8]> { rkyv::to_bytes::(self).unwrap() @@ -126,4 +143,3 @@ where rkyv::deserialize(archived).expect("data should be valid") } } - diff --git a/src/page/mod.rs b/src/page/mod.rs index af84b9d..3bd285e 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -6,22 +6,21 @@ mod space_info; mod ty; mod util; +use data_bucket_codegen::SizeMeasure; use derive_more::{Display, From, Into}; use rkyv::{Archive, Deserialize, Serialize}; -use data_bucket_codegen::SizeMeasure; -use crate::{SizeMeasurable, align}; +use crate::{align, SizeMeasurable}; pub use data::DataPage; pub use header::{GeneralHeader, DATA_VERSION}; -pub use index::{IndexValue, TableOfContentsPage, IndexPage, get_index_page_size_from_data_length}; +pub use index::{get_index_page_size_from_data_length, IndexPage, IndexValue, TableOfContentsPage}; //pub use iterators::{DataIterator, LinksIterator}; pub use space_info::{Interval, SpaceInfoPage}; pub use ty::PageType; pub use util::{ map_data_pages_to_general, map_index_pages_to_general, parse_data_page, parse_index_page, - parse_page, parse_space_info, persist_page, - seek_by_link, seek_to_page_start, update_at, + parse_page, parse_space_info, persist_page, seek_by_link, seek_to_page_start, update_at, }; // TODO: Move to config @@ -67,7 +66,7 @@ pub const INNER_PAGE_SIZE: usize = PAGE_SIZE - GENERAL_HEADER_SIZE; PartialEq, PartialOrd, Serialize, - SizeMeasure + SizeMeasure, )] pub struct PageId(u32); diff --git a/src/page/space_info.rs b/src/page/space_info.rs index e6a703a..fd77997 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -1,12 +1,12 @@ //! [`SpaceInfoPage`] declaration. +use rkyv::api::high::HighDeserializer; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; use rkyv::ser::sharing::Share; use rkyv::ser::Serializer; use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; -use rkyv::api::high::HighDeserializer; use crate::util::Persistable; use crate::{space, Link}; @@ -45,8 +45,7 @@ where + for<'a> Serialize< Strategy, Share>, rkyv::rancor::Error>, >, - ::Archived: - rkyv::Deserialize>, + ::Archived: rkyv::Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { rkyv::to_bytes::(self).unwrap() diff --git a/src/page/util.rs b/src/page/util.rs index d064686..794cbd8 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -9,7 +9,7 @@ use super::index::IndexValue; use super::SpaceInfoPage; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; -use crate::{DataPage, GeneralPage, Link, IndexPage, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; +use crate::{DataPage, GeneralPage, IndexPage, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; pub fn map_index_pages_to_general(pages: Vec>) -> Vec>> { // We are starting ID's from `1` because `0`'s page in file is info page. diff --git a/src/util/mod.rs b/src/util/mod.rs index 1721f80..237a16d 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -2,4 +2,4 @@ mod persistable; mod sized; pub use persistable::Persistable; -pub use sized::{align, align8, SizeMeasurable, align_vec}; +pub use sized::{align, align8, align_vec, SizeMeasurable}; diff --git a/src/util/persistable.rs b/src/util/persistable.rs index d898478..e8c776d 100644 --- a/src/util/persistable.rs +++ b/src/util/persistable.rs @@ -1,11 +1,11 @@ +use crate::SizeMeasurable; +use rkyv::de::Pool; use rkyv::rancor::Strategy; use rkyv::ser::allocator::ArenaHandle; use rkyv::ser::sharing::Share; use rkyv::ser::Serializer; use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; -use rkyv::de::Pool; -use crate::SizeMeasurable; pub trait Persistable { fn as_bytes(&self) -> impl AsRef<[u8]>; @@ -15,9 +15,11 @@ pub trait Persistable { impl Persistable for Vec where T: Archive - + for<'a> Serialize< - Strategy, Share>, rkyv::rancor::Error>, - > + Default + SizeMeasurable + Clone, + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + > + Default + + SizeMeasurable + + Clone, ::Archived: Deserialize>, { fn as_bytes(&self) -> impl AsRef<[u8]> { diff --git a/src/util/sized.rs b/src/util/sized.rs index 14ffc0b..3e79f2a 100644 --- a/src/util/sized.rs +++ b/src/util/sized.rs @@ -1,6 +1,6 @@ use crate::link::{Link, LINK_LENGTH}; -use std::{mem, sync::Arc}; use rkyv::util::AlignedVec; +use std::{mem, sync::Arc}; use uuid::Uuid; pub const fn align(len: usize) -> usize { @@ -127,7 +127,9 @@ impl SizeMeasurable for lockfree::set::Set { } impl SizeMeasurable for Option -where T: SizeMeasurable{ +where + T: SizeMeasurable, +{ fn aligned_size(&self) -> usize { size_of::>() } From b274eabf8f4018a35a41084de47e04815484a507 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Thu, 6 Mar 2025 20:32:22 +0300 Subject: [PATCH 21/24] corrections --- src/page/space_info.rs | 2 +- src/page/util.rs | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/page/space_info.rs b/src/page/space_info.rs index fd77997..314a275 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -21,11 +21,11 @@ pub type SpaceName = String; pub struct SpaceInfoPage { pub id: space::Id, pub page_count: u32, + pub pk_gen_state: Pk, pub name: SpaceName, pub row_schema: Vec<(String, String)>, pub primary_key_fields: Vec, pub secondary_index_types: Vec<(String, String)>, - pub pk_gen_state: Pk, pub empty_links_list: Vec, } diff --git a/src/page/util.rs b/src/page/util.rs index 794cbd8..958213b 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -105,7 +105,7 @@ pub fn seek_by_link(file: &mut std::fs::File, link: Link) -> eyre::Result<()> { Ok(()) } -pub fn update_at( +pub fn update_at( file: &mut std::fs::File, link: Link, new_data: &[u8], @@ -118,7 +118,7 @@ pub fn update_at( )); } - if (link.offset + link.length) as usize > DATA_LENGTH { + if (link.offset + link.length) > DATA_LENGTH { return Err(eyre!( "Link range (offset: {}, length: {}) exceeds data bounds ({})", link.offset, @@ -154,8 +154,13 @@ where { seek_to_page_start(file, index)?; let header = parse_general_header(file)?; + let length = if header.data_length == 0 { + PAGE_SIZE + } else { + header.data_length + }; - let mut buffer: Vec = vec![0u8; header.data_length as usize]; + let mut buffer: Vec = vec![0u8; length as usize]; file.read_exact(&mut buffer)?; let info = Page::from_bytes(buffer.as_ref()); From 1f88f2e4bb456d17e1e47cd47d475e0f17e44c5e Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 7 Mar 2025 00:04:19 +0300 Subject: [PATCH 22/24] corrections --- Cargo.toml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b2269c8..9622527 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,17 +11,11 @@ repository = "https://github.com/pathscale/DataBucket" description = "DataBucket is container for WorkTable's data" [dependencies] -data_bucket_derive = {path = "codegen" , version = "0.1.0"} +data_bucket_derive = { path = "codegen", version = "0.1.0" } eyre = "0.6.12" derive_more = { version = "1.0.0", features = ["from", "error", "display", "into"] } -rkyv = { version = "0.8.9", features = ["uuid-1"]} -scc = "2.1.16" +rkyv = { version = "0.8.9", features = ["uuid-1"] } lockfree = "0.5.1" -serde = { version = "1.0.215", features = ["derive"] } uuid = { version = "1.11.0", features = ["v4"] } -bitcode = "0.6.3" -indexset = { path = "../indexset", version = "0.10.3", features = ["concurrent", "cdc", "multimap"] } - -[dev-dependencies] -mktemp = "0.4.0" \ No newline at end of file +indexset = { version = "0.11.1", features = ["concurrent", "cdc", "multimap"] } From 784a3f91cfa57a3a284442073a22feb805e598a7 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 7 Mar 2025 00:12:04 +0300 Subject: [PATCH 23/24] clippy fix --- src/page/header.rs | 2 +- src/page/index/page.rs | 22 ++++++++++------------ src/page/index/table_of_contents_page.rs | 8 +++----- src/page/space_info.rs | 2 +- src/page/util.rs | 7 ++++--- src/persistence/data/mod.rs | 1 + src/persistence/data/rkyv_data.rs | 2 +- src/persistence/data/types.rs | 2 +- src/util/persistable.rs | 6 +++--- 9 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/page/header.rs b/src/page/header.rs index 4a09447..d5e5644 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -92,7 +92,7 @@ impl Persistable for GeneralHeader { } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") } } diff --git a/src/page/index/page.rs b/src/page/index/page.rs index c303c19..5a963c8 100644 --- a/src/page/index/page.rs +++ b/src/page/index/page.rs @@ -28,9 +28,7 @@ where let slot_size = u16::default().aligned_size(); let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); let vec_util_size = 8; - let size = (length - node_id_size - slot_size * 3 - vec_util_size * 2) - / (slot_size + index_value_size); - size + (length - node_id_size - slot_size * 3 - vec_util_size * 2) / (slot_size + index_value_size) } /// Represents a page, which is filled with [`IndexValue`]'s of some index. @@ -127,14 +125,14 @@ impl IndexPage { let current_index = rkyv::deserialize::(archived).expect("data should be valid"); - offset = offset + 2; + offset += 2; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..offset + 2]); let archived = unsafe { rkyv::access_unchecked::<::Archived>(&v[..]) }; let current_length = rkyv::deserialize::(archived).expect("data should be valid"); - offset = offset + 2; + offset += 2; let mut v = AlignedVec::<4>::new(); v.extend_from_slice(&bytes[offset..]); let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&v[..]) }; @@ -195,13 +193,13 @@ impl IndexPage { ))?; let node_id_bytes = rkyv::to_bytes::(&utility.node_id)?; - file.write(node_id_bytes.as_slice())?; + file.write_all(node_id_bytes.as_slice())?; let current_index_bytes = rkyv::to_bytes::(&utility.current_index)?; - file.write(current_index_bytes.as_slice())?; + file.write_all(current_index_bytes.as_slice())?; let current_length_bytes = rkyv::to_bytes::(&utility.current_length)?; - file.write(current_length_bytes.as_slice())?; + file.write_all(current_length_bytes.as_slice())?; let slots_bytes = rkyv::to_bytes::(&utility.slots)?; - file.write(slots_bytes.as_slice())?; + file.write_all(slots_bytes.as_slice())?; Ok(()) } @@ -273,7 +271,7 @@ impl IndexPage { let offset = Self::get_value_offset(size, value_index as usize); file.seek(SeekFrom::Current(offset as i64))?; let bytes = rkyv::to_bytes::(&value)?; - file.write(bytes.as_slice())?; + file.write_all(bytes.as_slice())?; if value_index != size as u16 - 1 { let mut value = Self::read_value(file)?; @@ -308,7 +306,7 @@ impl IndexPage { file.seek(SeekFrom::Current(offset as i64))?; let value = IndexValue::::default(); let bytes = rkyv::to_bytes::(&value)?; - file.write(bytes.as_slice())?; + file.write_all(bytes.as_slice())?; Ok(()) } @@ -324,7 +322,7 @@ impl IndexPage { node } - pub fn from_node(node: &Vec> + Clone>, size: usize) -> Self + pub fn from_node(node: &[impl Into> + Clone], size: usize) -> Self where T: Clone + Ord + Default, { diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 676f222..0ba55ae 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -85,11 +85,9 @@ impl TableOfContentsPage { self.estimated_size -= align(val.aligned_size() + PageId::default().0.aligned_size()); self.estimated_size += PageId::default().0.aligned_size(); - let id = self - .records + self.records .remove(val) - .expect("value should be available if remove is called"); - id + .expect("value should be available if remove is called") } pub fn update_key(&mut self, old_key: &T, new_key: T) @@ -139,7 +137,7 @@ where } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize(archived).expect("data should be valid") } } diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 314a275..2f97a91 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -52,7 +52,7 @@ where } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize(archived).expect("data should be valid") } } diff --git a/src/page/util.rs b/src/page/util.rs index 958213b..c82f7fe 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -19,7 +19,7 @@ pub fn map_index_pages_to_general(pages: Vec>) -> Vec( let mut pages = pages.into_iter(); if let Some(p) = pages.next() { let general = GeneralPage { - header: header.clone(), + header: *header, inner: p, }; general_pages.push(general); @@ -128,7 +128,7 @@ pub fn update_at( } seek_by_link(file, link)?; - file.write(new_data)?; + file.write_all(new_data)?; Ok(()) } @@ -179,6 +179,7 @@ pub fn parse_data_page( let mut buffer = [0u8; INNER_PAGE_SIZE]; if header.next_id == 0.into() { + #[allow(clippy::unused_io_amount)] file.read(&mut buffer)?; } else { file.read_exact(&mut buffer)?; diff --git a/src/persistence/data/mod.rs b/src/persistence/data/mod.rs index 290d568..a374587 100644 --- a/src/persistence/data/mod.rs +++ b/src/persistence/data/mod.rs @@ -6,6 +6,7 @@ pub use types::DataTypeValue; pub trait DataType { fn advance_accum(&self, accum: &mut usize); + #[allow(clippy::wrong_self_convention)] fn from_pointer(&self, pointer: *const u8, start_pointer: *const u8) -> DataTypeValue; fn advance_pointer_for_padding(&self, pointer: &mut *const u8, start_pointer: *const u8); fn advance_pointer(&self, pointer: &mut *const u8); diff --git a/src/persistence/data/rkyv_data.rs b/src/persistence/data/rkyv_data.rs index 97f4811..b65e5ec 100644 --- a/src/persistence/data/rkyv_data.rs +++ b/src/persistence/data/rkyv_data.rs @@ -3,7 +3,7 @@ use std::str::FromStr; pub fn parse_archived_row, S2: AsRef>( buf: &[u8], - columns: &Vec<(S1, S2)>, + columns: &[(S1, S2)], ) -> Vec { let mut data_length: usize = { let mut accum: usize = 0; diff --git a/src/persistence/data/types.rs b/src/persistence/data/types.rs index c3d0d31..b30ed33 100644 --- a/src/persistence/data/types.rs +++ b/src/persistence/data/types.rs @@ -52,7 +52,7 @@ impl FromStr for DataTypeValue { type Err = (); fn from_str(s: &str) -> Result { - Ok(match s.as_ref() { + Ok(match s { "String" => String::default().into(), "i128" => i128::default().into(), "i64" => i64::default().into(), diff --git a/src/util/persistable.rs b/src/util/persistable.rs index e8c776d..eb342a3 100644 --- a/src/util/persistable.rs +++ b/src/util/persistable.rs @@ -27,7 +27,7 @@ where } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") } } @@ -38,7 +38,7 @@ impl Persistable for u8 { } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") } } @@ -49,7 +49,7 @@ impl Persistable for String { } fn from_bytes(bytes: &[u8]) -> Self { - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&bytes[..]) }; + let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") } } From 00c6740c7f0ad9db1842d9737762c9807aa6a9b1 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Fri, 7 Mar 2025 00:39:54 +0300 Subject: [PATCH 24/24] clippy fix --- src/page/index/page.rs | 4 +--- src/persistence/data/rkyv_data.rs | 18 ++++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/page/index/page.rs b/src/page/index/page.rs index 5a963c8..680371d 100644 --- a/src/page/index/page.rs +++ b/src/page/index/page.rs @@ -421,9 +421,7 @@ mod tests { let slot_size = u16::default().aligned_size(); let index_value_size = align8(T::default().aligned_size() + Link::default().aligned_size()); let vec_util_size = 8; - let size = (length - node_id_size - slot_size * 2 - vec_util_size * 2) - / (slot_size + index_value_size); - size + (length - node_id_size - slot_size * 2 - vec_util_size * 2) / (slot_size + index_value_size) } #[test] diff --git a/src/persistence/data/rkyv_data.rs b/src/persistence/data/rkyv_data.rs index b65e5ec..591afb9 100644 --- a/src/persistence/data/rkyv_data.rs +++ b/src/persistence/data/rkyv_data.rs @@ -39,6 +39,7 @@ mod test { use super::parse_archived_row; use crate::persistence::data::types::DataTypeValue; use rkyv::{Archive, Deserialize, Serialize}; + use std::f64::consts::PI; #[derive(Archive, Serialize, Deserialize, Debug)] struct Struct1 { @@ -51,7 +52,7 @@ mod test { string1: "000000000000000".to_string(), }) .unwrap(); - let parsed = parse_archived_row(&buffer, &vec![("string1", "String")]); + let parsed = parse_archived_row(&buffer, &[("string1", "String")]); assert_eq!( parsed, [DataTypeValue::String("000000000000000".to_string())] @@ -66,7 +67,7 @@ mod test { #[test] fn test_parse_archived_row_int() { let buffer = rkyv::to_bytes::(&Struct2 { int1: 3 }).unwrap(); - let parsed = parse_archived_row(&buffer, &vec![("int1", "i32")]); + let parsed = parse_archived_row(&buffer, &[("int1", "i32")]); assert_eq!(parsed, [DataTypeValue::I32(3)]) } @@ -77,12 +78,9 @@ mod test { #[test] fn test_parse_archived_row_float() { - let buffer = rkyv::to_bytes::(&Struct3 { - float1: 3.14159265358, - }) - .unwrap(); - let parsed = parse_archived_row(&buffer, &vec![("float1", "f64")]); - assert_eq!(parsed, [DataTypeValue::F64(3.14159265358)]) + let buffer = rkyv::to_bytes::(&Struct3 { float1: PI }).unwrap(); + let parsed = parse_archived_row(&buffer, &[("float1", "f64")]); + assert_eq!(parsed, [DataTypeValue::F64(PI)]) } #[derive(Archive, Serialize, Deserialize, Debug)] @@ -113,7 +111,7 @@ mod test { int6: 7, string3: "x".to_string(), int7: 8, - float1: 3.14159265358, + float1: PI, }) .unwrap(); let parsed = parse_archived_row( @@ -145,7 +143,7 @@ mod test { DataTypeValue::U8(7), DataTypeValue::String("x".to_string()), DataTypeValue::I8(8), - DataTypeValue::F64(3.14159265358f64), + DataTypeValue::F64(PI), ] ) }