diff --git a/Cargo.lock b/Cargo.lock index 2749af17385..2cb59d1e9d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10415,6 +10415,7 @@ dependencies = [ "flatbuffers", "futures", "getrandom 0.3.4", + "glob", "itertools 0.14.0", "kanal", "object_store", diff --git a/benchmarks/duckdb-bench/src/lib.rs b/benchmarks/duckdb-bench/src/lib.rs index 48b3cc09abb..f6b081c33ec 100644 --- a/benchmarks/duckdb-bench/src/lib.rs +++ b/benchmarks/duckdb-bench/src/lib.rs @@ -14,14 +14,14 @@ use vortex_bench::Benchmark; use vortex_bench::Format; use vortex_bench::IdempotentPath; use vortex_bench::generate_duckdb_registration_sql; -use vortex_duckdb::duckdb::Config; -use vortex_duckdb::duckdb::Connection; -use vortex_duckdb::duckdb::Database; +use vortex_duckdb::duckdb::OwnedConfig; +use vortex_duckdb::duckdb::OwnedConnection; +use vortex_duckdb::duckdb::OwnedDatabase; /// DuckDB context for benchmarks. pub struct DuckClient { - pub db: Database, - pub connection: Connection, + pub db: OwnedDatabase, + pub connection: OwnedConnection, pub db_path: PathBuf, pub threads: Option, } @@ -65,8 +65,8 @@ impl DuckClient { pub fn open_and_setup_database( path: Option, threads: Option, - ) -> Result<(Database, Connection)> { - let mut config = Config::new().vortex_expect("failed to create duckdb config"); + ) -> Result<(OwnedDatabase, OwnedConnection)> { + let mut config = OwnedConfig::new().vortex_expect("failed to create duckdb config"); // Set DuckDB thread count if specified if let Some(thread_count) = threads { @@ -74,8 +74,8 @@ impl DuckClient { } let db = match path { - Some(path) => Database::open_with_config(path, config), - None => Database::open_in_memory_with_config(config), + Some(path) => OwnedDatabase::open_with_config(path, config), + None => OwnedDatabase::open_in_memory_with_config(config), }?; let connection = db.connect()?; @@ -100,17 +100,6 @@ impl DuckClient { } pub fn reopen(&mut self) -> Result<()> { - // take ownership of the connection & database - let mut connection = unsafe { Connection::borrow(self.connection.as_ptr()) }; - std::mem::swap(&mut self.connection, &mut connection); - let mut db = unsafe { Database::borrow(self.db.as_ptr()) }; - std::mem::swap(&mut self.db, &mut db); - - // drop the connection, then the database (order might be important?) - // NB: self.db and self.connection will be dangling pointers, which we'll fix below - drop(connection); - drop(db); - let (mut db, mut connection) = Self::open_and_setup_database(Some(self.db_path.clone()), self.threads)?; diff --git a/vortex-duckdb/cpp/copy_function.cpp b/vortex-duckdb/cpp/copy_function.cpp index 9bb6a0e974f..6b36d0f6a09 100644 --- a/vortex-duckdb/cpp/copy_function.cpp +++ b/vortex-duckdb/cpp/copy_function.cpp @@ -45,16 +45,20 @@ unique_ptr c_bind_one(ClientContext &context, const vector &column_types) { auto c_column_names = vector(); + c_column_names.reserve(column_names.size()); for (const auto &col_id : column_names) { c_column_names.push_back(const_cast(col_id.c_str())); } auto c_column_types = vector(); + c_column_types.reserve(c_column_types.size()); for (auto &col_type : column_types) { c_column_types.push_back(reinterpret_cast(const_cast(&col_type))); } duckdb_vx_error error_out = nullptr; + // TODO(myrrc): do we pass ownership of c_column_names in bind? + // If yes, it's a UB as we'd double delete on function return auto ffi_bind_data = copy_vtab_one.bind(reinterpret_cast(&info), c_column_names.data(), c_column_names.size(), diff --git a/vortex-duckdb/cpp/file_system.cpp b/vortex-duckdb/cpp/file_system.cpp index a4973a62727..35125ba521a 100644 --- a/vortex-duckdb/cpp/file_system.cpp +++ b/vortex-duckdb/cpp/file_system.cpp @@ -33,7 +33,7 @@ duckdb_vx_fs_open(duckdb_client_context ctx, const char *path, duckdb_vx_error * } try { - auto *client_context = reinterpret_cast(ctx); + auto client_context = reinterpret_cast(ctx); auto &fs = FileSystem::GetFileSystem(*client_context); auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ | FileFlags::FILE_FLAGS_PARALLEL_ACCESS); return reinterpret_cast(new FileHandleWrapper(std::move(handle))); @@ -51,7 +51,7 @@ duckdb_vx_fs_create(duckdb_client_context ctx, const char *path, duckdb_vx_error } try { - auto *client_context = reinterpret_cast(ctx); + auto client_context = reinterpret_cast(ctx); auto &fs = FileSystem::GetFileSystem(*client_context); auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE | @@ -142,7 +142,7 @@ extern "C" duckdb_state duckdb_vx_fs_list_files(duckdb_client_context ctx, } try { - auto *client_context = reinterpret_cast(ctx); + auto client_context = reinterpret_cast(ctx); auto &fs = FileSystem::GetFileSystem(*client_context); fs.ListFiles(directory, diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index d6481cafc70..0b9c28c0eb7 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -60,17 +60,18 @@ use vortex::error::vortex_err; use crate::cpp::DUCKDB_TYPE; use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; pub trait FromLogicalType { fn from_logical_type( - logical_type: LogicalType, + logical_type: &LogicalType, nullability: Nullability, ) -> VortexResult; } impl FromLogicalType for DType { fn from_logical_type( - logical_type: LogicalType, + logical_type: &LogicalType, nullability: Nullability, ) -> VortexResult { Ok(match logical_type.as_type_id() { @@ -125,21 +126,27 @@ impl FromLogicalType for DType { DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_NS => { DType::Extension(Timestamp::new(TimeUnit::Nanoseconds, nullability).erased()) } - DUCKDB_TYPE::DUCKDB_TYPE_ARRAY => DType::FixedSizeList( - Arc::new(DType::from_logical_type( - logical_type.array_child_type(), - Nullability::Nullable, - )?), - logical_type.array_type_array_size(), - nullability, - ), - DUCKDB_TYPE::DUCKDB_TYPE_LIST => DType::List( - Arc::new(DType::from_logical_type( - logical_type.list_child_type(), - Nullability::Nullable, - )?), - nullability, - ), + DUCKDB_TYPE::DUCKDB_TYPE_ARRAY => { + let child_type = logical_type.array_child_type(); + DType::FixedSizeList( + Arc::new(DType::from_logical_type( + &child_type, + Nullability::Nullable, + )?), + logical_type.array_type_array_size(), + nullability, + ) + } + DUCKDB_TYPE::DUCKDB_TYPE_LIST => { + let child_type = logical_type.list_child_type(); + DType::List( + Arc::new(DType::from_logical_type( + &child_type, + Nullability::Nullable, + )?), + nullability, + ) + } DUCKDB_TYPE::DUCKDB_TYPE_STRUCT => DType::Struct( (0..logical_type.struct_type_child_count()) .map(|i| { @@ -147,7 +154,7 @@ impl FromLogicalType for DType { let child_type = logical_type.struct_child_type(i); Ok(( child_name, - DType::from_logical_type(child_type, Nullability::Nullable)?, + DType::from_logical_type(&child_type, Nullability::Nullable)?, )) }) .collect::>()?, @@ -168,9 +175,9 @@ impl FromLogicalType for DType { } } -pub fn from_duckdb_table(iter: I) -> VortexResult +pub fn from_duckdb_table<'a, I, S>(iter: I) -> VortexResult where - I: Iterator, + I: Iterator, S: AsRef, { iter.map(|(name, type_, nullability)| { @@ -182,15 +189,15 @@ where .collect::>() } -impl TryFrom for LogicalType { +impl TryFrom for OwnedLogicalType { type Error = VortexError; fn try_from(value: DType) -> Result { - LogicalType::try_from(&value) + OwnedLogicalType::try_from(&value) } } -impl TryFrom<&DType> for LogicalType { +impl TryFrom<&DType> for OwnedLogicalType { type Error = VortexError; /// Converts a Vortex data type to a DuckDB logical type. @@ -209,25 +216,25 @@ impl TryFrom<&DType> for LogicalType { let duckdb_type = match dtype { DType::Null => DUCKDB_TYPE::DUCKDB_TYPE_SQLNULL, DType::Bool(_) => DUCKDB_TYPE::DUCKDB_TYPE_BOOLEAN, - DType::Primitive(ptype, _) => return LogicalType::try_from(*ptype), + DType::Primitive(ptype, _) => return OwnedLogicalType::try_from(*ptype), DType::Utf8(_) => DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR, DType::Binary(_) => DUCKDB_TYPE::DUCKDB_TYPE_BLOB, DType::Struct(struct_type, _) => { - return LogicalType::try_from(struct_type); + return OwnedLogicalType::try_from(struct_type); } DType::Decimal(decimal_dtype, _) => { - return LogicalType::decimal_type( + return OwnedLogicalType::decimal_type( decimal_dtype.precision(), decimal_dtype.scale().try_into()?, ); } DType::List(element_dtype, _) => { - let element_logical_type = LogicalType::try_from(element_dtype.as_ref())?; - return LogicalType::list_type(element_logical_type); + let element_logical_type = OwnedLogicalType::try_from(element_dtype.as_ref())?; + return OwnedLogicalType::list_type(element_logical_type); } DType::FixedSizeList(element_dtype, list_size, _) => { - let element_logical_type = LogicalType::try_from(element_dtype.as_ref())?; - return LogicalType::array_type(element_logical_type, *list_size); + let element_logical_type = OwnedLogicalType::try_from(element_dtype.as_ref())?; + return OwnedLogicalType::array_type(element_logical_type, *list_size); } DType::Extension(ext_dtype) => { let Some(temporal) = ext_dtype.metadata_opt::() else { @@ -267,25 +274,25 @@ impl TryFrom<&DType> for LogicalType { } }; - Ok(LogicalType::new(duckdb_type)) + Ok(OwnedLogicalType::new(duckdb_type)) } } -impl TryFrom for LogicalType { +impl TryFrom for OwnedLogicalType { type Error = VortexError; fn try_from(struct_type: StructFields) -> Result { - LogicalType::try_from(&struct_type) + OwnedLogicalType::try_from(&struct_type) } } -impl TryFrom<&StructFields> for LogicalType { +impl TryFrom<&StructFields> for OwnedLogicalType { type Error = VortexError; fn try_from(struct_type: &StructFields) -> Result { - let child_types: Vec = struct_type + let child_types: Vec = struct_type .fields() - .map(|field_dtype| LogicalType::try_from(&field_dtype)) + .map(|field_dtype| OwnedLogicalType::try_from(&field_dtype)) .collect::>()?; let child_names: Vec = struct_type @@ -297,25 +304,25 @@ impl TryFrom<&StructFields> for LogicalType { }) .collect::>()?; - LogicalType::struct_type(child_types, child_names) + OwnedLogicalType::struct_type(child_types, child_names) } } -impl TryFrom for LogicalType { +impl TryFrom for OwnedLogicalType { type Error = VortexError; fn try_from(value: PType) -> Result { Ok(match value { - I8 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TINYINT), - I16 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_SMALLINT), - I32 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), - I64 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT), - U8 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_UTINYINT), - U16 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_USMALLINT), - U32 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_UINTEGER), - U64 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT), - F32 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_FLOAT), - F64 => LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE), + I8 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TINYINT), + I16 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_SMALLINT), + I32 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), + I64 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT), + U8 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_UTINYINT), + U16 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_USMALLINT), + U32 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_UINTEGER), + U64 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT), + F32 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_FLOAT), + F64 => OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE), PType::F16 => return Err(vortex_err!("F16 type not supported in DuckDB")), }) } @@ -340,12 +347,12 @@ mod tests { use vortex::error::VortexResult; use crate::cpp; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedLogicalType; #[test] fn test_null_type() { let dtype = DType::Null; - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), cpp::DUCKDB_TYPE::DUCKDB_TYPE_SQLNULL @@ -355,7 +362,7 @@ mod tests { #[test] fn test_bool_type() { let dtype = DType::Bool(Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), cpp::DUCKDB_TYPE::DUCKDB_TYPE_BOOLEAN @@ -375,21 +382,21 @@ mod tests { #[case(PType::F64, cpp::DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE)] fn test_primitive_types(#[case] ptype: PType, #[case] expected_duckdb_type: cpp::DUCKDB_TYPE) { let dtype = DType::Primitive(ptype, Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!(logical_type.as_type_id(), expected_duckdb_type); } #[test] fn test_f16_unsupported() { let dtype = DType::Primitive(PType::F16, Nullability::NonNullable); - let result = LogicalType::try_from(&dtype); + let result = OwnedLogicalType::try_from(&dtype); assert!(result.is_err()); } #[test] fn test_string_type() { let dtype = DType::Utf8(Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR @@ -399,7 +406,7 @@ mod tests { #[test] fn test_binary_type() { let dtype = DType::Binary(Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), cpp::DUCKDB_TYPE::DUCKDB_TYPE_BLOB @@ -415,7 +422,7 @@ mod tests { ]; let struct_fields = StructFields::new(field_names, field_types); let dtype = DType::Struct(struct_fields, Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), @@ -430,7 +437,7 @@ mod tests { let struct_fields = StructFields::new(field_names, field_types); let dtype = DType::Struct(struct_fields, Nullability::NonNullable); - let result = LogicalType::try_from(&dtype); + let result = OwnedLogicalType::try_from(&dtype); assert!(result.is_err()); } @@ -439,7 +446,7 @@ mod tests { let struct_fields = StructFields::new(FieldNames::default(), [].into()); let dtype = DType::Struct(struct_fields, Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), cpp::DUCKDB_TYPE::DUCKDB_TYPE_STRUCT @@ -451,7 +458,7 @@ mod tests { use vortex::dtype::DecimalDType; let decimal_dtype = DecimalDType::new(18, 4); let dtype = DType::Decimal(decimal_dtype, Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), @@ -463,7 +470,7 @@ mod tests { fn test_list_type() { let element_dtype = DType::Primitive(PType::I32, Nullability::NonNullable); let dtype = DType::List(Arc::new(element_dtype), Nullability::NonNullable); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), @@ -476,7 +483,7 @@ mod tests { use vortex::dtype::datetime::TimeUnit; let dtype = DType::Extension(Date::new(TimeUnit::Days, Nullability::NonNullable).erased()); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), @@ -490,7 +497,7 @@ mod tests { let dtype = DType::Extension(Time::new(TimeUnit::Microseconds, Nullability::NonNullable).erased()); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!( logical_type.as_type_id(), @@ -521,7 +528,7 @@ mod tests { for (time_unit, expected_type) in test_cases { let dtype = DType::Extension(Timestamp::new(time_unit, Nullability::NonNullable).erased()); - let logical_type = LogicalType::try_from(&dtype).unwrap(); + let logical_type = OwnedLogicalType::try_from(&dtype).unwrap(); assert_eq!(logical_type.as_type_id(), expected_type); } @@ -541,7 +548,7 @@ mod tests { ); assert_eq!( - LogicalType::try_from(&dtype).unwrap().as_type_id(), + OwnedLogicalType::try_from(&dtype).unwrap().as_type_id(), cpp::DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_TZ ); } @@ -553,12 +560,12 @@ mod tests { // Invalid DATE time unit let dtype = DType::Extension(Date::new(TimeUnit::Milliseconds, Nullability::NonNullable).erased()); - assert!(LogicalType::try_from(&dtype).is_err()); + assert!(OwnedLogicalType::try_from(&dtype).is_err()); // Invalid TIME time unit let dtype = DType::Extension(Time::new(TimeUnit::Milliseconds, Nullability::NonNullable).erased()); - assert!(LogicalType::try_from(&dtype).is_err()); + assert!(OwnedLogicalType::try_from(&dtype).is_err()); } #[test] @@ -593,6 +600,6 @@ mod tests { .erased(); let dtype = DType::Extension(ext_dtype); - assert!(LogicalType::try_from(&dtype).is_err()); + assert!(OwnedLogicalType::try_from(&dtype).is_err()); } } diff --git a/vortex-duckdb/src/convert/expr.rs b/vortex-duckdb/src/convert/expr.rs index 5b95b631b72..c615d7a317a 100644 --- a/vortex-duckdb/src/convert/expr.rs +++ b/vortex-duckdb/src/convert/expr.rs @@ -56,23 +56,23 @@ pub fn try_from_bound_expression(value: &duckdb::Expression) -> VortexResult { let operator: Operator = compare.op.try_into()?; - let Some(left) = try_from_bound_expression(&compare.left)? else { + let Some(left) = try_from_bound_expression(compare.left)? else { return Ok(None); }; - let Some(right) = try_from_bound_expression(&compare.right)? else { + let Some(right) = try_from_bound_expression(compare.right)? else { return Ok(None); }; Binary.new_expr(operator, [left, right]) } duckdb::ExpressionClass::BoundBetween(between) => { - let Some(array) = try_from_bound_expression(&between.input)? else { + let Some(array) = try_from_bound_expression(between.input)? else { return Ok(None); }; - let Some(lower) = try_from_bound_expression(&between.lower)? else { + let Some(lower) = try_from_bound_expression(between.lower)? else { return Ok(None); }; - let Some(upper) = try_from_bound_expression(&between.upper)? else { + let Some(upper) = try_from_bound_expression(between.upper)? else { return Ok(None); }; Between.new_expr( @@ -97,7 +97,7 @@ pub fn try_from_bound_expression(value: &duckdb::Expression) -> VortexResult { let children = operator.children().collect_vec(); assert_eq!(children.len(), 1); - let Some(child) = try_from_bound_expression(&children[0])? else { + let Some(child) = try_from_bound_expression(children[0])? else { return Ok(None); }; match operator.op { @@ -113,7 +113,7 @@ pub fn try_from_bound_expression(value: &duckdb::Expression) -> VortexResult= 2); - let Some(element) = try_from_bound_expression(&children[0])? else { + let Some(element) = try_from_bound_expression(children[0])? else { return Ok(None); }; @@ -153,10 +153,10 @@ pub fn try_from_bound_expression(value: &duckdb::Expression) -> VortexResult { let children = func.children().collect_vec(); assert_eq!(children.len(), 2); - let Some(value) = try_from_bound_expression(&children[0])? else { + let Some(value) = try_from_bound_expression(children[0])? else { return Ok(None); }; - let Some(pattern_lit) = like_pattern_str(&children[1])? else { + let Some(pattern_lit) = like_pattern_str(children[1])? else { vortex_bail!("expected pattern to be bound string") }; let pattern = lit(pattern_lit); @@ -170,7 +170,7 @@ pub fn try_from_bound_expression(value: &duckdb::Expression) -> VortexResult { let Some(children) = conj .children() - .map(|c| try_from_bound_expression(&c)) + .map(try_from_bound_expression) .collect::>>>()? else { return Ok(None); diff --git a/vortex-duckdb/src/convert/scalar.rs b/vortex-duckdb/src/convert/scalar.rs index 4ea51b6bd8f..7a4bf52aec5 100644 --- a/vortex-duckdb/src/convert/scalar.rs +++ b/vortex-duckdb/src/convert/scalar.rs @@ -49,13 +49,13 @@ use vortex::scalar::ScalarValue; use vortex::scalar::Utf8Scalar; use crate::convert::dtype::FromLogicalType; -use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedValue; use crate::duckdb::Value; -use crate::duckdb::ValueRef; /// Trait for converting Vortex scalars to DuckDB values. pub trait ToDuckDBScalar { - fn try_to_duckdb_scalar(&self) -> VortexResult; + fn try_to_duckdb_scalar(&self) -> VortexResult; } impl ToDuckDBScalar for Scalar { @@ -64,13 +64,14 @@ impl ToDuckDBScalar for Scalar { /// # Note /// /// Struct and List scalars are not yet implemented and cause a panic. - fn try_to_duckdb_scalar(&self) -> VortexResult { + fn try_to_duckdb_scalar(&self) -> VortexResult { if self.is_null() { - return Ok(Value::null(&LogicalType::try_from(self.dtype())?)); + let lt = OwnedLogicalType::try_from(self.dtype())?; + return Ok(OwnedValue::null(<)); } match self.dtype() { - DType::Null => Ok(Value::sql_null()), + DType::Null => Ok(OwnedValue::sql_null()), DType::Bool(_) => self.as_bool().try_to_duckdb_scalar(), DType::Primitive(..) => self.as_primitive().try_to_duckdb_scalar(), DType::Decimal(..) => self.as_decimal().try_to_duckdb_scalar(), @@ -88,11 +89,13 @@ impl ToDuckDBScalar for PrimitiveScalar<'_> { /// # Note /// /// - `F16` values are converted to `F32` before creating the DuckDB value - fn try_to_duckdb_scalar(&self) -> VortexResult { + fn try_to_duckdb_scalar(&self) -> VortexResult { if self.ptype() == PType::F16 { - return Value::try_from(self.as_::().map(|f| f.to_f32())); + return OwnedValue::try_from(self.as_::().map(|f| f.to_f32())); } - match_each_native_simd_ptype!(self.ptype(), |P| { Ok(Value::try_from(self.as_::

())?) }) + match_each_native_simd_ptype!(self.ptype(), |P| { + Ok(OwnedValue::try_from(self.as_::

())?) + }) } } @@ -109,14 +112,15 @@ impl ToDuckDBScalar for DecimalScalar<'_> { /// /// This scalar conversion always uses `i128` for all decimal values regardless of precision, /// which differs from the array conversion logic that uses precision-based storage optimization. - fn try_to_duckdb_scalar(&self) -> VortexResult { + fn try_to_duckdb_scalar(&self) -> VortexResult { let decimal_type = self .dtype() .as_decimal_opt() .ok_or_else(|| vortex_err!("decimal scalar without decimal dtype"))?; let Some(decimal_value) = self.decimal_value() else { - return Ok(Value::null(&LogicalType::try_from(self.dtype())?)); + let lt = OwnedLogicalType::try_from(self.dtype())?; + return Ok(OwnedValue::null(<)); }; let huge_value = match decimal_value { @@ -128,7 +132,7 @@ impl ToDuckDBScalar for DecimalScalar<'_> { DecimalValue::I256(_) => vortex_bail!("cannot handle a i256 decimal in duckdb"), }; - Ok(Value::new_decimal( + Ok(OwnedValue::new_decimal( decimal_type.precision(), decimal_type.scale(), huge_value, @@ -138,35 +142,35 @@ impl ToDuckDBScalar for DecimalScalar<'_> { impl ToDuckDBScalar for BoolScalar<'_> { /// Converts a boolean scalar to a DuckDB boolean value. - fn try_to_duckdb_scalar(&self) -> VortexResult { - Value::try_from(self.value()) + fn try_to_duckdb_scalar(&self) -> VortexResult { + OwnedValue::try_from(self.value()) } } impl ToDuckDBScalar for Utf8Scalar<'_> { /// Converts a UTF-8 string scalar to a DuckDB VARCHAR value. - fn try_to_duckdb_scalar(&self) -> VortexResult { + fn try_to_duckdb_scalar(&self) -> VortexResult { Ok(match self.value() { - Some(value) => Value::from(value.as_str()), - None => Value::null(&LogicalType::varchar()), + Some(value) => OwnedValue::from(value.as_str()), + None => OwnedValue::null(&OwnedLogicalType::varchar()), }) } } impl ToDuckDBScalar for BinaryScalar<'_> { /// Converts a binary scalar to a DuckDB BLOB value. - fn try_to_duckdb_scalar(&self) -> VortexResult { + fn try_to_duckdb_scalar(&self) -> VortexResult { Ok(match self.value() { - Some(value) => Value::from(value.as_slice()), - None => Value::null(&LogicalType::blob()), + Some(value) => OwnedValue::from(value.as_slice()), + None => OwnedValue::null(&OwnedLogicalType::blob()), }) } } impl ToDuckDBScalar for ExtScalar<'_> { /// Converts an extension scalar (primarily temporal types) to a DuckDB value. - fn try_to_duckdb_scalar(&self) -> VortexResult { - let logical_type = LogicalType::try_from(&DType::Extension(self.ext_dtype().clone()))?; + fn try_to_duckdb_scalar(&self) -> VortexResult { + let logical_type = OwnedLogicalType::try_from(&DType::Extension(self.ext_dtype().clone()))?; let Some(temporal) = self.ext_dtype().metadata_opt::() else { vortex_bail!("Cannot convert non-temporal extension scalar to duckdb value"); }; @@ -191,13 +195,13 @@ impl ToDuckDBScalar for ExtScalar<'_> { "Currently only UTC timezone is supported for duckdb timestamp(tz) conversion" ); } - return Ok(Value::new_timestamp_tz(value()?)); + return Ok(OwnedValue::new_timestamp_tz(value()?)); } match unit { - TimeUnit::Nanoseconds => Value::new_timestamp_ns(value()?), - TimeUnit::Microseconds => Value::new_timestamp_us(value()?), - TimeUnit::Milliseconds => Value::new_timestamp_ms(value()?), - TimeUnit::Seconds => Value::new_timestamp_s(value()?), + TimeUnit::Nanoseconds => OwnedValue::new_timestamp_ns(value()?), + TimeUnit::Microseconds => OwnedValue::new_timestamp_us(value()?), + TimeUnit::Milliseconds => OwnedValue::new_timestamp_ms(value()?), + TimeUnit::Seconds => OwnedValue::new_timestamp_s(value()?), TimeUnit::Days => { vortex_bail!("timestamp(d) is cannot be converted to duckdb scalar") } @@ -211,14 +215,14 @@ impl ToDuckDBScalar for ExtScalar<'_> { vortex_err!("temporal types must be backed by primitive scalars") })? .as_::() - .map(Value::new_date) - .unwrap_or_else(|| Value::null(&logical_type)), + .map(OwnedValue::new_date) + .unwrap_or_else(|| OwnedValue::null(&logical_type)), _ => vortex_bail!("cannot have TimeUnit {unit}, so represent a day"), }, TemporalMetadata::Time(unit) => match unit { - TimeUnit::Microseconds => Value::new_time(value()?), - TimeUnit::Milliseconds => Value::new_time(value()? * 1000), - TimeUnit::Seconds => Value::new_time(value()? * 1000 * 1000), + TimeUnit::Microseconds => OwnedValue::new_time(value()?), + TimeUnit::Milliseconds => OwnedValue::new_time(value()? * 1000), + TimeUnit::Seconds => OwnedValue::new_time(value()? * 1000 * 1000), TimeUnit::Nanoseconds | TimeUnit::Days => { vortex_bail!("cannot convert timeunit {unit} to a duckdb MS time") } @@ -227,18 +231,18 @@ impl ToDuckDBScalar for ExtScalar<'_> { } } -impl TryFrom for Scalar { +impl TryFrom for Scalar { type Error = VortexError; - fn try_from(value: Value) -> Result { - Scalar::try_from(value.as_ref()) + fn try_from(value: OwnedValue) -> Result { + Scalar::try_from(&*value) } } -impl<'a> TryFrom> for Scalar { +impl<'a> TryFrom<&'a Value> for Scalar { type Error = VortexError; - fn try_from(value: ValueRef<'a>) -> Result { + fn try_from(value: &'a Value) -> Result { use crate::duckdb::ExtractedValue; let dtype = DType::from_logical_type(value.logical_type(), Nullable)?; match value.extract() { diff --git a/vortex-duckdb/src/convert/table_filter.rs b/vortex-duckdb/src/convert/table_filter.rs index a3a42a4e3b6..1f530f2769d 100644 --- a/vortex-duckdb/src/convert/table_filter.rs +++ b/vortex-duckdb/src/convert/table_filter.rs @@ -40,7 +40,7 @@ pub fn try_from_table_filter( TableFilterClass::ConjunctionAnd(conj_and) => { let Some(children) = conj_and .children() - .map(|child| try_from_table_filter(&child, col, scope_dtype)) + .map(|child| try_from_table_filter(child, col, scope_dtype)) .try_collect::<_, Option>, _>()? else { return Ok(None); @@ -52,7 +52,7 @@ pub fn try_from_table_filter( TableFilterClass::ConjunctionOr(disjuction_or) => { let Some(children) = disjuction_or .children() - .map(|child| try_from_table_filter(&child, col, scope_dtype)) + .map(|child| try_from_table_filter(child, col, scope_dtype)) .try_collect::<_, Option>, _>()? else { return Ok(None); @@ -63,11 +63,11 @@ pub fn try_from_table_filter( TableFilterClass::IsNull => is_null(col.clone()), TableFilterClass::IsNotNull => not(is_null(col.clone())), TableFilterClass::StructExtract(name, child_filter) => { - return try_from_table_filter(&child_filter, &get_item(name, col.clone()), scope_dtype); + return try_from_table_filter(child_filter, &get_item(name, col.clone()), scope_dtype); } TableFilterClass::Optional(child) => { // Optional expressions are optional not yet supported. - return try_from_table_filter(&child, col, scope_dtype).or_else(|_err| { + return try_from_table_filter(child, col, scope_dtype).or_else(|_err| { // Failed to convert the optional expression, but it's optional, so who cares? Ok(None) }); @@ -105,7 +105,7 @@ pub fn try_from_table_filter( op, move || { let value = data.latest()?; - let scalar = Scalar::try_from(value.as_ref()) + let scalar = Scalar::try_from(&*value) .vortex_expect("failed to convert dynamic filter value to scalar"); scalar.into_value() }, diff --git a/vortex-duckdb/src/convert/vector.rs b/vortex-duckdb/src/convert/vector.rs index 681337f8974..efeb2f3b4db 100644 --- a/vortex-duckdb/src/convert/vector.rs +++ b/vortex-duckdb/src/convert/vector.rs @@ -67,7 +67,7 @@ impl<'a> DuckString<'a> { } } -fn vector_as_slice(vector: &mut Vector, len: usize) -> ArrayRef { +fn vector_as_slice(vector: &Vector, len: usize) -> ArrayRef { let data = vector.as_slice_with_len::(len); PrimitiveArray::new( @@ -78,7 +78,7 @@ fn vector_as_slice(vector: &mut Vector, len: usize) -> ArrayRef } fn vector_mapped P>( - vector: &mut Vector, + vector: &Vector, len: usize, from_duckdb_type: F, ) -> ArrayRef { @@ -91,7 +91,7 @@ fn vector_mapped P>( .into_array() } -fn vector_as_string_blob(vector: &mut Vector, len: usize, dtype: DType) -> ArrayRef { +fn vector_as_string_blob(vector: &Vector, len: usize, dtype: DType) -> ArrayRef { let data = vector.as_slice_with_len::(len); let validity = vector.validity_ref(len); @@ -210,7 +210,7 @@ fn process_duckdb_lists( } /// Converts flat vector to a vortex array -pub fn flat_vector_to_vortex(vector: &mut Vector, len: usize) -> VortexResult { +pub fn flat_vector_to_vortex(vector: &Vector, len: usize) -> VortexResult { let type_id = vector.logical_type().as_type_id(); match type_id { DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP => { @@ -308,7 +308,7 @@ pub fn flat_vector_to_vortex(vector: &mut Vector, len: usize) -> VortexResult { let array_elem_size = vector.logical_type().array_type_array_size(); let child_data = flat_vector_to_vortex( - &mut vector.array_vector_get_child(), + vector.array_vector_get_child(), len * array_elem_size as usize, )?; @@ -326,7 +326,7 @@ pub fn flat_vector_to_vortex(vector: &mut Vector, len: usize) -> VortexResult VortexResult { let logical_type = vector.logical_type(); let children = (0..logical_type.struct_type_child_count()) - .map(|idx| flat_vector_to_vortex(&mut vector.struct_vector_get_child(idx), len)) + .map(|idx| flat_vector_to_vortex(vector.struct_vector_get_child(idx), len)) .collect::, _>>()?; let names = (0..logical_type.struct_type_child_count()) .map(|idx| logical_type.struct_child_name(idx)) @@ -357,9 +357,9 @@ pub fn data_chunk_to_vortex(field_names: &FieldNames, chunk: &DataChunk) -> Vort let columns = (0..chunk.column_count()) .map(|i| { - let mut vector = chunk.get_vector(i); + let vector = chunk.get_vector(i); vector.flatten(len); - flat_vector_to_vortex(&mut vector, len.as_()) + flat_vector_to_vortex(vector, len.as_()) }) .collect::>>()?; StructArray::try_new( @@ -383,16 +383,16 @@ mod tests { use super::*; use crate::cpp::DUCKDB_TYPE; - use crate::duckdb::LogicalType; - use crate::duckdb::Vector; + use crate::duckdb::OwnedLogicalType; + use crate::duckdb::OwnedVector; #[test] fn test_integer_vector_conversion() { let values = vec![1i32, 2, 3, 4, 5]; let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -401,7 +401,7 @@ mod tests { } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let expected = PrimitiveArray::from_option_iter([Some(1i32), Some(2), Some(3), Some(4), Some(5)]); assert_arrays_eq!(result, expected); @@ -412,8 +412,8 @@ mod tests { let values = vec![1_703_980_800_000_000_i64, 0i64, -86_400_000_000_i64]; // microseconds let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -422,7 +422,7 @@ mod tests { } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = TemporalArray::try_from(result).unwrap(); let vortex_values = vortex_array.temporal_values().to_primitive(); let values_slice = vortex_values.as_slice::(); @@ -435,8 +435,8 @@ mod tests { let values = vec![1_703_980_800_i64, 0i64, -86_400_i64]; // seconds let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_S); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_S); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -445,7 +445,7 @@ mod tests { } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = TemporalArray::try_from(result).unwrap(); let vortex_values = vortex_array.temporal_values().to_primitive(); let values_slice = vortex_values.as_slice::(); @@ -458,8 +458,8 @@ mod tests { let values = vec![1_703_980_800_000_i64, 0i64, -86_400_000_i64]; // milliseconds let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_MS); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_MS); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -468,7 +468,7 @@ mod tests { } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = TemporalArray::try_from(result).unwrap(); let vortex_values = vortex_array.temporal_values().to_primitive(); let values_slice = vortex_values.as_slice::(); @@ -481,8 +481,8 @@ mod tests { let values = vec![1_703_980_800_000_000_i64, 0i64, -86_400_000_000_i64]; let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -496,7 +496,7 @@ mod tests { validity_slice.set(1, false); // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = TemporalArray::try_from(result).unwrap(); let vortex_values = vortex_array.temporal_values().to_primitive(); let values_slice = vortex_values.as_slice::(); @@ -520,8 +520,8 @@ mod tests { ]; let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -530,7 +530,7 @@ mod tests { } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = TemporalArray::try_from(result).unwrap(); let vortex_values = vortex_array.temporal_values().to_primitive(); let values_slice = vortex_values.as_slice::(); @@ -543,8 +543,8 @@ mod tests { let values = vec![1_703_980_800_000_000_i64]; // Single microsecond timestamp let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -553,7 +553,7 @@ mod tests { } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = TemporalArray::try_from(result).unwrap(); let vortex_values = vortex_array.temporal_values().to_primitive(); let values_slice = vortex_values.as_slice::(); @@ -566,8 +566,8 @@ mod tests { let values = vec![true, false, true, false]; let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BOOLEAN); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BOOLEAN); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -576,7 +576,7 @@ mod tests { } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_bool(); let expected = BoolArray::new(BitBuffer::from(values), Validity::AllValid); assert_arrays_eq!(vortex_array, expected); @@ -587,8 +587,8 @@ mod tests { let values = vec![1i32, 2, 3]; let len = values.len(); - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -602,7 +602,7 @@ mod tests { validity_slice.set(1, false); // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_primitive(); let vortex_slice = vortex_array.as_slice::(); @@ -619,9 +619,9 @@ mod tests { let len = 1; let logical_type = - LogicalType::list_type(LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)) + OwnedLogicalType::list_type(OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)) .vortex_expect("LogicalType creation should succeed for test data"); - let mut vector = Vector::with_capacity(logical_type, len); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { @@ -630,13 +630,13 @@ mod tests { offset: 0, length: values.len() as u64, }; - let mut child = vector.list_vector_get_child(); + let child = vector.list_vector_get_child_mut(); let slice = child.as_slice_mut::(values.len()); slice.copy_from_slice(&values); } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_listview(); assert_eq!(vortex_array.len(), len); @@ -651,20 +651,22 @@ mod tests { let values = vec![1i32, 2, 3, 4]; let len = 1; - let logical_type = - LogicalType::array_type(LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), 4) - .vortex_expect("LogicalType creation should succeed for test data"); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::array_type( + OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), + 4, + ) + .vortex_expect("LogicalType creation should succeed for test data"); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data unsafe { - let mut child = vector.array_vector_get_child(); + let child = vector.array_vector_get_child_mut(); let slice = child.as_slice_mut::(values.len()); slice.copy_from_slice(&values); } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_fixed_size_list(); assert_eq!(vortex_array.len(), len); @@ -677,12 +679,12 @@ mod tests { #[test] fn test_empty_struct() { let len = 4; - let logical_type = LogicalType::struct_type([], []) + let logical_type = OwnedLogicalType::struct_type([], []) .vortex_expect("LogicalType creation should succeed for test data"); - let mut vector = Vector::with_capacity(logical_type, len); + let vector = OwnedVector::with_capacity(&logical_type, len); // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_struct(); assert_eq!(vortex_array.len(), len); @@ -695,29 +697,29 @@ mod tests { let values2 = vec![5i32, 6, 7, 8]; let len = values1.len(); - let logical_type = LogicalType::struct_type( + let logical_type = OwnedLogicalType::struct_type( [ - LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), - LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), + OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), + OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER), ], [CString::new("a").unwrap(), CString::new("b").unwrap()], ) .vortex_expect("LogicalType creation should succeed for test data"); - let mut vector = Vector::with_capacity(logical_type, len); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with data for (i, values) in (0..vector.logical_type().struct_type_child_count()).zip([values1, values2]) { unsafe { - let mut child = vector.struct_vector_get_child(i); + let child = vector.struct_vector_get_child_mut(i); let slice = child.as_slice_mut::(len); slice.copy_from_slice(&values); } } // Test conversion - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_struct(); assert_eq!(vortex_array.len(), len); @@ -740,8 +742,9 @@ mod tests { let len = 2; let logical_type = - LogicalType::list_type(LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)).unwrap(); - let mut vector = Vector::with_capacity(logical_type, len); + OwnedLogicalType::list_type(OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)) + .unwrap(); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Entry 0: offset=0, length=4 -> all elements (end=4) // Entry 1: null, offset=0, length=0 (end=0) @@ -755,7 +758,7 @@ mod tests { offset: 0, length: 0, }; - let mut child = vector.list_vector_get_child(); + let child = vector.list_vector_get_child_mut(); let slice = child.as_slice_mut::(child_values.len()); slice.copy_from_slice(&child_values); } @@ -766,7 +769,7 @@ mod tests { // Test conversion - the old bug would compute child length as 0+0=0 instead of // max(4,0)=4. - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_listview(); assert_eq!(vortex_array.len(), len); @@ -788,8 +791,9 @@ mod tests { let len = 2; let logical_type = - LogicalType::list_type(LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)).unwrap(); - let mut vector = Vector::with_capacity(logical_type, len); + OwnedLogicalType::list_type(OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)) + .unwrap(); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Populate with out-of-order list entries: // - Entry 0: offset=2, length=2 -> elements [3, 4] (end=4) @@ -804,14 +808,14 @@ mod tests { offset: 0, length: 2, }; - let mut child = vector.list_vector_get_child(); + let child = vector.list_vector_get_child_mut(); let slice = child.as_slice_mut::(child_values.len()); slice.copy_from_slice(&child_values); } // Test conversion - the old bug would compute child length as 0+2=2 instead of // max(4,2)=4. - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_listview(); assert_eq!(vortex_array.len(), len); @@ -834,8 +838,9 @@ mod tests { let len = 3; let logical_type = - LogicalType::list_type(LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)).unwrap(); - let mut vector = Vector::with_capacity(logical_type, len); + OwnedLogicalType::list_type(OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)) + .unwrap(); + let mut vector = OwnedVector::with_capacity(&logical_type, len); // Entry 0: valid, offset=0, length=2 -> elements [1, 2] // Entry 1: null with garbage values (offset=9999, length=9999) @@ -855,7 +860,7 @@ mod tests { offset: 2, length: 2, }; - let mut child = vector.list_vector_get_child(); + let child = vector.list_vector_get_child_mut(); let slice = child.as_slice_mut::(child_values.len()); slice.copy_from_slice(&child_values); } @@ -866,7 +871,7 @@ mod tests { // Test conversion. The old code would compute child_min_length as 9999+9999=19998, which // would panic when trying to read that much data from the child vector. - let result = flat_vector_to_vortex(&mut vector, len).unwrap(); + let result = flat_vector_to_vortex(&vector, len).unwrap(); let vortex_array = result.to_listview(); assert_eq!(vortex_array.len(), len); diff --git a/vortex-duckdb/src/copy.rs b/vortex-duckdb/src/copy.rs index 9365f054750..84feb1f1279 100644 --- a/vortex-duckdb/src/copy.rs +++ b/vortex-duckdb/src/copy.rs @@ -32,8 +32,8 @@ use crate::convert::from_duckdb_table; use crate::duckdb::ClientContext; use crate::duckdb::CopyFunction; use crate::duckdb::DataChunk; +use crate::duckdb::DuckDbFsWriter; use crate::duckdb::LogicalType; -use crate::duckdb::duckdb_fs_create_writer; #[derive(Debug)] pub struct VortexCopyFunction; @@ -65,7 +65,7 @@ impl CopyFunction for VortexCopyFunction { fn bind( column_names: Vec, - column_types: Vec, + column_types: Vec<&LogicalType>, ) -> VortexResult { let fields = from_duckdb_table( column_names @@ -120,7 +120,7 @@ impl CopyFunction for VortexCopyFunction { } fn init_global( - client_context: ClientContext, + client_context: &ClientContext, bind_data: &Self::BindData, file_path: String, ) -> VortexResult { @@ -129,12 +129,14 @@ impl CopyFunction for VortexCopyFunction { let array_stream = ArrayStreamAdapter::new(bind_data.dtype.clone(), rx.into_stream()); let handle = SESSION.handle(); + // SAFETY: The ClientContext is owned by the Connection and lives for the duration of + // query execution. DuckDB keeps the connection alive while this copy function runs. + let ctx = unsafe { client_context.erase_lifetime() }; let write_task = handle.spawn(async move { // Use DuckDB FS exclusively to match the DuckDB client context configuration. - let writer = unsafe { duckdb_fs_create_writer(client_context.as_ptr(), &file_path) } - .map_err(|e| { - vortex_err!("Failed to create DuckDB FS writer for {file_path}: {e}") - })?; + let writer = DuckDbFsWriter::new(ctx, &file_path).map_err(|e| { + vortex_err!("Failed to create DuckDB FS writer for {file_path}: {e}") + })?; SESSION.write_options().write(writer, array_stream).await }); diff --git a/vortex-duckdb/src/duckdb/client_context.rs b/vortex-duckdb/src/duckdb/client_context.rs index 2f7e7a025a8..6def948d1f9 100644 --- a/vortex-duckdb/src/duckdb/client_context.rs +++ b/vortex-duckdb/src/duckdb/client_context.rs @@ -6,55 +6,56 @@ use std::ffi::CStr; use vortex::error::vortex_panic; use crate::cpp; -use crate::duckdb::ObjectCacheRef; -use crate::duckdb::Value; -use crate::wrapper; +use crate::duckdb::ObjectCache; +use crate::duckdb::OwnedValue; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( /// A DuckDB client context wrapper. ClientContext, cpp::duckdb_client_context, - |_| { - // No cleanup is necessary since the client context is owned by the connection and will - // be valid for the connection's lifetime. - } + cpp::duckdb_destroy_client_context ); -// SAFETY: the underlying DuckDB context is valid for the connection lifetime and DuckDB -// synchronizes internal state. +// SAFETY: ClientContext carries an opaque pointer. It is safe to send/share across threads +// under the same guarantees: the underlying DuckDB context is valid for the connection +// lifetime and DuckDB synchronizes internal state. unsafe impl Send for ClientContext {} unsafe impl Sync for ClientContext {} -impl Clone for ClientContext { - fn clone(&self) -> Self { - // ClientContext is a lightweight wrapper around an opaque pointer owned by the connection. - // Cloning just creates another wrapper around the same pointer. - unsafe { Self::borrow(self.as_ptr()) } +impl ClientContext { + /// Erases the lifetime of this reference, returning a `&'static ClientContext`. + /// + /// # Safety + /// + /// The caller must ensure that the underlying `ClientContext` outlives all uses of the + /// returned reference. In practice, the `ClientContext` is owned by the `Connection` + /// and lives as long as the connection, so this is safe as long as the connection is kept alive. + pub unsafe fn erase_lifetime(&self) -> &'static Self { + unsafe { &*(self as *const Self) } } -} -impl ClientContext { /// Get the object cache for this client context. - pub fn object_cache(&self) -> ObjectCacheRef<'static> { + pub fn object_cache(&self) -> &ObjectCache { unsafe { let cache = cpp::duckdb_client_context_get_object_cache(self.as_ptr()); if cache.is_null() { vortex_panic!("Failed to get object cache from client context"); } - ObjectCacheRef::borrow(cache) + ObjectCache::borrow(cache) } } /// Try to get the current value of a configuration setting. /// Returns None if the setting doesn't exist. - pub fn try_get_current_setting(&self, key: &CStr) -> Option { + pub fn try_get_current_setting(&self, key: &CStr) -> Option { unsafe { let value_ptr = cpp::duckdb_client_context_try_get_current_setting(self.as_ptr(), key.as_ptr()); if value_ptr.is_null() { None } else { - Some(Value::own(value_ptr)) + Some(OwnedValue::own(value_ptr)) } } } diff --git a/vortex-duckdb/src/duckdb/config.rs b/vortex-duckdb/src/duckdb/config.rs index 0aba793ddf3..72933650b74 100644 --- a/vortex-duckdb/src/duckdb/config.rs +++ b/vortex-duckdb/src/duckdb/config.rs @@ -11,19 +11,18 @@ use vortex::error::vortex_err; use crate::cpp; use crate::duckdb::Database; -use crate::duckdb::LogicalType; -use crate::duckdb::Value; +use crate::duckdb::OwnedValue; use crate::duckdb_try; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( /// A DuckDB configuration instance. Config, cpp::duckdb_config, cpp::duckdb_destroy_config ); -impl Config { +impl OwnedConfig { /// Creates a new DuckDB configuration. pub fn new() -> VortexResult { let mut ptr: cpp::duckdb_config = ptr::null_mut(); @@ -34,7 +33,9 @@ impl Config { Ok(unsafe { Self::own(ptr) }) } +} +impl Config { /// Sets a key-value configuration parameter. pub fn set(&mut self, key: &str, value: &str) -> VortexResult<()> { let key_cstr = @@ -56,7 +57,7 @@ impl Config { /// Gets the value of a configuration parameter that was previously set. /// Returns None if the parameter was never set on this Config instance. - pub fn get(&self, key: &str) -> Option { + pub fn get(&self, key: &str) -> Option { let key_cstr = CString::new(key).ok()?; let mut value: cpp::duckdb_value = ptr::null_mut(); @@ -65,7 +66,7 @@ impl Config { }; (result == cpp::duckdb_state::DuckDBSuccess && !value.is_null()) - .then(|| unsafe { Value::own(value) }) + .then(|| unsafe { OwnedValue::own(value) }) } pub fn get_str(&self, key: &str) -> Option { @@ -137,8 +138,8 @@ impl Config { &self, name: &str, description: &str, - logical_type: LogicalType, - default_value: Value, + logical_type: OwnedLogicalType, + default_value: OwnedValue, ) -> VortexResult<()> { let name_cstr = CString::new(name).map_err(|_| vortex_err!("Invalid name: contains null bytes"))?; @@ -158,8 +159,10 @@ impl Config { } } +use crate::duckdb::OwnedLogicalType; + impl Database { - pub fn config(&self) -> Config { + pub fn config(&self) -> &Config { unsafe { Config::borrow(cpp::duckdb_vx_database_get_config(self.as_ptr())) } } } @@ -167,16 +170,17 @@ impl Database { #[cfg(test)] mod tests { use super::*; + use crate::duckdb::OwnedDatabase; #[test] fn test_config_creation() { - let config = Config::new(); + let config = OwnedConfig::new(); assert!(config.is_ok()); } #[test] fn test_config_set_and_get() { - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); // Set some values assert!(config.set("memory_limit", "1GB").is_ok()); @@ -192,7 +196,7 @@ mod tests { #[test] fn test_config_get_all() { - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); assert!(config.set("memory_limit", "512MB").is_ok()); assert!(config.set("threads", "4").is_ok()); @@ -205,7 +209,7 @@ mod tests { #[test] fn test_config_update_value() { - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); // Set initial value assert!(config.set("threads", "2").is_ok()); @@ -218,10 +222,8 @@ mod tests { #[test] fn test_config_persistence_through_database() { - use crate::duckdb::Database; - // Create config with specific settings - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); config.set("memory_limit", "256MB").unwrap(); config.set("threads", "1").unwrap(); @@ -230,7 +232,7 @@ mod tests { assert_eq!(config.get_str("threads"), Some("1".to_string())); // Use config to create database (this consumes the config) - let db = Database::open_in_memory_with_config(config); + let db = OwnedDatabase::open_in_memory_with_config(config); assert!(db.is_ok()); // Verify database was created successfully @@ -240,14 +242,14 @@ mod tests { #[test] fn test_config_invalid_key() { - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); let result = config.set("key\0with\0nulls", "value"); assert!(result.is_err()); } #[test] fn test_config_invalid_value() { - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); let result = config.set("key", "value\0with\0nulls"); assert!(result.is_err()); } diff --git a/vortex-duckdb/src/duckdb/connection.rs b/vortex-duckdb/src/duckdb/connection.rs index 4670f79724d..59231d3f2ed 100644 --- a/vortex-duckdb/src/duckdb/connection.rs +++ b/vortex-duckdb/src/duckdb/connection.rs @@ -11,18 +11,18 @@ use vortex::error::vortex_err; use crate::cpp; use crate::duckdb::ClientContext; use crate::duckdb::Database; -use crate::duckdb::QueryResult; +use crate::duckdb::OwnedQueryResult; use crate::duckdb_try; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( /// A DuckDB connection. Connection, cpp::duckdb_connection, cpp::duckdb_disconnect ); -impl Connection { +impl OwnedConnection { pub fn connect(db: &Database) -> VortexResult { let mut ptr: cpp::duckdb_connection = ptr::null_mut(); duckdb_try!( @@ -31,9 +31,11 @@ impl Connection { ); Ok(unsafe { Self::own(ptr) }) } +} +impl Connection { /// Execute SQL query and return the result. - pub fn query(&self, query: &str) -> VortexResult { + pub fn query(&self, query: &str) -> VortexResult { let mut result: cpp::duckdb_result = unsafe { std::mem::zeroed() }; let query_cstr = std::ffi::CString::new(query).map_err(|_| vortex_err!("Invalid query string"))?; @@ -55,11 +57,11 @@ impl Connection { return Err(vortex_err!("Failed to execute query: {}", error_msg)); } - Ok(unsafe { QueryResult::new(result) }) + Ok(unsafe { OwnedQueryResult::new(result) }) } - /// Get the object cache for this connection. - pub fn client_context(&self) -> VortexResult { + /// Get the client context for this connection. + pub fn client_context(&self) -> VortexResult<&ClientContext> { unsafe { let client_context = cpp::duckdb_vx_connection_get_client_context(self.as_ptr()); if client_context.is_null() { @@ -79,9 +81,10 @@ mod tests { use super::*; use crate::cpp::duckdb_string_t; + use crate::duckdb::OwnedDatabase; - fn test_connection() -> VortexResult { - let db = Database::open_in_memory()?; + fn test_connection() -> VortexResult { + let db = OwnedDatabase::open_in_memory()?; db.connect() } @@ -176,15 +179,15 @@ mod tests { assert_eq!(result.column_name(0).unwrap(), "num"); assert_eq!(result.column_name(1).unwrap(), "text"); - let chunk = result.into_iter().next().unwrap(); - let vec = chunk.get_vector(0); - let slice = vec.as_slice_with_len::(chunk.len().as_()); + let mut chunk = result.into_iter().next().unwrap(); + let len = chunk.len().as_(); + assert_eq!(len, 1); - let mut vec_str = chunk.get_vector(1); - let slice_str = unsafe { vec_str.as_slice_mut::(chunk.len().as_()) }; + let int_val = chunk.get_vector(0).as_slice_with_len::(len)[0]; + assert_eq!(int_val, 1); - assert_eq!(chunk.len(), 1); - assert_eq!(slice[0], 1); + let vec_str = chunk.get_vector_mut(1); + let slice_str = unsafe { vec_str.as_slice_mut::(len) }; assert_eq!( unsafe { CStr::from_ptr(cpp::duckdb_string_t_data(&raw mut slice_str[0])).to_string_lossy() diff --git a/vortex-duckdb/src/duckdb/copy_function/callback.rs b/vortex-duckdb/src/duckdb/copy_function/callback.rs index 1e26c7e34c7..ec2f3fa9ab2 100644 --- a/vortex-duckdb/src/duckdb/copy_function/callback.rs +++ b/vortex-duckdb/src/duckdb/copy_function/callback.rs @@ -17,9 +17,9 @@ use crate::cpp::duckdb_vx_copy_func_bind_input; use crate::cpp::duckdb_vx_error; use crate::duckdb::ClientContext; use crate::duckdb::CopyFunction; -use crate::duckdb::Data; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; +use crate::duckdb::OwnedData; use crate::duckdb::try_or; use crate::duckdb::try_or_null; @@ -43,12 +43,12 @@ pub(crate) unsafe extern "C-unwind" fn bind_callback( let column_types = unsafe { std::slice::from_raw_parts(column_types, column_type_count.as_()) } .iter() - .map(|c| unsafe { LogicalType::borrow(c.cast()) }) + .map(|c| unsafe { LogicalType::borrow(*c) }) .collect_vec(); try_or_null(error_out, || { let bind_data = T::bind(column_names, column_types)?; - Ok(Data::from(Box::new(bind_data)).as_ptr()) + Ok(OwnedData::from(Box::new(bind_data)).as_ptr()) }) } @@ -66,7 +66,7 @@ pub(crate) unsafe extern "C-unwind" fn global_callback( try_or_null(error_out, || { let ctx = unsafe { ClientContext::borrow(client_context) }; let bind_data = T::init_global(ctx, bind_data, file_path)?; - Ok(Data::from(Box::new(bind_data)).as_ptr()) + Ok(OwnedData::from(Box::new(bind_data)).as_ptr()) }) } @@ -78,7 +78,7 @@ pub(crate) unsafe extern "C-unwind" fn local_callback( unsafe { bind_data.cast::().as_ref() }.vortex_expect("bind_data null pointer"); try_or_null(error_out, || { let bind_data = T::init_local(bind_data)?; - Ok(Data::from(Box::new(bind_data)).as_ptr()) + Ok(OwnedData::from(Box::new(bind_data)).as_ptr()) }) } @@ -97,8 +97,8 @@ pub(crate) unsafe extern "C-unwind" fn copy_to_sink_callback( .vortex_expect("bind_data null pointer"); try_or(error_out, || { - T::copy_to_sink(bind_data, global_data, local_data, &mut unsafe { - DataChunk::borrow(data_chunk) + T::copy_to_sink(bind_data, global_data, local_data, unsafe { + DataChunk::borrow_mut(data_chunk) })?; Ok(()) }) diff --git a/vortex-duckdb/src/duckdb/copy_function/mod.rs b/vortex-duckdb/src/duckdb/copy_function/mod.rs index c347d670125..d0c4a95ccbd 100644 --- a/vortex-duckdb/src/duckdb/copy_function/mod.rs +++ b/vortex-duckdb/src/duckdb/copy_function/mod.rs @@ -29,7 +29,7 @@ pub trait CopyFunction: Sized + Debug { /// This function is used for determining the schema of a file produced by the function. fn bind( column_names: Vec, - column_types: Vec, + column_types: Vec<&LogicalType>, ) -> VortexResult; /// The function is called during query execution and is responsible for consuming the output @@ -50,7 +50,7 @@ pub trait CopyFunction: Sized + Debug { /// The global operator state is used to keep track of the progress in the copy function and /// is shared between all threads working on the copy function. fn init_global( - client_context: ClientContext, + client_context: &ClientContext, bind_data: &Self::BindData, file_path: String, ) -> VortexResult; diff --git a/vortex-duckdb/src/duckdb/data.rs b/vortex-duckdb/src/duckdb/data.rs index 7033312ea63..29642c2273e 100644 --- a/vortex-duckdb/src/duckdb/data.rs +++ b/vortex-duckdb/src/duckdb/data.rs @@ -3,14 +3,14 @@ use crate::cpp; use crate::duckdb::drop_boxed; -use crate::wrapper; +use crate::lifetime_wrapper; // This data wrapper is used to create an external data object that can be passed to and // freed by DuckDB. -wrapper!(Data, cpp::duckdb_vx_data, |_| {}); +lifetime_wrapper!(Data, cpp::duckdb_vx_data, |_| {}); -impl From> for Data { +impl From> for OwnedData { fn from(value: Box) -> Self { unsafe { Self::own(cpp::duckdb_vx_data_create( diff --git a/vortex-duckdb/src/duckdb/data_chunk.rs b/vortex-duckdb/src/duckdb/data_chunk.rs index bda0024eedc..d19a1e511ba 100644 --- a/vortex-duckdb/src/duckdb/data_chunk.rs +++ b/vortex-duckdb/src/duckdb/data_chunk.rs @@ -11,19 +11,18 @@ use vortex::error::vortex_bail; use crate::cpp; use crate::cpp::duckdb_logical_type; use crate::cpp::duckdb_vx_error; -use crate::duckdb::LogicalType; use crate::duckdb::Vector; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( DataChunk, cpp::duckdb_data_chunk, cpp::duckdb_destroy_data_chunk ); -impl DataChunk { +impl OwnedDataChunk { /// Create a new data chunk using a list of logical dtypes - pub fn new(column_types: impl AsRef<[LogicalType]>) -> DataChunk { + pub fn new(column_types: impl AsRef<[OwnedLogicalType]>) -> OwnedDataChunk { let mut ptrs = column_types .as_ref() .iter() @@ -31,9 +30,13 @@ impl DataChunk { .collect::>(); let ptr = unsafe { cpp::duckdb_create_data_chunk(ptrs.as_mut_ptr(), ptrs.len() as _) }; - unsafe { DataChunk::own(ptr) } + unsafe { OwnedDataChunk::own(ptr) } } +} + +use crate::duckdb::OwnedLogicalType; +impl DataChunk { /// Returns the column count of the data chunk. pub fn column_count(&self) -> usize { usize::try_from(unsafe { cpp::duckdb_data_chunk_get_column_count(self.as_ptr()) }) @@ -46,12 +49,17 @@ impl DataChunk { } /// Returns the vector at the specified column index. - pub fn get_vector(&self, idx: usize) -> Vector { + pub fn get_vector(&self, idx: usize) -> &Vector { unsafe { Vector::borrow(cpp::duckdb_data_chunk_get_vector(self.as_ptr(), idx as _)) } } + /// Returns a mutable reference to the vector at the specified column index. + pub fn get_vector_mut(&mut self, idx: usize) -> &mut Vector { + unsafe { Vector::borrow_mut(cpp::duckdb_data_chunk_get_vector(self.as_ptr(), idx as _)) } + } + pub fn len(&self) -> u64 { - unsafe { cpp::duckdb_data_chunk_get_size(self.ptr) } + unsafe { cpp::duckdb_data_chunk_get_size(self.as_ptr()) } } pub fn is_empty(&self) -> bool { diff --git a/vortex-duckdb/src/duckdb/database.rs b/vortex-duckdb/src/duckdb/database.rs index 2456e9936dc..9878071984d 100644 --- a/vortex-duckdb/src/duckdb/database.rs +++ b/vortex-duckdb/src/duckdb/database.rs @@ -12,19 +12,19 @@ use vortex::error::vortex_bail; use vortex::error::vortex_err; use crate::cpp; -use crate::duckdb::Config; -use crate::duckdb::connection::Connection; +use crate::duckdb::OwnedConfig; +use crate::duckdb::connection::OwnedConnection; use crate::duckdb_try; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( /// A DuckDB database instance. Database, duckdb_database, cpp::duckdb_close ); -impl Database { +impl OwnedDatabase { /// Creates a new DuckDB database instance in memory. pub fn open_in_memory() -> VortexResult { let mut ptr: duckdb_database = ptr::null_mut(); @@ -58,7 +58,7 @@ impl Database { /// Opens a DuckDB database from a file path with custom configuration. /// /// Creates a new file in case the path does not exist. - pub fn open_with_config>(path: P, config: Config) -> VortexResult { + pub fn open_with_config>(path: P, config: OwnedConfig) -> VortexResult { let path_str = path .as_ref() .to_str() @@ -69,11 +69,13 @@ impl Database { let mut ptr: duckdb_database = ptr::null_mut(); let mut error: *mut c_char = ptr::null_mut(); + // duckdb_open_ext borrows the config (copies it internally), so we pass as_ptr() + // and let the OwnedConfig drop naturally at the end of this function. let result = unsafe { cpp::duckdb_open_ext( path_cstr.as_ptr(), &raw mut ptr, - config.into_ptr(), + config.as_ptr(), &raw mut error, ) }; @@ -100,12 +102,14 @@ impl Database { } /// Opens an in-memory DuckDB database with custom configuration. - pub fn open_in_memory_with_config(config: Config) -> VortexResult { + pub fn open_in_memory_with_config(config: OwnedConfig) -> VortexResult { let mut ptr: duckdb_database = ptr::null_mut(); let mut error: *mut c_char = ptr::null_mut(); + // duckdb_open_ext borrows the config (copies it internally), so we pass as_ptr() + // and let the OwnedConfig drop naturally at the end of this function. let result = unsafe { - cpp::duckdb_open_ext(ptr::null(), &raw mut ptr, config.into_ptr(), &raw mut error) + cpp::duckdb_open_ext(ptr::null(), &raw mut ptr, config.as_ptr(), &raw mut error) }; if result != cpp::duckdb_state::DuckDBSuccess { @@ -124,15 +128,17 @@ impl Database { Ok(unsafe { Self::own(ptr) }) } +} +impl Database { /// Connects to the DuckDB database. - pub fn connect(&self) -> VortexResult { - Connection::connect(self) + pub fn connect(&self) -> VortexResult { + OwnedConnection::connect(self) } pub fn register_vortex_scan_replacement(&self) -> VortexResult<()> { duckdb_try!( - unsafe { cpp::duckdb_vx_register_scan_replacement(self.ptr) }, + unsafe { cpp::duckdb_vx_register_scan_replacement(self.as_ptr()) }, "Failed to register vortex scan replacement" ); Ok(()) @@ -145,11 +151,11 @@ mod tests { #[test] fn test_database_with_config() { - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); config.set("memory_limit", "512MB").unwrap(); config.set("threads", "1").unwrap(); - let db = Database::open_in_memory_with_config(config); + let db = OwnedDatabase::open_in_memory_with_config(config); assert!(db.is_ok()); let conn = db.unwrap().connect(); @@ -158,10 +164,10 @@ mod tests { #[test] fn test_file_database_with_config() { - let mut config = Config::new().unwrap(); + let mut config = OwnedConfig::new().unwrap(); config.set("memory_limit", "256MB").unwrap(); - let db = Database::open_with_config("test_config_unit.db", config); + let db = OwnedDatabase::open_with_config("test_config_unit.db", config); assert!(db.is_ok()); let conn = db.unwrap().connect(); diff --git a/vortex-duckdb/src/duckdb/ddb_string.rs b/vortex-duckdb/src/duckdb/ddb_string.rs index df5b513b8a6..20099505ddc 100644 --- a/vortex-duckdb/src/duckdb/ddb_string.rs +++ b/vortex-duckdb/src/duckdb/ddb_string.rs @@ -10,20 +10,35 @@ use vortex::error::VortexExpect; use vortex::error::vortex_err; use crate::cpp::duckdb_free; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( #[derive(Debug)] DDBString, *mut std::ffi::c_char, - |ptr: *mut std::ffi::c_char| { + |ptr: &mut *mut std::ffi::c_char| unsafe { duckdb_free((*ptr).cast()) } +); + +impl OwnedDDBString { + /// Creates an owned DDBString from a C string pointer, validating it is UTF-8. + /// + /// # Safety + /// + /// The pointer must be a valid, non-null, null-terminated C string allocated by DuckDB. + pub unsafe fn from_c_str(ptr: *mut std::ffi::c_char) -> Self { unsafe { CStr::from_ptr(ptr) } .to_str() .map_err(|e| vortex_err!("Failed to convert C string to str: {e}")) - .vortex_expect("DuckDB string should be valid UTF-8") - }, - |ptr: &mut *mut std::ffi::c_char| unsafe { duckdb_free((*ptr).cast()) } -); + .vortex_expect("DuckDB string should be valid UTF-8"); + unsafe { Self::own(ptr) } + } +} + +impl Display for OwnedDDBString { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_ref()) + } +} impl Display for DDBString { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { @@ -33,8 +48,14 @@ impl Display for DDBString { impl AsRef for DDBString { fn as_ref(&self) -> &str { - // SAFETY: The string have been validated on construction. - unsafe { str::from_utf8_unchecked(CStr::from_ptr(self.ptr).to_bytes()) } + // SAFETY: The string has been validated on construction. + unsafe { str::from_utf8_unchecked(CStr::from_ptr(self.as_ptr()).to_bytes()) } + } +} + +impl AsRef for OwnedDDBString { + fn as_ref(&self) -> &str { + (**self).as_ref() } } @@ -44,14 +65,26 @@ impl PartialEq for DDBString { } } +impl PartialEq for OwnedDDBString { + fn eq(&self, other: &Self) -> bool { + self.as_ref() == other.as_ref() + } +} + impl PartialEq for DDBString { fn eq(&self, other: &str) -> bool { self.as_ref() == other } } -impl From for FieldName { - fn from(value: DDBString) -> Self { +impl PartialEq for OwnedDDBString { + fn eq(&self, other: &str) -> bool { + self.as_ref() == other + } +} + +impl From for FieldName { + fn from(value: OwnedDDBString) -> Self { FieldName::from(value.as_ref()) } } diff --git a/vortex-duckdb/src/duckdb/expr.rs b/vortex-duckdb/src/duckdb/expr.rs index f3cfbe7e6a4..b1cf81d57ad 100644 --- a/vortex-duckdb/src/duckdb/expr.rs +++ b/vortex-duckdb/src/duckdb/expr.rs @@ -9,13 +9,12 @@ use std::ptr; use crate::cpp; use crate::cpp::duckdb_vx_expr_class; +use crate::duckdb::OwnedDDBString; use crate::duckdb::ScalarFunction; -use crate::duckdb::ValueRef; -use crate::duckdb::ddb_string::DDBString; -use crate::wrapper; +use crate::duckdb::Value; +use crate::lifetime_wrapper; -// TODO(joe): replace with lifetime_wrapper! -wrapper!(Expression, cpp::duckdb_vx_expr, cpp::duckdb_vx_destroy_expr); +lifetime_wrapper!(Expression, cpp::duckdb_vx_expr, cpp::duckdb_vx_destroy_expr); impl Display for Expression { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { @@ -39,16 +38,14 @@ impl Expression { cpp::DUCKDB_VX_EXPR_CLASS::DUCKDB_VX_EXPR_CLASS_BOUND_COLUMN_REF => { let name = unsafe { let ptr = cpp::duckdb_vx_expr_get_bound_column_ref_get_name(self.as_ptr()); - DDBString::own(ptr.cast_mut()) + OwnedDDBString::own(ptr.cast_mut()) }; ExpressionClass::BoundColumnRef(BoundColumnRef { name }) } cpp::DUCKDB_VX_EXPR_CLASS::DUCKDB_VX_EXPR_CLASS_BOUND_CONSTANT => { let value = unsafe { - ValueRef::borrow(cpp::duckdb_vx_expr_bound_constant_get_value( - self.as_ptr(), - )) + Value::borrow(cpp::duckdb_vx_expr_bound_constant_get_value(self.as_ptr())) }; ExpressionClass::BoundConstant(BoundConstant { value }) } @@ -151,31 +148,31 @@ impl Expression { pub enum ExpressionClass<'a> { BoundColumnRef(BoundColumnRef), BoundConstant(BoundConstant<'a>), - BoundComparison(BoundComparison), + BoundComparison(BoundComparison<'a>), BoundConjunction(BoundConjunction<'a>), - BoundBetween(BoundBetween), + BoundBetween(BoundBetween<'a>), BoundOperator(BoundOperator<'a>), BoundFunction(BoundFunction<'a>), } pub struct BoundColumnRef { - pub name: DDBString, + pub name: OwnedDDBString, } pub struct BoundConstant<'a> { - pub value: ValueRef<'a>, + pub value: &'a Value, } -pub struct BoundComparison { - pub left: Expression, - pub right: Expression, +pub struct BoundComparison<'a> { + pub left: &'a Expression, + pub right: &'a Expression, pub op: cpp::DUCKDB_VX_EXPR_TYPE, } -pub struct BoundBetween { - pub input: Expression, - pub lower: Expression, - pub upper: Expression, +pub struct BoundBetween<'a> { + pub input: &'a Expression, + pub lower: &'a Expression, + pub upper: &'a Expression, pub lower_inclusive: bool, pub upper_inclusive: bool, } @@ -185,9 +182,9 @@ pub struct BoundConjunction<'a> { pub op: cpp::DUCKDB_VX_EXPR_TYPE, } -impl BoundConjunction<'_> { - /// Returns the children expressions of the bound operator. - pub fn children(&self) -> impl Iterator { +impl<'a> BoundConjunction<'a> { + /// Returns the children expressions of the bound conjunction. + pub fn children(&self) -> impl Iterator + 'a { self.children .iter() .map(|&child| unsafe { Expression::borrow(child) }) @@ -199,9 +196,9 @@ pub struct BoundOperator<'a> { pub op: cpp::DUCKDB_VX_EXPR_TYPE, } -impl BoundOperator<'_> { +impl<'a> BoundOperator<'a> { /// Returns the children expressions of the bound operator. - pub fn children(&self) -> impl Iterator { + pub fn children(&self) -> impl Iterator + 'a { self.children .iter() .map(|&child| unsafe { Expression::borrow(child) }) @@ -210,13 +207,13 @@ impl BoundOperator<'_> { pub struct BoundFunction<'a> { children: &'a [cpp::duckdb_vx_expr], - pub scalar_function: ScalarFunction, + pub scalar_function: &'a ScalarFunction, pub bind_info: *const c_void, } -impl BoundFunction<'_> { +impl<'a> BoundFunction<'a> { /// Returns the children expressions of the bound function. - pub fn children(&self) -> impl Iterator { + pub fn children(&self) -> impl Iterator + 'a { self.children .iter() .map(|&child| unsafe { Expression::borrow(child) }) diff --git a/vortex-duckdb/src/duckdb/file_system.rs b/vortex-duckdb/src/duckdb/file_system.rs index c7eb6272d59..b43f4fdaabd 100644 --- a/vortex-duckdb/src/duckdb/file_system.rs +++ b/vortex-duckdb/src/duckdb/file_system.rs @@ -21,11 +21,10 @@ use crate::lifetime_wrapper; lifetime_wrapper!( FsFileHandle, cpp::duckdb_vx_file_handle, - cpp::duckdb_vx_fs_close, - [owned] + cpp::duckdb_vx_fs_close ); -unsafe impl Send for FsFileHandle {} -unsafe impl Sync for FsFileHandle {} +unsafe impl Send for OwnedFsFileHandle {} +unsafe impl Sync for OwnedFsFileHandle {} pub(crate) fn fs_error(err: cpp::duckdb_vx_error) -> VortexError { if err.is_null() { @@ -88,29 +87,23 @@ unsafe extern "C-unwind" fn list_files_callback( entries.push(DirEntry { name, is_dir }); } -pub(crate) unsafe fn duckdb_fs_create_writer( - ctx: cpp::duckdb_client_context, - path: &str, -) -> VortexResult { - unsafe { DuckDbFsWriter::create(ctx, path) } -} - pub(crate) struct DuckDbFsWriter { - handle: Arc, + handle: Arc, pos: u64, } impl DuckDbFsWriter { - pub(crate) unsafe fn create(ctx: cpp::duckdb_client_context, path: &str) -> VortexResult { + pub(crate) fn new(ctx: &ClientContext, path: &str) -> VortexResult { let c_path = CString::new(path).map_err(|e| vortex_err!("Invalid path: {e}"))?; let mut err: cpp::duckdb_vx_error = ptr::null_mut(); - let file_handle = unsafe { cpp::duckdb_vx_fs_create(ctx, c_path.as_ptr(), &raw mut err) }; + let file_handle = + unsafe { cpp::duckdb_vx_fs_create(ctx.as_ptr(), c_path.as_ptr(), &raw mut err) }; if file_handle.is_null() { return Err(fs_error(err)); } Ok(Self { - handle: Arc::new(unsafe { FsFileHandle::own(file_handle) }), + handle: Arc::new(unsafe { OwnedFsFileHandle::own(file_handle) }), pos: 0, }) } @@ -177,11 +170,11 @@ mod tests { use std::path::PathBuf; use super::*; - use crate::duckdb::Database; + use crate::duckdb::OwnedDatabase; #[test] fn test_writer_roundtrip_local() { - let db = Database::open_in_memory().unwrap(); + let db = OwnedDatabase::open_in_memory().unwrap(); let conn = db.connect().unwrap(); let ctx = conn.client_context().unwrap(); @@ -189,7 +182,7 @@ mod tests { let path: PathBuf = dir.path().join("writer_local.vortex"); let path_str = path.to_string_lossy(); - let mut writer = unsafe { duckdb_fs_create_writer(ctx.as_ptr(), &path_str) }.unwrap(); + let mut writer = DuckDbFsWriter::new(ctx, &path_str).unwrap(); futures::executor::block_on(async { VortexWrite::write_all(&mut writer, vec![1_u8, 2, 3]) diff --git a/vortex-duckdb/src/duckdb/footer_cache.rs b/vortex-duckdb/src/duckdb/footer_cache.rs index cdc7869e044..bd867fd88ff 100644 --- a/vortex-duckdb/src/duckdb/footer_cache.rs +++ b/vortex-duckdb/src/duckdb/footer_cache.rs @@ -4,14 +4,14 @@ use vortex::file::Footer; use vortex::file::VortexOpenOptions; -use crate::duckdb::ObjectCacheRef; +use crate::duckdb::ObjectCache; pub struct FooterCache<'a> { - object_cache: ObjectCacheRef<'a>, + object_cache: &'a ObjectCache, } pub struct Entry<'a> { - object_cache: ObjectCacheRef<'a>, + object_cache: &'a ObjectCache, key: String, value: Option<&'a Footer>, } @@ -34,7 +34,7 @@ impl Entry<'_> { } impl<'a> FooterCache<'a> { - pub fn new(object_cache: ObjectCacheRef<'a>) -> Self { + pub fn new(object_cache: &'a ObjectCache) -> Self { Self { object_cache } } diff --git a/vortex-duckdb/src/duckdb/logical_type.rs b/vortex-duckdb/src/duckdb/logical_type.rs index d21c4126345..a9541848f10 100644 --- a/vortex-duckdb/src/duckdb/logical_type.rs +++ b/vortex-duckdb/src/duckdb/logical_type.rs @@ -10,37 +10,37 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use crate::cpp::*; -use crate::duckdb::ddb_string::DDBString; -use crate::wrapper; +use crate::duckdb::ddb_string::OwnedDDBString; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( LogicalType, duckdb_logical_type, duckdb_destroy_logical_type ); -/// `LogicalType` is Send, as the wrapped pointer and bool are Send. -unsafe impl Send for LogicalType {} -unsafe impl Sync for LogicalType {} +/// `OwnedLogicalType` is Send+Sync, as the wrapped pointer is Send+Sync. +unsafe impl Send for OwnedLogicalType {} +unsafe impl Sync for OwnedLogicalType {} -impl Clone for LogicalType { +impl Clone for OwnedLogicalType { fn clone(&self) -> Self { unsafe { Self::own(duckdb_vx_logical_type_copy(self.as_ptr())) } } } -impl LogicalType { +impl OwnedLogicalType { pub fn new(dtype: DUCKDB_TYPE) -> Self { unsafe { Self::own(duckdb_create_logical_type(dtype)) } } /// Creates a DuckDB struct logical type from child types and field names. - pub fn struct_type(child_types: T, child_names: N) -> VortexResult + pub fn struct_type(child_types: T, child_names: N) -> VortexResult where - T: IntoIterator, + T: IntoIterator, N: IntoIterator, { - let child_types: Vec = child_types.into_iter().collect(); + let child_types: Vec = child_types.into_iter().collect(); let child_names: Vec = child_names.into_iter().collect(); let mut child_type_ptrs: Vec = @@ -79,7 +79,7 @@ impl LogicalType { } /// Creates a DuckDB list logical type with the specified element type. - pub fn list_type(element_type: LogicalType) -> VortexResult { + pub fn list_type(element_type: OwnedLogicalType) -> VortexResult { let ptr = unsafe { duckdb_create_list_type(element_type.as_ptr()) }; if ptr.is_null() { @@ -91,7 +91,7 @@ impl LogicalType { /// Creates a DuckDB fixed-size list logical type with the specified element type and list size. /// /// Note that DuckDB calls what we call a fixed-size list the ARRAY type. - pub fn array_type(element_type: LogicalType, list_size: u32) -> VortexResult { + pub fn array_type(element_type: OwnedLogicalType, list_size: u32) -> VortexResult { // SAFETY: We trust that DuckDB correctly gives us a valid pointer or `NULL`. let ptr = unsafe { duckdb_create_array_type(element_type.as_ptr(), list_size as idx_t) }; @@ -115,10 +115,6 @@ impl LogicalType { } } - pub fn as_type_id(&self) -> DUCKDB_TYPE { - unsafe { duckdb_get_type_id(self.as_ptr()) } - } - pub fn null() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_SQLNULL) } @@ -170,6 +166,12 @@ impl LogicalType { pub fn timestamp_tz() -> Self { Self::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_TZ) } +} + +impl LogicalType { + pub fn as_type_id(&self) -> DUCKDB_TYPE { + unsafe { duckdb_get_type_id(self.as_ptr()) } + } pub fn as_decimal(&self) -> (u8, u8) { unsafe { @@ -180,8 +182,8 @@ impl LogicalType { } } - pub fn array_child_type(&self) -> Self { - unsafe { LogicalType::own(duckdb_array_type_child_type(self.as_ptr())) } + pub fn array_child_type(&self) -> OwnedLogicalType { + unsafe { OwnedLogicalType::own(duckdb_array_type_child_type(self.as_ptr())) } } pub fn array_type_array_size(&self) -> u32 { @@ -189,24 +191,24 @@ impl LogicalType { .vortex_expect("Array size must fit in u32") } - pub fn list_child_type(&self) -> Self { - unsafe { LogicalType::own(duckdb_list_type_child_type(self.as_ptr())) } + pub fn list_child_type(&self) -> OwnedLogicalType { + unsafe { OwnedLogicalType::own(duckdb_list_type_child_type(self.as_ptr())) } } - pub fn map_key_type(&self) -> Self { - unsafe { LogicalType::own(duckdb_map_type_key_type(self.as_ptr())) } + pub fn map_key_type(&self) -> OwnedLogicalType { + unsafe { OwnedLogicalType::own(duckdb_map_type_key_type(self.as_ptr())) } } - pub fn map_value_type(&self) -> Self { - unsafe { LogicalType::own(duckdb_map_type_value_type(self.as_ptr())) } + pub fn map_value_type(&self) -> OwnedLogicalType { + unsafe { OwnedLogicalType::own(duckdb_map_type_value_type(self.as_ptr())) } } - pub fn struct_child_type(&self, idx: usize) -> Self { - unsafe { LogicalType::own(duckdb_struct_type_child_type(self.as_ptr(), idx as idx_t)) } + pub fn struct_child_type(&self, idx: usize) -> OwnedLogicalType { + unsafe { OwnedLogicalType::own(duckdb_struct_type_child_type(self.as_ptr(), idx as idx_t)) } } - pub fn struct_child_name(&self, idx: usize) -> DDBString { - unsafe { DDBString::own(duckdb_struct_type_child_name(self.as_ptr(), idx as idx_t)) } + pub fn struct_child_name(&self, idx: usize) -> OwnedDDBString { + unsafe { OwnedDDBString::own(duckdb_struct_type_child_name(self.as_ptr(), idx as idx_t)) } } pub fn struct_type_child_count(&self) -> usize { @@ -214,12 +216,12 @@ impl LogicalType { .vortex_expect("Struct type child count must fit in usize") } - pub fn union_member_type(&self, idx: usize) -> Self { - unsafe { LogicalType::own(duckdb_union_type_member_type(self.as_ptr(), idx as idx_t)) } + pub fn union_member_type(&self, idx: usize) -> OwnedLogicalType { + unsafe { OwnedLogicalType::own(duckdb_union_type_member_type(self.as_ptr(), idx as idx_t)) } } - pub fn union_member_name(&self, idx: usize) -> DDBString { - unsafe { DDBString::own(duckdb_union_type_member_name(self.as_ptr(), idx as idx_t)) } + pub fn union_member_name(&self, idx: usize) -> OwnedDDBString { + unsafe { OwnedDDBString::own(duckdb_union_type_member_name(self.as_ptr(), idx as idx_t)) } } pub fn union_member_count(&self) -> usize { @@ -230,11 +232,17 @@ impl LogicalType { impl Debug for LogicalType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let debug = unsafe { DDBString::own(duckdb_vx_logical_type_stringify(self.as_ptr())) }; + let debug = unsafe { OwnedDDBString::own(duckdb_vx_logical_type_stringify(self.as_ptr())) }; write!(f, "{}", debug) } } +impl Debug for OwnedLogicalType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Debug::fmt(&**self, f) + } +} + /// A trait representing the DuckDB logical types. pub trait DuckDBType {} @@ -358,7 +366,6 @@ floating_type!(Double, f64); #[allow(clippy::cast_possible_truncation)] mod tests { use super::*; - use crate::duckdb::LogicalType; #[test] fn test_clone_logical_type() { @@ -366,45 +373,32 @@ mod tests { ..=DUCKDB_TYPE::DUCKDB_TYPE_INTEGER_LITERAL as u32) .map(|variant| unsafe { std::mem::transmute::(variant) }) .filter(|&variant| { - // `LogicalType::new` calls the DuckDB C API function - // `duckdb_create_logical_type` with just the type enum. - // - // Though complex types require additional parameters: let excluded_types = [ - // Needs width and scale parameters DUCKDB_TYPE::DUCKDB_TYPE_DECIMAL, - // Needs the enum dictionary/values DUCKDB_TYPE::DUCKDB_TYPE_ENUM, - // Needs the child element type DUCKDB_TYPE::DUCKDB_TYPE_LIST, - // Needs field names and their type DUCKDB_TYPE::DUCKDB_TYPE_STRUCT, - // Needs key and value types DUCKDB_TYPE::DUCKDB_TYPE_MAP, - // Needs member types DUCKDB_TYPE::DUCKDB_TYPE_UNION, - // Needs element type and array size DUCKDB_TYPE::DUCKDB_TYPE_ARRAY, ]; !excluded_types.contains(&variant) }) { - assert_eq!(LogicalType::new(variant).clone().as_type_id(), variant); + assert_eq!(OwnedLogicalType::new(variant).clone().as_type_id(), variant); } } #[test] fn test_clone_decimal_logical_type() { let decimal_type = - LogicalType::decimal_type(10, 2).vortex_expect("Failed to create decimal type"); + OwnedLogicalType::decimal_type(10, 2).vortex_expect("Failed to create decimal type"); #[allow(clippy::redundant_clone)] let cloned = decimal_type.clone(); assert_eq!(decimal_type.as_type_id(), cloned.as_type_id()); - // Further verify the parameters are preserved. let (original_width, original_scale) = decimal_type.as_decimal(); - let (cloned_width, cloned_scale) = cloned.as_decimal(); assert_eq!(original_width, cloned_width); @@ -413,10 +407,9 @@ mod tests { #[test] fn test_clone_list_logical_type() { - // Create a list of integers - let int_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let int_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); let list_type = - LogicalType::list_type(int_type).vortex_expect("Failed to create list type"); + OwnedLogicalType::list_type(int_type).vortex_expect("Failed to create list type"); #[allow(clippy::redundant_clone)] let cloned = list_type.clone(); @@ -424,7 +417,6 @@ mod tests { assert_eq!(list_type.as_type_id(), cloned.as_type_id()); assert_eq!(list_type.as_type_id(), DUCKDB_TYPE::DUCKDB_TYPE_LIST); - // Verify the child type is preserved let original_child = list_type.list_child_type(); let cloned_child = cloned.list_child_type(); @@ -437,17 +429,17 @@ mod tests { #[test] fn test_clone_array_logical_type() { - // Create an array of strings with size 5 - let array_type = - LogicalType::array_type(LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR), 5) - .vortex_expect("Failed to create array type"); + let array_type = OwnedLogicalType::array_type( + OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR), + 5, + ) + .vortex_expect("Failed to create array type"); #[allow(clippy::redundant_clone)] let cloned = array_type.clone(); assert_eq!(array_type.as_type_id(), cloned.as_type_id()); assert_eq!(array_type.as_type_id(), DUCKDB_TYPE::DUCKDB_TYPE_ARRAY); - // Verify the child type is preserved let original_child = array_type.array_child_type(); let cloned_child = cloned.array_child_type(); @@ -457,7 +449,6 @@ mod tests { assert_eq!(original_child_type_id, cloned_child_type_id); assert_eq!(original_child_type_id, DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); - // Verify the array size is preserved let original_size = array_type.array_type_array_size(); let cloned_size = cloned.array_type_array_size(); @@ -467,11 +458,10 @@ mod tests { #[test] fn test_clone_map_logical_type() { - // Create a map of string -> integer - let key_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); - let value_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let key_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); + let value_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); let map_type = unsafe { - LogicalType::own(duckdb_create_map_type( + OwnedLogicalType::own(duckdb_create_map_type( key_type.as_ptr(), value_type.as_ptr(), )) @@ -483,7 +473,6 @@ mod tests { assert_eq!(map_type.as_type_id(), cloned.as_type_id()); assert_eq!(map_type.as_type_id(), DUCKDB_TYPE::DUCKDB_TYPE_MAP); - // Verify the key and value types are preserved let original_key = map_type.map_key_type(); let original_value = map_type.map_value_type(); let cloned_key = cloned.map_key_type(); @@ -500,14 +489,13 @@ mod tests { #[test] fn test_clone_struct_logical_type() { - // Create a struct with two fields: {name: VARCHAR, age: INTEGER} - let name_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); - let age_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let name_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); + let age_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); let member_types = vec![name_type, age_type]; let member_names = vec![CString::new("name").unwrap(), CString::new("age").unwrap()]; - let struct_type = LogicalType::struct_type(member_types, member_names) + let struct_type = OwnedLogicalType::struct_type(member_types, member_names) .vortex_expect("Failed to create struct type"); #[allow(clippy::redundant_clone)] @@ -516,13 +504,11 @@ mod tests { assert_eq!(struct_type.as_type_id(), cloned.as_type_id()); assert_eq!(struct_type.as_type_id(), DUCKDB_TYPE::DUCKDB_TYPE_STRUCT); - // Verify the child count is preserved let original_count = struct_type.struct_type_child_count(); let cloned_count = cloned.struct_type_child_count(); assert_eq!(original_count, cloned_count); assert_eq!(original_count, 2); - // Verify each field for idx in 0..original_count { let original_child_type = struct_type.struct_child_type(idx); let cloned_child_type = cloned.struct_child_type(idx); @@ -540,9 +526,8 @@ mod tests { #[test] fn test_clone_union_logical_type() { - // Create a union with two members: {str: VARCHAR, num: INTEGER} - let str_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); - let num_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let str_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR); + let num_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); let mut member_types = vec![str_type.as_ptr(), num_type.as_ptr()]; let str_cstr = CString::new("str").unwrap(); @@ -550,7 +535,7 @@ mod tests { let mut member_names = vec![str_cstr.as_ptr(), num_cstr.as_ptr()]; let union_type = unsafe { - LogicalType::own(duckdb_create_union_type( + OwnedLogicalType::own(duckdb_create_union_type( member_types.as_mut_ptr(), member_names.as_mut_ptr(), 2, @@ -563,13 +548,11 @@ mod tests { assert_eq!(union_type.as_type_id(), cloned.as_type_id()); assert_eq!(union_type.as_type_id(), DUCKDB_TYPE::DUCKDB_TYPE_UNION); - // Verify the member count is preserved let original_count = union_type.union_member_count(); let cloned_count = cloned.union_member_count(); assert_eq!(original_count, cloned_count); assert_eq!(original_count, 2); - // Verify each member for idx in 0..original_count { let original_member_type = union_type.union_member_type(idx); let cloned_member_type = cloned.union_member_type(idx); diff --git a/vortex-duckdb/src/duckdb/macro_.rs b/vortex-duckdb/src/duckdb/macro_.rs index 8e813227836..7b42b56a050 100644 --- a/vortex-duckdb/src/duckdb/macro_.rs +++ b/vortex-duckdb/src/duckdb/macro_.rs @@ -25,429 +25,118 @@ macro_rules! duckdb_try { }; } +/// Generates two FFI pointer wrapper types using the opaque-pointee pattern: +/// +/// - `$Name` — an opaque ZST pointee type, never constructed directly. +/// Used only behind pointers/references (`&$Name`, `&mut $Name`). +/// +/// - `Owned$Name` — an owned handle that calls the destructor on drop. +/// Derefs to `&$Name` / `&mut $Name`. #[macro_export] macro_rules! lifetime_wrapper { - // Full variant with all types (Owned + Ref + MutRef) + // Main form: generates opaque $Name + Owned$Name. ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, $destructor:expr) => { - $crate::lifetime_wrapper!($(#[$meta])* $Name, $ffi_type, $destructor, [owned, ref, mut_ref]); - }; - - // Selective variant - specify which types to generate - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, $destructor:expr, [$($variant:ident),*]) => { - $crate::lifetime_wrapper_impl!($(#[$meta])* $Name, $ffi_type, $destructor, [$($variant),*]); - }; -} - -#[macro_export] -macro_rules! lifetime_wrapper_impl { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, $destructor:expr, [$($variant:ident),*]) => { - // Step 1: Generate all type definitions first - $crate::lifetime_wrapper_generate_owned_type!($(#[$meta])* $Name, $ffi_type, [$($variant),*]); - $crate::lifetime_wrapper_generate_ref_type!($(#[$meta])* $Name, $ffi_type, [$($variant),*]); - $crate::lifetime_wrapper_generate_mut_ref_type!($(#[$meta])* $Name, $ffi_type, [$($variant),*]); - - // Step 2: Generate all implementations that may reference other types - $crate::lifetime_wrapper_generate_owned_impl!($(#[$meta])* $Name, $ffi_type, $destructor, [$($variant),*]); - $crate::lifetime_wrapper_generate_ref_impl!($(#[$meta])* $Name, $ffi_type, [$($variant),*]); - $crate::lifetime_wrapper_generate_mut_ref_impl!($(#[$meta])* $Name, $ffi_type, [$($variant),*]); - - // Step 3: Generate cross-type relationships (Deref, etc.) - $crate::lifetime_wrapper_generate_relationships!($(#[$meta])* $Name, $ffi_type, [$($variant),*]); - }; -} - -// Generate owned type definition only -#[macro_export] -macro_rules! lifetime_wrapper_generate_owned_type { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [owned $(, $rest:ident)*]) => { - // Owned version that manages the FFI pointer's lifetime - $(#[$meta])* - pub struct $Name { - ptr: $ffi_type, - owned: bool, - } - }; - // Skip if 'owned' is not in the list - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [$($other:ident),*]) => {}; -} - -// Generate owned implementation -#[macro_export] -macro_rules! lifetime_wrapper_generate_owned_impl { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, $destructor:expr,[owned $(, $rest:ident)*]) => { - #[allow(dead_code)] - impl $Name { - /// Takes ownership of the memory. The Rust wrapper becomes - /// responsible for calling the destructor when dropped. - /// This is the only constructor for the owned variant. - pub unsafe fn own(ptr: $ffi_type) -> Self { - if ptr.is_null() { - vortex::error::vortex_panic!( - "Attempted to create a wrapper from a null pointer" - ); - } - Self { ptr, owned: true } - } - - /// Returns the raw pointer. - pub fn as_ptr(&self) -> $ffi_type { - self.ptr - } - - /// Release ownership and return the raw pointer. - pub fn into_ptr(mut self) -> $ffi_type { - assert!(self.owned, "Cannot take ownership of unowned ptr"); - self.owned = false; // Prevent destructor from being called - self.ptr - } - } - - impl Drop for $Name { - fn drop(&mut self) { - if self.owned { - let destructor = $destructor; - #[allow(unused_unsafe)] - unsafe { - destructor(&mut self.ptr) - } - } - } - } - }; - // Skip if 'owned' is not in the list - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, $destructor:expr,[$($other:ident),*]) => {}; -} - -// Generate ref type definition only -#[macro_export] -macro_rules! lifetime_wrapper_generate_ref_type { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [ref $(, $rest:ident)*]) => { paste::paste! { - // Borrowed version with explicit lifetime parameter - $(#[$meta])* - pub struct [<$Name Ref>]<'a> { - ptr: $ffi_type, - _lifetime: std::marker::PhantomData<&'a ()>, - } - } - }; - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [owned, ref $(, $rest:ident)*]) => { - paste::paste! { - // Borrowed version with explicit lifetime parameter + // --- Opaque pointee (never constructed, used as &$Name / &mut $Name) --- + $(#[$meta])* - pub struct [<$Name Ref>]<'a> { - ptr: $ffi_type, - _lifetime: std::marker::PhantomData<&'a ()>, - } - } - }; - // Skip if 'ref' is not in the list - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [$($other:ident),*]) => {}; -} + #[allow(dead_code)] + pub struct $Name(()); -// Generate ref implementation -#[macro_export] -macro_rules! lifetime_wrapper_generate_ref_impl { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [ref $(, $rest:ident)*]) => { - paste::paste! { #[allow(dead_code)] - impl<'a> [<$Name Ref>]<'a> { - /// Borrows the pointer without taking ownership. - /// This is the only constructor for the ref variant. - pub unsafe fn borrow(ptr: $ffi_type) -> Self { + impl $Name { + /// Borrows the pointer as an immutable reference with explicit lifetime. + /// + /// # Safety + /// + /// The pointer must be valid for the lifetime `'a` and must not be null. + pub unsafe fn borrow<'a>(ptr: $ffi_type) -> &'a Self { if ptr.is_null() { - vortex::error::vortex_panic!("Attempted to create a wrapper ref from a null pointer"); - } - Self { - ptr, - _lifetime: std::marker::PhantomData, + vortex::error::vortex_panic!( + "Attempted to borrow from a null pointer" + ); } + unsafe { &*(ptr as *const Self) } } - /// Returns the raw pointer. - pub fn as_ptr(&self) -> $ffi_type { - self.ptr - } - } - - // Ref versions can be Copy since they're just borrowed pointers - impl<'a> Copy for [<$Name Ref>]<'a> {} - impl<'a> Clone for [<$Name Ref>]<'a> { - fn clone(&self) -> Self { - *self - } - } - } - }; - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [owned, ref $(, $rest:ident)*]) => { - paste::paste! { - #[allow(dead_code)] - impl<'a> [<$Name Ref>]<'a> { - /// Borrows the pointer without taking ownership. - /// This is the only constructor for the ref variant. - pub unsafe fn borrow(ptr: $ffi_type) -> Self { + /// Borrows the pointer as a mutable reference with explicit lifetime. + /// + /// # Safety + /// + /// The pointer must be valid for the lifetime `'a`, must not be null, + /// and no other references to the same pointer must exist. + pub unsafe fn borrow_mut<'a>(ptr: $ffi_type) -> &'a mut Self { if ptr.is_null() { - vortex::error::vortex_panic!("Attempted to create a wrapper ref from a null pointer"); - } - Self { - ptr, - _lifetime: std::marker::PhantomData, + vortex::error::vortex_panic!( + "Attempted to borrow_mut from a null pointer" + ); } + unsafe { &mut *(ptr as *mut Self) } } - /// Returns the raw pointer. + /// Returns the raw FFI pointer. pub fn as_ptr(&self) -> $ffi_type { - self.ptr + (self as *const Self).cast_mut().cast() } } - // Ref versions can be Copy since they're just borrowed pointers - impl<'a> Copy for [<$Name Ref>]<'a> {} - impl<'a> Clone for [<$Name Ref>]<'a> { - fn clone(&self) -> Self { - *self - } - } - } - }; - // Skip if 'ref' is not in the list - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [$($other:ident),*]) => {}; -} + // --- Owned handle (calls destructor on drop) --- -// Generate mut_ref type definition only -#[macro_export] -macro_rules! lifetime_wrapper_generate_mut_ref_type { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [mut_ref $(, $rest:ident)*]) => { - paste::paste! { - // Mutable borrowed version with explicit lifetime parameter $(#[$meta])* - pub struct [<$Name MutRef>]<'a> { - ptr: $ffi_type, - _lifetime: std::marker::PhantomData<&'a mut ()>, - } - } - }; - // Skip if 'mut_ref' is not in the list - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [$($other:ident),*]) => {}; -} + #[allow(dead_code)] + pub struct []($ffi_type); -// Generate mut_ref implementation -#[macro_export] -macro_rules! lifetime_wrapper_generate_mut_ref_impl { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [mut_ref $(, $rest:ident)*]) => { - paste::paste! { #[allow(dead_code)] - impl<'a> [<$Name MutRef>]<'a> { - /// Borrows the pointer mutably without taking ownership. - /// This is the only constructor for the mut_ref variant. - pub unsafe fn borrow_mut(ptr: $ffi_type) -> Self { + impl [] { + /// Takes ownership of the pointer. The owned handle becomes + /// responsible for calling the destructor when dropped. + /// + /// # Safety + /// + /// The pointer must be valid and the caller must transfer ownership. + pub unsafe fn own(ptr: $ffi_type) -> Self { if ptr.is_null() { - vortex::error::vortex_panic!("Attempted to create a wrapper mut ref from a null pointer"); - } - Self { - ptr, - _lifetime: std::marker::PhantomData, + vortex::error::vortex_panic!( + "Attempted to create an owned wrapper from a null pointer" + ); } + Self(ptr) } - /// Returns the raw pointer. - pub fn as_ptr(&self) -> $ffi_type { - self.ptr + /// Releases ownership and returns the raw pointer without + /// calling the destructor. + pub fn into_ptr(self) -> $ffi_type { + let this = std::mem::ManuallyDrop::new(self); + this.0 } } - } - }; - // Skip if 'mut_ref' is not in the list - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [$($other:ident),*]) => {}; -} - -// Generate cross-type relationships (Deref, conversion methods, etc.) -#[macro_export] -macro_rules! lifetime_wrapper_generate_relationships { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, [$($variant:ident),*]) => { - // Generate Deref from MutRef to Ref if both exist - $crate::lifetime_wrapper_mut_ref_deref_if_both_exist!($Name, [$($variant),*]); - // Generate owned methods that reference other types - $crate::lifetime_wrapper_owned_cross_methods!($Name, [$($variant),*]); - }; -} - -// Generate methods for owned type that reference other types -#[macro_export] -macro_rules! lifetime_wrapper_owned_cross_methods { - ($Name:ident, [$($variant:ident),*]) => { - $crate::lifetime_wrapper_generate_as_ref_if_needed!($Name, [$($variant),*]); - $crate::lifetime_wrapper_generate_as_mut_ref_if_needed!($Name, [$($variant),*]); - }; -} - -// Generate as_ref method if both owned and ref exist -#[macro_export] -macro_rules! lifetime_wrapper_generate_as_ref_if_needed { - ($Name:ident, [owned, ref $(, $rest:ident)*]) => { - paste::paste! { - impl $Name { - /// Convert to a borrowed reference with explicit lifetime - pub fn as_ref(&self) -> [<$Name Ref>]<'_> { - [<$Name Ref>] { - ptr: self.ptr, - _lifetime: std::marker::PhantomData, - } - } - } - } - }; - ($Name:ident, [ref, owned $(, $rest:ident)*]) => { - paste::paste! { - impl $Name { - /// Convert to a borrowed reference with explicit lifetime - pub fn as_ref(&self) -> [<$Name Ref>]<'_> { - [<$Name Ref>] { - ptr: self.ptr, - _lifetime: std::marker::PhantomData, - } - } - } - } - }; - ($Name:ident, [$first:ident $(, $rest:ident)*]) => { - $crate::lifetime_wrapper_generate_as_ref_if_needed!($Name, [$($rest),*]); - }; - ($Name:ident, []) => {}; -} - -// Generate as_mut_ref method if both owned and mut_ref exist -#[macro_export] -macro_rules! lifetime_wrapper_generate_as_mut_ref_if_needed { - ($Name:ident, [owned, mut_ref $(, $rest:ident)*]) => { - paste::paste! { - impl $Name { - /// Convert to a mutable borrowed reference with explicit lifetime - pub fn as_mut_ref(&mut self) -> [<$Name MutRef>]<'_> { - [<$Name MutRef>] { - ptr: self.ptr, - _lifetime: std::marker::PhantomData, - } - } - } - } - }; - ($Name:ident, [mut_ref, owned $(, $rest:ident)*]) => { - paste::paste! { - impl $Name { - /// Convert to a mutable borrowed reference with explicit lifetime - pub fn as_mut_ref(&mut self) -> [<$Name MutRef>]<'_> { - [<$Name MutRef>] { - ptr: self.ptr, - _lifetime: std::marker::PhantomData, - } - } - } - } - }; - ($Name:ident, [$first:ident $(, $rest:ident)*]) => { - $crate::lifetime_wrapper_generate_as_mut_ref_if_needed!($Name, [$($rest),*]); - }; - ($Name:ident, []) => {}; -} - -// Generate Deref implementation for MutRef -> Ref if both exist -#[macro_export] -macro_rules! lifetime_wrapper_mut_ref_deref_if_both_exist { - ($Name:ident, [mut_ref, ref $(, $rest:ident)*]) => { - $crate::lifetime_wrapper_generate_deref!($Name); - }; - ($Name:ident, [ref, mut_ref $(, $rest:ident)*]) => { - $crate::lifetime_wrapper_generate_deref!($Name); - }; - ($Name:ident, [mut_ref $(, $rest:ident)*]) => { - $crate::lifetime_wrapper_mut_ref_deref_if_both_exist!($Name, [$($rest),*]); - }; - ($Name:ident, [ref $(, $rest:ident)*]) => { - $crate::lifetime_wrapper_mut_ref_deref_if_both_exist!($Name, [$($rest),*]); - }; - ($Name:ident, [$first:ident $(, $rest:ident)*]) => { - $crate::lifetime_wrapper_mut_ref_deref_if_both_exist!($Name, [$($rest),*]); - }; - ($Name:ident, []) => {}; -} - -// Actually generate the Deref implementation -#[macro_export] -macro_rules! lifetime_wrapper_generate_deref { - ($Name:ident) => { - paste::paste! { - // MutRef can deref to Ref for convenient immutable access - impl<'a> std::ops::Deref for [<$Name MutRef>]<'a> { - type Target = [<$Name Ref>]<'a>; + impl std::ops::Deref for [] { + type Target = $Name; fn deref(&self) -> &Self::Target { - // SAFETY: We can transmute MutRef to Ref because: - // 1. They have the same layout (same ptr field) - // 2. MutRef's lifetime is more restrictive (&'a mut -> &'a) - // 3. This is the same as &mut T -> &T coercion - unsafe { std::mem::transmute(self) } + // SAFETY: The opaque $Name is a ZST and the pointer is valid + // for the lifetime of the owned handle. + unsafe { &*(self.0 as *const $Name) } } } - } - }; -} - -// TODO(joe): replace with lifetime_wrapper! -#[macro_export] -macro_rules! wrapper { - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, $destructor:expr) => { - wrapper!($(#[$meta])* $Name, $ffi_type, |_| {}, $destructor); - }; - ($(#[$meta:meta])* $Name:ident, $ffi_type:ty, $validator:expr, $destructor:expr) => { - $(#[$meta])* - pub struct $Name { - ptr: $ffi_type, - owned: bool, - } - #[allow(dead_code)] - impl $Name { - /// Takes ownership of the memory. The Rust wrapper becomes - /// responsible for calling the destructor when dropped. - pub unsafe fn own(ptr: $ffi_type) -> Self { - if ptr.is_null() { - vortex::error::vortex_panic!("Attempted to create a wrapper from a null pointer"); + impl std::ops::DerefMut for [] { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: The opaque $Name is a ZST and the pointer is valid + // for the lifetime of the owned handle. We have &mut self so + // exclusive access is guaranteed. + unsafe { &mut *(self.0 as *mut $Name) } } - $validator(ptr); - Self { ptr, owned: true } } - /// Borrows the pointer without taking ownership. - /// For compatibility with existing code. - pub unsafe fn borrow(ptr: $ffi_type) -> Self { - if ptr.is_null() { - vortex::error::vortex_panic!("Attempted to create a wrapper from a null pointer"); - } - $validator(ptr); - Self { ptr, owned: false } - } - - /// Returns the raw pointer. - pub fn as_ptr(&self) -> $ffi_type { - self.ptr - } - - /// Release ownership and return the raw pointer. - pub fn into_ptr(mut self) -> $ffi_type { - assert!(self.owned, "Cannot take ownership of unowned ptr"); - self.owned = false; // Prevent destructor from being called - self.ptr - } - } - - impl Drop for $Name { - fn drop(&mut self) { - if self.owned { + impl Drop for [] { + fn drop(&mut self) { let destructor = $destructor; #[allow(unused_unsafe)] - unsafe { destructor(&mut self.ptr) } + unsafe { + destructor(&mut self.0) + } } } } diff --git a/vortex-duckdb/src/duckdb/mod.rs b/vortex-duckdb/src/duckdb/mod.rs index 2e8c446ae77..774dc4af3a9 100644 --- a/vortex-duckdb/src/duckdb/mod.rs +++ b/vortex-duckdb/src/duckdb/mod.rs @@ -34,6 +34,7 @@ pub use copy_function::*; pub use data::*; pub use data_chunk::*; pub use database::*; +pub use ddb_string::*; pub use expr::*; pub use file_system::*; pub use logical_type::*; diff --git a/vortex-duckdb/src/duckdb/object_cache.rs b/vortex-duckdb/src/duckdb/object_cache.rs index b6e80ceee4c..74f49858ead 100644 --- a/vortex-duckdb/src/duckdb/object_cache.rs +++ b/vortex-duckdb/src/duckdb/object_cache.rs @@ -6,6 +6,7 @@ use std::os::raw::c_void; use vortex::error::VortexExpect; use vortex::error::vortex_err; +use vortex::error::vortex_panic; use crate::cpp; use crate::lifetime_wrapper; @@ -19,12 +20,22 @@ unsafe extern "C-unwind" fn rust_box_deleter(ptr: *mut c_void) { } } -// ObjectCache is a wrapper around a DuckDB object cache. -// We only implement ObjectCacheRef since duckdb only has a single object cache per client, -// context which is never owned. -lifetime_wrapper!(ObjectCache, cpp::duckdb_vx_object_cache, |_| {}, [ref]); +lifetime_wrapper!(ObjectCache, cpp::duckdb_vx_object_cache, |_| { + vortex_panic!("ObjectCache is owned by the DatabaseInstance, not by Rust") +}); + +impl ObjectCache { + /// Erases the lifetime of this reference, returning a `&'static ObjectCache`. + /// + /// # Safety + /// + /// The caller must ensure that the underlying `ObjectCache` outlives all uses of the + /// returned reference. In practice, the `ObjectCache` is owned by the `DatabaseInstance` + /// and lives as long as the database, so this is safe as long as the database is kept alive. + pub unsafe fn erase_lifetime(&self) -> &'static Self { + unsafe { &*(self as *const Self) } + } -impl ObjectCacheRef<'_> { /// Store an entry in the object cache with the given key. /// The entry will be converted to an opaque pointer and stored. /// Uses a proper deleter to ensure memory is freed when the cache entry is removed. @@ -60,6 +71,7 @@ impl ObjectCacheRef<'_> { } } } -// This is Send + Sync since the cache has a mutex wrapper. -unsafe impl Send for ObjectCacheRef<'_> {} -unsafe impl Sync for ObjectCacheRef<'_> {} + +// SAFETY: Send + Sync since the cache has a mutex wrapper on the C++ side. +unsafe impl Send for ObjectCache {} +unsafe impl Sync for ObjectCache {} diff --git a/vortex-duckdb/src/duckdb/query_result.rs b/vortex-duckdb/src/duckdb/query_result.rs index bad1941e10e..6b3fc2742ff 100644 --- a/vortex-duckdb/src/duckdb/query_result.rs +++ b/vortex-duckdb/src/duckdb/query_result.rs @@ -7,12 +7,11 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::error::vortex_err; -use crate::LogicalType; use crate::cpp; -use crate::duckdb::DataChunk; -use crate::wrapper; +use crate::duckdb::OwnedDataChunk; +use crate::lifetime_wrapper; -wrapper! { +lifetime_wrapper! { /// A wrapper around a DuckDB query result. #[derive(Debug)] QueryResult, @@ -27,15 +26,17 @@ wrapper! { } } -impl QueryResult { - /// Create a new `QueryResult` from a `duckdb_result`. +impl OwnedQueryResult { + /// Create a new `OwnedQueryResult` from a `duckdb_result`. /// /// Takes ownership of the result and will destroy it on drop. pub unsafe fn new(result: cpp::duckdb_result) -> Self { let boxed = Box::new(result); unsafe { Self::own(Box::into_raw(boxed)) } } +} +impl QueryResult { /// Get the number of columns in the result. pub fn column_count(&self) -> u64 { unsafe { cpp::duckdb_column_count(self.as_ptr()) } @@ -68,14 +69,16 @@ impl QueryResult { } /// Get the type of a column by index. - pub fn column_type(&self, col_idx: usize) -> LogicalType { + pub fn column_type(&self, col_idx: usize) -> OwnedLogicalType { let dtype = unsafe { cpp::duckdb_column_type(self.as_ptr(), col_idx as u64) }; - LogicalType::new(dtype) + OwnedLogicalType::new(dtype) } } -impl IntoIterator for QueryResult { - type Item = DataChunk; +use crate::duckdb::OwnedLogicalType; + +impl IntoIterator for OwnedQueryResult { + type Item = OwnedDataChunk; type IntoIter = QueryResultIter; fn into_iter(self) -> Self::IntoIter { @@ -84,23 +87,23 @@ impl IntoIterator for QueryResult { } pub struct QueryResultIter { - result: QueryResult, + result: OwnedQueryResult, } impl QueryResultIter { - pub fn new(result: QueryResult) -> Self { + pub fn new(result: OwnedQueryResult) -> Self { Self { result } } } impl Iterator for QueryResultIter { - type Item = DataChunk; + type Item = OwnedDataChunk; fn next(&mut self) -> Option { let chunk = unsafe { cpp::duckdb_fetch_chunk(*self.result.as_ptr()) }; if chunk.is_null() { return None; } - Some(unsafe { DataChunk::own(chunk) }) + Some(unsafe { OwnedDataChunk::own(chunk) }) } } diff --git a/vortex-duckdb/src/duckdb/scalar_function.rs b/vortex-duckdb/src/duckdb/scalar_function.rs index c27af3c43b6..a8376696258 100644 --- a/vortex-duckdb/src/duckdb/scalar_function.rs +++ b/vortex-duckdb/src/duckdb/scalar_function.rs @@ -6,9 +6,9 @@ use vortex::error::vortex_err; use crate::cpp; use crate::duckdb::LogicalType; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!(ScalarFunction, cpp::duckdb_vx_sfunc, |_| {}); +lifetime_wrapper!(ScalarFunction, cpp::duckdb_vx_sfunc, |_| {}); impl ScalarFunction { pub fn name(&self) -> &str { @@ -21,7 +21,7 @@ impl ScalarFunction { } } - pub fn return_type(&self) -> LogicalType { + pub fn return_type(&self) -> &LogicalType { unsafe { LogicalType::borrow(cpp::duckdb_vx_sfunc_return_type(self.as_ptr())) } } } diff --git a/vortex-duckdb/src/duckdb/selection_vector.rs b/vortex-duckdb/src/duckdb/selection_vector.rs index 55ed2cd26b2..252b265219d 100644 --- a/vortex-duckdb/src/duckdb/selection_vector.rs +++ b/vortex-duckdb/src/duckdb/selection_vector.rs @@ -2,9 +2,9 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use crate::cpp; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!( +lifetime_wrapper!( SelectionVector, cpp::duckdb_selection_vector, |ptr: &mut cpp::duckdb_selection_vector| { @@ -12,11 +12,13 @@ wrapper!( } ); -impl SelectionVector { +impl OwnedSelectionVector { pub fn with_capacity(len: usize) -> Self { unsafe { Self::own(cpp::duckdb_create_selection_vector(len as _)) } } +} +impl SelectionVector { // NOTE(ngates): selection vector doesn't hold its own length. Which makes writing a safe // Rust API annoying... pub unsafe fn as_slice_mut(&mut self, length: usize) -> &mut [u32] { diff --git a/vortex-duckdb/src/duckdb/table_filter.rs b/vortex-duckdb/src/duckdb/table_filter.rs index 9b8ec5f1f1e..f351a2c8eb6 100644 --- a/vortex-duckdb/src/duckdb/table_filter.rs +++ b/vortex-duckdb/src/duckdb/table_filter.rs @@ -14,14 +14,14 @@ use vortex::error::vortex_panic; use crate::cpp; use crate::cpp::idx_t; use crate::duckdb::Expression; +use crate::duckdb::OwnedValue; use crate::duckdb::Value; -use crate::duckdb::ValueRef; -use crate::wrapper; +use crate::lifetime_wrapper; -wrapper!(TableFilterSet, cpp::duckdb_vx_table_filter_set, |_| {}); +lifetime_wrapper!(TableFilterSet, cpp::duckdb_vx_table_filter_set, |_| {}); impl TableFilterSet { - pub fn get(&self, index: u64) -> Option<(idx_t, TableFilter)> { + pub fn get(&self, index: u64) -> Option<(idx_t, &TableFilter)> { let mut filter_set: duckdb_vx_table_filter = ptr::null_mut(); let column_index = unsafe { @@ -45,7 +45,7 @@ impl TableFilterSet { } impl<'a> IntoIterator for &'a TableFilterSet { - type Item = (idx_t, TableFilter); + type Item = (idx_t, &'a TableFilter); type IntoIter = Box + 'a>; fn into_iter(self) -> Self::IntoIter { @@ -62,7 +62,7 @@ impl Debug for TableFilterSet { } } -wrapper!(TableFilter, duckdb_vx_table_filter, |_| {}); +lifetime_wrapper!(TableFilter, duckdb_vx_table_filter, |_| {}); impl TableFilter { pub fn as_class(&self) -> TableFilterClass<'_> { @@ -75,7 +75,7 @@ impl TableFilter { unsafe { cpp::duckdb_vx_table_filter_get_constant(self.as_ptr(), &raw mut out) }; TableFilterClass::ConstantComparison(ConstantComparison { - value: unsafe { ValueRef::borrow(out.value) }, + value: unsafe { Value::borrow(out.value) }, operator: out.comparison_type, }) } @@ -162,7 +162,7 @@ impl TableFilter { unsafe { cpp::duckdb_vx_table_filter_get_dynamic(self.as_ptr(), &raw mut out) }; TableFilterClass::Dynamic(DynamicFilter { - data: unsafe { DynamicFilterData::own(out.data) }, + data: unsafe { OwnedDynamicFilterData::own(out.data) }, operator: out.comparison_type, }) } @@ -192,15 +192,15 @@ pub enum TableFilterClass<'a> { IsNotNull, ConjunctionOr(Conjunction<'a>), ConjunctionAnd(Conjunction<'a>), - StructExtract(&'a str, TableFilter), - Optional(TableFilter), + StructExtract(&'a str, &'a TableFilter), + Optional(&'a TableFilter), InFilter(Values<'a>), Dynamic(DynamicFilter), - Expression(Expression), + Expression(&'a Expression), } pub struct ConstantComparison<'a> { - pub value: ValueRef<'a>, + pub value: &'a Value, pub operator: cpp::DUCKDB_VX_EXPR_TYPE, } @@ -208,8 +208,8 @@ pub struct Conjunction<'a> { children: &'a [duckdb_vx_table_filter], } -impl Conjunction<'_> { - pub fn children(&self) -> impl Iterator { +impl<'a> Conjunction<'a> { + pub fn children(&self) -> impl Iterator + 'a { self.children .iter() .map(|&child| unsafe { TableFilter::borrow(child) }) @@ -231,12 +231,12 @@ struct ValuesIterator<'a> { } impl<'a> Iterator for ValuesIterator<'a> { - type Item = ValueRef<'a>; + type Item = &'a Value; fn next(&mut self) -> Option { (self.index < self.values_count).then(|| { let value = unsafe { - ValueRef::borrow(cpp::duckdb_vx_values_vec_get(self.values, self.index as _)) + Value::borrow(cpp::duckdb_vx_values_vec_get(self.values, self.index as _)) }; self.index += 1; value @@ -245,7 +245,7 @@ impl<'a> Iterator for ValuesIterator<'a> { } impl<'a> Values<'a> { - pub fn iter(&self) -> impl Iterator> { + pub fn iter(&self) -> impl Iterator { ValuesIterator { values: self.values, values_count: self.values_count, @@ -256,11 +256,11 @@ impl<'a> Values<'a> { } pub struct DynamicFilter { - pub data: DynamicFilterData, + pub data: OwnedDynamicFilterData, pub operator: cpp::DUCKDB_VX_EXPR_TYPE, } -wrapper!( +lifetime_wrapper!( /// A handle to mutable dynamic filter data. DynamicFilterData, cpp::duckdb_vx_dynamic_filter_data, @@ -268,16 +268,16 @@ wrapper!( ); /// This handle wraps a single shared_ptr on C++ side, so we can assert is Send + Sync -unsafe impl Send for DynamicFilterData {} -unsafe impl Sync for DynamicFilterData {} +unsafe impl Send for OwnedDynamicFilterData {} +unsafe impl Sync for OwnedDynamicFilterData {} impl DynamicFilterData { /// Fetches the latest value from the dynamic filter data, if it has been initialized. - pub fn latest(&self) -> Option { + pub fn latest(&self) -> Option { let ptr = unsafe { cpp::duckdb_vx_dynamic_filter_data_get_value(self.as_ptr()) }; if ptr.is_null() { return None; } - Some(unsafe { Value::own(ptr) }) + Some(unsafe { OwnedValue::own(ptr) }) } } diff --git a/vortex-duckdb/src/duckdb/table_function/bind.rs b/vortex-duckdb/src/duckdb/table_function/bind.rs index 53c80c77670..4f92c9c4fe3 100644 --- a/vortex-duckdb/src/duckdb/table_function/bind.rs +++ b/vortex-duckdb/src/duckdb/table_function/bind.rs @@ -8,11 +8,11 @@ use vortex::error::vortex_err; use crate::cpp; use crate::duckdb::ClientContext; use crate::duckdb::LogicalType; +use crate::duckdb::OwnedData; +use crate::duckdb::OwnedValue; use crate::duckdb::TableFunction; -use crate::duckdb::Value; -use crate::duckdb::data::Data; use crate::duckdb::try_or_null; -use crate::wrapper; +use crate::lifetime_wrapper; /// The native bind callback for a table function. pub(crate) unsafe extern "C-unwind" fn bind_callback( @@ -22,12 +22,12 @@ pub(crate) unsafe extern "C-unwind" fn bind_callback( error_out: *mut cpp::duckdb_vx_error, ) -> cpp::duckdb_vx_data { let client_context = unsafe { ClientContext::borrow(ctx) }; - let bind_input = unsafe { BindInput::own(bind_input) }; - let mut bind_result = unsafe { BindResult::own(bind_result) }; + let bind_input = unsafe { OwnedBindInput::own(bind_input) }; + let mut bind_result = unsafe { OwnedBindResult::own(bind_result) }; try_or_null(error_out, || { - let bind_data = T::bind(&client_context, &bind_input, &mut bind_result)?; - Ok(Data::from(Box::new(bind_data)).as_ptr()) + let bind_data = T::bind(client_context, &bind_input, &mut bind_result)?; + Ok(OwnedData::from(Box::new(bind_data)).as_ptr()) }) } @@ -43,33 +43,33 @@ pub(crate) unsafe extern "C-unwind" fn bind_data_clone_callback Option { + pub fn get_parameter(&self, index: usize) -> Option { let value_ptr = unsafe { cpp::duckdb_vx_tfunc_bind_input_get_parameter(self.as_ptr(), index as _) }; if value_ptr.is_null() { None } else { - Some(unsafe { Value::own(value_ptr) }) + Some(unsafe { OwnedValue::own(value_ptr) }) } } /// Returns the named parameter with the given name, if it exists. - pub fn get_named_parameter(&self, name: &CStr) -> Option { + pub fn get_named_parameter(&self, name: &CStr) -> Option { let value_ptr = unsafe { cpp::duckdb_vx_tfunc_bind_input_get_named_parameter(self.as_ptr(), name.as_ptr()) }; if value_ptr.is_null() { None } else { - Some(unsafe { Value::own(value_ptr) }) + Some(unsafe { OwnedValue::own(value_ptr) }) } } @@ -79,7 +79,7 @@ impl BindInput { } } -wrapper!(BindResult, cpp::duckdb_vx_tfunc_bind_result, |_| {}); +lifetime_wrapper!(BindResult, cpp::duckdb_vx_tfunc_bind_result, |_| {}); impl BindResult { pub fn add_result_column(&self, name: &str, logical_type: &LogicalType) { diff --git a/vortex-duckdb/src/duckdb/table_function/init.rs b/vortex-duckdb/src/duckdb/table_function/init.rs index c3cd5d0d013..7e0090ddb89 100644 --- a/vortex-duckdb/src/duckdb/table_function/init.rs +++ b/vortex-duckdb/src/duckdb/table_function/init.rs @@ -12,9 +12,9 @@ use vortex::error::vortex_bail; use crate::cpp; use crate::duckdb::ClientContext; +use crate::duckdb::OwnedData; use crate::duckdb::TableFilterSet; use crate::duckdb::TableFunction; -use crate::duckdb::data::Data; /// Native callback for the global initialization of a table function. pub(crate) unsafe extern "C-unwind" fn init_global_callback( @@ -26,7 +26,7 @@ pub(crate) unsafe extern "C-unwind" fn init_global_callback( ); match T::init_global(&init_input) { - Ok(init_data) => Data::from(Box::new(init_data)).as_ptr(), + Ok(init_data) => OwnedData::from(Box::new(init_data)).as_ptr(), Err(e) => { // Set the error in the error output. let msg = e.to_string(); @@ -51,7 +51,7 @@ pub(crate) unsafe extern "C-unwind" fn init_local_callback( .vortex_expect("global_init_data null pointer"); match T::init_local(&init_input, global_init_data) { - Ok(init_data) => Data::from(Box::new(init_data)).as_ptr(), + Ok(init_data) => OwnedData::from(Box::new(init_data)).as_ptr(), Err(e) => { // Set the error in the error output. let msg = e.to_string(); @@ -107,7 +107,7 @@ impl<'a, T: TableFunction> TableInitInput<'a, T> { } /// Returns the table filter set for the table function. - pub fn table_filter_set(&self) -> Option { + pub fn table_filter_set(&self) -> Option<&TableFilterSet> { let ptr = self.input.filters; if ptr.is_null() { None @@ -117,7 +117,7 @@ impl<'a, T: TableFunction> TableInitInput<'a, T> { } /// Returns the object cache from the client context for the table function. - pub fn client_context(&self) -> VortexResult { + pub fn client_context(&self) -> VortexResult<&ClientContext> { unsafe { if self.input.client_context.is_null() { vortex_bail!("Client context is null"); diff --git a/vortex-duckdb/src/duckdb/table_function/mod.rs b/vortex-duckdb/src/duckdb/table_function/mod.rs index a068765fcbe..c3295065450 100644 --- a/vortex-duckdb/src/duckdb/table_function/mod.rs +++ b/vortex-duckdb/src/duckdb/table_function/mod.rs @@ -23,7 +23,7 @@ pub use virtual_columns::VirtualColumnsResult; use crate::cpp; use crate::cpp::duckdb_client_context; use crate::duckdb::Database; -use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::duckdb::client_context::ClientContext; use crate::duckdb::data_chunk::DataChunk; use crate::duckdb::expr::Expression; @@ -60,13 +60,13 @@ pub trait TableFunction: Sized + Debug { const MAX_THREADS: u64 = u64::MAX; /// Returns the parameters of the table function. - fn parameters() -> Vec { + fn parameters() -> Vec { // By default, we don't have any parameters. vec![] } /// Returns the named parameters of the table function, if any. - fn named_parameters() -> Vec<(CString, LogicalType)> { + fn named_parameters() -> Vec<(CString, OwnedLogicalType)> { // By default, we don't have any named parameters. vec![] } @@ -247,14 +247,14 @@ unsafe extern "C-unwind" fn function( .vortex_expect("global_init_data null pointer"); let local_init_data = unsafe { local_init_data.cast::().as_mut() } .vortex_expect("local_init_data null pointer"); - let mut data_chunk = unsafe { DataChunk::borrow(output) }; + let data_chunk = unsafe { DataChunk::borrow_mut(output) }; match T::scan( - &client_context, + client_context, bind_data, local_init_data, global_init_data, - &mut data_chunk, + data_chunk, ) { Ok(()) => { // The data chunk is already filled by the function. diff --git a/vortex-duckdb/src/duckdb/table_function/pushdown_complex_filter.rs b/vortex-duckdb/src/duckdb/table_function/pushdown_complex_filter.rs index 52be2b1f78e..f267d484745 100644 --- a/vortex-duckdb/src/duckdb/table_function/pushdown_complex_filter.rs +++ b/vortex-duckdb/src/duckdb/table_function/pushdown_complex_filter.rs @@ -19,5 +19,5 @@ pub(crate) unsafe extern "C-unwind" fn pushdown_complex_filter_callback().as_mut() }.vortex_expect("bind_data null pointer"); let expr = unsafe { Expression::borrow(expr) }; - try_or(error_out, || T::pushdown_complex_filter(bind_data, &expr)) + try_or(error_out, || T::pushdown_complex_filter(bind_data, expr)) } diff --git a/vortex-duckdb/src/duckdb/table_function/virtual_columns.rs b/vortex-duckdb/src/duckdb/table_function/virtual_columns.rs index f6fb2214b80..aeee1020395 100644 --- a/vortex-duckdb/src/duckdb/table_function/virtual_columns.rs +++ b/vortex-duckdb/src/duckdb/table_function/virtual_columns.rs @@ -8,7 +8,7 @@ use vortex::error::VortexExpect; use crate::cpp; use crate::duckdb::LogicalType; use crate::duckdb::TableFunction; -use crate::wrapper; +use crate::lifetime_wrapper; /// Native callback for the get_virtual_columns function. pub(crate) unsafe extern "C-unwind" fn get_virtual_columns_callback( @@ -17,12 +17,12 @@ pub(crate) unsafe extern "C-unwind" fn get_virtual_columns_callback().as_ref() }.vortex_expect("bind_data null pointer"); - let mut result = unsafe { VirtualColumnsResult::borrow(result) }; + let result = unsafe { VirtualColumnsResult::borrow_mut(result) }; - T::virtual_columns(bind_data, &mut result); + T::virtual_columns(bind_data, result); } -wrapper!( +lifetime_wrapper!( VirtualColumnsResult, cpp::duckdb_vx_tfunc_virtual_cols_result, |_| {} diff --git a/vortex-duckdb/src/duckdb/value.rs b/vortex-duckdb/src/duckdb/value.rs index 54c684ef97a..68f121b6feb 100644 --- a/vortex-duckdb/src/duckdb/value.rs +++ b/vortex-duckdb/src/duckdb/value.rs @@ -19,13 +19,13 @@ use crate::cpp; use crate::cpp::DUCKDB_TYPE; use crate::cpp::idx_t; use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::lifetime_wrapper; -lifetime_wrapper!(Value, cpp::duckdb_value, cpp::duckdb_destroy_value, [owned, ref]); +lifetime_wrapper!(Value, cpp::duckdb_value, cpp::duckdb_destroy_value); -impl<'a> ValueRef<'a> { - /// Note the lifetime of logical type is tied to &self - pub fn logical_type(&self) -> LogicalType { +impl Value { + pub fn logical_type(&self) -> &LogicalType { unsafe { LogicalType::borrow(cpp::duckdb_get_value_type(self.as_ptr())) } } @@ -89,9 +89,6 @@ impl<'a> ValueRef<'a> { ExtractedValue::Varchar(string) } DUCKDB_TYPE::DUCKDB_TYPE_BLOB => { - // TODO(ngates): for blobs and strings, we could write our own C functions to - // get the values by reference, avoiding a double copy since these C functions - // also copy on the CPP side. let blob = unsafe { cpp::duckdb_get_blob(self.as_ptr()) }; let slice = unsafe { std::slice::from_raw_parts(blob.data.cast::(), blob.size.as_()) }; @@ -139,7 +136,7 @@ impl<'a> ValueRef<'a> { ExtractedValue::List( (0..elem_count) .map(|i| unsafe { - Value::own(cpp::duckdb_get_list_child(self.as_ptr(), i as idx_t)) + OwnedValue::own(cpp::duckdb_get_list_child(self.as_ptr(), i as idx_t)) }) .collect::>(), ) @@ -147,7 +144,7 @@ impl<'a> ValueRef<'a> { DUCKDB_TYPE::DUCKDB_TYPE_STRUCT => ExtractedValue::List( (0..self.logical_type().struct_type_child_count()) .map(|i| unsafe { - Value::own(cpp::duckdb_get_struct_child(self.as_ptr(), i as idx_t)) + OwnedValue::own(cpp::duckdb_get_struct_child(self.as_ptr(), i as idx_t)) }) .collect::>(), ), @@ -157,7 +154,7 @@ impl<'a> ValueRef<'a> { } } -impl<'a> Debug for ValueRef<'a> { +impl Debug for Value { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let ptr = unsafe { cpp::duckdb_value_to_string(self.as_ptr()) }; write!(f, "{}", unsafe { CStr::from_ptr(ptr).to_string_lossy() })?; @@ -166,7 +163,13 @@ impl<'a> Debug for ValueRef<'a> { } } -impl Value { +impl Debug for OwnedValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Debug::fmt(&**self, f) + } +} + +impl OwnedValue { pub fn sql_null() -> Self { unsafe { Self::own(cpp::duckdb_create_null_value()) } } @@ -175,11 +178,6 @@ impl Value { unsafe { Self::own(cpp::duckdb_vx_value_create_null(logical_type.as_ptr())) } } - /// Note the lifetime of logical type if tied to &self - pub fn logical_type(&self) -> LogicalType { - unsafe { LogicalType::borrow(cpp::duckdb_get_value_type(self.as_ptr())) } - } - pub fn new_decimal(precision: u8, scale: i8, value: i128) -> Self { unsafe { Self::own(cpp::duckdb_create_decimal(cpp::duckdb_decimal { @@ -251,103 +249,105 @@ impl Display for Value { let ptr = unsafe { cpp::duckdb_vx_value_to_string(self.as_ptr()) }; write!(f, "{}", unsafe { CStr::from_ptr(ptr) }.to_string_lossy())?; unsafe { cpp::duckdb_free(ptr.cast()) }; - Ok(()) } } +impl Display for OwnedValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Display::fmt(&**self, f) + } +} + #[inline] pub fn i128_from_parts(high: i64, low: u64) -> i128 { ((high as i128) << 64) | (low as i128) } -impl Debug for Value { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{self}",) - } -} - -impl TryFrom> for Value +impl TryFrom> for OwnedValue where - T: Into + NativeDType, + T: Into + NativeDType, { type Error = VortexError; fn try_from(value: Option) -> Result { match value { Some(v) => Ok(v.into()), - None => Ok(Value::null(&LogicalType::try_from(&T::dtype())?)), + None => { + let lt = OwnedLogicalType::try_from(&T::dtype())?; + Ok(OwnedValue::null(<)) + } } } } -impl From for Value { +impl From for OwnedValue { fn from(value: i8) -> Self { unsafe { Self::own(cpp::duckdb_create_int8(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: i16) -> Self { unsafe { Self::own(cpp::duckdb_create_int16(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: i32) -> Self { unsafe { Self::own(cpp::duckdb_create_int32(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: i64) -> Self { unsafe { Self::own(cpp::duckdb_create_int64(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: u8) -> Self { unsafe { Self::own(cpp::duckdb_create_uint8(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: u16) -> Self { unsafe { Self::own(cpp::duckdb_create_uint16(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: u32) -> Self { unsafe { Self::own(cpp::duckdb_create_uint32(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: u64) -> Self { unsafe { Self::own(cpp::duckdb_create_uint64(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: f32) -> Self { unsafe { Self::own(cpp::duckdb_create_float(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: f64) -> Self { unsafe { Self::own(cpp::duckdb_create_double(value)) } } } -impl From for Value { +impl From for OwnedValue { fn from(value: bool) -> Self { unsafe { Self::own(cpp::duckdb_create_bool(value)) } } } -impl From<&str> for Value { +impl From<&str> for OwnedValue { fn from(value: &str) -> Self { unsafe { Self::own(cpp::duckdb_create_varchar_length( @@ -358,7 +358,7 @@ impl From<&str> for Value { } } -impl From<&[u8]> for Value { +impl From<&[u8]> for OwnedValue { fn from(value: &[u8]) -> Self { unsafe { Self::own(cpp::duckdb_create_blob(value.as_ptr(), value.len() as _)) } } @@ -388,7 +388,7 @@ pub enum ExtractedValue { TimestampMs(i64), TimestampS(i64), Decimal(u8, i8, i128), - List(Vec), + List(Vec), } #[cfg(test)] diff --git a/vortex-duckdb/src/duckdb/vector.rs b/vortex-duckdb/src/duckdb/vector.rs index 6a1b88bb5e8..dddd7dcf5e1 100644 --- a/vortex-duckdb/src/duckdb/vector.rs +++ b/vortex-duckdb/src/duckdb/vector.rs @@ -21,39 +21,33 @@ use crate::cpp; use crate::cpp::duckdb_vx_error; use crate::cpp::idx_t; use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedValue; use crate::duckdb::SelectionVector; use crate::duckdb::Value; -use crate::duckdb::vector_buffer::VectorBuffer; -use crate::wrapper; +use crate::duckdb::VectorBuffer; +use crate::lifetime_wrapper; pub const DUCKDB_STANDARD_VECTOR_SIZE: usize = 2048; -wrapper!(Vector, cpp::duckdb_vector, cpp::duckdb_destroy_vector); - -/// Safety: It is safe to mark `Vector` as `Send` as the memory it points is `Send`. -/// -/// Exceptions from a raw pointer not being `Send` would be pointing to -/// thread-local storage or other types that are not `Send`, e.g. `RefCell`. -/// -/// ```no_test -/// pub struct Vector { -/// ptr: *mut _duckdb_vector, -/// owned: bool, -/// } -/// ``` -unsafe impl Send for Vector {} +lifetime_wrapper!(Vector, cpp::duckdb_vector, cpp::duckdb_destroy_vector); -impl Vector { +/// Safety: It is safe to mark `OwnedVector` as `Send` as the memory it points is `Send`. +unsafe impl Send for OwnedVector {} + +impl OwnedVector { /// Create a new vector with the given type. - pub fn new(logical_type: LogicalType) -> Self { + pub fn new(logical_type: &LogicalType) -> Self { unsafe { Self::own(cpp::duckdb_create_vector(logical_type.as_ptr(), 0)) } } /// Create a new vector with the given type and capacity. - pub fn with_capacity(logical_type: LogicalType, len: usize) -> Self { + pub fn with_capacity(logical_type: &LogicalType, len: usize) -> Self { unsafe { Self::own(cpp::duckdb_create_vector(logical_type.as_ptr(), len as _)) } } +} +impl Vector { /// Converts the vector is a constant vector with every element `value`. pub fn reference_value(&mut self, value: &Value) { unsafe { @@ -73,7 +67,6 @@ impl Vector { /// A dictionary holds a strong reference to all memory it uses. /// /// `dictionary` differs from `slice_to_dictionary` in that it initializes hash caching. - /// See: pub fn dictionary( &self, dict: &Vector, @@ -105,7 +98,7 @@ impl Vector { .vortex_expect("dictionary ID should be valid C string"); unsafe { cpp::duckdb_vx_set_dictionary_vector_id( - self.ptr, + self.as_ptr(), dict_id.as_ptr(), dict_id.as_bytes().len().as_u32(), ) @@ -113,7 +106,7 @@ impl Vector { } pub fn to_sequence(&mut self, start: i64, stop: i64, capacity: u64) { - unsafe { cpp::duckdb_vx_sequence_vector(self.ptr, start, stop, capacity) } + unsafe { cpp::duckdb_vx_sequence_vector(self.as_ptr(), start, stop, capacity) } } /// Converts a vector into a flat uncompressed vector vortex call this `canonicalize`. @@ -134,7 +127,7 @@ impl Vector { } pub fn row_is_null(&self, row: u64) -> bool { - let validity = unsafe { cpp::duckdb_vector_get_validity(self.ptr) }; + let validity = unsafe { cpp::duckdb_vector_get_validity(self.as_ptr()) }; // validity can return a NULL pointer if the entire vector is valid if validity.is_null() { @@ -172,8 +165,9 @@ impl Vector { unsafe { cpp::duckdb_vector_assign_string_element(self.as_ptr(), idx as _, value.as_ptr()) } } - pub fn logical_type(&self) -> LogicalType { - unsafe { LogicalType::own(cpp::duckdb_vector_get_column_type(self.as_ptr())) } + pub fn logical_type(&self) -> OwnedLogicalType { + // NOTE(ngates): weirdly this function does indeed return an owned logical type. + unsafe { OwnedLogicalType::own(cpp::duckdb_vector_get_column_type(self.as_ptr())) } } /// Ensure the validity slice is writable. @@ -265,17 +259,22 @@ impl Vector { } } - pub fn list_vector_get_child(&self) -> Self { + pub fn list_vector_get_child(&self) -> &Self { unsafe { Self::borrow(cpp::duckdb_list_vector_get_child(self.as_ptr())) } } - pub fn array_vector_get_child(&self) -> Self { - // SAFETY: duckdb_array_vector_get_child dereferences the vector pointer which must be - // valid and point to an ARRAY type vector. The returned child vector is borrowed and - // remains valid as long as the parent vector is valid. + pub fn list_vector_get_child_mut(&mut self) -> &mut Self { + unsafe { Self::borrow_mut(cpp::duckdb_list_vector_get_child(self.as_ptr())) } + } + + pub fn array_vector_get_child(&self) -> &Self { unsafe { Self::borrow(cpp::duckdb_array_vector_get_child(self.as_ptr())) } } + pub fn array_vector_get_child_mut(&mut self) -> &mut Self { + unsafe { Self::borrow_mut(cpp::duckdb_array_vector_get_child(self.as_ptr())) } + } + pub fn list_vector_set_size(&self, size: u64) -> VortexResult<()> { let state = unsafe { cpp::duckdb_list_vector_set_size(self.as_ptr(), size) }; match state { @@ -284,10 +283,7 @@ impl Vector { } } - pub fn struct_vector_get_child(&self, idx: usize) -> Self { - // SAFETY: duckdb_struct_vector_get_child dereferences the vector pointer which must be - // valid and point to an STRUCT type vector. The returned child vector is borrowed and - // remains valid as long as the parent vector is valid. + pub fn struct_vector_get_child(&self, idx: usize) -> &Self { unsafe { Self::borrow(cpp::duckdb_struct_vector_get_child( self.as_ptr(), @@ -296,14 +292,23 @@ impl Vector { } } - pub fn get_value(&self, idx: u64, len: u64) -> Option { + pub fn struct_vector_get_child_mut(&mut self, idx: usize) -> &mut Self { + unsafe { + Self::borrow_mut(cpp::duckdb_struct_vector_get_child( + self.as_ptr(), + idx as idx_t, + )) + } + } + + pub fn get_value(&self, idx: u64, len: u64) -> Option { if idx >= len { return None; } unsafe { let value_ptr = cpp::duckdb_vx_vector_get_value(self.as_ptr(), idx as idx_t); - Some(Value::own(value_ptr)) + Some(OwnedValue::own(value_ptr)) } } } @@ -346,13 +351,13 @@ mod tests { use super::*; use crate::cpp::DUCKDB_TYPE; + use crate::duckdb::OwnedSelectionVector; #[test] fn test_create_validity_all_valid() { - // Test case where all values are valid - should return None let len = 10; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let vector = OwnedVector::with_capacity(&logical_type, len); let validity = vector.validity_ref(len); let validity = validity.to_validity(); @@ -365,23 +370,19 @@ mod tests { #[test] fn test_create_validity_with_nulls() { - // Test case with some null values let len = 10; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // Set some positions as null - // SAFETY: Vector was created with this length. let validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; - validity_slice.set(1, false); // null at position 1 - validity_slice.set(3, false); // null at position 3 - validity_slice.set(7, false); // null at position 7 + validity_slice.set(1, false); + validity_slice.set(3, false); + validity_slice.set(7, false); let validity = vector.validity_ref(len); let validity = validity.to_validity(); assert_eq!(validity.maybe_len(), Some(len)); - // Check that the right positions are null assert_eq!( validity.to_mask(len), Mask::from_indices(len, vec![0, 2, 4, 5, 6, 8, 9]) @@ -390,14 +391,12 @@ mod tests { #[test] fn test_create_validity_single_element() { - // Test with a single element that is null let len = 1; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // SAFETY: Vector was created with this length. let validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; - validity_slice.set(0, false); // null at position 0 + validity_slice.set(0, false); let validity = vector.validity_ref(len); let validity = validity.to_validity(); @@ -406,13 +405,10 @@ mod tests { #[test] fn test_create_validity_single_element_valid() { - // Test with a single valid element let len = 1; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // Ensure validity slice exists but don't set any nulls - // SAFETY: Vector was created with this length. let _validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; let validity = vector.validity_ref(len); @@ -422,47 +418,38 @@ mod tests { #[test] fn test_create_validity_empty() { - // Test with zero length let len = 0; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let vector = OwnedVector::with_capacity(&logical_type, len); let validity = vector.validity_ref(len); let validity = validity.to_validity(); - // Even with zero length, if validity mask doesn't exist, should return None assert_eq!(validity, Validity::AllValid); } #[test] fn test_create_validity_all_nulls() { - // Test case where all values are null let len = 10; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // SAFETY: Vector was created with this length. let validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; - // Set all positions as null for i in 0..len { validity_slice.set(i, false); } let validity = vector.validity_ref(len); let validity = validity.to_validity(); - // Check that all positions are null assert_eq!(validity, Validity::AllInvalid); } #[test] fn test_row_is_null_all_valid() { - // Test case where all values are valid (no validity mask) let len = 10; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let vector = OwnedVector::with_capacity(&logical_type, len); let validity = vector.validity_ref(len); - - // When there's no validity mask, all rows should be valid (not null) for i in 0..len { assert!(validity.is_valid(i), "Row {i} should not be null"); } @@ -470,21 +457,16 @@ mod tests { #[test] fn test_row_is_null_with_nulls() { - // Test case with some null values let len = 10; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // Set some positions as null - // SAFETY: Vector was created with this length. let validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; - validity_slice.set(1, false); // null at position 1 - validity_slice.set(3, false); // null at position 3 - validity_slice.set(7, false); // null at position 7 + validity_slice.set(1, false); + validity_slice.set(3, false); + validity_slice.set(7, false); let validity = vector.validity_ref(len); - - // Check each position assert!(validity.is_valid(0), "Row 0 should not be null"); assert!(!validity.is_valid(1), "Row 1 should be null"); assert!(validity.is_valid(2), "Row 2 should not be null"); @@ -499,21 +481,16 @@ mod tests { #[test] fn test_row_is_null_all_nulls() { - // Test case where all values are null let len = 10; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // SAFETY: Vector was created with this length. let validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; - // Set all positions as null for i in 0..len { validity_slice.set(i, false); } let validity = vector.validity_ref(len); - - // Check that all positions are null for i in 0..len { assert!(!validity.is_valid(i), "Row {i} should be null"); } @@ -521,63 +498,48 @@ mod tests { #[test] fn test_row_is_null_single_element() { - // Test with a single element that is null let len = 1; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // SAFETY: Vector was created with this length. let validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; - validity_slice.set(0, false); // null at position 0 + validity_slice.set(0, false); let validity = vector.validity_ref(len); - assert!(!validity.is_valid(0), "Single element should be null"); } #[test] fn test_row_is_null_single_element_valid() { - // Test with a single valid element let len = 1; - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // Ensure validity slice exists but element is valid - // SAFETY: Vector was created with this length. let _validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; let validity = vector.validity_ref(len); - assert!(validity.is_valid(0), "Single element should be valid"); } #[test] fn test_row_is_null_byte_boundaries() { - // Test bit manipulation across 64-bit boundaries - let len = 128; // More than 64 bits - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut vector = Vector::with_capacity(logical_type, len); + let len = 128; + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let mut vector = OwnedVector::with_capacity(&logical_type, len); - // SAFETY: Vector was created with this length. let validity_slice = unsafe { vector.ensure_validity_bitslice(len) }; - - // Set specific positions as null to test bit manipulation - validity_slice.set(0, false); // First bit of first u64 - validity_slice.set(63, false); // Last bit of first u64 - validity_slice.set(64, false); // First bit of second u64 - validity_slice.set(127, false); // Last bit of second u64 (if it exists) + validity_slice.set(0, false); + validity_slice.set(63, false); + validity_slice.set(64, false); + validity_slice.set(127, false); let validity = vector.validity_ref(len); - - // Test the null positions assert!(!validity.is_valid(0), "Row 0 should be null"); assert!(!validity.is_valid(63), "Row 63 should be null"); assert!(!validity.is_valid(64), "Row 64 should be null"); if len > 127 { assert!(!validity.is_valid(127), "Row 127 should be null"); } - - // Test some valid positions assert!(validity.is_valid(1), "Row 1 should be valid"); assert!(validity.is_valid(32), "Row 32 should be valid"); assert!(validity.is_valid(62), "Row 62 should be valid"); @@ -586,16 +548,16 @@ mod tests { #[test] fn test_dictionary() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER); - let mut dict = Vector::with_capacity(logical_type.clone(), 2); + let mut dict = OwnedVector::with_capacity(&logical_type, 2); let dict_slice = unsafe { dict.as_slice_mut::(2) }; dict_slice[0] = 100; dict_slice[1] = 200; - let vector = Vector::with_capacity(logical_type, 3); + let vector = OwnedVector::with_capacity(&logical_type, 3); - let mut sel_vec = SelectionVector::with_capacity(3); + let mut sel_vec = OwnedSelectionVector::with_capacity(3); let sel_slice = unsafe { sel_vec.as_slice_mut(3) }; sel_slice[0] = 0; sel_slice[1] = 1; diff --git a/vortex-duckdb/src/duckdb/vector_buffer.rs b/vortex-duckdb/src/duckdb/vector_buffer.rs index 3648f90e3bd..a0faf1d1ca9 100644 --- a/vortex-duckdb/src/duckdb/vector_buffer.rs +++ b/vortex-duckdb/src/duckdb/vector_buffer.rs @@ -2,20 +2,20 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use crate::cpp; -use crate::duckdb::Data; -use crate::wrapper; +use crate::duckdb::OwnedData; +use crate::lifetime_wrapper; // A wrapped buffer that give duckdb a strong reference to a vortex buffer. -wrapper!( +lifetime_wrapper!( VectorBuffer, cpp::duckdb_vx_vector_buffer, cpp::duckdb_vx_vector_buffer_destroy ); -impl VectorBuffer { +impl OwnedVectorBuffer { pub fn new(data: T) -> Self { - let data = Data::from(Box::new(data)); + let data = OwnedData::from(Box::new(data)); unsafe { Self::own(cpp::duckdb_vx_vector_buffer_create(data.as_ptr())) } } } diff --git a/vortex-duckdb/src/e2e_test/object_cache_test.rs b/vortex-duckdb/src/e2e_test/object_cache_test.rs index 0613b4069d1..6f11059bc19 100644 --- a/vortex-duckdb/src/e2e_test/object_cache_test.rs +++ b/vortex-duckdb/src/e2e_test/object_cache_test.rs @@ -13,7 +13,7 @@ use crate::duckdb::BindInput; use crate::duckdb::BindResult; use crate::duckdb::ClientContext; use crate::duckdb::DataChunk; -use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::duckdb::TableFunction; use crate::duckdb::TableInitInput; @@ -47,7 +47,7 @@ impl TableFunction for TestTableFunction { _input: &BindInput, result: &mut BindResult, ) -> VortexResult { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); result.add_result_column("test_value", &logical_type); let cache = client_context.object_cache(); @@ -106,11 +106,11 @@ impl TableFunction for TestTableFunction { } } -use crate::duckdb::Database; +use crate::duckdb::OwnedDatabase; #[test] fn test_table_function_with_object_cache() -> VortexResult<()> { - let db = Database::open_in_memory()?; + let db = OwnedDatabase::open_in_memory()?; let conn = db.connect()?; // Register our test table function diff --git a/vortex-duckdb/src/e2e_test/vortex_scan_test.rs b/vortex-duckdb/src/e2e_test/vortex_scan_test.rs index b0ce40fb864..4d2d67536fe 100644 --- a/vortex-duckdb/src/e2e_test/vortex_scan_test.rs +++ b/vortex-duckdb/src/e2e_test/vortex_scan_test.rs @@ -4,6 +4,8 @@ //! This module contains tests for the `vortex_scan` table function. use std::ffi::CStr; +use std::io::Write; +use std::net::TcpListener; use std::path::Path; use std::slice; use std::str::FromStr; @@ -42,11 +44,11 @@ use crate::SESSION; use crate::cpp; use crate::cpp::duckdb_string_t; use crate::cpp::duckdb_timestamp; -use crate::duckdb::Connection; -use crate::duckdb::Database; +use crate::duckdb::OwnedConnection; +use crate::duckdb::OwnedDatabase; -fn database_connection() -> Connection { - let db = Database::open_in_memory().unwrap(); +fn database_connection() -> OwnedConnection { + let db = OwnedDatabase::open_in_memory().unwrap(); db.register_vortex_scan_replacement().unwrap(); crate::initialize(&db).unwrap(); db.connect().unwrap() @@ -120,9 +122,10 @@ fn scan_vortex_file_single_row>( let formatted_query = query.replace('?', &format!("'{file_path}'")); let result = conn.query(&formatted_query).unwrap(); - let chunk = result.into_iter().next().unwrap(); - let mut vec = chunk.get_vector(col_idx); - T::from_duckdb_value(&mut unsafe { vec.as_slice_mut::(chunk.len().as_()) }[0]) + let mut chunk = result.into_iter().next().unwrap(); + let len = chunk.len().as_(); + let vec = chunk.get_vector_mut(col_idx); + T::from_duckdb_value(&mut unsafe { vec.as_slice_mut::(len) }[0]) } fn scan_vortex_file>( @@ -137,10 +140,11 @@ fn scan_vortex_file>( let result = conn.query(&formatted_query)?; let mut values = Vec::new(); - for chunk in result { - let mut vec = chunk.get_vector(col_idx); + for mut chunk in result { + let len = chunk.len().as_(); + let vec = chunk.get_vector_mut(col_idx); values.extend( - unsafe { vec.as_slice_mut::(chunk.len().as_()) } + unsafe { vec.as_slice_mut::(len) } .iter_mut() .map(T::from_duckdb_value), ); @@ -346,6 +350,52 @@ fn test_vortex_scan_multiple_files() { assert_eq!(total_sum, 21); } +#[test] +fn test_vortex_scan_over_http() { + let file = RUNTIME.block_on(async { + let strings = VarBinArray::from(vec!["a", "b", "c"]); + write_single_column_vortex_file("strings", strings).await + }); + + let file_bytes = std::fs::read(file.path()).unwrap(); + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let addr = listener.local_addr().unwrap(); + + std::thread::spawn(move || { + for _ in 0..2 { + if let Ok((mut stream, _)) = listener.accept() { + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Length: {}\r\n\r\n", + file_bytes.len() + ); + stream.write_all(response.as_bytes()).unwrap(); + stream.write_all(&file_bytes).unwrap(); + } + } + }); + + let conn = database_connection(); + conn.query("SET vortex_filesystem = 'duckdb';").unwrap(); + conn.query("INSTALL httpfs;").unwrap(); + conn.query("LOAD httpfs;").unwrap(); + + let url = format!( + "http://{}/{}", + addr, + file.path().file_name().unwrap().to_string_lossy() + ); + + let result = conn + .query(&format!("SELECT COUNT(*) FROM read_vortex('{url}')")) + .unwrap(); + let chunk = result.into_iter().next().unwrap(); + let count = chunk + .get_vector(0) + .as_slice_with_len::(chunk.len().as_())[0]; + + assert_eq!(count, 3); +} + #[test] fn test_write_file() { let conn = database_connection(); @@ -807,25 +857,26 @@ fn test_vortex_encodings_roundtrip() { )) .unwrap(); - let chunk = result.into_iter().next().unwrap(); - assert_eq!(chunk.len(), 5); // 5 rows + let mut chunk = result.into_iter().next().unwrap(); + let len: usize = chunk.len().as_(); + assert_eq!(len, 5); // 5 rows assert_eq!(chunk.column_count(), 10); // 10 columns // Verify primitive i32 (column 0) let primitive_i32_vec = chunk.get_vector(0); - let primitive_i32_slice = primitive_i32_vec.as_slice_with_len::(chunk.len().as_()); + let primitive_i32_slice = primitive_i32_vec.as_slice_with_len::(len); assert_eq!(primitive_i32_slice, [1, 2, 3, 4, 5]); // Verify primitive f64 (column 1) let primitive_f64_vec = chunk.get_vector(1); - let primitive_f64_slice = primitive_f64_vec.as_slice_with_len::(chunk.len().as_()); + let primitive_f64_slice = primitive_f64_vec.as_slice_with_len::(len); assert!((primitive_f64_slice[0] - 1.1).abs() < f64::EPSILON); assert!((primitive_f64_slice[1] - 2.2).abs() < f64::EPSILON); assert!((primitive_f64_slice[2] - 3.3).abs() < f64::EPSILON); // Verify constant string (column 2) - let mut constant_vec = chunk.get_vector(2); - let constant_slice = unsafe { constant_vec.as_slice_mut::(chunk.len().as_()) }; + let constant_vec = chunk.get_vector_mut(2); + let constant_slice = unsafe { constant_vec.as_slice_mut::(len) }; for idx in 0..5 { let string_val = String::from_duckdb_value(&mut constant_slice[idx]); assert_eq!(string_val, "constant_value"); @@ -833,12 +884,12 @@ fn test_vortex_encodings_roundtrip() { // Verify boolean (column 3) let bool_vec = chunk.get_vector(3); - let bool_slice = bool_vec.as_slice_with_len::(chunk.len().as_()); + let bool_slice = bool_vec.as_slice_with_len::(len); assert_eq!(bool_slice, [true, false, true, false, true]); // Verify dictionary (column 4) - let mut dict_vec = chunk.get_vector(4); - let dict_slice = unsafe { dict_vec.as_slice_mut::(chunk.len().as_()) }; + let dict_vec = chunk.get_vector_mut(4); + let dict_slice = unsafe { dict_vec.as_slice_mut::(len) }; // Keys were [0, 1, 0, 2, 1] and values were ["apple", "banana", "cherry"] let expected_dict_values = ["apple", "banana", "apple", "cherry", "banana"]; for idx in 0..5 { @@ -848,17 +899,17 @@ fn test_vortex_encodings_roundtrip() { // Verify RLE (column 5) let rle_vec = chunk.get_vector(5); - let rle_slice = rle_vec.as_slice_with_len::(chunk.len().as_()); + let rle_slice = rle_vec.as_slice_with_len::(len); assert_eq!(rle_slice, [100, 100, 100, 200, 200]); // Verify sequence (column 6) let seq_vec = chunk.get_vector(6); - let seq_slice = seq_vec.as_slice_with_len::(chunk.len().as_()); + let seq_slice = seq_vec.as_slice_with_len::(len); assert_eq!(seq_slice, [0, 10, 20, 30, 40]); // Verify varbin (column 7) - let mut varbin_vec = chunk.get_vector(7); - let varbin_slice = unsafe { varbin_vec.as_slice_mut::(chunk.len().as_()) }; + let varbin_vec = chunk.get_vector_mut(7); + let varbin_slice = unsafe { varbin_vec.as_slice_mut::(len) }; let expected_strings = ["hello", "world", "vortex", "test", "data"]; for i in 0..5 { let string_val = String::from_duckdb_value(&mut varbin_slice[i]); @@ -868,7 +919,7 @@ fn test_vortex_encodings_roundtrip() { // Verify list (column 8) // Expected lists: [1,2], [3,4,5], [6], [7,8,9,10], [] let list_vec = chunk.get_vector(8); - let list_entries = list_vec.as_slice_with_len::(chunk.len().as_()); + let list_entries = list_vec.as_slice_with_len::(len); // Verify list lengths assert_eq!(list_entries[0].length, 2); // [1,2] diff --git a/vortex-duckdb/src/exporter/all_invalid.rs b/vortex-duckdb/src/exporter/all_invalid.rs index d402693df98..c7679931911 100644 --- a/vortex-duckdb/src/exporter/all_invalid.rs +++ b/vortex-duckdb/src/exporter/all_invalid.rs @@ -5,19 +5,19 @@ use vortex::error::VortexResult; use vortex::error::vortex_ensure; use crate::duckdb::LogicalType; -use crate::duckdb::Value; +use crate::duckdb::OwnedValue; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; struct AllInvalidExporter { len: usize, - null_value: Value, + null_value: OwnedValue, } pub(crate) fn new_exporter(len: usize, logical_type: &LogicalType) -> Box { Box::new(AllInvalidExporter { len, - null_value: Value::null(logical_type), + null_value: OwnedValue::null(logical_type), }) } @@ -38,23 +38,23 @@ mod tests { use vortex::array::arrays::PrimitiveArray; use super::*; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; #[test] fn all_null_array() { let arr = PrimitiveArray::from_option_iter::([None, None, None]); - let ltype = LogicalType::int32(); + let ltype = OwnedLogicalType::int32(); - let mut chunk = DataChunk::new([ltype.clone()]); + let mut chunk = OwnedDataChunk::new([ltype.clone()]); new_exporter(arr.len(), <ype) - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - CONSTANT INTEGER: 3 = [ NULL] "# diff --git a/vortex-duckdb/src/exporter/bool.rs b/vortex-duckdb/src/exporter/bool.rs index 709689a6591..ef2c251c947 100644 --- a/vortex-duckdb/src/exporter/bool.rs +++ b/vortex-duckdb/src/exporter/bool.rs @@ -8,7 +8,7 @@ use vortex::buffer::BitBuffer; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; use crate::exporter::all_invalid; @@ -27,7 +27,7 @@ pub(crate) fn new_exporter( let validity = array.validity()?.to_array(len).execute::(ctx)?; if validity.all_false() { - return Ok(all_invalid::new_exporter(len, &LogicalType::bool())); + return Ok(all_invalid::new_exporter(len, &OwnedLogicalType::bool())); } Ok(validity::new_exporter( @@ -61,22 +61,23 @@ mod tests { use super::*; use crate::SESSION; use crate::cpp; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; #[test] fn test_bool() { let arr = BoolArray::from_iter([true, false, true]); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(1, 2, &mut chunk.get_vector(0)) + .export(1, 2, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(2); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT BOOLEAN: 2 = [ false, true] "# @@ -87,16 +88,17 @@ mod tests { fn test_bool_long() { let arr = BoolArray::from_iter([true; 128]); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(1, 66, &mut chunk.get_vector(0)) + .export(1, 66, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(65); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), format!( r#"Chunk - [1 Columns] - FLAT BOOLEAN: 65 = [ {}] @@ -110,16 +112,17 @@ mod tests { fn test_bool_nullable() { let arr = BoolArray::from_iter([Some(true), None, Some(false)]); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(1, 2, &mut chunk.get_vector(0)) + .export(1, 2, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(2); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT BOOLEAN: 2 = [ NULL, false] "# @@ -130,16 +133,17 @@ mod tests { fn test_bool_all_invalid() { let arr = BoolArray::from_iter([None; 3]); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(1, 2, &mut chunk.get_vector(0)) + .export(1, 2, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(2); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - CONSTANT BOOLEAN: 2 = [ NULL] "# diff --git a/vortex-duckdb/src/exporter/cache.rs b/vortex-duckdb/src/exporter/cache.rs index 8d0d2afb7d3..97214755e89 100644 --- a/vortex-duckdb/src/exporter/cache.rs +++ b/vortex-duckdb/src/exporter/cache.rs @@ -8,7 +8,7 @@ use vortex::array::ArrayRef; use vortex::array::Canonical; use vortex_utils::aliases::dash_map::DashMap; -use crate::duckdb::Vector; +use crate::duckdb::OwnedVector; /// Cache for array conversions from Vortex to DuckDB. /// @@ -17,7 +17,7 @@ use crate::duckdb::Vector; /// We hold on to the `ArrayRef` to ensure that the key (ptr addr) doesn't get reused. #[derive(Default)] pub struct ConversionCache { - pub values_cache: DashMap>)>, + pub values_cache: DashMap>)>, pub canonical_cache: DashMap, // A value which must be unique for a given DuckDB operator. instance_id: u64, diff --git a/vortex-duckdb/src/exporter/constant.rs b/vortex-duckdb/src/exporter/constant.rs index 15d659ee4be..1483332d5f1 100644 --- a/vortex-duckdb/src/exporter/constant.rs +++ b/vortex-duckdb/src/exporter/constant.rs @@ -9,7 +9,7 @@ use vortex::error::VortexResult; use vortex::mask::Mask; use crate::convert::ToDuckDBScalar; -use crate::duckdb::Value; +use crate::duckdb::OwnedValue; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; use crate::exporter::ConversionCache; @@ -17,7 +17,7 @@ use crate::exporter::new_array_exporter; use crate::exporter::validity; struct ConstantExporter { - value: Option, + value: Option, } pub fn new_exporter_with_mask( diff --git a/vortex-duckdb/src/exporter/decimal.rs b/vortex-duckdb/src/exporter/decimal.rs index ff4904c0b5e..a332335aa79 100644 --- a/vortex-duckdb/src/exporter/decimal.rs +++ b/vortex-duckdb/src/exporter/decimal.rs @@ -19,9 +19,9 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::mask::Mask; -use crate::LogicalType; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedVectorBuffer; use crate::duckdb::Vector; -use crate::duckdb::VectorBuffer; use crate::exporter::ColumnExporter; use crate::exporter::all_invalid; use crate::exporter::validity; @@ -34,7 +34,7 @@ struct DecimalExporter { struct DecimalZeroCopyExporter { values: Buffer, - shared_buffer: VectorBuffer, + shared_buffer: OwnedVectorBuffer, } pub(crate) fn new_exporter( @@ -53,10 +53,8 @@ pub(crate) fn new_exporter( let validity = validity.to_array(len).execute::(ctx)?; if validity.all_false() { - return Ok(all_invalid::new_exporter( - len, - &LogicalType::try_from(DType::Decimal(decimal_dtype, nullability))?, - )); + let ltype = OwnedLogicalType::try_from(DType::Decimal(decimal_dtype, nullability))?; + return Ok(all_invalid::new_exporter(len, <ype)); } let exporter = if values_type == dest_values_type { @@ -64,7 +62,7 @@ pub(crate) fn new_exporter( let buffer = Buffer::::from_byte_buffer(values.into_host_sync()); Box::new(DecimalZeroCopyExporter { values: buffer.clone(), - shared_buffer: VectorBuffer::new(buffer), + shared_buffer: OwnedVectorBuffer::new(buffer), }) as Box }) } else { @@ -133,8 +131,8 @@ mod tests { use vortex::error::VortexExpect; use super::*; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; pub(crate) fn new_zero_copy_exporter( array: &DecimalArray, @@ -149,7 +147,7 @@ mod tests { validity, Box::new(DecimalZeroCopyExporter { values: buffer.clone(), - shared_buffer: VectorBuffer::new(buffer), + shared_buffer: OwnedVectorBuffer::new(buffer), }), )) }) @@ -165,18 +163,18 @@ mod tests { ); // Create a DuckDB integer chunk since decimal will be stored as i32 for this precision - let mut chunk = DataChunk::new([LogicalType::decimal_type(10, 2) + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::decimal_type(10, 2) .vortex_expect("LogicalType creation should succeed for test data")]); new_zero_copy_exporter(&arr) .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); // Verify the exported data matches expected format assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT DECIMAL(10,2): 3 = [ 123.45, 678.90, -123.00] "# @@ -192,19 +190,19 @@ mod tests { decimal_dtype, ); - let mut chunk = DataChunk::new([LogicalType::decimal_type(5, 1) + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::decimal_type(5, 1) .vortex_expect("LogicalType creation should succeed for test data")]); // Export first 3 elements new_zero_copy_exporter(&arr) .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); // Verify the exported data matches expected format assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT DECIMAL(5,1): 3 = [ 10.0, 11.0, 12.0] "# @@ -218,18 +216,18 @@ mod tests { let arr = DecimalArray::from_option_iter([Some(123456i32), None, Some(789012i32)], decimal_dtype); - let mut chunk = DataChunk::new([LogicalType::decimal_type(8, 3) + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::decimal_type(8, 3) .vortex_expect("LogicalType creation should succeed for test data")]); new_zero_copy_exporter(&arr) .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); // Verify the exported data matches expected format (NULL is represented as NULL) assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT DECIMAL(8,3): 3 = [ 123.456, NULL, 789.012] "# diff --git a/vortex-duckdb/src/exporter/dict.rs b/vortex-duckdb/src/exporter/dict.rs index 9dc9934ae94..a5ae9fc6784 100644 --- a/vortex-duckdb/src/exporter/dict.rs +++ b/vortex-duckdb/src/exporter/dict.rs @@ -21,8 +21,9 @@ use vortex::dtype::match_each_integer_ptype; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::duckdb::LogicalType; -use crate::duckdb::SelectionVector; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedSelectionVector; +use crate::duckdb::OwnedVector; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; use crate::exporter::all_invalid; @@ -32,7 +33,7 @@ use crate::exporter::new_array_exporter; struct DictExporter { // Store the dictionary values once and export the same dictionary with each codes chunk. - values_vector: Arc>, // NOTE(ngates): not actually flat... + values_vector: Arc>, // NOTE(ngates): not actually flat... values_len: u32, codes: PrimitiveArray, codes_type: PhantomData, @@ -49,7 +50,7 @@ pub(crate) fn new_exporter_with_flatten( ) -> VortexResult> { // Grab the cache dictionary values. let values = array.values(); - let values_type: LogicalType = values.dtype().try_into()?; + let values_type: OwnedLogicalType = values.dtype().try_into()?; if let Some(constant) = values.as_opt::() { return constant::new_exporter_with_mask( ConstantArray::new(constant.scalar().clone(), array.codes().len()), @@ -108,7 +109,8 @@ pub(crate) fn new_exporter_with_flatten( Some(vector) => vector, None => { // Create a new DuckDB vector for the values. - let mut vector = Vector::with_capacity(values.dtype().try_into()?, values.len()); + let values_type: OwnedLogicalType = values.dtype().try_into()?; + let mut vector = OwnedVector::with_capacity(&values_type, values.len()); new_array_exporter(values.clone(), cache, ctx)?.export( 0, values.len(), @@ -140,7 +142,7 @@ pub(crate) fn new_exporter_with_flatten( impl> ColumnExporter for DictExporter { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { // Create a selection vector from the codes. - let mut sel_vec = SelectionVector::with_capacity(len); + let mut sel_vec = OwnedSelectionVector::with_capacity(len); let mut_sel_vec = unsafe { sel_vec.as_slice_mut(len) }; for (dst, src) in mut_sel_vec.iter_mut().zip( self.codes.as_slice::()[offset..offset + len] @@ -155,7 +157,7 @@ impl> ColumnExporter for DictExporter { // dictionary. let new_values_vector = { let values_vector = self.values_vector.lock(); - let mut new_values_vector = Vector::new(values_vector.logical_type()); + let mut new_values_vector = OwnedVector::new(&values_vector.logical_type()); // Shares the underlying data which determines the vectors length. new_values_vector.reference(&values_vector); new_values_vector @@ -186,8 +188,8 @@ mod tests { use crate::SESSION; use crate::cpp; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; use crate::exporter::ColumnExporter; use crate::exporter::ConversionCache; use crate::exporter::dict::new_exporter_with_flatten; @@ -207,16 +209,17 @@ mod tests { ConstantArray::new(10, 1).into_array(), ); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); new_exporter(&arr, &ConversionCache::default()) .unwrap() - .export(0, 2, &mut chunk.get_vector(0)) + .export(0, 2, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(2); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER: 2 = [ NULL, 10] "# @@ -230,17 +233,18 @@ mod tests { ConstantArray::new(10, 1).into_array(), ); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); let mut ctx = ExecutionCtx::new(VortexSession::default()); new_exporter_with_flatten(&arr, &ConversionCache::default(), &mut ctx, false) .unwrap() - .export(0, 2, &mut chunk.get_vector(0)) + .export(0, 2, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(2); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - CONSTANT INTEGER: 2 = [ NULL] "# @@ -254,24 +258,25 @@ mod tests { PrimitiveArray::from_option_iter([Some(10), None]).into_array(), ); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); new_exporter(&arr, &ConversionCache::default()) .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); // some-invalid codes cannot be exported as a dictionary. assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER: 3 = [ NULL, 10, NULL] "# ); let mut flat_chunk = - DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); new_array_exporter( arr.into_array(), @@ -279,12 +284,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 3, &mut flat_chunk.get_vector(0)) + .export(0, 3, flat_chunk.get_vector_mut(0)) .unwrap(); flat_chunk.set_len(3); assert_eq!( - format!("{}", String::try_from(&flat_chunk).unwrap()), + format!("{}", String::try_from(&*flat_chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER: 3 = [ NULL, 10, NULL] "# @@ -299,16 +304,17 @@ mod tests { Buffer::::empty().into_array(), ); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); new_exporter(&arr, &ConversionCache::default()) .unwrap() - .export(0, 0, &mut chunk.get_vector(0)) + .export(0, 0, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(0); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER: 0 = [ ] "# diff --git a/vortex-duckdb/src/exporter/fixed_size_list.rs b/vortex-duckdb/src/exporter/fixed_size_list.rs index 1caa2f982d0..61f93e8e4ec 100644 --- a/vortex-duckdb/src/exporter/fixed_size_list.rs +++ b/vortex-duckdb/src/exporter/fixed_size_list.rs @@ -17,7 +17,7 @@ use super::ConversionCache; use super::all_invalid; use super::new_array_exporter_with_flatten; use super::validity; -use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; @@ -43,10 +43,8 @@ pub(crate) fn new_exporter( let elements_exporter = new_array_exporter_with_flatten(elements, cache, ctx, true)?; if mask.all_false() { - return Ok(all_invalid::new_exporter( - len, - &LogicalType::try_from(dtype)?, - )); + let ltype = OwnedLogicalType::try_from(dtype)?; + return Ok(all_invalid::new_exporter(len, <ype)); } Ok(validity::new_exporter( @@ -75,9 +73,9 @@ impl ColumnExporter for FixedSizeListExporter { let list_size = self.list_size as usize; // Get the child vector for array elements and export the elements directly. - let mut elements_vector = vector.array_vector_get_child(); + let elements_vector = vector.array_vector_get_child_mut(); self.elements_exporter - .export(offset * list_size, len * list_size, &mut elements_vector)?; + .export(offset * list_size, len * list_size, elements_vector)?; // TODO(connor): We must flatten the child vector to ensure any child dictionary views // (namely UTF-8 string views in dictionaries) are materialized. @@ -100,8 +98,8 @@ mod tests { use super::*; use crate::SESSION; use crate::cpp; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; use crate::duckdb::Vector; /// Sets up a DataChunk, exports the array to it, and returns the chunk. @@ -110,11 +108,12 @@ mod tests { list_size: u32, offset: usize, len: usize, - ) -> DataChunk { - let array_type = LogicalType::new_array(cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER, list_size); + ) -> OwnedDataChunk { + let array_type = + OwnedLogicalType::new_array(cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER, list_size); // TODO(connor): This mutable API is brittle. Maybe bundle this logic? - let mut chunk = DataChunk::new([array_type]); + let mut chunk = OwnedDataChunk::new([array_type]); new_exporter( fsl, @@ -122,7 +121,7 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(offset, len, &mut chunk.get_vector(0)) + .export(offset, len, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(len); @@ -188,7 +187,7 @@ mod tests { // Verify the actual array values. let vector = chunk.get_vector(0); - verify_array_elements(&vector, &[1, 2, 3, 4, 5, 6], 2, 3); + verify_array_elements(vector, &[1, 2, 3, 4, 5, 6], 2, 3); } #[test] @@ -207,10 +206,10 @@ mod tests { // Verify nullability. let vector = chunk.get_vector(0); - assert_nulls(&vector, &[true, false, true, true]); + assert_nulls(vector, &[true, false, true, true]); // Verify the values (note: elements for null list still exist in storage). - verify_array_elements(&vector, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3, 4); + verify_array_elements(vector, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3, 4); } #[test] @@ -229,7 +228,7 @@ mod tests { // All lists should be null. let vector = chunk.get_vector(0); vector.flatten(chunk.len()); - assert_nulls(&vector, &[false, false, false]); + assert_nulls(vector, &[false, false, false]); } #[test] @@ -248,7 +247,7 @@ mod tests { // Verify alternating null pattern. let vector = chunk.get_vector(0); - assert_nulls(&vector, &[false, true, false, true, false]); + assert_nulls(vector, &[false, true, false, true, false]); } #[test] @@ -267,7 +266,7 @@ mod tests { // Verify the single-element arrays. let vector = chunk.get_vector(0); - verify_array_elements(&vector, &[10, 20, 30, 40], 1, 4); + verify_array_elements(vector, &[10, 20, 30, 40], 1, 4); } #[test] @@ -288,15 +287,15 @@ mod tests { // Should contain [4, 5, 6], [7, 8, 9]. let vector = chunk.get_vector(0); - verify_array_elements(&vector, &[4, 5, 6, 7, 8, 9], 3, 2); + verify_array_elements(vector, &[4, 5, 6, 7, 8, 9], 3, 2); } /// Helper to create nested array type for DuckDB. - fn create_nested_array_type(inner_list_size: u32, outer_list_size: u32) -> LogicalType { + fn create_nested_array_type(inner_list_size: u32, outer_list_size: u32) -> OwnedLogicalType { let inner_array_type = - LogicalType::new_array(cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER, inner_list_size); + OwnedLogicalType::new_array(cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER, inner_list_size); - LogicalType::array_type(inner_array_type, outer_list_size) + OwnedLogicalType::array_type(inner_array_type, outer_list_size) .vortex_expect("failed to create nested array type") } @@ -329,7 +328,7 @@ mod tests { // Create the nested array type and export. let outer_array_type = create_nested_array_type(2, 3); - let mut chunk = DataChunk::new([outer_array_type]); + let mut chunk = OwnedDataChunk::new([outer_array_type]); new_exporter( outer_fsl, @@ -337,7 +336,7 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 2, &mut chunk.get_vector(0)) + .export(0, 2, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(2); @@ -386,7 +385,7 @@ mod tests { // Create the nested array type and export. let outer_array_type = create_nested_array_type(2, 3); - let mut chunk = DataChunk::new([outer_array_type]); + let mut chunk = OwnedDataChunk::new([outer_array_type]); new_exporter( outer_fsl, @@ -394,7 +393,7 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); @@ -402,7 +401,7 @@ mod tests { // Verify outer level nullability. let outer_vector = chunk.get_vector(0); - assert_nulls(&outer_vector, &[true, false, true]); + assert_nulls(outer_vector, &[true, false, true]); // Verify inner level structure and nullability. let inner_vector = outer_vector.array_vector_get_child(); diff --git a/vortex-duckdb/src/exporter/list.rs b/vortex-duckdb/src/exporter/list.rs index 237b84813c5..935e83f68fe 100644 --- a/vortex-duckdb/src/exporter/list.rs +++ b/vortex-duckdb/src/exporter/list.rs @@ -19,8 +19,9 @@ use vortex::mask::Mask; use super::ConversionCache; use super::all_invalid; use super::new_array_exporter_with_flatten; -use crate::LogicalType; use crate::cpp; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedVector; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; @@ -32,7 +33,7 @@ struct ListExporter { /// Note that we are trading less compute for more memory here, as we will export the entire /// array in the constructor of the exporter (`new_exporter`) even if some of the elements are /// unreachable. - duckdb_elements: Arc>, + duckdb_elements: Arc>, offsets: PrimitiveArray, num_elements: usize, offset_type: PhantomData, @@ -55,10 +56,8 @@ pub(crate) fn new_exporter( let validity = validity.to_array(array_len).execute::(ctx)?; if validity.all_false() { - return Ok(all_invalid::new_exporter( - array_len, - &LogicalType::try_from(dtype)?, - )); + let ltype = OwnedLogicalType::try_from(dtype)?; + return Ok(all_invalid::new_exporter(array_len, <ype)); } let values_key = Arc::as_ptr(&elements).addr(); @@ -72,8 +71,8 @@ pub(crate) fn new_exporter( Some(elements) => elements, None => { // We have no cached the vector yet, so create a new DuckDB vector for the elements. - let mut duckdb_elements = - Vector::with_capacity(elements.dtype().try_into()?, num_elements); + let elements_type: OwnedLogicalType = elements.dtype().try_into()?; + let mut duckdb_elements = OwnedVector::with_capacity(&elements_type, num_elements); let elements_exporter = new_array_exporter_with_flatten(elements.clone(), cache, ctx, true)?; @@ -143,7 +142,7 @@ impl ColumnExporter for ListExporter { duckdb_list_views[i] = cpp::duckdb_list_entry { offset, length }; } - let mut child = vector.list_vector_get_child(); + let child = vector.list_vector_get_child_mut(); child.reference(&self.duckdb_elements.lock()); vector.list_vector_set_size(self.num_elements as u64)?; @@ -164,8 +163,8 @@ mod tests { use super::*; use crate::SESSION; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; use crate::exporter::new_array_exporter; #[test] @@ -180,9 +179,9 @@ mod tests { } .into_array(); - let list_type = LogicalType::list_type(LogicalType::int32()) + let list_type = OwnedLogicalType::list_type(OwnedLogicalType::int32()) .vortex_expect("LogicalType creation should succeed for test data"); - let mut chunk = DataChunk::new([list_type]); + let mut chunk = OwnedDataChunk::new([list_type]); new_array_exporter( list, @@ -190,12 +189,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 0, &mut chunk.get_vector(0)) + .export(0, 0, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(0); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER[]: 0 = [ ] "# @@ -219,9 +218,9 @@ mod tests { } .into_array(); - let list_type = LogicalType::list_type(LogicalType::varchar()) + let list_type = OwnedLogicalType::list_type(OwnedLogicalType::varchar()) .vortex_expect("LogicalType creation should succeed for test data"); - let mut chunk = DataChunk::new([list_type]); + let mut chunk = OwnedDataChunk::new([list_type]); new_array_exporter( list, @@ -229,12 +228,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 4, &mut chunk.get_vector(0)) + .export(0, 4, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(4); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT VARCHAR[]: 4 = [ [abc], [def], NULL, [ghi]] "# diff --git a/vortex-duckdb/src/exporter/list_view.rs b/vortex-duckdb/src/exporter/list_view.rs index 3a39705475d..a2f1b32230d 100644 --- a/vortex-duckdb/src/exporter/list_view.rs +++ b/vortex-duckdb/src/exporter/list_view.rs @@ -20,8 +20,9 @@ use vortex::mask::Mask; use super::ConversionCache; use super::all_invalid; use super::new_array_exporter_with_flatten; -use crate::LogicalType; use crate::cpp; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedVector; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; @@ -33,7 +34,7 @@ struct ListViewExporter { /// Note that we are trading less compute for more memory here, as we will export the entire /// array in the constructor of the exporter (`new_exporter`) even if some of the elements are /// unreachable. - duckdb_elements: Arc>, + duckdb_elements: Arc>, offsets: PrimitiveArray, sizes: PrimitiveArray, num_elements: usize, @@ -60,10 +61,8 @@ pub(crate) fn new_exporter( let validity = validity.to_array(len).execute::(ctx)?; if validity.all_false() { - return Ok(all_invalid::new_exporter( - len, - &LogicalType::try_from(DType::List(elements_dtype, nullability))?, - )); + let ltype = OwnedLogicalType::try_from(DType::List(elements_dtype, nullability))?; + return Ok(all_invalid::new_exporter(len, <ype)); } let values_key = Arc::as_ptr(&elements).addr(); @@ -77,8 +76,8 @@ pub(crate) fn new_exporter( Some(elements) => elements, None => { // We have no cached the vector yet, so create a new DuckDB vector for the elements. - let mut duckdb_elements = - Vector::with_capacity(elements_dtype.as_ref().try_into()?, elements.len()); + let elements_type: OwnedLogicalType = elements_dtype.as_ref().try_into()?; + let mut duckdb_elements = OwnedVector::with_capacity(&elements_type, elements.len()); let elements_exporter = new_array_exporter_with_flatten(elements.clone(), cache, ctx, true)?; @@ -155,7 +154,7 @@ impl ColumnExporter for ListViewExporter duckdb_list_views[i] = cpp::duckdb_list_entry { offset, length }; } - let mut child = vector.list_vector_get_child(); + let child = vector.list_vector_get_child_mut(); child.reference(&self.duckdb_elements.lock()); vector.list_vector_set_size(self.num_elements as u64)?; @@ -176,8 +175,8 @@ mod tests { use super::*; use crate::SESSION; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; use crate::exporter::new_array_exporter; #[test] @@ -194,9 +193,9 @@ mod tests { } .into_array(); - let list_type = LogicalType::list_type(LogicalType::varchar()) + let list_type = OwnedLogicalType::list_type(OwnedLogicalType::varchar()) .vortex_expect("LogicalType creation should succeed for test data"); - let mut chunk = DataChunk::new([list_type]); + let mut chunk = OwnedDataChunk::new([list_type]); new_array_exporter( list, @@ -204,12 +203,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 0, &mut chunk.get_vector(0)) + .export(0, 0, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(0); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER[]: 0 = [ ] "# @@ -229,9 +228,9 @@ mod tests { } .into_array(); - let list_type = LogicalType::list_type(LogicalType::int32()) + let list_type = OwnedLogicalType::list_type(OwnedLogicalType::int32()) .vortex_expect("LogicalType creation should succeed for test data"); - let mut chunk = DataChunk::new([list_type]); + let mut chunk = OwnedDataChunk::new([list_type]); new_array_exporter( list, @@ -239,12 +238,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER[]: 3 = [ [1], [2], [3]] "# @@ -270,9 +269,9 @@ mod tests { } .into_array(); - let list_type = LogicalType::list_type(LogicalType::varchar()) + let list_type = OwnedLogicalType::list_type(OwnedLogicalType::varchar()) .vortex_expect("LogicalType creation should succeed for test data"); - let mut chunk = DataChunk::new([list_type]); + let mut chunk = OwnedDataChunk::new([list_type]); new_array_exporter( list, @@ -280,12 +279,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 4, &mut chunk.get_vector(0)) + .export(0, 4, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(4); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT VARCHAR[]: 4 = [ [], [abc, def, NULL], NULL, []] "# diff --git a/vortex-duckdb/src/exporter/mod.rs b/vortex-duckdb/src/exporter/mod.rs index ad224d3b38d..be30600d8aa 100644 --- a/vortex-duckdb/src/exporter/mod.rs +++ b/vortex-duckdb/src/exporter/mod.rs @@ -39,7 +39,7 @@ use vortex::error::vortex_bail; use crate::duckdb::DUCKDB_STANDARD_VECTOR_SIZE; use crate::duckdb::DataChunk; -use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::duckdb::Vector; pub struct ArrayExporter { @@ -93,8 +93,7 @@ impl ArrayExporter { chunk.set_len(chunk_len); for (i, field) in self.fields.iter_mut().enumerate() { - let mut vector = chunk.get_vector(i); - field.export(position, chunk_len, &mut vector)?; + field.export(position, chunk_len, chunk.get_vector_mut(i))?; } Ok(true) @@ -151,7 +150,10 @@ fn new_array_exporter_with_flatten( // Otherwise, we fall back to canonical match array.execute::(ctx)? { - Canonical::Null(array) => Ok(all_invalid::new_exporter(array.len(), &LogicalType::null())), + Canonical::Null(array) => Ok(all_invalid::new_exporter( + array.len(), + &OwnedLogicalType::null(), + )), Canonical::Bool(array) => bool::new_exporter(array, ctx), Canonical::Primitive(array) => primitive::new_exporter(array, ctx), Canonical::Decimal(array) => decimal::new_exporter(array, ctx), @@ -187,14 +189,14 @@ mod tests { use vortex::mask::Mask; use crate::cpp::DUCKDB_TYPE; - use crate::duckdb::LogicalType; - use crate::duckdb::Vector; + use crate::duckdb::OwnedLogicalType; + use crate::duckdb::OwnedVector; use crate::exporter::copy_from_slice; #[test] fn test_set_validity_all_true() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); let mask = Mask::AllTrue(10); let all_null = unsafe { vector.set_validity(&mask, 0, 10) }; @@ -204,8 +206,8 @@ mod tests { #[test] fn test_set_validity_all_false() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); let len = 10; let mask = Mask::AllFalse(len); @@ -222,8 +224,8 @@ mod tests { #[test] fn test_set_validity_values_all_true() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); let mask = Mask::from(BitBuffer::from(vec![true; 10])); @@ -240,8 +242,8 @@ mod tests { #[test] fn test_set_validity_values_all_false() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); const LEN: usize = 10; let bits = vec![false; LEN]; @@ -259,8 +261,8 @@ mod tests { #[test] fn test_set_validity_values_mixed() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); let bits = vec![ true, false, true, true, false, false, true, true, false, true, @@ -279,8 +281,8 @@ mod tests { #[test] fn test_set_validity_values_with_offset() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); let bits = vec![ false, false, true, true, false, true, false, true, true, false, true, true, false, @@ -299,8 +301,8 @@ mod tests { #[test] fn test_set_validity_values_with_offset_and_smaller_len() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); let bits = vec![ true, false, true, true, false, false, true, true, false, true, true, true, false, @@ -320,8 +322,8 @@ mod tests { #[test] fn test_set_validity_values_64bit_alignment() { - let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); - let mut vector = Vector::with_capacity(logical_type, 100); + let logical_type = OwnedLogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); + let mut vector = OwnedVector::with_capacity(&logical_type, 100); let bits = (0..70).map(|i| i % 3 == 0).collect::>(); let mask = Mask::from(BitBuffer::from(bits.as_slice())); diff --git a/vortex-duckdb/src/exporter/primitive.rs b/vortex-duckdb/src/exporter/primitive.rs index 06bcca2af41..42b93c644d9 100644 --- a/vortex-duckdb/src/exporter/primitive.rs +++ b/vortex-duckdb/src/exporter/primitive.rs @@ -11,9 +11,9 @@ use vortex::dtype::match_each_native_ptype; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::LogicalType; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedVectorBuffer; use crate::duckdb::Vector; -use crate::duckdb::VectorBuffer; use crate::exporter::ColumnExporter; use crate::exporter::all_invalid; use crate::exporter::validity; @@ -21,7 +21,7 @@ use crate::exporter::validity; struct PrimitiveExporter { len: usize, start: *const T, - shared_buffer: VectorBuffer, + shared_buffer: OwnedVectorBuffer, _phantom_type: PhantomData, } @@ -35,10 +35,8 @@ pub fn new_exporter( .execute::(ctx)?; if validity.all_false() { - return Ok(all_invalid::new_exporter( - array.len(), - &LogicalType::try_from(array.ptype())?, - )); + let ltype = OwnedLogicalType::try_from(array.ptype())?; + return Ok(all_invalid::new_exporter(array.len(), <ype)); } match_each_native_ptype!(array.ptype(), |T| { @@ -46,7 +44,7 @@ pub fn new_exporter( let prim = Box::new(PrimitiveExporter { len: buffer.len(), start: buffer.as_ptr(), - shared_buffer: VectorBuffer::new(buffer), + shared_buffer: OwnedVectorBuffer::new(buffer), _phantom_type: Default::default(), }); Ok(validity::new_exporter(validity, prim)) @@ -76,23 +74,24 @@ mod tests { use crate::SESSION; use crate::cpp; use crate::duckdb::DUCKDB_STANDARD_VECTOR_SIZE; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; #[test] fn test_primitive_exporter() { let arr = PrimitiveArray::from_iter(0..10); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) + .export(0, 3, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(3); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT INTEGER: 3 = [ 0, 1, 2] "# @@ -107,7 +106,11 @@ mod tests { { let mut chunk = (0..ARRAY_COUNT) - .map(|_| DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)])) + .map(|_| { + OwnedDataChunk::new([OwnedLogicalType::new( + cpp::duckdb_type::DUCKDB_TYPE_INTEGER, + )]) + }) .collect_vec(); for i in 0..ARRAY_COUNT { @@ -116,13 +119,13 @@ mod tests { .export( i * DUCKDB_STANDARD_VECTOR_SIZE, DUCKDB_STANDARD_VECTOR_SIZE, - &mut chunk[i].get_vector(0), + chunk[i].get_vector_mut(0), ) .unwrap(); chunk[i].set_len(DUCKDB_STANDARD_VECTOR_SIZE); assert_eq!( - format!("{}", String::try_from(&chunk[i]).unwrap()), + format!("{}", String::try_from(&*chunk[i]).unwrap()), format!( r#"Chunk - [1 Columns] - FLAT INTEGER: {DUCKDB_STANDARD_VECTOR_SIZE} = [ {}] diff --git a/vortex-duckdb/src/exporter/run_end.rs b/vortex-duckdb/src/exporter/run_end.rs index 32b102f949a..891f077bb28 100644 --- a/vortex-duckdb/src/exporter/run_end.rs +++ b/vortex-duckdb/src/exporter/run_end.rs @@ -16,7 +16,7 @@ use vortex::error::VortexExpect; use vortex::error::VortexResult; use crate::convert::ToDuckDBScalar; -use crate::duckdb::SelectionVector; +use crate::duckdb::OwnedSelectionVector; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; use crate::exporter::cache::ConversionCache; @@ -86,7 +86,7 @@ impl ColumnExporter for RunEndExporter { } // Build up a selection vector - let mut sel_vec = SelectionVector::with_capacity(len); + let mut sel_vec = OwnedSelectionVector::with_capacity(len); let mut sel_vec_slice = unsafe { sel_vec.as_slice_mut(len) }; for (run_idx, &next_end) in ends_slice[start_run_idx..=end_run_idx].iter().enumerate() { diff --git a/vortex-duckdb/src/exporter/sequence.rs b/vortex-duckdb/src/exporter/sequence.rs index 93b2c419b3f..f46dc1a9de7 100644 --- a/vortex-duckdb/src/exporter/sequence.rs +++ b/vortex-duckdb/src/exporter/sequence.rs @@ -40,22 +40,23 @@ mod tests { use super::*; use crate::cpp; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; #[test] fn test_sequence() { let arr = SequenceArray::typed_new(2, 5, Nullability::NonNullable, 100).unwrap(); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); + let mut chunk = + OwnedDataChunk::new([OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); new_exporter(&arr) .unwrap() - .export(0, 4, &mut chunk.get_vector(0)) + .export(0, 4, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(4); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - SEQUENCE INTEGER: 4 = [ 2, 7, 12, 17] "# diff --git a/vortex-duckdb/src/exporter/struct_.rs b/vortex-duckdb/src/exporter/struct_.rs index eda7024106b..fa3aa931379 100644 --- a/vortex-duckdb/src/exporter/struct_.rs +++ b/vortex-duckdb/src/exporter/struct_.rs @@ -9,7 +9,7 @@ use vortex::array::arrays::StructArrayParts; use vortex::array::builtins::ArrayBuiltins; use vortex::error::VortexResult; -use crate::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; use crate::exporter::ConversionCache; @@ -36,10 +36,8 @@ pub(crate) fn new_exporter( let validity = validity.to_array(len).execute::(ctx)?; if validity.to_bit_buffer().true_count() == 0 { - return Ok(all_invalid::new_exporter( - len, - &LogicalType::try_from(struct_fields)?, - )); + let ltype = OwnedLogicalType::try_from(struct_fields)?; + return Ok(all_invalid::new_exporter(len, <ype)); } let children = fields @@ -62,7 +60,7 @@ pub(crate) fn new_exporter( impl ColumnExporter for StructExporter { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { for (idx, child) in self.children.iter().enumerate() { - child.export(offset, len, &mut vector.struct_vector_get_child(idx))?; + child.export(offset, len, vector.struct_vector_get_child_mut(idx))?; } Ok(()) } @@ -86,8 +84,8 @@ mod tests { use super::*; use crate::SESSION; use crate::cpp; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; #[test] fn test_struct_exporter() { @@ -97,10 +95,10 @@ mod tests { .into_array(); let arr = StructArray::from_fields(&[("a", prim), ("b", strings)]).vortex_expect("struct array"); - let mut chunk = DataChunk::new([LogicalType::struct_type( + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::struct_type( vec![ - LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER), - LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_VARCHAR), + OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER), + OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_VARCHAR), ], vec![CString::new("col1").unwrap(), CString::new("col2").unwrap()], ) @@ -112,12 +110,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 10, &mut chunk.get_vector(0)) + .export(0, 10, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(10); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT STRUCT(col1 INTEGER, col2 VARCHAR): 10 = [ {'col1': 0, 'col2': a}, {'col1': 1, 'col2': b}, {'col1': 2, 'col2': c}, {'col1': 3, 'col2': d}, {'col1': 4, 'col2': e}, {'col1': 5, 'col2': f}, {'col1': 6, 'col2': g}, {'col1': 7, 'col2': h}, {'col1': 8, 'col2': i}, {'col1': 9, 'col2': j}] "# @@ -161,10 +159,10 @@ mod tests { ])), ) .vortex_expect("StructArray creation should succeed for test data"); - let mut chunk = DataChunk::new([LogicalType::struct_type( + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::struct_type( vec![ - LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER), - LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_VARCHAR), + OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER), + OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_VARCHAR), ], vec![CString::new("col1").unwrap(), CString::new("col2").unwrap()], ) @@ -176,12 +174,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 10, &mut chunk.get_vector(0)) + .export(0, 10, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(10); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT STRUCT(col1 INTEGER, col2 VARCHAR): 10 = [ {'col1': 1, 'col2': NULL}, {'col1': NULL, 'col2': b}, {'col1': 2, 'col2': c}, NULL, NULL, NULL, {'col1': 4, 'col2': g}, {'col1': NULL, 'col2': h}, {'col1': 5, 'col2': NULL}, {'col1': NULL, 'col2': j}] "# @@ -206,10 +204,10 @@ mod tests { ])), ) .vortex_expect("StructArray creation should succeed for test data"); - let mut chunk = DataChunk::new([LogicalType::struct_type( + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::struct_type( vec![ - LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER), - LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_VARCHAR), + OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER), + OwnedLogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_VARCHAR), ], vec![CString::new("col1").unwrap(), CString::new("col2").unwrap()], ) @@ -221,12 +219,12 @@ mod tests { &mut SESSION.create_execution_ctx(), ) .unwrap() - .export(0, 10, &mut chunk.get_vector(0)) + .export(0, 10, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(10); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT STRUCT(col1 INTEGER, col2 VARCHAR): 10 = [ {'col1': 42, 'col2': b}, {'col1': 42, 'col2': c}, {'col1': 42, 'col2': c}, NULL, NULL, NULL, {'col1': 42, 'col2': d}, {'col1': 42, 'col2': g}, {'col1': 42, 'col2': g}, {'col1': 42, 'col2': h}] "# diff --git a/vortex-duckdb/src/exporter/temporal.rs b/vortex-duckdb/src/exporter/temporal.rs index 3beac12def3..b8fb38e126a 100644 --- a/vortex-duckdb/src/exporter/temporal.rs +++ b/vortex-duckdb/src/exporter/temporal.rs @@ -48,8 +48,8 @@ mod tests { use crate::SESSION; use crate::cpp; - use crate::duckdb::DataChunk; - use crate::duckdb::LogicalType; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; use crate::exporter::temporal::new_exporter; #[test] @@ -59,17 +59,18 @@ mod tests { TimeUnit::Seconds, None, ); - let mut chunk = - DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_TIMESTAMP_S)]); + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::new( + cpp::duckdb_type::DUCKDB_TYPE_TIMESTAMP_S, + )]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(1, 5, &mut chunk.get_vector(0)) + .export(1, 5, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(2); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT TIMESTAMP_S: 2 = [ 2025-06-18 16:43:45, 2025-06-18 16:43:46] "# @@ -84,16 +85,18 @@ mod tests { TimeUnit::Microseconds, None, ); - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_TIMESTAMP)]); + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::new( + cpp::duckdb_type::DUCKDB_TYPE_TIMESTAMP, + )]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(1, 5, &mut chunk.get_vector(0)) + .export(1, 5, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(4); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT TIMESTAMP: 4 = [ 2025-06-18 16:46:29.000001, 2025-06-18 16:46:30.000001, 2025-06-18 16:46:31.000001, 2025-06-18 16:46:32.000001] "# @@ -107,17 +110,17 @@ mod tests { TimeUnit::Microseconds, ); - let mut chunk = DataChunk::new([LogicalType::try_from(arr.dtype()).unwrap()]); + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::try_from(arr.dtype()).unwrap()]); new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() - .export(1, 5, &mut chunk.get_vector(0)) + .export(1, 5, chunk.get_vector_mut(0)) .unwrap(); chunk.set_len(4); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT TIME: 4 = [ 00:00:02, 00:00:03, 00:00:04, 00:00:05] "# diff --git a/vortex-duckdb/src/exporter/varbinview.rs b/vortex-duckdb/src/exporter/varbinview.rs index c440d72a363..09db059e1da 100644 --- a/vortex-duckdb/src/exporter/varbinview.rs +++ b/vortex-duckdb/src/exporter/varbinview.rs @@ -15,9 +15,9 @@ use vortex::buffer::ByteBuffer; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::LogicalType; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedVectorBuffer; use crate::duckdb::Vector; -use crate::duckdb::VectorBuffer; use crate::exporter::ColumnExporter; use crate::exporter::all_invalid; use crate::exporter::validity; @@ -25,7 +25,7 @@ use crate::exporter::validity; struct VarBinViewExporter { views: Buffer, buffers: Arc<[ByteBuffer]>, - vector_buffers: Vec, + vector_buffers: Vec, } pub(crate) fn new_exporter( @@ -41,10 +41,8 @@ pub(crate) fn new_exporter( } = array.into_parts(); let validity = validity.to_array(len).execute::(ctx)?; if validity.all_false() { - return Ok(all_invalid::new_exporter( - len, - &LogicalType::try_from(dtype)?, - )); + let ltype = OwnedLogicalType::try_from(dtype)?; + return Ok(all_invalid::new_exporter(len, <ype)); } let buffers = buffers @@ -59,7 +57,11 @@ pub(crate) fn new_exporter( validity, Box::new(VarBinViewExporter { views: Buffer::::from_byte_buffer(views.unwrap_host()), - vector_buffers: buffers.iter().cloned().map(VectorBuffer::new).collect_vec(), + vector_buffers: buffers + .iter() + .cloned() + .map(OwnedVectorBuffer::new) + .collect_vec(), buffers, }), )) @@ -147,26 +149,26 @@ mod tests { use vortex_array::VortexSessionExecute; use vortex_array::arrays::VarBinViewArray; - use crate::LogicalType; use crate::SESSION; - use crate::duckdb::DataChunk; + use crate::duckdb::OwnedDataChunk; + use crate::duckdb::OwnedLogicalType; use crate::exporter::varbinview::new_exporter; #[test] fn all_invalid_varbinview() -> VortexResult<()> { let arr = VarBinViewArray::from_iter([Option::<&str>::None; 4], DType::Utf8(Nullable)); - let mut chunk = DataChunk::new([LogicalType::varchar()]); + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::varchar()]); new_exporter(arr, &mut SESSION.create_execution_ctx())?.export( 0, 3, - &mut chunk.get_vector(0), + chunk.get_vector_mut(0), )?; chunk.set_len(3); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - CONSTANT VARCHAR: 3 = [ NULL] "# @@ -179,17 +181,17 @@ mod tests { let arr = VarBinViewArray::from_iter([None, None, None, Some("Hey")], DType::Utf8(Nullable)); - let mut chunk = DataChunk::new([LogicalType::varchar()]); + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::varchar()]); new_exporter(arr, &mut SESSION.create_execution_ctx())?.export( 0, 3, - &mut chunk.get_vector(0), + chunk.get_vector_mut(0), )?; chunk.set_len(3); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - CONSTANT VARCHAR: 3 = [ NULL] "# @@ -204,17 +206,17 @@ mod tests { DType::Utf8(Nullable), ); - let mut chunk = DataChunk::new([LogicalType::varchar()]); + let mut chunk = OwnedDataChunk::new([OwnedLogicalType::varchar()]); new_exporter(arr, &mut SESSION.create_execution_ctx())?.export( 0, 3, - &mut chunk.get_vector(0), + chunk.get_vector_mut(0), )?; chunk.set_len(3); assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), + format!("{}", String::try_from(&*chunk).unwrap()), r#"Chunk - [1 Columns] - FLAT VARCHAR: 3 = [ NULL, NULL, Hi] "# diff --git a/vortex-duckdb/src/exporter/vector.rs b/vortex-duckdb/src/exporter/vector.rs index e8f66bb88f8..a1d52ccef1f 100644 --- a/vortex-duckdb/src/exporter/vector.rs +++ b/vortex-duckdb/src/exporter/vector.rs @@ -3,7 +3,7 @@ use vortex::mask::Mask; -use crate::Value; +use crate::duckdb::OwnedValue; use crate::duckdb::Vector; use crate::exporter::copy_from_slice; @@ -49,6 +49,6 @@ impl Vector { } pub(super) fn set_all_false_validity(&mut self) { - self.reference_value(&Value::null(&self.logical_type())); + self.reference_value(&OwnedValue::null(&self.logical_type())); } } diff --git a/vortex-duckdb/src/filesystem.rs b/vortex-duckdb/src/filesystem.rs index 79ed4aff5dc..6710aff7e6d 100644 --- a/vortex-duckdb/src/filesystem.rs +++ b/vortex-duckdb/src/filesystem.rs @@ -37,7 +37,7 @@ use vortex::io::runtime::BlockingRuntime; use crate::RUNTIME; use crate::cpp; use crate::duckdb::ClientContext; -use crate::duckdb::FsFileHandle; +use crate::duckdb::OwnedFsFileHandle; use crate::duckdb::duckdb_fs_list_dir; use crate::duckdb::fs_error; @@ -50,14 +50,18 @@ pub(super) fn resolve_filesystem( .ok_or_else(|| { vortex_err!("Failed to read 'vortex_filesystem' setting from DuckDB config") })?; - let fs_config = fs_config.as_ref().as_string(); + let fs_config = fs_config.as_string(); Ok(if fs_config.as_str() == "duckdb" { tracing::info!( "Using DuckDB's built-in filesystem for URL scheme '{}'", base_url.scheme() ); - Arc::new(DuckDbFileSystem::new(base_url.clone(), ctx.clone())) + // SAFETY: The ClientContext is owned by the Connection and lives for the duration of + // query execution. DuckDB keeps the connection alive while the filesystem is in use. + Arc::new(DuckDbFileSystem::new(base_url.clone(), unsafe { + ctx.erase_lifetime() + })) } else if fs_config.as_str() == "vortex" { tracing::info!( "Using Vortex's object store filesystem for URL scheme '{}'", @@ -98,7 +102,7 @@ fn object_store_fs(base_url: &Url) -> VortexResult { struct DuckDbFileSystem { base_url: Url, - ctx: ClientContext, + ctx: &'static ClientContext, } impl Debug for DuckDbFileSystem { @@ -110,7 +114,7 @@ impl Debug for DuckDbFileSystem { } impl DuckDbFileSystem { - pub fn new(base_url: Url, ctx: ClientContext) -> Self { + pub fn new(base_url: Url, ctx: &'static ClientContext) -> Self { Self { base_url, ctx } } } @@ -137,13 +141,13 @@ impl FileSystem for DuckDbFileSystem { directory ); - let ctx = self.ctx.clone(); + let ctx = self.ctx; let base_path = self.base_url.path().to_string(); stream::once(async move { RUNTIME .handle() - .spawn_blocking(move || list_recursive(&ctx, &directory, &base_path)) + .spawn_blocking(move || list_recursive(ctx, &directory, &base_path)) .await }) .flat_map(|result| match result { @@ -194,7 +198,7 @@ fn list_recursive( /// A VortexReadAt implementation backed by DuckDB's filesystem (e.g., httpfs/s3). pub(crate) struct DuckDbFsReader { - handle: Arc, + handle: Arc, uri: Arc, is_local: bool, size: Arc>, @@ -215,7 +219,7 @@ impl DuckDbFsReader { let is_local = url.scheme() == "file"; Ok(Self { - handle: Arc::new(unsafe { FsFileHandle::own(handle) }), + handle: Arc::new(unsafe { OwnedFsFileHandle::own(handle) }), uri: Arc::from(url.as_str()), is_local, size: Arc::new(OnceLock::new()), diff --git a/vortex-duckdb/src/lib.rs b/vortex-duckdb/src/lib.rs index 7391c899fbf..b820aad7e88 100644 --- a/vortex-duckdb/src/lib.rs +++ b/vortex-duckdb/src/lib.rs @@ -17,8 +17,8 @@ use vortex::session::VortexSession; use crate::copy::VortexCopyFunction; use crate::duckdb::Database; -use crate::duckdb::LogicalType; -use crate::duckdb::Value; +use crate::duckdb::OwnedLogicalType; +use crate::duckdb::OwnedValue; use crate::scan::VortexTableFunction; mod convert; @@ -49,8 +49,8 @@ pub fn initialize(db: &Database) -> VortexResult<()> { db.config().add_extension_options( "vortex_filesystem", "Whether to use Vortex's filesystem ('vortex') or DuckDB's filesystems ('duckdb').", - LogicalType::varchar(), - Value::from("vortex"), + OwnedLogicalType::varchar(), + OwnedValue::from("vortex"), )?; db.register_table_function::(c"vortex_scan")?; db.register_table_function::(c"read_vortex")?; @@ -73,7 +73,7 @@ pub unsafe extern "C" fn vortex_init_rust(db: cpp::duckdb_database) { database .register_vortex_scan_replacement() .vortex_expect("failed to register vortex scan replacement"); - initialize(&database).vortex_expect("Failed to initialize Vortex extension"); + initialize(database).vortex_expect("Failed to initialize Vortex extension"); } /// The DuckDB extension ABI version function. diff --git a/vortex-duckdb/src/scan.rs b/vortex-duckdb/src/scan.rs index 2772130eb8d..e5bec25beb9 100644 --- a/vortex-duckdb/src/scan.rs +++ b/vortex-duckdb/src/scan.rs @@ -64,7 +64,7 @@ use crate::duckdb::ClientContext; use crate::duckdb::DataChunk; use crate::duckdb::Expression; use crate::duckdb::ExtractedValue; -use crate::duckdb::LogicalType; +use crate::duckdb::OwnedLogicalType; use crate::duckdb::TableFunction; use crate::duckdb::TableInitInput; use crate::duckdb::VirtualColumnsResult; @@ -79,7 +79,7 @@ pub struct VortexBindData { filter_exprs: Vec, files: Vec, column_names: Vec, - column_types: Vec, + column_types: Vec, } impl Clone for VortexBindData { @@ -128,7 +128,7 @@ pub struct VortexTableFunction; /// Extracts the schema from a Vortex file. fn extract_schema_from_vortex_file( file: &VortexFile, -) -> VortexResult<(Vec, Vec)> { +) -> VortexResult<(Vec, Vec)> { let dtype = file.dtype(); // For now, we assume the top-level type to be a struct. @@ -140,7 +140,7 @@ fn extract_schema_from_vortex_file( let mut column_types = Vec::new(); for (field_name, field_dtype) in struct_dtype.names().iter().zip(struct_dtype.fields()) { - let logical_type = LogicalType::try_from(&field_dtype)?; + let logical_type = OwnedLogicalType::try_from(&field_dtype)?; column_names.push(field_name.to_string()); column_types.push(logical_type); } @@ -178,29 +178,26 @@ fn extract_table_filter_expr( init: &TableInitInput, column_ids: &[u64], ) -> VortexResult> { - let mut table_filter_exprs: HashSet = - if let Some(filter) = init.table_filter_set() { - filter - .into_iter() - .map(|(idx, ex)| { - let idx_u: usize = idx.as_(); - let col_idx: usize = column_ids[idx_u].as_(); - let name = init - .bind_data() - .column_names - .get(col_idx) - .vortex_expect("exists"); - try_from_table_filter( - &ex, - &col(name.as_str()), - init.bind_data().first_file.dtype(), - ) - }) - .collect::>>>()? - .unwrap_or_else(HashSet::new) - } else { - HashSet::new() - }; + let mut table_filter_exprs: HashSet = if let Some(filter) = + init.table_filter_set() + { + filter + .into_iter() + .map(|(idx, ex)| { + let idx_u: usize = idx.as_(); + let col_idx: usize = column_ids[idx_u].as_(); + let name = init + .bind_data() + .column_names + .get(col_idx) + .vortex_expect("exists"); + try_from_table_filter(ex, &col(name.as_str()), init.bind_data().first_file.dtype()) + }) + .collect::>>>()? + .unwrap_or_else(HashSet::new) + } else { + HashSet::new() + }; table_filter_exprs.extend(init.bind_data().filter_exprs.clone()); Ok(and_collect(table_filter_exprs.into_iter().collect_vec())) @@ -225,8 +222,8 @@ impl TableFunction for VortexTableFunction { /// Input parameter types of the `vortex_scan` table function. /// // `vortex_scan` takes a single file glob parameter. - fn parameters() -> Vec { - vec![LogicalType::varchar()] + fn parameters() -> Vec { + vec![OwnedLogicalType::varchar()] } fn bind( @@ -239,15 +236,13 @@ impl TableFunction for VortexTableFunction { .ok_or_else(|| vortex_err!("Missing file glob parameter"))?; // Parse the URL and separate the base URL (keep scheme, host, etc.) from the path. - let glob_url_str = glob_url_parameter.as_ref().as_string(); + let glob_url_str = glob_url_parameter.as_string(); let glob_url = match Url::parse(glob_url_str.as_str()) { Ok(url) => url, Err(_) => { // Otherwise, we assume it's a file path. let path = if !glob_url_str.as_str().starts_with("/") { - // We cannot use Path::canonicalize to resolve relative paths since it - // requires the file to exist, and the glob may contain wildcards. Instead, - // we resolve relative paths against the current working directory. + // We cannot use Path::canonicalize to resolve relative paths since it requires the file to exist, and the glob may contain wildcards. Instead, we resolve relative paths against the current working directory. let current_dir = std::env::current_dir().map_err(|e| { vortex_err!( "Cannot get current working directory to resolve relative path {}: {}", @@ -276,7 +271,7 @@ impl TableFunction for VortexTableFunction { .map_err(|e| vortex_err!("Invalid setting name: {}", e))?; let max_threads = ctx .try_get_current_setting(&max_threads_cstr) - .and_then(|v| match v.as_ref().extract() { + .and_then(|v| match v.extract() { ExtractedValue::UBigInt(val) => usize::try_from(val).ok(), ExtractedValue::BigInt(val) if val > 0 => usize::try_from(val as u64).ok(), _ => None, @@ -409,7 +404,9 @@ impl TableFunction for VortexTableFunction { ); let client_context = init_input.client_context()?; - let object_cache = client_context.object_cache(); + // SAFETY: The ObjectCache is owned by the DatabaseInstance and lives as long as the + // database. DuckDB keeps the database alive for the duration of any query execution. + let object_cache = unsafe { client_context.object_cache().erase_lifetime() }; let num_workers = std::thread::available_parallelism() .map(|n| n.get()) @@ -565,7 +562,11 @@ impl TableFunction for VortexTableFunction { } fn virtual_columns(_bind_data: &Self::BindData, result: &mut VirtualColumnsResult) { - result.register(EMPTY_COLUMN_IDX, EMPTY_COLUMN_NAME, &LogicalType::bool()); + result.register( + EMPTY_COLUMN_IDX, + EMPTY_COLUMN_NAME, + &OwnedLogicalType::bool(), + ); } } diff --git a/vortex-file/Cargo.toml b/vortex-file/Cargo.toml index ddd75f7b336..b6c507b0373 100644 --- a/vortex-file/Cargo.toml +++ b/vortex-file/Cargo.toml @@ -22,6 +22,7 @@ bytes = { workspace = true } flatbuffers = { workspace = true } futures = { workspace = true, features = ["std", "async-await"] } getrandom_v03 = { workspace = true } # Needed to pickup the "wasm_js" feature for wasm targets from the workspace configuration +glob = { workspace = true } itertools = { workspace = true } kanal = { workspace = true } object_store = { workspace = true, optional = true } diff --git a/vortex-sqllogictest/src/duckdb.rs b/vortex-sqllogictest/src/duckdb.rs index 2d461b50974..99266aab9bf 100644 --- a/vortex-sqllogictest/src/duckdb.rs +++ b/vortex-sqllogictest/src/duckdb.rs @@ -11,10 +11,11 @@ use indicatif::ProgressBar; use sqllogictest::DBOutput; use sqllogictest::runner::AsyncDB; use vortex::error::VortexError; -use vortex_duckdb::duckdb::Connection; -use vortex_duckdb::duckdb::Database; use vortex_duckdb::duckdb::LogicalType; -use vortex_duckdb::duckdb::Value; +use vortex_duckdb::duckdb::OwnedConnection; +use vortex_duckdb::duckdb::OwnedDatabase; +use vortex_duckdb::duckdb::OwnedLogicalType; +use vortex_duckdb::duckdb::OwnedValue; use vortex_duckdb::initialize; #[derive(Debug, thiserror::Error)] @@ -33,8 +34,8 @@ impl std::fmt::Display for DuckDBTestError { } struct Inner { - conn: Connection, - _db: Database, + conn: OwnedConnection, + _db: OwnedDatabase, } unsafe impl Send for Inner {} @@ -47,7 +48,7 @@ pub struct DuckDB { impl DuckDB { pub fn try_new(pb: ProgressBar) -> Result { - let db = Database::open_in_memory()?; + let db = OwnedDatabase::open_in_memory()?; db.register_vortex_scan_replacement()?; initialize(&db)?; @@ -62,26 +63,26 @@ impl DuckDB { /// Turn the DuckDB logical type into a `DFColumnType`, which /// tells the runner what types they are. We use the one from DataFusion /// as its richer than the default one. - fn normalize_column_type(logical_type: LogicalType) -> DFColumnType { + fn normalize_column_type(logical_type: &LogicalType) -> DFColumnType { let type_id = logical_type.as_type_id(); - if type_id == LogicalType::int32().as_type_id() - || type_id == LogicalType::int64().as_type_id() - || type_id == LogicalType::uint64().as_type_id() - || type_id == LogicalType::int128().as_type_id() - || type_id == LogicalType::uint128().as_type_id() + if type_id == OwnedLogicalType::int32().as_type_id() + || type_id == OwnedLogicalType::int64().as_type_id() + || type_id == OwnedLogicalType::uint64().as_type_id() + || type_id == OwnedLogicalType::int128().as_type_id() + || type_id == OwnedLogicalType::uint128().as_type_id() { DFColumnType::Integer - } else if type_id == LogicalType::varchar().as_type_id() { + } else if type_id == OwnedLogicalType::varchar().as_type_id() { DFColumnType::Text - } else if type_id == LogicalType::bool().as_type_id() { + } else if type_id == OwnedLogicalType::bool().as_type_id() { DFColumnType::Boolean - } else if type_id == LogicalType::float32().as_type_id() - || type_id == LogicalType::float64().as_type_id() + } else if type_id == OwnedLogicalType::float32().as_type_id() + || type_id == OwnedLogicalType::float64().as_type_id() { DFColumnType::Float - } else if type_id == LogicalType::timestamp().as_type_id() - || type_id == LogicalType::timestamp_tz().as_type_id() + } else if type_id == OwnedLogicalType::timestamp().as_type_id() + || type_id == OwnedLogicalType::timestamp_tz().as_type_id() { DFColumnType::Timestamp } else { @@ -108,7 +109,7 @@ impl AsyncDB for DuckDB { for col_idx in 0..r.column_count() { let col_idx = usize::try_from(col_idx).map_err(VortexError::from)?; let dtype = r.column_type(col_idx); - types.push(Self::normalize_column_type(dtype)); + types.push(Self::normalize_column_type(&dtype)); } for chunk in r.into_iter() { @@ -119,7 +120,7 @@ impl AsyncDB for DuckDB { match vector.get_value(row_idx, chunk.len()) { Some(value) => current_row.push(value.to_string()), None => current_row - .push(Value::null(&vector.logical_type()).to_string()), + .push(OwnedValue::null(&vector.logical_type()).to_string()), } }