diff --git a/Cargo.lock b/Cargo.lock index 11adb87b..71cfa818 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -762,9 +762,7 @@ dependencies = [ [[package]] name = "cipherstash-client" -version = "0.34.0-alpha.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200537bf2ab562b085e34df7e3391d0426ab04eea3ed588a7fc27f1bd218ee33" +version = "0.34.1-alpha.3" dependencies = [ "aes-gcm-siv", "anyhow", @@ -776,7 +774,7 @@ dependencies = [ "blake3", "cfg-if", "chrono", - "cipherstash-config 0.34.0-alpha.4", + "cipherstash-config 0.34.1-alpha.3", "cipherstash-core", "cllw-ore", "cts-common", @@ -794,7 +792,7 @@ dependencies = [ "ore-rs", "percent-encoding", "rand 0.8.5", - "recipher 0.2.0", + "recipher 0.2.1", "reqwest", "reqwest-middleware", "reqwest-retry", @@ -837,20 +835,17 @@ dependencies = [ [[package]] name = "cipherstash-config" -version = "0.34.0-alpha.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "333ba6c42338ce6bbbc515fb75e43b57311ece1a9ea41e7daabe50478c342841" +version = "0.34.1-alpha.3" dependencies = [ "bitflags", "serde", + "serde_json", "thiserror 1.0.69", ] [[package]] name = "cipherstash-core" -version = "0.34.0-alpha.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32921e505e39f8f7cae9f55e82462d8dd92764a9148f479b42abf52e60e90437" +version = "0.34.1-alpha.3" dependencies = [ "hmac", "lazy_static", @@ -982,8 +977,6 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cllw-ore" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c676b8e0a3130e6f8b4398d9aa5b287c3ce7074ac89f1ccf1570ebeb22281629" dependencies = [ "blake3", "hex", @@ -1192,9 +1185,7 @@ dependencies = [ [[package]] name = "cts-common" -version = "0.34.0-alpha.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7817fb03b19c6a588bc9120fd876a6d65f531a0b2aa0d39384bc78f3c4c4340" +version = "0.34.1-alpha.3" dependencies = [ "arrayvec", "axum", @@ -1214,7 +1205,9 @@ dependencies = [ "rand 0.8.5", "regex", "serde", + "serde_json", "thiserror 1.0.69", + "tracing", "url", "utoipa", "uuid", @@ -3501,9 +3494,7 @@ dependencies = [ [[package]] name = "recipher" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061598013445a8bb847d0c95ee33b5e95c1d198d5242b6a8b9f3078aa7437e79" +version = "0.2.1" dependencies = [ "aes", "async-trait", @@ -4349,9 +4340,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stack-auth" -version = "0.34.0-alpha.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e8a681ffc8eb40575fb5f40b8316f1b9e03074eb1e4951e0690b00b0349fed" +version = "0.34.1-alpha.3" dependencies = [ "aquamarine", "cts-common", @@ -4370,13 +4359,12 @@ dependencies = [ "vitaminc", "vitaminc-protected", "zeroize", + "zerokms-protocol", ] [[package]] name = "stack-profile" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56fdb1e5ef2111e616fb46da39ad63485b3f3c82de3245fe3c14ce52e8775112" +version = "0.34.1-alpha.3" dependencies = [ "dirs", "gethostname", @@ -6286,12 +6274,10 @@ dependencies = [ [[package]] name = "zerokms-protocol" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52f1d857d2e6d4fe258c49906d53f8b2666c4841dc2e39e67cfea3717382294" +version = "0.12.8" dependencies = [ "base64", - "cipherstash-config 0.34.0-alpha.4", + "cipherstash-config 0.34.1-alpha.3", "const-hex", "cts-common", "fake 2.10.0", diff --git a/Cargo.toml b/Cargo.toml index d1a09aec..485c472f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,8 +43,11 @@ debug = true [workspace.dependencies] sqltk = { version = "0.10.0" } -cipherstash-client = { version = "0.34.0-alpha.4" } -cts-common = { version = "0.34.0-alpha.4" } +# TODO: revert to crates.io once the cipherstash-suite version with `IndexType::Ope` +# is published. See https://github.com/cipherstash/cipherstash-suite for the +# OPE-enabled change. +cipherstash-client = { path = "../cipherstash-suite/packages/cipherstash-client" } +cts-common = { path = "../cipherstash-suite/packages/cts-common" } thiserror = "2.0.9" tokio = { version = "1.44.2", features = ["full"] } diff --git a/packages/cipherstash-proxy/src/postgresql/context/column.rs b/packages/cipherstash-proxy/src/postgresql/context/column.rs index 0088f0b3..c4deb993 100644 --- a/packages/cipherstash-proxy/src/postgresql/context/column.rs +++ b/packages/cipherstash-proxy/src/postgresql/context/column.rs @@ -77,8 +77,8 @@ fn column_type_to_postgres_type( (ColumnType::Int, _) => postgres_types::Type::INT4, (ColumnType::SmallInt, _) => postgres_types::Type::INT2, (ColumnType::Timestamp, _) => postgres_types::Type::TIMESTAMPTZ, - (ColumnType::Utf8Str, _) => postgres_types::Type::TEXT, - (ColumnType::JsonB, EqlTermVariant::JsonAccessor) => postgres_types::Type::TEXT, - (ColumnType::JsonB, _) => postgres_types::Type::JSONB, + (ColumnType::Text, _) => postgres_types::Type::TEXT, + (ColumnType::Json, EqlTermVariant::JsonAccessor) => postgres_types::Type::TEXT, + (ColumnType::Json, _) => postgres_types::Type::JSONB, } } diff --git a/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs b/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs index 5736bdee..f70e4962 100644 --- a/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs +++ b/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs @@ -115,7 +115,7 @@ pub fn literal_from_sql( /// /// | Input Type | Target Column Type | Result | /// |------------|--------------------|--------| -/// | `Type::INT4` | `ColumnType::Utf8Str` | `Plaintext::Utf8Str` | +/// | `Type::INT4` | `ColumnType::Text` | `Plaintext::Text` | /// | `Type::INT2` | `ColumnType::Int` | `Plaintext::Int` | /// | `Type::INT8` | `ColumnType::Int` | `Error`` | fn text_from_sql( @@ -126,7 +126,7 @@ fn text_from_sql( debug!(target: ENCODING, ?val, ?eql_term, ?col_type); match (eql_term, col_type) { - (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::Utf8Str) => { + (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::Text) => { Ok(Plaintext::new(val)) } (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::Float) => { @@ -168,7 +168,7 @@ fn text_from_sql( } // If JSONB, JSONPATH values are treated as strings - (EqlTermVariant::JsonPath | EqlTermVariant::JsonAccessor, ColumnType::JsonB) => { + (EqlTermVariant::JsonPath | EqlTermVariant::JsonAccessor, ColumnType::Json) => { let val = if val.starts_with("$.") { val.to_string() } else { @@ -176,12 +176,12 @@ fn text_from_sql( }; Ok(Plaintext::new(val)) } - (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::JsonB) => { + (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::Json) => { serde_json::from_str::(val) .map_err(|_| MappingError::CouldNotParseParameter) .map(Plaintext::new) } - (EqlTermVariant::Tokenized, ColumnType::Utf8Str) => Ok(Plaintext::new(val)), + (EqlTermVariant::Tokenized, ColumnType::Text) => Ok(Plaintext::new(val)), (eql_term, col_type) => Err(MappingError::UnsupportedParameterType { eql_term, @@ -202,7 +202,7 @@ fn binary_from_sql( debug!(target: ENCODING, ?pg_type, ?eql_term, ?col_type); match (eql_term, col_type, pg_type) { - (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::Utf8Str, _) => { + (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::Text, _) => { parse_bytes_from_sql::(bytes, pg_type).map(Plaintext::new) } (EqlTermVariant::Full | EqlTermVariant::Partial, ColumnType::Boolean, _) => { @@ -253,7 +253,7 @@ fn binary_from_sql( } // If JSONB, JSONPATH values are treated as strings - (EqlTermVariant::JsonPath, ColumnType::JsonB, &Type::JSONPATH) => { + (EqlTermVariant::JsonPath, ColumnType::Json, &Type::JSONPATH) => { parse_bytes_from_sql::(bytes, pg_type).map(|val| { let val = if val.starts_with("$.") { val @@ -263,7 +263,7 @@ fn binary_from_sql( Plaintext::new(val) }) } - (EqlTermVariant::JsonAccessor, ColumnType::JsonB, &Type::TEXT | &Type::VARCHAR) => { + (EqlTermVariant::JsonAccessor, ColumnType::Json, &Type::TEXT | &Type::VARCHAR) => { parse_bytes_from_sql::(bytes, pg_type).map(|val| { let val = if val.starts_with("$.") { val @@ -276,7 +276,7 @@ fn binary_from_sql( // Python psycopg sends JSON/B as BYTEA ( EqlTermVariant::Full | EqlTermVariant::Partial, - ColumnType::JsonB, + ColumnType::Json, &Type::JSON | &Type::JSONB | &Type::BYTEA, ) => parse_bytes_from_sql::(bytes, pg_type).map(Plaintext::new), @@ -356,9 +356,9 @@ fn decimal_from_sql( .ok_or(MappingError::CouldNotParseParameter) .map(Plaintext::new), - ColumnType::Utf8Str => Ok(Plaintext::new(decimal.to_string())), + ColumnType::Text => Ok(Plaintext::new(decimal.to_string())), - ColumnType::JsonB => { + ColumnType::Json => { let val: serde_json::Value = serde_json::from_str(&decimal.to_string()) .map_err(|_| MappingError::CouldNotParseParameter)?; Ok(Plaintext::new(val)) @@ -408,7 +408,7 @@ mod tests { config: ColumnConfig { name: "column".to_owned(), in_place: false, - cast_type: ColumnType::Utf8Str, + cast_type: ColumnType::Text, indexes: vec![], mode: ColumnMode::PlaintextDuplicate, }, diff --git a/packages/cipherstash-proxy/src/postgresql/data/to_sql.rs b/packages/cipherstash-proxy/src/postgresql/data/to_sql.rs index 6bc57d73..1007d266 100644 --- a/packages/cipherstash-proxy/src/postgresql/data/to_sql.rs +++ b/packages/cipherstash-proxy/src/postgresql/data/to_sql.rs @@ -16,7 +16,7 @@ pub fn to_sql(plaintext: &Plaintext, format_code: &FormatCode) -> Result Result { let s = match &plaintext { - Plaintext::Utf8Str(Some(x)) => x.to_string(), + Plaintext::Text(Some(x)) => x.to_string(), Plaintext::Int(Some(x)) => x.to_string(), Plaintext::BigInt(Some(x)) => x.to_string(), Plaintext::BigUInt(Some(x)) => x.to_string(), @@ -26,7 +26,7 @@ fn text_to_sql(plaintext: &Plaintext) -> Result { Plaintext::NaiveDate(Some(x)) => x.to_string(), Plaintext::SmallInt(Some(x)) => x.to_string(), Plaintext::Timestamp(Some(x)) => x.to_string(), - Plaintext::JsonB(Some(x)) => x.to_string(), + Plaintext::Json(Some(x)) => x.to_string(), _ => "".to_string(), }; @@ -44,8 +44,8 @@ fn binary_to_sql(plaintext: &Plaintext) -> Result { Plaintext::NaiveDate(x) => x.to_sql_checked(&Type::DATE, &mut bytes), Plaintext::SmallInt(x) => x.to_sql_checked(&Type::INT2, &mut bytes), Plaintext::Timestamp(x) => x.to_sql_checked(&Type::TIMESTAMPTZ, &mut bytes), - Plaintext::Utf8Str(x) => x.to_sql_checked(&Type::TEXT, &mut bytes), - Plaintext::JsonB(x) => x.to_sql_checked(&Type::JSONB, &mut bytes), + Plaintext::Text(x) => x.to_sql_checked(&Type::TEXT, &mut bytes), + Plaintext::Json(x) => x.to_sql_checked(&Type::JSONB, &mut bytes), Plaintext::Decimal(x) => x.to_sql_checked(&Type::NUMERIC, &mut bytes), // TODO: Implement these Plaintext::BigUInt(_x) => unimplemented!(), diff --git a/packages/cipherstash-proxy/src/proxy/encrypt_config/config.rs b/packages/cipherstash-proxy/src/proxy/encrypt_config/config.rs index 41ae3e13..af132024 100644 --- a/packages/cipherstash-proxy/src/proxy/encrypt_config/config.rs +++ b/packages/cipherstash-proxy/src/proxy/encrypt_config/config.rs @@ -133,8 +133,8 @@ impl From for ColumnType { CastAs::Boolean => ColumnType::Boolean, CastAs::Date => ColumnType::Date, CastAs::Real | CastAs::Double => ColumnType::Float, - CastAs::Text => ColumnType::Utf8Str, - CastAs::JsonB => ColumnType::JsonB, + CastAs::Text => ColumnType::Text, + CastAs::JsonB => ColumnType::Json, } } } @@ -237,7 +237,7 @@ mod tests { let column = encrypt_config.get(&ident).expect("column exists"); - assert_eq!(column.cast_type, ColumnType::Utf8Str); + assert_eq!(column.cast_type, ColumnType::Text); assert!(column.indexes.is_empty()); } diff --git a/packages/cipherstash-proxy/src/proxy/mod.rs b/packages/cipherstash-proxy/src/proxy/mod.rs index 9a46b990..5515b82d 100644 --- a/packages/cipherstash-proxy/src/proxy/mod.rs +++ b/packages/cipherstash-proxy/src/proxy/mod.rs @@ -63,7 +63,8 @@ impl Proxy { let encrypt_config_manager = EncryptConfigManager::init(&config.database).await?; - let schema_manager = SchemaManager::init(&config.database).await?; + let schema_manager = + SchemaManager::init(&config.database, encrypt_config_manager.clone()).await?; let eql_version = Proxy::eql_version(&config).await?; @@ -114,8 +115,11 @@ impl Proxy { debug!(msg = "ReloadCommand received", ?command); match command { ReloadCommand::DatabaseSchema(responder) => { - schema_manager.reload().await; + // Reload encrypt config first so the schema reload + // sees the latest per-column index configuration when + // deriving EqlTraits. encrypt_config_manager.reload().await; + schema_manager.reload().await; let _ = responder.send(()); } ReloadCommand::EncryptSchema(responder) => { diff --git a/packages/cipherstash-proxy/src/proxy/schema/manager.rs b/packages/cipherstash-proxy/src/proxy/schema/manager.rs index 5fa2710d..3233c099 100644 --- a/packages/cipherstash-proxy/src/proxy/schema/manager.rs +++ b/packages/cipherstash-proxy/src/proxy/schema/manager.rs @@ -1,9 +1,14 @@ use crate::config::DatabaseConfig; use crate::error::Error; +use crate::proxy::encrypt_config::{EncryptConfig, EncryptConfigManager}; use crate::proxy::{AGGREGATE_QUERY, SCHEMA_QUERY}; use crate::{connect, log::SCHEMA}; use arc_swap::ArcSwap; -use eql_mapper::{self, EqlTraits}; +use cipherstash_client::{ + eql::Identifier, + schema::column::{Index, IndexType}, +}; +use eql_mapper::{self, EqlTrait, EqlTraits}; use eql_mapper::{Column, Schema, Table}; use sqltk::parser::ast::Ident; use std::sync::Arc; @@ -14,14 +19,18 @@ use tracing::{debug, info, warn}; #[derive(Clone, Debug)] pub struct SchemaManager { config: DatabaseConfig, + encrypt_config_manager: EncryptConfigManager, schema: Arc>, _reload_handle: Arc>, } impl SchemaManager { - pub async fn init(config: &DatabaseConfig) -> Result { + pub async fn init( + config: &DatabaseConfig, + encrypt_config_manager: EncryptConfigManager, + ) -> Result { let config = config.clone(); - init_reloader(config).await + init_reloader(config, encrypt_config_manager).await } pub fn load(&self) -> Arc { @@ -29,7 +38,8 @@ impl SchemaManager { } pub async fn reload(&self) { - match load_schema_with_retry(&self.config).await { + let encrypt_config = self.encrypt_config_manager.load(); + match load_schema_with_retry(&self.config, &encrypt_config).await { Ok(reloaded) => { debug!(target: SCHEMA, msg = "Reloaded database schema"); self.schema.swap(Arc::new(reloaded)); @@ -44,15 +54,20 @@ impl SchemaManager { } } -async fn init_reloader(config: DatabaseConfig) -> Result { +async fn init_reloader( + config: DatabaseConfig, + encrypt_config_manager: EncryptConfigManager, +) -> Result { // Skip retries on startup as the likely failure mode is configuration - let schema = load_schema(&config).await?; + let initial_encrypt_config = encrypt_config_manager.load(); + let schema = load_schema(&config, &initial_encrypt_config).await?; info!(msg = "Loaded database schema"); let schema = Arc::new(ArcSwap::new(Arc::new(schema))); let config_ref = config.clone(); let schema_ref = schema.clone(); + let encrypt_config_ref = encrypt_config_manager.clone(); let reload_handle = tokio::spawn(async move { let reload_interval = tokio::time::Duration::from_secs(config_ref.config_reload_interval); @@ -65,7 +80,8 @@ async fn init_reloader(config: DatabaseConfig) -> Result { loop { interval.tick().await; - match load_schema_with_retry(&config_ref).await { + let encrypt_config = encrypt_config_ref.load(); + match load_schema_with_retry(&config_ref, &encrypt_config).await { Ok(reloaded) => { schema_ref.swap(Arc::new(reloaded)); } @@ -81,6 +97,7 @@ async fn init_reloader(config: DatabaseConfig) -> Result { Ok(SchemaManager { config, + encrypt_config_manager, schema, _reload_handle: Arc::new(reload_handle), }) @@ -91,13 +108,16 @@ async fn init_reloader(config: DatabaseConfig) -> Result { /// When databases and the proxy start up at the same time they might not be ready to accept connections before the /// proxy tries to query the schema. To give the proxy the best chance of initialising correctly this method will /// retry the query a few times before passing on the error. -async fn load_schema_with_retry(config: &DatabaseConfig) -> Result { +async fn load_schema_with_retry( + config: &DatabaseConfig, + encrypt_config: &EncryptConfig, +) -> Result { let mut retry_count = 0; let max_retry_count = 10; let max_backoff = Duration::from_secs(2); loop { - match load_schema(config).await { + match load_schema(config, encrypt_config).await { Ok(schema) => { return Ok(schema); } @@ -117,7 +137,10 @@ async fn load_schema_with_retry(config: &DatabaseConfig) -> Result Result { +pub async fn load_schema( + config: &DatabaseConfig, + encrypt_config: &EncryptConfig, +) -> Result { let client = connect::database(config).await?; let tables = client.query(SCHEMA_QUERY, &[]).await?; @@ -141,9 +164,18 @@ pub async fn load_schema(config: &DatabaseConfig) -> Result { let column = match column_type_name.as_deref() { Some("eql_v2_encrypted") => { - debug!(target: SCHEMA, msg = "eql_v2_encrypted column", table = table_name, column = col); - - let eql_traits = EqlTraits::all(); + let identifier = Identifier::new(&table_name, col); + let eql_traits = encrypt_config + .get_column_config(&identifier) + .map(|config| eql_traits_from_indexes(&config.indexes)) + .unwrap_or_default(); + debug!( + target: SCHEMA, + msg = "eql_v2_encrypted column", + table = table_name, + column = col, + traits = %eql_traits, + ); Column::eql(ident, eql_traits) } _ => Column::native(ident), @@ -166,3 +198,110 @@ pub async fn load_schema(config: &DatabaseConfig) -> Result { Ok(schema) } + +/// Translate the configured indexes for a column into the `EqlTraits` that +/// describe which SQL operations the eql-mapper should permit on it. +/// +/// Mapping: +/// - `unique` → `Eq` +/// - `ore` / `ope` → `Ord` (implies `Eq`) +/// - `match` → `TokenMatch` +/// - `ste_vec` → `JsonLike` (implies `Ord` + `Eq`) and `Contain` +fn eql_traits_from_indexes(indexes: &[Index]) -> EqlTraits { + indexes + .iter() + .flat_map(|index| match &index.index_type { + IndexType::Ore | IndexType::Ope => &[EqlTrait::Ord][..], + IndexType::Match { .. } => &[EqlTrait::TokenMatch][..], + IndexType::Unique { .. } => &[EqlTrait::Eq][..], + IndexType::SteVec { .. } => &[EqlTrait::JsonLike, EqlTrait::Contain][..], + }) + .copied() + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use cipherstash_client::schema::column::{Index, Tokenizer}; + + #[test] + fn no_indexes_yields_no_traits() { + let traits = eql_traits_from_indexes(&[]); + assert_eq!(traits, EqlTraits::none()); + } + + #[test] + fn unique_index_yields_eq() { + let traits = eql_traits_from_indexes(&[Index::new_unique()]); + assert_eq!(traits, EqlTraits::from(EqlTrait::Eq)); + } + + #[test] + fn ore_index_yields_ord_and_eq() { + let traits = eql_traits_from_indexes(&[Index::new_ore()]); + assert!(traits.ord); + assert!(traits.eq, "Ord implies Eq"); + assert!(!traits.token_match); + assert!(!traits.json_like); + assert!(!traits.contain); + } + + #[test] + fn ope_index_yields_ord_and_eq() { + let traits = eql_traits_from_indexes(&[Index::new_ope()]); + assert!(traits.ord); + assert!(traits.eq, "Ord implies Eq"); + assert!(!traits.token_match); + assert!(!traits.json_like); + assert!(!traits.contain); + } + + #[test] + fn match_index_yields_token_match_only() { + let traits = eql_traits_from_indexes(&[Index::new(IndexType::Match { + tokenizer: Tokenizer::Standard, + token_filters: vec![], + k: 6, + m: 2048, + include_original: false, + })]); + assert!(traits.token_match); + assert!(!traits.eq); + assert!(!traits.ord); + } + + #[test] + fn ste_vec_index_yields_json_like_and_contain() { + let traits = eql_traits_from_indexes(&[Index::new(IndexType::SteVec { + prefix: "doc".into(), + term_filters: vec![], + array_index_mode: Default::default(), + })]); + assert!(traits.json_like); + assert!(traits.contain); + assert!(traits.ord, "JsonLike implies Ord"); + assert!(traits.eq, "JsonLike implies Eq"); + assert!(!traits.token_match); + } + + #[test] + fn multiple_indexes_unioned() { + let traits = eql_traits_from_indexes(&[ + Index::new_ore(), + Index::new_unique(), + Index::new(IndexType::Match { + tokenizer: Tokenizer::Standard, + token_filters: vec![], + k: 6, + m: 2048, + include_original: false, + }), + ]); + assert!(traits.eq); + assert!(traits.ord); + assert!(traits.token_match); + assert!(!traits.json_like); + assert!(!traits.contain); + } +}