diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs index d7d1ea72d92..5fd320a2580 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs @@ -75,6 +75,7 @@ pub struct TermQueryParams { case_insensitive: bool, } +#[cfg(test)] pub fn term_query_from_field_value(field: impl ToString, value: impl ToString) -> TermQuery { TermQuery { field: field.to_string(), diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs index e31ee090693..5b3078e7073 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::{BTreeSet, HashMap}; + use serde::Deserialize; -use crate::elastic_query_dsl::bool_query::BoolQuery; use crate::elastic_query_dsl::one_field_map::OneFieldMap; -use crate::elastic_query_dsl::term_query::term_query_from_field_value; use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner}; use crate::not_nan_f32::NotNaNf32; -use crate::query_ast::QueryAst; +use crate::query_ast::{QueryAst, TermSetQuery}; #[derive(PartialEq, Eq, Debug, Deserialize, Clone)] #[serde(try_from = "TermsQueryForSerialization")] @@ -87,15 +87,14 @@ impl TryFrom for TermsQuery { impl ConvertibleToQueryAst for TermsQuery { fn convert_to_query_ast(self) -> anyhow::Result { - let term_queries: Vec = self - .values - .into_iter() - .map(|value| term_query_from_field_value(self.field.clone(), value)) - .map(ElasticQueryDslInner::from) - .collect(); - let mut union = BoolQuery::union(term_queries); - union.boost = self.boost; - union.convert_to_query_ast() + let mut terms_per_field = HashMap::new(); + let values_set: BTreeSet = self.values.into_iter().collect(); + terms_per_field.insert(self.field, values_set); + + let term_set_query = TermSetQuery { terms_per_field }; + let query_ast: QueryAst = term_set_query.into(); + + Ok(query_ast.boost(self.boost)) } } diff --git a/quickwit/quickwit-query/src/query_ast/term_set_query.rs b/quickwit/quickwit-query/src/query_ast/term_set_query.rs index d25d01a4703..0605d854863 100644 --- a/quickwit/quickwit-query/src/query_ast/term_set_query.rs +++ b/quickwit/quickwit-query/src/query_ast/term_set_query.rs @@ -19,7 +19,7 @@ use tantivy::Term; use crate::InvalidQuery; use crate::query_ast::{ - BuildTantivyAst, BuildTantivyAstContext, QueryAst, TantivyQueryAst, TermQuery, + BoolQuery, BuildTantivyAst, BuildTantivyAstContext, QueryAst, TantivyQueryAst, TermQuery, }; /// TermSetQuery matches the same document set as if it was a union of @@ -32,6 +32,53 @@ pub struct TermSetQuery { } impl TermSetQuery { + fn has_fast_only_field(&self, context: &BuildTantivyAstContext) -> bool { + for full_path in self.terms_per_field.keys() { + if let Some((_, field_entry, _)) = + super::utils::find_field_or_hit_dynamic(full_path, context.schema) + && field_entry.is_fast() + && !field_entry.is_indexed() + { + return true; + } + } + false + } + + fn build_bool_query( + &self, + context: &BuildTantivyAstContext, + ) -> Result { + let should_clauses = self + .terms_per_field + .iter() + .flat_map(|(full_path, values)| { + values.iter().map(|value| { + QueryAst::Term(TermQuery { + field: full_path.to_string(), + value: value.to_string(), + }) + }) + }) + .collect(); + + let bool_query = BoolQuery { + should: should_clauses, + ..Default::default() + }; + + bool_query.build_tantivy_ast_impl(context) + } + + fn build_term_set_query( + &self, + context: &BuildTantivyAstContext, + ) -> Result { + let terms_it = self.make_term_iterator(context)?; + let term_set_query = tantivy::query::TermSetQuery::new(terms_it); + Ok(term_set_query.into()) + } + fn make_term_iterator( &self, context: &BuildTantivyAstContext, @@ -67,9 +114,11 @@ impl BuildTantivyAst for TermSetQuery { &self, context: &BuildTantivyAstContext, ) -> Result { - let terms_it = self.make_term_iterator(context)?; - let term_set_query = tantivy::query::TermSetQuery::new(terms_it); - Ok(term_set_query.into()) + if self.has_fast_only_field(context) { + self.build_bool_query(context) + } else { + self.build_term_set_query(context) + } } } @@ -78,3 +127,67 @@ impl From for QueryAst { QueryAst::TermSet(term_set_query) } } + +#[cfg(test)] +mod tests { + use std::collections::{BTreeSet, HashMap}; + + use tantivy::schema::{FAST, INDEXED, Schema}; + + use super::TermSetQuery; + use crate::query_ast::{BuildTantivyAst, BuildTantivyAstContext}; + + #[test] + fn test_term_set_query_with_fast_only_field_returns_bool_query() { + let mut schema_builder = Schema::builder(); + schema_builder.add_u64_field("fast_field", FAST); + let schema = schema_builder.build(); + + let terms_per_field = HashMap::from([( + "fast_field".to_string(), + BTreeSet::from(["1".to_string(), "2".to_string()]), + )]); + let term_set_query = TermSetQuery { terms_per_field }; + + let tantivy_query_ast = term_set_query + .build_tantivy_ast_call(&BuildTantivyAstContext::for_test(&schema)) + .unwrap(); + + let bool_query = tantivy_query_ast + .as_bool_query() + .expect("Expected BoolQuery for fast-only field, but got a different query type"); + assert_eq!(bool_query.should.len(), 2); + assert_eq!(bool_query.must.len(), 0); + assert_eq!(bool_query.must_not.len(), 0); + assert_eq!(bool_query.filter.len(), 0); + } + + #[test] + fn test_term_set_query_with_indexed_field_uses_term_set() { + let mut schema_builder = Schema::builder(); + schema_builder.add_u64_field("indexed_field", FAST | INDEXED); + let schema = schema_builder.build(); + + let terms_per_field = HashMap::from([( + "indexed_field".to_string(), + BTreeSet::from(["1".to_string(), "2".to_string()]), + )]); + let term_set_query = TermSetQuery { terms_per_field }; + + let tantivy_query_ast = term_set_query + .build_tantivy_ast_call(&BuildTantivyAstContext::for_test(&schema)) + .unwrap(); + + // Should return a leaf query (TermSetQuery wrapped in TantivyQueryAst) + let leaf = tantivy_query_ast + .as_leaf() + .expect("Expected a leaf query (TermSetQuery), but got a complex query"); + + // Verify it's a TermSetQuery by checking the debug representation + let debug_str = format!("{leaf:?}"); + assert!( + debug_str.contains("TermSetQuery"), + "Expected TermSetQuery, got: {debug_str}" + ); + } +}