use quickwit_proto::SearchRequest;
use tantivy::query::{Query, QueryParser, QueryParserError as TantivyQueryParserError};
use tantivy::schema::{Field, Schema};
use tantivy_query_grammar::{UserInputAst, UserInputLeaf, UserInputLiteral};
use crate::sort_by::validate_sort_by_field_name;
use crate::{QueryParserError, DYNAMIC_FIELD_NAME, QUICKWIT_TOKENIZER_MANAGER};
pub(crate) fn build_query(
schema: Schema,
request: &SearchRequest,
default_field_names: &[String],
) -> Result<Box<dyn Query>, QueryParserError> {
let user_input_ast = tantivy_query_grammar::parse_query(&request.query)
.map_err(|_| TantivyQueryParserError::SyntaxError(request.query.to_string()))?;
if let Some(sort_by_field) = request.sort_by_field.as_ref() {
validate_sort_by_field_name(sort_by_field, &schema)?;
}
if has_range_clause(&user_input_ast) {
return Err(anyhow::anyhow!("Range queries are not currently allowed.").into());
}
if needs_default_search_field(&user_input_ast)
&& request.search_fields.is_empty()
&& (default_field_names.is_empty() || default_field_names == [DYNAMIC_FIELD_NAME])
{
return Err(
anyhow::anyhow!("No default field declared and no field specified in query.").into(),
);
}
let search_fields = if request.search_fields.is_empty() {
resolve_fields(&schema, default_field_names)?
} else {
resolve_fields(&schema, &request.search_fields)?
};
let mut query_parser =
QueryParser::new(schema, search_fields, QUICKWIT_TOKENIZER_MANAGER.clone());
query_parser.set_conjunction_by_default();
let query = query_parser.parse_query(&request.query)?;
Ok(query)
}
fn has_range_clause(user_input_ast: &UserInputAst) -> bool {
match user_input_ast {
UserInputAst::Clause(sub_queries) => {
for (_, sub_ast) in sub_queries {
if has_range_clause(sub_ast) {
return true;
}
}
false
}
UserInputAst::Boost(ast, _) => has_range_clause(ast),
UserInputAst::Leaf(leaf) => matches!(**leaf, UserInputLeaf::Range { .. }),
}
}
fn needs_default_search_field(user_input_ast: &UserInputAst) -> bool {
match user_input_ast {
UserInputAst::Clause(sub_queries) => {
for (_, sub_ast) in sub_queries {
if needs_default_search_field(sub_ast) {
return true;
}
}
false
}
UserInputAst::Boost(ast, _) => needs_default_search_field(ast),
UserInputAst::Leaf(leaf) => match &**leaf {
UserInputLeaf::Literal(UserInputLiteral { field_name, .. }) => field_name.is_none(),
UserInputLeaf::Range { field, .. } => field.is_none(),
_ => false,
},
}
}
fn resolve_fields(schema: &Schema, field_names: &[String]) -> anyhow::Result<Vec<Field>> {
let mut fields = vec![];
for field_name in field_names {
let field = schema
.get_field(field_name)
.ok_or_else(|| TantivyQueryParserError::FieldDoesNotExist(field_name.clone()))?;
fields.push(field);
}
Ok(fields)
}
#[cfg(test)]
mod test {
use quickwit_proto::SearchRequest;
use tantivy::schema::{Schema, TEXT};
use super::build_query;
use crate::{DYNAMIC_FIELD_NAME, SOURCE_FIELD_NAME};
enum TestExpectation {
Err(&'static str),
Ok(&'static str),
}
fn make_schema() -> Schema {
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("title", TEXT);
schema_builder.add_text_field("desc", TEXT);
schema_builder.add_text_field("server.name", TEXT);
schema_builder.add_text_field("server.mem", TEXT);
schema_builder.add_text_field(SOURCE_FIELD_NAME, TEXT);
schema_builder.add_json_field(DYNAMIC_FIELD_NAME, TEXT);
schema_builder.build()
}
#[track_caller]
fn check_build_query(
query_str: &str,
search_fields: Vec<String>,
default_search_fields: Option<Vec<String>>,
expected: TestExpectation,
) -> anyhow::Result<()> {
let request = SearchRequest {
aggregation_request: None,
index_id: "test_index".to_string(),
query: query_str.to_string(),
search_fields,
start_timestamp: None,
end_timestamp: None,
max_hits: 20,
start_offset: 0,
sort_order: None,
sort_by_field: None,
};
let default_field_names =
default_search_fields.unwrap_or_else(|| vec!["title".to_string(), "desc".to_string()]);
let query_result = build_query(make_schema(), &request, &default_field_names);
match expected {
TestExpectation::Err(sub_str) => {
assert!(
query_result.is_err(),
"Expected error {sub_str}, but got a success on query parsing {query_str}"
);
let query_err = query_result.err().unwrap();
assert!(
format!("{query_err:?}").contains(sub_str),
"Query error received is {:?}. It should contain {}",
query_err,
sub_str
);
}
TestExpectation::Ok(sub_str) => {
assert!(
query_result.is_ok(),
"Expected a success when parsing {sub_str}, but got error"
);
let query = query_result.unwrap();
assert!(
format!("{query:?}").contains(sub_str),
"Error query parsing {:?} should contain {}",
query,
sub_str
);
}
}
Ok(())
}
#[test]
fn test_build_query() {
check_build_query(
"foo:bar",
vec![],
None,
TestExpectation::Err("Field does not exists: 'foo'"),
)
.unwrap();
check_build_query(
"server.type:hpc server.mem:4GB",
vec![],
None,
TestExpectation::Err("Field does not exists: 'server.type'"),
)
.unwrap();
check_build_query(
"title:[a TO b]",
vec![],
None,
TestExpectation::Err("Range queries are not currently allowed."),
)
.unwrap();
check_build_query(
"title:{a TO b} desc:foo",
vec![],
None,
TestExpectation::Err("Range queries are not currently allowed."),
)
.unwrap();
check_build_query(
"title:>foo",
vec![],
None,
TestExpectation::Err("Range queries are not currently allowed."),
)
.unwrap();
check_build_query(
"title:foo desc:bar _source:baz",
vec![],
None,
TestExpectation::Ok("TermQuery"),
)
.unwrap();
check_build_query(
"title:foo desc:bar",
vec!["url".to_string()],
None,
TestExpectation::Err("Field does not exists: 'url'"),
)
.unwrap();
check_build_query(
"server.name:\".bar:\" server.mem:4GB",
vec!["server.name".to_string()],
None,
TestExpectation::Ok("TermQuery"),
)
.unwrap();
check_build_query(
"server.name:\"for.bar:b\" server.mem:4GB",
vec![],
None,
TestExpectation::Ok("TermQuery"),
)
.unwrap();
check_build_query(
"foo",
vec![],
Some(vec![]),
TestExpectation::Err("No default field declared and no field specified in query."),
)
.unwrap();
check_build_query(
"bar",
vec![],
Some(vec![DYNAMIC_FIELD_NAME.to_string()]),
TestExpectation::Err("No default field declared and no field specified in query."),
)
.unwrap();
check_build_query(
"title:hello AND (Jane OR desc:world)",
vec![],
Some(vec![DYNAMIC_FIELD_NAME.to_string()]),
TestExpectation::Err("No default field declared and no field specified in query."),
)
.unwrap();
}
}