use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Debug;
use dyn_clone::{clone_trait_object, DynClone};
use quickwit_proto::SearchRequest;
use serde_json::Value as JsonValue;
use tantivy::query::Query;
use tantivy::schema::{Field, Schema};
use tantivy::Document;
use crate::{DocParsingError, QueryParserError, SortBy};
#[typetag::serde(tag = "type")]
pub trait DocMapper: Send + Sync + Debug + DynClone + 'static {
fn doc_from_json(&self, doc_json: String) -> Result<Document, DocParsingError>;
fn doc_to_json(
&self,
named_doc: BTreeMap<String, Vec<JsonValue>>,
) -> anyhow::Result<serde_json::Map<String, JsonValue>>;
fn schema(&self) -> Schema;
fn query(
&self,
split_schema: Schema,
request: &SearchRequest,
) -> Result<Box<dyn Query>, QueryParserError>;
fn sort_by(&self) -> SortBy {
SortBy::DocId
}
fn timestamp_field(&self, split_schema: &Schema) -> Option<Field> {
self.timestamp_field_name()
.and_then(|field_name| split_schema.get_field(&field_name))
}
fn timestamp_field_name(&self) -> Option<String> {
None
}
fn tag_field_names(&self) -> BTreeSet<String> {
Default::default()
}
fn demux_field_name(&self) -> Option<String> {
None
}
}
clone_trait_object!(DocMapper);
#[cfg(test)]
mod tests {
use quickwit_proto::SearchRequest;
use tantivy::schema::{Cardinality, FieldType};
use crate::default_doc_mapper::{FieldMappingType, QuickwitJsonOptions, QuickwitTextOptions};
use crate::{DefaultDocMapperBuilder, DocMapper, FieldMappingEntry, DYNAMIC_FIELD_NAME};
const JSON_DEFAULT_DOC_MAPPER: &str = r#"
{
"type": "default",
"default_search_fields": [],
"tag_fields": [],
"field_mappings": []
}"#;
#[test]
fn test_deserialize_doc_mapper() -> anyhow::Result<()> {
let deserialized_default_doc_mapper =
serde_json::from_str::<Box<dyn DocMapper>>(JSON_DEFAULT_DOC_MAPPER)?;
let expected_default_doc_mapper = DefaultDocMapperBuilder::default().try_build()?;
assert_eq!(
format!("{:?}", deserialized_default_doc_mapper),
format!("{:?}", expected_default_doc_mapper),
);
Ok(())
}
#[test]
fn test_deserialize_minimal_doc_mapper() -> anyhow::Result<()> {
let deserialized_default_doc_mapper =
serde_json::from_str::<Box<dyn DocMapper>>(r#"{"type": "default"}"#)?;
let expected_default_doc_mapper = DefaultDocMapperBuilder::default().try_build()?;
assert_eq!(
format!("{:?}", deserialized_default_doc_mapper),
format!("{:?}", expected_default_doc_mapper),
);
Ok(())
}
#[test]
fn test_deserialize_doc_mapper_default_dynamic_tokenizer() {
let doc_mapper =
serde_json::from_str::<Box<dyn DocMapper>>(r#"{"type": "default", "mode": "dynamic"}"#)
.unwrap();
let tantivy_schema = doc_mapper.schema();
let dynamic_field = tantivy_schema.get_field(DYNAMIC_FIELD_NAME).unwrap();
if let FieldType::JsonObject(json_options) =
tantivy_schema.get_field_entry(dynamic_field).field_type()
{
let text_opt = json_options.get_text_indexing_options().unwrap();
assert_eq!(text_opt.tokenizer(), "default");
} else {
panic!("dynamic field should be of JSON type");
}
}
#[test]
fn test_serdeserialize_doc_mapper() -> anyhow::Result<()> {
let deserialized_default_doc_mapper =
serde_json::from_str::<Box<dyn DocMapper>>(JSON_DEFAULT_DOC_MAPPER)?;
let expected_default_doc_mapper = DefaultDocMapperBuilder::default().try_build()?;
assert_eq!(
format!("{:?}", deserialized_default_doc_mapper),
format!("{:?}", expected_default_doc_mapper),
);
let serialized_doc_mapper = serde_json::to_string(&deserialized_default_doc_mapper)?;
let deserialized_default_doc_mapper =
serde_json::from_str::<Box<dyn DocMapper>>(&serialized_doc_mapper)?;
let serialized_doc_mapper_2 = serde_json::to_string(&deserialized_default_doc_mapper)?;
assert_eq!(serialized_doc_mapper, serialized_doc_mapper_2);
Ok(())
}
#[test]
fn test_doc_mapper_query_with_json_field() {
let mut doc_mapper_builder = DefaultDocMapperBuilder::default();
doc_mapper_builder.field_mappings.push(FieldMappingEntry {
name: "json_field".to_string(),
mapping_type: FieldMappingType::Json(
QuickwitJsonOptions::default(),
Cardinality::SingleValue,
),
});
let doc_mapper = doc_mapper_builder.try_build().unwrap();
let schema = doc_mapper.schema();
let search_request = SearchRequest {
index_id: "quickwit-index".to_string(),
query: "json_field.toto.titi:hello".to_string(),
search_fields: vec![],
start_timestamp: None,
end_timestamp: None,
max_hits: 10,
start_offset: 0,
sort_order: None,
sort_by_field: None,
aggregation_request: None,
};
let query = doc_mapper.query(schema, &search_request).unwrap();
assert_eq!(
format!("{:?}", query),
r#"TermQuery(Term(type=Json, field=0, path=toto.titi, vtype=Str, "hello"))"#
);
}
#[test]
fn test_doc_mapper_query_with_invalid_sort_field() {
let mut doc_mapper_builder = DefaultDocMapperBuilder::default();
let text_opt = QuickwitTextOptions {
fast: true,
..Default::default()
};
doc_mapper_builder.field_mappings.push(FieldMappingEntry {
name: "text_field".to_string(),
mapping_type: FieldMappingType::Text(text_opt, Cardinality::SingleValue),
});
doc_mapper_builder
.default_search_fields
.push("text_field".to_string());
let doc_mapper = doc_mapper_builder.try_build().unwrap();
let schema = doc_mapper.schema();
let search_request = SearchRequest {
index_id: "quickwit-index".to_string(),
query: "text_field:hello".to_string(),
search_fields: vec![],
start_timestamp: None,
end_timestamp: None,
max_hits: 10,
start_offset: 0,
sort_order: None,
sort_by_field: Some("text_field".to_string()),
aggregation_request: None,
};
let query = doc_mapper.query(schema, &search_request).unwrap_err();
assert_eq!(
format!("{:?}", query),
"QueryParserError(Sort by field on type text is currently not supported `text_field`.)"
);
}
#[test]
fn test_doc_mapper_query_with_json_field_default_search_fields() {
let mut doc_mapper_builder = DefaultDocMapperBuilder::default();
doc_mapper_builder.field_mappings.push(FieldMappingEntry {
name: "json_field".to_string(),
mapping_type: FieldMappingType::Json(
QuickwitJsonOptions::default(),
Cardinality::SingleValue,
),
});
doc_mapper_builder
.default_search_fields
.push("json_field".to_string());
let doc_mapper = doc_mapper_builder.try_build().unwrap();
let schema = doc_mapper.schema();
let search_request = SearchRequest {
index_id: "quickwit-index".to_string(),
query: "toto.titi:hello".to_string(),
search_fields: vec![],
start_timestamp: None,
end_timestamp: None,
max_hits: 10,
start_offset: 0,
sort_order: None,
sort_by_field: None,
aggregation_request: None,
};
let query = doc_mapper.query(schema, &search_request).unwrap();
assert_eq!(
format!("{:?}", query),
r#"TermQuery(Term(type=Json, field=0, path=toto.titi, vtype=Str, "hello"))"#
);
}
#[test]
fn test_doc_mapper_query_with_json_field_ambiguous_term() {
let doc_mapper_builder = DefaultDocMapperBuilder {
field_mappings: vec![FieldMappingEntry {
name: "json_field".to_string(),
mapping_type: FieldMappingType::Json(
QuickwitJsonOptions::default(),
Cardinality::SingleValue,
),
}],
default_search_fields: vec!["json_field".to_string()],
..Default::default()
};
let doc_mapper = doc_mapper_builder.try_build().unwrap();
let schema = doc_mapper.schema();
let search_request = SearchRequest {
index_id: "quickwit-index".to_string(),
query: "toto:5".to_string(),
search_fields: vec![],
start_timestamp: None,
end_timestamp: None,
max_hits: 10,
start_offset: 0,
sort_order: None,
sort_by_field: None,
aggregation_request: None,
};
let query = doc_mapper.query(schema, &search_request).unwrap();
assert_eq!(
format!("{:?}", query),
r#"BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Json, field=0, path=toto, vtype=U64, 5))), (Should, TermQuery(Term(type=Json, field=0, path=toto, vtype=Str, "5")))] }"#
);
}
}