use std::collections::HashMap;
use exocore_protos::{reflect::FieldType, registry::Registry};
use tantivy::{schema::*, tokenizer::*, Document};
use super::MutationIndexConfig;
use crate::error::Error;
pub(crate) struct Fields {
pub config: MutationIndexConfig,
pub trait_type: Field,
pub entity_id: Field,
pub trait_id: Field,
pub entity_trait_id: Field,
pub creation_date: Field,
pub modification_date: Field,
pub block_offset: Field,
pub operation_id: Field,
pub document_type: Field,
pub all_text: Field,
pub all_refs: Field,
pub _dynamic_fields: DynamicFields,
pub dynamic_mappings: DynamicFieldsMapping,
pub references_tokenizer: TextAnalyzer,
}
impl Fields {
pub fn get_dynamic_trait_field(
&self,
trait_name: &str,
field_name: &str,
) -> Result<&MappedDynamicField, Error> {
let fields_mapping = self.dynamic_mappings.get(trait_name).ok_or_else(|| {
Error::QueryParsing(anyhow!(
"Trait '{}' doesn\'t have any dynamic fields",
trait_name
))
})?;
let field = fields_mapping.get(field_name).ok_or_else(|| {
Error::QueryParsing(anyhow!(
"Trait '{}' doesn\'t have any dynamic field with name '{}'",
trait_name,
field_name
))
})?;
Ok(field)
}
pub fn register_tokenizers(&self, index: &tantivy::Index) {
index
.tokenizers()
.register("references", self.references_tokenizer.clone());
}
}
pub(crate) type DynamicFieldsMapping = HashMap<String, HashMap<String, MappedDynamicField>>;
#[derive(Default)]
pub(crate) struct DynamicFields {
reference: Vec<Field>,
text: Vec<Field>,
string: Vec<Field>,
i64: Vec<Field>,
i64_fast: Vec<Field>,
u64: Vec<Field>,
u64_fast: Vec<Field>,
}
#[derive(Debug)]
pub(crate) struct MappedDynamicField {
pub field: Field,
pub field_type: FieldType,
pub is_fast_field: bool,
}
pub(crate) fn build_tantivy_schema(
config: MutationIndexConfig,
registry: &Registry,
) -> (Schema, Fields) {
let mut schema_builder = SchemaBuilder::default();
let trait_type = schema_builder.add_text_field("trait_type", STRING | STORED);
let entity_id = schema_builder.add_text_field("entity_id", STRING | STORED);
let trait_id = schema_builder.add_text_field("trait_id", STRING | STORED);
let entity_trait_id = schema_builder.add_text_field("entity_trait_id", STRING);
let creation_date = schema_builder.add_u64_field("creation_date", STORED);
let modification_date = schema_builder.add_u64_field("modification_date", STORED | FAST);
let block_offset = schema_builder.add_u64_field("block_offset", STORED | FAST);
let operation_id = schema_builder.add_u64_field(
"operation_id",
IntOptions::default()
.set_indexed()
.set_stored()
.set_fast(Cardinality::SingleValue),
);
let document_type = schema_builder.add_u64_field("document_type", STORED);
let references_tokenizer = TextAnalyzer::from(SimpleTokenizer);
let references_options = TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("references")
.set_index_option(IndexRecordOption::WithFreqsAndPositions),
)
.set_stored();
let all_text = schema_builder.add_text_field("all_text", TEXT);
let all_refs = schema_builder.add_text_field("all_refs", references_options.clone());
let (dynamic_fields, dynamic_mappings) = build_dynamic_fields_tantivy_schema(
&config,
registry,
&mut schema_builder,
references_options,
);
let schema = schema_builder.build();
let fields = Fields {
config,
trait_type,
entity_id,
trait_id,
entity_trait_id,
creation_date,
modification_date,
block_offset,
operation_id,
document_type,
all_text,
all_refs,
_dynamic_fields: dynamic_fields,
dynamic_mappings,
references_tokenizer,
};
(schema, fields)
}
fn build_dynamic_fields_tantivy_schema(
config: &MutationIndexConfig,
registry: &Registry,
builder: &mut SchemaBuilder,
references_options: TextOptions,
) -> (DynamicFields, DynamicFieldsMapping) {
let mut dyn_fields = DynamicFields::default();
for i in 0..config.dynamic_reference_fields {
dyn_fields
.reference
.push(builder.add_text_field(&format!("references_{}", i), references_options.clone()));
}
for i in 0..config.dynamic_text_fields {
dyn_fields
.text
.push(builder.add_text_field(&format!("text_{}", i), TEXT));
}
for i in 0..config.dynamic_string_fields {
dyn_fields
.string
.push(builder.add_text_field(&format!("string_{}", i), STRING));
}
for i in 0..config.dynamic_i64_fields {
dyn_fields
.i64
.push(builder.add_i64_field(&format!("i64_{}", i), STORED));
}
for i in 0..config.dynamic_i64_sortable_fields {
dyn_fields
.i64_fast
.push(builder.add_i64_field(&format!("i64_fast_{}", i), STORED | FAST));
}
for i in 0..config.dynamic_u64_fields {
dyn_fields
.u64
.push(builder.add_u64_field(&format!("u64_{}", i), STORED));
}
for i in 0..config.dynamic_u64_sortable_fields {
dyn_fields
.u64_fast
.push(builder.add_u64_field(&format!("u64_fast_{}", i), STORED | FAST));
}
let mut dyn_mappings = HashMap::new();
let message_descriptors = registry.message_descriptors();
for message_descriptor in message_descriptors {
let mut ref_fields_count = 0;
let mut text_fields_count = 0;
let mut string_fields_count = 0;
let mut i64_fields_count = 0;
let mut i64_fast_fields_count = 0;
let mut u64_fields_count = 0;
let mut u64_fast_fields_count = 0;
let mut field_mapping = HashMap::new();
for field in message_descriptor.fields.values() {
if !field.indexed_flag && !field.text_flag && !field.sorted_flag {
continue;
}
let ft = field.field_type.clone();
if ft == FieldType::Uint64 || ft == FieldType::Uint32 || ft == FieldType::DateTime {
if field.sorted_flag && u64_fast_fields_count < dyn_fields.u64_fast.len() {
let mapped_field = MappedDynamicField {
field: dyn_fields.u64_fast[u64_fast_fields_count],
field_type: ft,
is_fast_field: true,
};
field_mapping.insert(field.name.clone(), mapped_field);
u64_fast_fields_count += 1;
continue;
} else if field.indexed_flag && u64_fields_count < dyn_fields.u64.len() {
let mapped_field = MappedDynamicField {
field: dyn_fields.u64[u64_fields_count],
field_type: ft,
is_fast_field: false,
};
field_mapping.insert(field.name.clone(), mapped_field);
u64_fields_count += 1;
continue;
}
} else if ft == FieldType::Int64 || ft == FieldType::Int32 {
if field.sorted_flag && i64_fast_fields_count < dyn_fields.i64_fast.len() {
let mapped_field = MappedDynamicField {
field: dyn_fields.i64_fast[i64_fast_fields_count],
field_type: ft,
is_fast_field: true,
};
field_mapping.insert(field.name.clone(), mapped_field);
i64_fast_fields_count += 1;
continue;
} else if field.indexed_flag && i64_fields_count < dyn_fields.i64.len() {
let mapped_field = MappedDynamicField {
field: dyn_fields.i64[i64_fields_count],
field_type: ft,
is_fast_field: false,
};
field_mapping.insert(field.name.clone(), mapped_field);
i64_fields_count += 1;
continue;
}
} else if ft == FieldType::Reference {
if field.indexed_flag && ref_fields_count < dyn_fields.reference.len() {
let mapped_field = MappedDynamicField {
field: dyn_fields.reference[ref_fields_count],
field_type: ft,
is_fast_field: false,
};
field_mapping.insert(field.name.clone(), mapped_field);
ref_fields_count += 1;
continue;
}
} else if ft == FieldType::String {
if field.text_flag && text_fields_count < dyn_fields.text.len() {
let mapped_field = MappedDynamicField {
field: dyn_fields.text[text_fields_count],
field_type: ft,
is_fast_field: false,
};
field_mapping.insert(field.name.clone(), mapped_field);
text_fields_count += 1;
continue;
} else if field.indexed_flag && string_fields_count < dyn_fields.string.len() {
let mapped_field = MappedDynamicField {
field: dyn_fields.string[string_fields_count],
field_type: ft,
is_fast_field: false,
};
field_mapping.insert(field.name.clone(), mapped_field);
string_fields_count += 1;
continue;
}
}
error!(
"Invalid index option / type for field {:?} of message {} or ran out of fields",
field, message_descriptor.name
);
}
if !field_mapping.is_empty() {
dyn_mappings.insert(message_descriptor.name.clone(), field_mapping);
}
}
(dyn_fields, dyn_mappings)
}
pub(crate) fn get_doc_string_value(doc: &Document, field: Field) -> String {
match doc.get_first(field) {
Some(tantivy::schema::Value::Str(v)) => v.to_string(),
_ => panic!("Couldn't find field of type string"),
}
}
pub(crate) fn get_doc_opt_string_value(doc: &Document, field: Field) -> Option<String> {
match doc.get_first(field) {
Some(tantivy::schema::Value::Str(v)) => Some(v.to_string()),
_ => None,
}
}
pub(crate) fn get_doc_opt_u64_value(doc: &Document, field: Field) -> Option<u64> {
match doc.get_first(field) {
Some(tantivy::schema::Value::U64(v)) => Some(*v),
_ => None,
}
}
pub(crate) fn get_doc_u64_value(doc: &Document, field: Field) -> u64 {
match doc.get_first(field) {
Some(tantivy::schema::Value::U64(v)) => *v,
_ => panic!("Couldn't find field of type u64"),
}
}