use crate::error::StorageError;
use crate::value::{EdgeId, VertexId};
mod analyzer;
mod query;
mod tantivy_index;
#[cfg(test)]
mod tests;
pub use analyzer::Analyzer;
pub use query::TextQuery;
pub use tantivy_index::TantivyTextIndex;
pub const DEFAULT_BM25_K1: f32 = 1.2;
pub const DEFAULT_BM25_B: f32 = 0.75;
#[derive(Debug, thiserror::Error)]
pub enum TextIndexError {
#[error("property '{0}' is not registered as a text index")]
PropertyNotIndexed(String),
#[error("element id {0} property '{1}' is not a string-valued field")]
NonStringValue(u64, String),
#[error("query parse error: {0}")]
QueryParse(String),
#[error("analyzer '{0}' is not registered")]
UnknownAnalyzer(String),
#[error("unsupported config: {0}")]
UnsupportedConfig(String),
#[error("index corruption: {0}")]
Corruption(String),
#[error("backend error: {0}")]
Backend(String),
#[error("storage error: {0}")]
Storage(#[from] StorageError),
}
impl From<tantivy::TantivyError> for TextIndexError {
fn from(err: tantivy::TantivyError) -> Self {
TextIndexError::Backend(err.to_string())
}
}
impl From<tantivy::query::QueryParserError> for TextIndexError {
fn from(err: tantivy::query::QueryParserError) -> Self {
TextIndexError::QueryParse(err.to_string())
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct TextIndexConfig {
pub analyzer: Analyzer,
pub store_positions: bool,
pub bm25_k1: f32,
pub bm25_b: f32,
pub commit_every: usize,
pub writer_memory_bytes: usize,
}
impl Default for TextIndexConfig {
fn default() -> Self {
Self {
analyzer: Analyzer::StandardEnglish,
store_positions: true,
bm25_k1: DEFAULT_BM25_K1,
bm25_b: DEFAULT_BM25_B,
commit_every: 1024,
writer_memory_bytes: 50_000_000,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ElementRef {
Vertex(VertexId),
Edge(EdgeId),
}
impl ElementRef {
pub fn as_vertex(self) -> Option<VertexId> {
match self {
ElementRef::Vertex(id) => Some(id),
ElementRef::Edge(_) => None,
}
}
pub fn as_edge(self) -> Option<EdgeId> {
match self {
ElementRef::Edge(id) => Some(id),
ElementRef::Vertex(_) => None,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct TextHit {
pub element: ElementRef,
pub score: f32,
}
pub trait TextIndex: Send + Sync {
fn config(&self) -> &TextIndexConfig;
fn upsert(&self, id: u64, text: &str) -> Result<(), TextIndexError>;
fn delete(&self, id: u64) -> Result<(), TextIndexError>;
fn search(&self, query: &TextQuery, k: usize) -> Result<Vec<TextHit>, TextIndexError>;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn commit(&self) -> Result<(), TextIndexError>;
fn merge(&self) -> Result<(), TextIndexError>;
}