use std::collections::HashMap;
use std::fmt;
use std::path::{Path, PathBuf};
use std::result::Result as StdResult;
use crate::traits::{DictError, Result};
use serde::{Deserialize, Serialize};
pub mod btree;
pub mod fts;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexStats {
pub entries: u64,
pub size: u64,
pub build_time: u64,
pub version: String,
pub config: IndexConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexConfig {
pub btree_order: Option<usize>,
pub fts_config: FtsConfig,
pub compression: Option<CompressionConfig>,
pub build_in_memory: bool,
pub max_memory: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FtsConfig {
pub min_token_len: usize,
pub max_token_len: usize,
pub use_stemming: bool,
pub stop_words: Vec<String>,
pub language: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompressionConfig {
pub algorithm: CompressionAlgorithm,
pub level: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CompressionAlgorithm {
None,
Gzip,
Lz4,
Zstd,
}
impl Default for CompressionAlgorithm {
fn default() -> Self {
CompressionAlgorithm::Zstd
}
}
pub trait Index: Send + Sync {
const INDEX_TYPE: &'static str;
fn build(&mut self, entries: &[(String, Vec<u8>)], config: &IndexConfig) -> Result<()>;
fn load(&mut self, path: &Path) -> Result<()>;
fn save(&self, path: &Path) -> Result<()>;
fn stats(&self) -> &IndexStats;
fn is_built(&self) -> bool;
fn clear(&mut self);
fn verify(&self) -> Result<bool>;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum IndexError {
CorruptedIndex(String),
VersionMismatch { expected: String, found: String },
NotBuilt(String),
IoError(String),
ConfigError(String),
InsufficientMemory(String),
}
impl fmt::Display for IndexError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexError::CorruptedIndex(msg) => write!(f, "Corrupted index: {}", msg),
IndexError::VersionMismatch { expected, found } => {
write!(
f,
"Version mismatch: expected {}, found {}",
expected, found
)
}
IndexError::NotBuilt(name) => write!(f, "Index '{}' not built", name),
IndexError::IoError(msg) => write!(f, "Index I/O error: {}", msg),
IndexError::ConfigError(msg) => write!(f, "Index configuration error: {}", msg),
IndexError::InsufficientMemory(msg) => write!(f, "Insufficient memory: {}", msg),
}
}
}
impl std::error::Error for IndexError {}
impl From<IndexError> for DictError {
fn from(err: IndexError) -> Self {
DictError::IndexError(err.to_string())
}
}
pub struct IndexManager {
btree: Option<btree::BTreeIndex>,
fts: Option<fts::FtsIndex>,
config: IndexConfig,
paths: HashMap<&'static str, PathBuf>,
stats: IndexStats,
}
impl IndexManager {
pub fn new(config: IndexConfig) -> Self {
let stats_config = config.clone();
Self {
btree: None,
fts: None,
config,
paths: HashMap::new(),
stats: IndexStats {
entries: 0,
size: 0,
build_time: 0,
version: "1.0".to_string(),
config: stats_config,
},
}
}
pub fn build_all(&mut self, entries: &[(String, Vec<u8>)]) -> Result<()> {
let start_time = std::time::Instant::now();
let entries_count = entries.len() as u64;
if self.btree.is_none() {
self.btree = Some(btree::BTreeIndex::new());
}
if let Some(ref mut btree) = self.btree {
btree.build(entries, &self.config)?;
self.stats.size += btree.stats().size;
}
if self.fts.is_none() {
self.fts = Some(fts::FtsIndex::new());
}
if let Some(ref mut fts) = self.fts {
fts.build(entries, &self.config)?;
self.stats.size += fts.stats().size;
}
self.stats.entries = entries_count;
self.stats.build_time = start_time.elapsed().as_millis() as u64;
Ok(())
}
pub fn load_all(&mut self, base_path: &Path, extensions: &[(&str, &str)]) -> Result<()> {
for (index_type, extension) in extensions {
let index_path = base_path.with_extension(extension);
match *index_type {
"btree" => {
if !index_path.exists() {
return Err(DictError::FileNotFound(index_path.display().to_string()));
}
if self.btree.is_none() {
self.btree = Some(btree::BTreeIndex::new());
}
if let Some(ref mut btree) = self.btree {
btree.load(&index_path)?;
self.stats.size += btree.stats().size;
}
}
"fts" => {
if !index_path.exists() {
return Err(DictError::FileNotFound(index_path.display().to_string()));
}
if self.fts.is_none() {
self.fts = Some(fts::FtsIndex::new());
}
if let Some(ref mut fts) = self.fts {
fts.load(&index_path)?;
self.stats.size += fts.stats().size;
}
}
_ => {
return Err(DictError::Internal(format!(
"Unknown index type: {}",
index_type
)))
}
}
}
Ok(())
}
pub fn save_all(&self, base_path: &Path, extensions: &[(&str, &str)]) -> Result<()> {
for (index_type, extension) in extensions {
let index_path = base_path.with_extension(extension);
match *index_type {
"btree" => {
if let Some(ref btree) = self.btree {
btree.save(&index_path)?;
}
}
"fts" => {
if let Some(ref fts) = self.fts {
fts.save(&index_path)?;
}
}
_ => {
return Err(DictError::Internal(format!(
"Unknown index type: {}",
index_type
)))
}
}
}
Ok(())
}
pub fn binary_search(&self, key: &str) -> Result<Option<(Vec<u8>, u64)>> {
if let Some(ref btree) = self.btree {
btree.search(key)
} else {
Err(DictError::IndexError(
"B-TREE index not available".to_string(),
))
}
}
pub fn fulltext_search(&self, query: &str) -> Result<Vec<(String, f32)>> {
if let Some(ref fts) = self.fts {
fts.search(query)
} else {
Err(DictError::IndexError("FTS index not available".to_string()))
}
}
pub fn stats(&self) -> &IndexStats {
&self.stats
}
pub fn is_built(&self) -> bool {
self.btree.as_ref().map(|b| b.is_built()).unwrap_or(false)
&& self.fts.as_ref().map(|f| f.is_built()).unwrap_or(false)
}
pub fn clear(&mut self) {
if let Some(ref mut btree) = self.btree {
btree.clear();
}
if let Some(ref mut fts) = self.fts {
fts.clear();
}
self.stats = IndexStats {
entries: 0,
size: 0,
build_time: 0,
version: "1.0".to_string(),
config: self.config.clone(),
};
}
pub fn verify(&self) -> Result<bool> {
let mut all_valid = true;
if let Some(ref btree) = self.btree {
if !btree.verify()? {
all_valid = false;
}
}
if let Some(ref fts) = self.fts {
if !fts.verify()? {
all_valid = false;
}
}
Ok(all_valid)
}
}
impl Default for IndexConfig {
fn default() -> Self {
Self {
btree_order: Some(256),
fts_config: FtsConfig {
min_token_len: 3,
max_token_len: 64,
use_stemming: true,
stop_words: vec!["the".to_string(), "and".to_string(), "or".to_string()],
language: Some("en".to_string()),
},
compression: Some(CompressionConfig {
algorithm: CompressionAlgorithm::default(),
level: 6,
}),
build_in_memory: true,
max_memory: Some(1_000_000_000), }
}
}
impl Default for FtsConfig {
fn default() -> Self {
Self {
min_token_len: 3,
max_token_len: 64,
use_stemming: true,
stop_words: vec!["the".to_string(), "and".to_string(), "or".to_string()],
language: Some("en".to_string()),
}
}
}