use std::collections::HashMap;
use std::fmt;
use std::hash::{BuildHasher, Hash};
use std::ops::Range;
use std::path::Path;
use std::result::Result as StdResult;
use serde::{Deserialize, Serialize};
pub type Result<T> = StdResult<T, DictError>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub word: String,
pub entry: Vec<u8>,
pub score: Option<f32>,
pub highlights: Option<Vec<(usize, usize)>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BatchResult {
pub word: String,
pub entry: Option<Vec<u8>>,
pub error: Option<DictError>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DictMetadata {
pub name: String,
pub version: String,
pub entries: u64,
pub description: Option<String>,
pub author: Option<String>,
pub language: Option<String>,
pub file_size: u64,
pub created: Option<String>,
pub has_btree: bool,
pub has_fts: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DictError {
FileNotFound(String),
InvalidFormat(String),
UnsupportedOperation(String),
IoError(String),
MmapError(String),
IndexError(String),
DecompressionError(String),
SerializationError(String),
Internal(String),
}
impl fmt::Display for DictError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DictError::FileNotFound(path) => write!(f, "File not found: {}", path),
DictError::InvalidFormat(msg) => write!(f, "Invalid format: {}", msg),
DictError::UnsupportedOperation(op) => write!(f, "Unsupported operation: {}", op),
DictError::IoError(msg) => write!(f, "I/O error: {}", msg),
DictError::MmapError(msg) => write!(f, "Memory mapping error: {}", msg),
DictError::IndexError(msg) => write!(f, "Index error: {}", msg),
DictError::DecompressionError(msg) => write!(f, "Decompression error: {}", msg),
DictError::SerializationError(msg) => write!(f, "Serialization error: {}", msg),
DictError::Internal(msg) => write!(f, "Internal error: {}", msg),
}
}
}
impl std::error::Error for DictError {}
impl From<std::io::Error> for DictError {
fn from(err: std::io::Error) -> Self {
DictError::IoError(err.to_string())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DictConfig {
pub load_btree: bool,
pub load_fts: bool,
pub use_mmap: bool,
pub cache_size: usize,
pub batch_size: usize,
pub encoding: Option<String>,
pub build_btree: bool,
pub build_fts: bool,
}
impl Default for DictConfig {
fn default() -> Self {
Self {
load_btree: true,
load_fts: true,
use_mmap: true,
cache_size: 1000,
batch_size: 100,
encoding: None,
build_btree: true,
build_fts: true,
}
}
}
pub struct EntryIterator<'a, K> {
pub keys: std::vec::IntoIter<K>,
pub dictionary: &'a dyn Dict<K>,
}
impl<'a, K: Hash + Eq + Clone + fmt::Display> Iterator for EntryIterator<'a, K> {
type Item = Result<(K, Vec<u8>)>;
fn next(&mut self) -> Option<Self::Item> {
self.keys
.next()
.and_then(|key| match self.dictionary.get(&key) {
Ok(entry) => Some(Ok((key, entry))),
Err(e) => Some(Err(e)),
})
}
}
pub trait Dict<K>: Send + Sync
where
K: Hash + Eq + Clone + fmt::Display,
{
fn metadata(&self) -> &DictMetadata;
fn contains(&self, key: &K) -> Result<bool>;
fn get(&self, key: &K) -> Result<Vec<u8>>;
fn get_multiple(&self, keys: &[K]) -> Result<Vec<BatchResult>> {
let mut results = Vec::with_capacity(keys.len());
for key in keys {
results.push(BatchResult {
word: format!("{}", key),
entry: self.get(key).ok(),
error: self.get(key).err(),
});
}
Ok(results)
}
fn get_batch(&self, keys: &[K], batch_size: Option<usize>) -> Result<Vec<BatchResult>> {
let batch_size = batch_size.unwrap_or(100);
let mut results = Vec::new();
for chunk in keys.chunks(batch_size) {
let mut chunk_results = self.get_multiple(chunk)?;
results.append(&mut chunk_results);
}
Ok(results)
}
fn keys(&self) -> Result<Vec<K>> {
let mut keys = Vec::new();
for item in self.iter()? {
keys.push(item?.0);
}
Ok(keys)
}
fn values(&self) -> Result<Vec<Vec<u8>>> {
let mut values = Vec::new();
for item in self.iter()? {
values.push(item?.1);
}
Ok(values)
}
fn search_prefix(&self, prefix: &str, limit: Option<usize>) -> Result<Vec<SearchResult>>;
fn search_fuzzy(&self, query: &str, max_distance: Option<u32>) -> Result<Vec<SearchResult>>;
fn search_fulltext(
&self,
query: &str,
) -> Result<Box<dyn Iterator<Item = Result<SearchResult>> + Send>>;
fn get_range(&self, range: Range<usize>) -> Result<Vec<(K, Vec<u8>)>>;
fn iter(&self) -> Result<EntryIterator<K>>;
fn prefix_iter(
&self,
prefix: &str,
) -> Result<Box<dyn Iterator<Item = Result<(K, Vec<u8>)>> + Send>>;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn file_paths(&self) -> &[std::path::PathBuf];
fn reload_indexes(&mut self) -> Result<()>;
fn clear_cache(&mut self);
fn stats(&self) -> DictStats {
DictStats {
total_entries: self.len() as u64,
cache_hit_rate: 0.0, memory_usage: 0, index_sizes: HashMap::new(),
}
}
fn build_indexes(&mut self) -> Result<()>;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DictStats {
pub total_entries: u64,
pub cache_hit_rate: f32,
pub memory_usage: u64,
pub index_sizes: HashMap<String, u64>,
}
pub trait DictBuilder<K> {
fn add_entry(&mut self, key: K, entry: &[u8]) -> Result<()>;
fn build(&mut self, output_path: &Path, config: Option<DictConfig>) -> Result<()>;
fn set_metadata(&mut self, metadata: DictMetadata);
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
}
pub trait HighPerformanceDict<K>: Dict<K>
where
K: Hash + Eq + Clone + fmt::Display,
{
fn binary_search_get(&self, key: &K) -> Result<Vec<u8>>;
fn stream_search(&self, query: &str) -> Result<Box<dyn Iterator<Item = Result<SearchResult>>>>;
}
pub trait DictFormat<K> {
const FORMAT_NAME: &'static str;
const FORMAT_VERSION: &'static str;
fn is_valid_format(path: &Path) -> Result<bool>;
fn load(path: &Path, config: DictConfig) -> Result<Box<dyn Dict<K> + Send + Sync>>;
}
pub const FORMAT_MDICT: &str = "mdict";
pub const FORMAT_STARDICT: &str = "stardict";
pub const FORMAT_ZIM: &str = "zim";
pub const EXT_DICT: &str = ".dict";
pub const EXT_IDX: &str = ".idx";
pub const EXT_INFO: &str = ".info";
pub const EXT_BTREE: &str = ".btree";
pub const EXT_FTS: &str = ".fts";