use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec::Vec;
use thiserror_no_std::Error;
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum IndexError {
#[error("Dataset not found: {0}")]
DatasetNotFound(String),
#[error("Document not found: object_id={0}")]
DocumentNotFound(u64),
#[error("Content decode error")]
ContentDecodeError,
#[error("Index corrupted")]
IndexCorrupted,
#[error("IO error: {0}")]
IoError(String),
#[error("Query parse error: {0}")]
QueryParseError(String),
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub limit: usize,
pub offset: usize,
pub fuzzy: bool,
pub fuzzy_distance: usize,
pub highlight: bool,
pub snippet_length: usize,
pub min_score: f32,
}
impl Default for SearchOptions {
fn default() -> Self {
Self {
limit: 100,
offset: 0,
fuzzy: false,
fuzzy_distance: 2,
highlight: true,
snippet_length: 200,
min_score: 0.0,
}
}
}
#[derive(Debug, Clone)]
pub struct SearchHit {
pub object_id: u64,
pub path: String,
pub score: f32,
pub snippet: String,
pub matched_terms: Vec<String>,
pub positions: Vec<u32>,
}
#[derive(Debug, Clone, Default)]
pub struct IndexStats {
pub document_count: u64,
pub term_count: u64,
pub total_term_occurrences: u64,
pub avg_doc_length: f32,
pub index_size_bytes: u64,
pub last_rebuild: u64,
}
#[derive(Debug, Clone)]
pub struct Posting {
pub object_id: u64,
pub term_freq: u32,
pub positions: Vec<u32>,
}
#[derive(Debug, Clone)]
pub struct PostingList {
pub doc_freq: u32,
pub postings: Vec<Posting>,
}
impl PostingList {
pub fn new() -> Self {
Self {
doc_freq: 0,
postings: Vec::new(),
}
}
pub fn add_posting(&mut self, posting: Posting) {
self.doc_freq += 1;
self.postings.push(posting);
}
pub fn remove_posting(&mut self, object_id: u64) -> bool {
if let Some(pos) = self.postings.iter().position(|p| p.object_id == object_id) {
self.postings.remove(pos);
self.doc_freq = self.doc_freq.saturating_sub(1);
true
} else {
false
}
}
}
impl Default for PostingList {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct DocMeta {
pub object_id: u64,
pub path: String,
pub length: u32,
pub indexed_at: u64,
}
#[derive(Debug, Clone)]
pub struct InvertedIndex {
pub index: BTreeMap<String, PostingList>,
pub docs: BTreeMap<u64, DocMeta>,
pub doc_count: u64,
pub total_terms: u64,
pub avg_doc_len: f32,
}
impl InvertedIndex {
pub fn new() -> Self {
Self {
index: BTreeMap::new(),
docs: BTreeMap::new(),
doc_count: 0,
total_terms: 0,
avg_doc_len: 0.0,
}
}
pub fn recalculate_avg_doc_len(&mut self) {
if self.doc_count == 0 {
self.avg_doc_len = 0.0;
} else {
self.avg_doc_len = self.total_terms as f32 / self.doc_count as f32;
}
}
}
impl Default for InvertedIndex {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct Token {
pub term: String,
pub position: u32,
pub byte_offset: usize,
}