use ahash::AHashMap;
use crate::lexical::core::field::FieldValue;
#[derive(Debug, Clone)]
pub struct AnalyzedDocument {
pub field_terms: AHashMap<String, Vec<AnalyzedTerm>>,
pub stored_fields: AHashMap<String, FieldValue>,
pub field_lengths: AHashMap<String, u32>,
pub point_values: AHashMap<String, Vec<f64>>,
}
#[derive(Debug, Clone)]
pub struct AnalyzedTerm {
pub term: String,
pub position: u32,
pub frequency: u32,
pub offset: (usize, usize),
}
impl AnalyzedDocument {
pub fn new() -> Self {
Self {
field_terms: AHashMap::new(),
stored_fields: AHashMap::new(),
field_lengths: AHashMap::new(),
point_values: AHashMap::new(),
}
}
pub fn field_count(&self) -> usize {
self.field_terms.len()
}
pub fn total_terms(&self) -> usize {
self.field_terms.values().map(|terms| terms.len()).sum()
}
pub fn field_length(&self, field: &str) -> Option<u32> {
self.field_lengths.get(field).copied()
}
}
impl Default for AnalyzedDocument {
fn default() -> Self {
Self::new()
}
}
impl AnalyzedTerm {
pub fn new(term: String, position: u32, frequency: u32, offset: (usize, usize)) -> Self {
Self {
term,
position,
frequency,
offset,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_analyzed_document_new() {
let doc = AnalyzedDocument::new();
assert_eq!(doc.field_count(), 0);
assert_eq!(doc.total_terms(), 0);
}
#[test]
fn test_analyzed_document_field_count() {
let mut doc = AnalyzedDocument::new();
doc.field_terms.insert("title".to_string(), vec![]);
doc.field_terms.insert("content".to_string(), vec![]);
assert_eq!(doc.field_count(), 2);
}
#[test]
fn test_analyzed_document_total_terms() {
let mut doc = AnalyzedDocument::new();
doc.field_terms.insert(
"title".to_string(),
vec![
AnalyzedTerm::new("hello".to_string(), 0, 1, (0, 5)),
AnalyzedTerm::new("world".to_string(), 1, 1, (6, 11)),
],
);
doc.field_terms.insert(
"content".to_string(),
vec![AnalyzedTerm::new("test".to_string(), 0, 1, (0, 4))],
);
assert_eq!(doc.total_terms(), 3);
}
#[test]
fn test_analyzed_term_new() {
let term = AnalyzedTerm::new("search".to_string(), 5, 2, (10, 16));
assert_eq!(term.term, "search");
assert_eq!(term.position, 5);
assert_eq!(term.frequency, 2);
assert_eq!(term.offset, (10, 16));
}
}