use crate::document::Document;
use crate::types::context_bundle::{Query, ScoreDetails};
pub trait Scorer {
fn score(&self, doc: &Document, query: &Query) -> ScoreDetails;
fn score_value(&self, details: &ScoreDetails) -> f32 {
let score = if details.total_words == 0 {
0.0
} else {
details.term_matches as f32 / details.total_words as f32
};
debug_assert!((0.0..=1.0).contains(&score), "score {score} out of range [0.0, 1.0]");
score
}
}
#[derive(Default)]
pub struct TermFrequencyScorer;
impl Scorer for TermFrequencyScorer {
fn score(&self, doc: &Document, query: &Query) -> ScoreDetails {
let content_lower = doc.content.to_lowercase();
let words: Vec<&str> = content_lower.split_whitespace().collect();
let total_words = words.len();
let term_matches = if total_words == 0 || query.terms.is_empty() {
0
} else {
let mut count = 0;
for word in &words {
for term in &query.terms {
if word == term {
count += 1;
}
}
}
count
};
ScoreDetails {
query_terms: query.terms.clone(),
term_matches,
total_words,
}
}
}
pub trait TokenCounter {
fn count_tokens(&self, content: &str) -> usize;
}
#[derive(Default)]
pub struct ApproxTokenCounter;
impl TokenCounter for ApproxTokenCounter {
fn count_tokens(&self, content: &str) -> usize {
if content.is_empty() {
0
} else {
(content.len() + 3) / 4
}
}
}