context_core/selection/
ranking.rs1use crate::document::Document;
2use crate::types::context_bundle::{Query, ScoreDetails};
3
4pub trait Scorer {
5 fn score(&self, doc: &Document, query: &Query) -> ScoreDetails;
6
7 fn score_value(&self, details: &ScoreDetails) -> f32 {
8 let score = if details.total_words == 0 {
9 0.0
10 } else {
11 details.term_matches as f32 / details.total_words as f32
12 };
13 debug_assert!((0.0..=1.0).contains(&score), "score {score} out of range [0.0, 1.0]");
14 score
15 }
16}
17
18#[derive(Default)]
20pub struct TermFrequencyScorer;
21
22impl Scorer for TermFrequencyScorer {
23 fn score(&self, doc: &Document, query: &Query) -> ScoreDetails {
24 let content_lower = doc.content.to_lowercase();
26 let words: Vec<&str> = content_lower.split_whitespace().collect();
27 let total_words = words.len();
28
29 let term_matches = if total_words == 0 || query.terms.is_empty() {
30 0
31 } else {
32 let mut count = 0;
33 for word in &words {
35 for term in &query.terms {
36 if word == term {
37 count += 1;
38 }
39 }
40 }
41 count
42 };
43
44 ScoreDetails {
45 query_terms: query.terms.clone(),
46 term_matches,
47 total_words,
48 }
49 }
50}
51
52pub trait TokenCounter {
53 fn count_tokens(&self, content: &str) -> usize;
54}
55
56#[derive(Default)]
59pub struct ApproxTokenCounter;
60
61impl TokenCounter for ApproxTokenCounter {
62 fn count_tokens(&self, content: &str) -> usize {
63 if content.is_empty() {
65 0
66 } else {
67 (content.len() + 3) / 4
68 }
69 }
70}