Skip to main content

context_core/selection/
ranking.rs

1use crate::document::Document;
2use crate::types::context_bundle::{Query, ScoreDetails};
3
4pub trait Scorer {
5    fn score(&self, doc: &Document, query: &Query) -> ScoreDetails;
6
7    fn score_value(&self, details: &ScoreDetails) -> f32 {
8        let score = if details.total_words == 0 {
9            0.0
10        } else {
11            details.term_matches as f32 / details.total_words as f32
12        };
13        debug_assert!((0.0..=1.0).contains(&score), "score {score} out of range [0.0, 1.0]");
14        score
15    }
16}
17
18/// v0: Simple Term Frequency Scorer
19#[derive(Default)]
20pub struct TermFrequencyScorer;
21
22impl Scorer for TermFrequencyScorer {
23    fn score(&self, doc: &Document, query: &Query) -> ScoreDetails {
24        // Spec: total_words is defined as split(content, whitespace).len() after lowercasing.
25        let content_lower = doc.content.to_lowercase();
26        let words: Vec<&str> = content_lower.split_whitespace().collect();
27        let total_words = words.len();
28
29        let term_matches = if total_words == 0 || query.terms.is_empty() {
30            0
31        } else {
32            let mut count = 0;
33            // Naive count: occurrences of ANY query term
34            for word in &words {
35                for term in &query.terms {
36                    if word == term {
37                        count += 1;
38                    }
39                }
40            }
41            count
42        };
43
44        ScoreDetails {
45            query_terms: query.terms.clone(),
46            term_matches,
47            total_words,
48        }
49    }
50}
51
52pub trait TokenCounter {
53    fn count_tokens(&self, content: &str) -> usize;
54}
55
56/// v0: Approximate GPT-style tokenization
57/// tokens(content) := ceil(len(content) / 4)
58#[derive(Default)]
59pub struct ApproxTokenCounter;
60
61impl TokenCounter for ApproxTokenCounter {
62    fn count_tokens(&self, content: &str) -> usize {
63        // Integer division ceil(len / 4) equivalent to (len + 4 - 1) / 4
64        if content.is_empty() {
65            0
66        } else {
67            (content.len() + 3) / 4
68        }
69    }
70}