context_core/selection/
mod.rs1pub mod filters;
2pub mod ranking;
3pub mod budgeting;
4
5use std::cmp::Ordering;
6
7use crate::cache::ContextCache;
8use crate::types::context_bundle::{
9 Query, ScoredDocument, SelectionError, SelectionMetadata, SelectionResult,
10};
11pub use ranking::{ApproxTokenCounter, Scorer, TermFrequencyScorer, TokenCounter};
12pub use budgeting::{apply_budget, BudgetResult};
13
14pub struct ContextSelector<S, T> {
15 scorer: S,
16 tokenizer: T,
17}
18
19impl Default for ContextSelector<TermFrequencyScorer, ApproxTokenCounter> {
20 fn default() -> Self {
21 Self {
22 scorer: TermFrequencyScorer,
23 tokenizer: ApproxTokenCounter,
24 }
25 }
26}
27
28impl<S, T> ContextSelector<S, T>
29where
30 S: Scorer,
31 T: TokenCounter,
32{
33 pub fn new(scorer: S, tokenizer: T) -> Self {
34 Self { scorer, tokenizer }
35 }
36
37 pub fn select(
38 &self,
39 cache: &ContextCache,
40 query: Query,
41 budget: usize,
42 ) -> Result<SelectionResult, SelectionError> {
43 let loaded_docs = cache.load_documents().map_err(|_| SelectionError::CacheError)?;
45
46 let mut scored_docs: Vec<ScoredDocument> = loaded_docs
48 .iter()
49 .map(|doc| {
50 let details = self.scorer.score(doc, &query);
51 let score = self.scorer.score_value(&details);
52 let token_count = self.tokenizer.count_tokens(&doc.content);
53 ScoredDocument {
54 document: doc,
55 score,
56 score_details: details,
57 token_count,
58 }
59 })
60 .collect();
61
62 scored_docs.sort_by(|a, b| {
65 let score_cmp = b.score.partial_cmp(&a.score).unwrap_or(Ordering::Equal);
67 if score_cmp != Ordering::Equal {
68 score_cmp
69 } else {
70 a.document.id.cmp(&b.document.id)
72 }
73 });
74
75 debug_assert!(
76 scored_docs.windows(2).all(|w| {
77 let a = &w[0];
78 let b = &w[1];
79 a.score > b.score || (a.score == b.score && a.document.id <= b.document.id)
80 })
81 );
82
83 let BudgetResult {
85 selected,
86 tokens_used,
87 documents_selected,
88 documents_excluded_by_budget,
89 } = apply_budget(scored_docs, budget);
90
91 let metadata = SelectionMetadata {
92 query: query.raw,
93 budget,
94 tokens_used,
95 documents_considered: loaded_docs.len(),
96 documents_selected,
97 documents_excluded_by_budget,
98 };
99
100 Ok(SelectionResult {
101 documents: selected,
102 selection: metadata,
103 })
104 }
105}