Skip to main content

context_core/selection/
mod.rs

1pub mod filters;
2pub mod ranking;
3pub mod budgeting;
4
5use std::cmp::Ordering;
6
7use crate::cache::ContextCache;
8use crate::types::context_bundle::{
9	Query, ScoredDocument, SelectionError, SelectionMetadata, SelectionResult,
10};
11pub use ranking::{ApproxTokenCounter, Scorer, TermFrequencyScorer, TokenCounter};
12pub use budgeting::{apply_budget, BudgetResult};
13
14pub struct ContextSelector<S, T> {
15	scorer: S,
16	tokenizer: T,
17}
18
19impl Default for ContextSelector<TermFrequencyScorer, ApproxTokenCounter> {
20	fn default() -> Self {
21		Self {
22			scorer: TermFrequencyScorer,
23			tokenizer: ApproxTokenCounter,
24		}
25	}
26}
27
28impl<S, T> ContextSelector<S, T>
29where
30	S: Scorer,
31	T: TokenCounter,
32{
33	pub fn new(scorer: S, tokenizer: T) -> Self {
34		Self { scorer, tokenizer }
35	}
36
37	pub fn select(
38		&self,
39		cache: &ContextCache,
40		query: Query,
41		budget: usize,
42	) -> Result<SelectionResult, SelectionError> {
43		// 0. Load documents strictly from manifest to ensure authoritativeness
44		let loaded_docs = cache.load_documents().map_err(|_| SelectionError::CacheError)?;
45
46		// 1. Scoring Phase
47		let mut scored_docs: Vec<ScoredDocument> = loaded_docs
48			.iter()
49			.map(|doc| {
50				let details = self.scorer.score(doc, &query);
51				let score = self.scorer.score_value(&details);
52				let token_count = self.tokenizer.count_tokens(&doc.content);
53				ScoredDocument {
54					document: doc,
55					score,
56					score_details: details,
57					token_count,
58				}
59			})
60			.collect();
61
62		// 2. Ordering Phase
63		// Sort globally by (score desc, id asc)
64		scored_docs.sort_by(|a, b| {
65			// Descending score
66			let score_cmp = b.score.partial_cmp(&a.score).unwrap_or(Ordering::Equal);
67			if score_cmp != Ordering::Equal {
68				score_cmp
69			} else {
70				// Ascending ID
71				a.document.id.cmp(&b.document.id)
72			}
73		});
74
75		debug_assert!(
76			scored_docs.windows(2).all(|w| {
77				let a = &w[0];
78				let b = &w[1];
79				a.score > b.score || (a.score == b.score && a.document.id <= b.document.id)
80			})
81		);
82
83		// 3. Budgeting Phase
84		let BudgetResult {
85			selected,
86			tokens_used,
87			documents_selected,
88			documents_excluded_by_budget,
89		} = apply_budget(scored_docs, budget);
90
91		let metadata = SelectionMetadata {
92			query: query.raw,
93			budget,
94			tokens_used,
95			documents_considered: loaded_docs.len(),
96			documents_selected,
97			documents_excluded_by_budget,
98		};
99
100		Ok(SelectionResult {
101			documents: selected,
102			selection: metadata,
103		})
104	}
105}