Skip to main content

roboticus_agent/
retrieval_strategy.rs

1//! Adaptive source discovery — policy-driven retrieval strategy selection.
2//!
3//! This module decides **how** the pipeline should retrieve information for a
4//! given turn. The decision is based on classified intents, the synthesized
5//! operating state, prompt signals, and conversational position.
6//!
7//! ## Strategy tiers
8//!
9//! | Strategy              | Description                                        |
10//! |-----------------------|----------------------------------------------------|
11//! | `CacheOnly`           | Fast path — cached/indexed retrieval only           |
12//! | `IndexedRetrieval`    | Standard — full FTS5 + vector cosine memory search  |
13//! | `LiveDiscovery`       | Escalated — retrieval + external/live data sources   |
14//! | `DirectVerification`  | Highest — bypass cache, verify against live sources  |
15//!
16//! ## Design invariants
17//!
18//! - The decision function is pure: no I/O, no side effects.
19//! - All inputs come from the shared pipeline (intents, task state, prompt).
20//! - The strategy **informs** retrieval but does not gate it until live
21//!   discovery sources are implemented.
22
23use serde::Serialize;
24
25use crate::task_state::TaskOperatingState;
26
27// ── Strategy enum ────────────────────────────────────────────────────
28
29/// Retrieval strategy tier, ordered from lightest to heaviest.
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
31pub enum RetrievalStrategy {
32    /// Fast path: use cached/indexed retrieval only.
33    CacheOnly,
34    /// Standard: full memory retrieval with FTS5 + vector cosine.
35    IndexedRetrieval,
36    /// Escalated: retrieval + external/live data sources.
37    LiveDiscovery,
38    /// Highest: bypass cache, verify claims against live sources.
39    DirectVerification,
40}
41
42impl RetrievalStrategy {
43    /// Human-readable label for trace annotations.
44    pub fn as_str(&self) -> &'static str {
45        match self {
46            Self::CacheOnly => "cache_only",
47            Self::IndexedRetrieval => "indexed_retrieval",
48            Self::LiveDiscovery => "live_discovery",
49            Self::DirectVerification => "direct_verification",
50        }
51    }
52}
53
54// ── Decision output ──────────────────────────────────────────────────
55
56/// The result of retrieval strategy selection, capturing the chosen
57/// strategy, the signals that drove the decision, and a confidence score.
58#[derive(Debug, Clone, Serialize)]
59pub struct RetrievalDecision {
60    /// The selected retrieval strategy.
61    pub strategy: RetrievalStrategy,
62    /// Signals that contributed to the decision (e.g. "intent:CurrentEvents",
63    /// "marker:latest", "first_turn_greeting").
64    pub signals: Vec<String>,
65    /// Confidence in the decision (0.0–1.0). Higher means more certain the
66    /// selected strategy is appropriate.
67    pub confidence: f64,
68}
69
70// ── Freshness markers ────────────────────────────────────────────────
71
72/// Prompt substrings that indicate the user wants fresh / real-time data.
73const FRESHNESS_MARKERS: &[&str] = &[
74    "latest",
75    "current",
76    "today",
77    "right now",
78    "breaking",
79    "recent news",
80    "what's happening",
81    "what is happening",
82];
83
84/// Prompt substrings that indicate the user wants claim verification.
85const VERIFICATION_MARKERS: &[&str] = &[
86    "verify",
87    "confirm",
88    "is it true",
89    "fact check",
90    "fact-check",
91    "actually",
92    "really true",
93    "double check",
94    "double-check",
95    "check if",
96];
97
98// ── Decision function ────────────────────────────────────────────────
99
100/// Select the retrieval strategy for the current turn.
101///
102/// # Arguments
103///
104/// - `intent_names` — serialized intent names from the semantic classifier
105///   (e.g. `["CurrentEvents", "Execution"]`).
106/// - `_task_state` — the synthesized operating state. Reserved for future
107///   refinement (e.g. budget pressure -> downgrade to CacheOnly).
108/// - `user_prompt` — the raw user prompt text.
109/// - `is_first_turn` — whether this is the first turn in the session.
110pub fn decide_retrieval_strategy(
111    intent_names: &[String],
112    _task_state: &TaskOperatingState,
113    user_prompt: &str,
114    is_first_turn: bool,
115) -> RetrievalDecision {
116    let lower = user_prompt.to_ascii_lowercase();
117    let mut signals: Vec<String> = Vec::new();
118
119    // ── Check for DirectVerification (highest priority) ──────────────
120    let has_verification_marker = VERIFICATION_MARKERS.iter().any(|m| {
121        if lower.contains(m) {
122            signals.push(format!("marker:{m}"));
123            true
124        } else {
125            false
126        }
127    });
128
129    if has_verification_marker {
130        return RetrievalDecision {
131            strategy: RetrievalStrategy::DirectVerification,
132            signals,
133            confidence: 0.85,
134        };
135    }
136
137    // ── Check for LiveDiscovery ──────────────────────────────────────
138    let has_current_events_intent = intent_names
139        .iter()
140        .any(|i| i.eq_ignore_ascii_case("currentevents"));
141    if has_current_events_intent {
142        signals.push("intent:CurrentEvents".into());
143        return RetrievalDecision {
144            strategy: RetrievalStrategy::LiveDiscovery,
145            signals,
146            confidence: 0.90,
147        };
148    }
149
150    let has_freshness_marker = FRESHNESS_MARKERS.iter().any(|m| {
151        if lower.contains(m) {
152            signals.push(format!("marker:{m}"));
153            true
154        } else {
155            false
156        }
157    });
158
159    if has_freshness_marker {
160        return RetrievalDecision {
161            strategy: RetrievalStrategy::LiveDiscovery,
162            signals,
163            confidence: 0.75,
164        };
165    }
166
167    // ── Check for CacheOnly ─────────────────────────────────────────
168    let has_acknowledgement_intent = intent_names
169        .iter()
170        .any(|i| i.eq_ignore_ascii_case("acknowledgement"));
171    if has_acknowledgement_intent {
172        signals.push("intent:Acknowledgement".into());
173        return RetrievalDecision {
174            strategy: RetrievalStrategy::CacheOnly,
175            signals,
176            confidence: 0.90,
177        };
178    }
179
180    // First turn with a short greeting -> CacheOnly
181    if is_first_turn && is_short_greeting(&lower) {
182        signals.push("first_turn_greeting".into());
183        return RetrievalDecision {
184            strategy: RetrievalStrategy::CacheOnly,
185            signals,
186            confidence: 0.80,
187        };
188    }
189
190    // ── Default: IndexedRetrieval ────────────────────────────────────
191    signals.push("default".into());
192    RetrievalDecision {
193        strategy: RetrievalStrategy::IndexedRetrieval,
194        signals,
195        confidence: 0.70,
196    }
197}
198
199/// Detect short greeting prompts that don't need full retrieval.
200fn is_short_greeting(lower: &str) -> bool {
201    let trimmed = lower.trim();
202    // Short message (under 30 chars) that looks like a greeting
203    if trimmed.len() > 30 {
204        return false;
205    }
206    const GREETINGS: &[&str] = &[
207        "hi",
208        "hey",
209        "hello",
210        "yo",
211        "sup",
212        "howdy",
213        "good morning",
214        "good afternoon",
215        "good evening",
216        "what's up",
217        "whats up",
218        "hola",
219        "greetings",
220    ];
221    GREETINGS.iter().any(|g| {
222        trimmed == *g
223            || trimmed.starts_with(&format!("{g} "))
224            || trimmed.starts_with(&format!("{g}!"))
225    })
226}
227
228// ── Tests ────────────────────────────────────────────────────────────
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233    use crate::task_state::{TaskStateInput, synthesize};
234
235    fn base_input() -> TaskStateInput {
236        TaskStateInput {
237            user_content: "test message".into(),
238            intents: vec![],
239            authority: "SelfGenerated".into(),
240            retrieval_metrics: None,
241            tool_search_stats: None,
242            mcp_tools_available: false,
243            taskable_agent_count: 0,
244            fit_agent_count: 0,
245            fit_agent_names: vec![],
246            enabled_skill_count: 0,
247            matching_skill_count: 0,
248            missing_skills: vec![],
249            remaining_budget_tokens: 8000,
250            provider_breaker_open: false,
251            inference_mode: "standard".into(),
252            decomposition_proposal: None,
253            explicit_specialist_workflow: false,
254            named_tool_match: false,
255            recent_response_skeletons: vec![],
256            recent_user_message_lengths: vec![],
257            self_echo_fragments: vec![],
258            declared_action: None,
259            previous_turn_had_protocol_issues: false,
260            normalization_retry_streak: 0,
261        }
262    }
263
264    fn make_state(input: &TaskStateInput) -> TaskOperatingState {
265        synthesize(input)
266    }
267
268    #[test]
269    fn current_events_intent_selects_live_discovery() {
270        let input = base_input();
271        let state = make_state(&input);
272        let intents = vec!["CurrentEvents".to_string()];
273
274        let decision =
275            decide_retrieval_strategy(&intents, &state, "what's going on in the world", false);
276
277        assert_eq!(decision.strategy, RetrievalStrategy::LiveDiscovery);
278        assert!(decision.signals.iter().any(|s| s.contains("CurrentEvents")));
279        assert!(decision.confidence > 0.5);
280    }
281
282    #[test]
283    fn verify_keyword_selects_direct_verification() {
284        let input = base_input();
285        let state = make_state(&input);
286        let intents: Vec<String> = vec![];
287
288        let decision =
289            decide_retrieval_strategy(&intents, &state, "can you verify that claim", false);
290
291        assert_eq!(decision.strategy, RetrievalStrategy::DirectVerification);
292        assert!(decision.signals.iter().any(|s| s.contains("verify")));
293        assert!(decision.confidence > 0.5);
294    }
295
296    #[test]
297    fn acknowledgement_selects_cache_only() {
298        let input = base_input();
299        let state = make_state(&input);
300        let intents = vec!["Acknowledgement".to_string()];
301
302        let decision = decide_retrieval_strategy(&intents, &state, "ok got it", false);
303
304        assert_eq!(decision.strategy, RetrievalStrategy::CacheOnly);
305        assert!(
306            decision
307                .signals
308                .iter()
309                .any(|s| s.contains("Acknowledgement"))
310        );
311    }
312
313    #[test]
314    fn normal_prompt_selects_indexed_retrieval() {
315        let input = base_input();
316        let state = make_state(&input);
317        let intents: Vec<String> = vec![];
318
319        let decision = decide_retrieval_strategy(
320            &intents,
321            &state,
322            "tell me about the architecture of the system",
323            false,
324        );
325
326        assert_eq!(decision.strategy, RetrievalStrategy::IndexedRetrieval);
327        assert!(decision.signals.iter().any(|s| s == "default"));
328    }
329
330    #[test]
331    fn freshness_markers_select_live_discovery() {
332        let input = base_input();
333        let state = make_state(&input);
334        let intents: Vec<String> = vec![];
335
336        for marker in &["latest", "right now", "breaking", "what's happening"] {
337            let prompt = format!("tell me the {marker} developments");
338            let decision = decide_retrieval_strategy(&intents, &state, &prompt, false);
339            assert_eq!(
340                decision.strategy,
341                RetrievalStrategy::LiveDiscovery,
342                "marker '{marker}' should select LiveDiscovery"
343            );
344        }
345    }
346
347    #[test]
348    fn first_turn_greeting_selects_cache_only() {
349        let input = base_input();
350        let state = make_state(&input);
351        let intents: Vec<String> = vec![];
352
353        let decision = decide_retrieval_strategy(&intents, &state, "hello", true);
354
355        assert_eq!(decision.strategy, RetrievalStrategy::CacheOnly);
356        assert!(decision.signals.iter().any(|s| s == "first_turn_greeting"));
357    }
358
359    #[test]
360    fn first_turn_non_greeting_uses_indexed_retrieval() {
361        let input = base_input();
362        let state = make_state(&input);
363        let intents: Vec<String> = vec![];
364
365        let decision =
366            decide_retrieval_strategy(&intents, &state, "explain the theory of relativity", true);
367
368        assert_eq!(decision.strategy, RetrievalStrategy::IndexedRetrieval);
369    }
370
371    #[test]
372    fn verification_takes_priority_over_freshness() {
373        let input = base_input();
374        let state = make_state(&input);
375        let intents: Vec<String> = vec![];
376
377        // Contains both a verification marker and a freshness marker
378        let decision =
379            decide_retrieval_strategy(&intents, &state, "can you verify the latest claims", false);
380
381        assert_eq!(decision.strategy, RetrievalStrategy::DirectVerification);
382    }
383
384    #[test]
385    fn fact_check_selects_direct_verification() {
386        let input = base_input();
387        let state = make_state(&input);
388        let intents: Vec<String> = vec![];
389
390        let decision =
391            decide_retrieval_strategy(&intents, &state, "fact check this statement for me", false);
392
393        assert_eq!(decision.strategy, RetrievalStrategy::DirectVerification);
394        assert!(decision.signals.iter().any(|s| s.contains("fact check")));
395    }
396}