car_multi/patterns/
advisor.rs

1//! Advisor — executor stays in control, stronger model is consulted on demand.
2//!
3//! Unlike delegation patterns, the advisor does not take over execution.
4//! It only returns a bounded verdict that the caller may apply or ignore.
5
6use crate::error::MultiError;
7use crate::mailbox::Mailbox;
8use crate::runner::AgentRunner;
9use crate::shared::SharedInfra;
10use crate::types::{AgentOutput, AgentSpec};
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::sync::atomic::{AtomicU32, Ordering};
14use std::sync::Arc;
15use tracing::instrument;
16
17/// Structured guidance returned by an advisor consultation.
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19#[serde(rename_all = "snake_case")]
20pub enum AdvisorVerdict {
21    Continue { rationale: String },
22    Plan { guidance: String },
23    Correction { guidance: String },
24    Stop { reason: String },
25    Uncertain { reason: String },
26}
27
28/// Result of one advisor consultation.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct AdvisorResult {
31    pub verdict: AdvisorVerdict,
32    pub advisor_output: AgentOutput,
33    pub used: u32,
34    pub max_uses: u32,
35}
36
37/// Bounded consultation primitive for stronger-model judgment.
38pub struct Advisor {
39    pub advisor: AgentSpec,
40    pub max_uses: u32,
41    used: AtomicU32,
42}
43
44impl Advisor {
45    pub fn new(advisor: AgentSpec, max_uses: u32) -> Self {
46        Self {
47            advisor,
48            max_uses,
49            used: AtomicU32::new(0),
50        }
51    }
52
53    pub fn used(&self) -> u32 {
54        self.used.load(Ordering::Relaxed)
55    }
56
57    pub fn remaining(&self) -> u32 {
58        self.max_uses.saturating_sub(self.used())
59    }
60
61    #[instrument(name = "multi.advisor", skip_all)]
62    pub async fn consult(
63        &self,
64        task: &str,
65        runner: &Arc<dyn AgentRunner>,
66        infra: &SharedInfra,
67    ) -> Result<AdvisorResult, MultiError> {
68        let prior = self.used.fetch_add(1, Ordering::Relaxed);
69        if prior >= self.max_uses {
70            return Err(MultiError::AdvisorExhausted {
71                used: prior,
72                max_uses: self.max_uses,
73            });
74        }
75
76        let advisory_task = format!(
77            r#"You are an advisor. The executor remains in control of the loop.
78
79Return exactly one verdict as JSON:
80```json
81{{
82  "verdict": "continue|plan|correction|stop|uncertain",
83  "rationale": "text when verdict is continue",
84  "guidance": "text when verdict is plan or correction",
85  "reason": "text when verdict is stop or uncertain"
86}}
87```
88
89Task or draft to review:
90{task}"#
91        );
92
93        let mailbox = Mailbox::default();
94        let rt = infra.make_runtime();
95        let output = runner
96            .run(&self.advisor, &advisory_task, &rt, &mailbox)
97            .await
98            .map_err(|e| {
99                MultiError::AgentFailed(
100                    self.advisor.name.clone(),
101                    format!("advisor consultation failed: {}", e),
102                )
103            })?;
104
105        let verdict = parse_verdict(&output.answer);
106        Ok(AdvisorResult {
107            verdict,
108            advisor_output: output,
109            used: prior + 1,
110            max_uses: self.max_uses,
111        })
112    }
113}
114
115/// Risk level assigned by the caller or fixture harness.
116#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
117#[serde(rename_all = "snake_case")]
118pub enum TaskRisk {
119    Low,
120    Medium,
121    High,
122}
123
124/// Inputs for deciding whether the student should consult the advisor.
125#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
126pub struct AdvisorTriggerContext {
127    pub repeated_failures: u32,
128    pub explicit_uncertainty: bool,
129    pub missing_guidance: bool,
130    pub prior_advisor_calls: u32,
131    #[serde(default = "default_task_risk")]
132    pub task_risk: TaskRisk,
133    #[serde(default)]
134    pub labels: Vec<String>,
135}
136
137const fn default_task_risk() -> TaskRisk {
138    TaskRisk::Low
139}
140
141impl Default for AdvisorTriggerContext {
142    fn default() -> Self {
143        Self {
144            repeated_failures: 0,
145            explicit_uncertainty: false,
146            missing_guidance: false,
147            prior_advisor_calls: 0,
148            task_risk: TaskRisk::Low,
149            labels: Vec::new(),
150        }
151    }
152}
153
154/// Explicit outcome of trigger evaluation.
155#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
156#[serde(rename_all = "snake_case")]
157pub enum AdvisorTriggerDecision {
158    NoConsult { reason: String },
159    OptionalConsult { reason: String },
160    MustConsult { reason: String },
161}
162
163/// Bounded policy for when the student should consult the advisor.
164#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
165pub struct AdvisorTriggerPolicy {
166    pub max_calls_per_run: u32,
167    pub repeated_failure_threshold: u32,
168    pub consult_on_missing_guidance: bool,
169    pub require_on_high_risk: bool,
170}
171
172impl Default for AdvisorTriggerPolicy {
173    fn default() -> Self {
174        Self {
175            max_calls_per_run: 3,
176            repeated_failure_threshold: 2,
177            consult_on_missing_guidance: true,
178            require_on_high_risk: true,
179        }
180    }
181}
182
183impl AdvisorTriggerPolicy {
184    pub fn evaluate(&self, ctx: &AdvisorTriggerContext) -> AdvisorTriggerDecision {
185        if ctx.prior_advisor_calls >= self.max_calls_per_run {
186            return AdvisorTriggerDecision::NoConsult {
187                reason: "advisor budget exhausted".to_string(),
188            };
189        }
190
191        if self.require_on_high_risk && matches!(ctx.task_risk, TaskRisk::High) {
192            return AdvisorTriggerDecision::MustConsult {
193                reason: "high-risk task".to_string(),
194            };
195        }
196
197        if ctx.repeated_failures >= self.repeated_failure_threshold {
198            return AdvisorTriggerDecision::MustConsult {
199                reason: format!(
200                    "repeated failures reached threshold ({}/{})",
201                    ctx.repeated_failures, self.repeated_failure_threshold
202                ),
203            };
204        }
205
206        if ctx.explicit_uncertainty {
207            return AdvisorTriggerDecision::OptionalConsult {
208                reason: "executor signaled uncertainty".to_string(),
209            };
210        }
211
212        if self.consult_on_missing_guidance && ctx.missing_guidance {
213            return AdvisorTriggerDecision::OptionalConsult {
214                reason: "no approved guidance matched".to_string(),
215            };
216        }
217
218        AdvisorTriggerDecision::NoConsult {
219            reason: "no trigger matched".to_string(),
220        }
221    }
222}
223
224fn parse_verdict(response: &str) -> AdvisorVerdict {
225    if let Some(json_str) = car_ir::json_extract::extract_json_object(response) {
226        if let Ok(parsed) = serde_json::from_str::<HashMap<String, serde_json::Value>>(&json_str) {
227            let verdict = parsed.get("verdict").and_then(|v| v.as_str()).unwrap_or("");
228            let rationale = parsed
229                .get("rationale")
230                .and_then(|v| v.as_str())
231                .unwrap_or("")
232                .trim();
233            let guidance = parsed
234                .get("guidance")
235                .and_then(|v| v.as_str())
236                .unwrap_or("")
237                .trim();
238            let reason = parsed
239                .get("reason")
240                .and_then(|v| v.as_str())
241                .unwrap_or("")
242                .trim();
243            return match verdict {
244                "continue" if !rationale.is_empty() => AdvisorVerdict::Continue {
245                    rationale: rationale.to_string(),
246                },
247                "plan" if !guidance.is_empty() => AdvisorVerdict::Plan {
248                    guidance: guidance.to_string(),
249                },
250                "correction" if !guidance.is_empty() => AdvisorVerdict::Correction {
251                    guidance: guidance.to_string(),
252                },
253                "stop" if !reason.is_empty() => AdvisorVerdict::Stop {
254                    reason: reason.to_string(),
255                },
256                "uncertain" if !reason.is_empty() => AdvisorVerdict::Uncertain {
257                    reason: reason.to_string(),
258                },
259                _ => AdvisorVerdict::Uncertain {
260                    reason: "advisor response could not be parsed cleanly".to_string(),
261                },
262            };
263        }
264    }
265
266    let lower = response.to_lowercase();
267    if lower.contains("verdict: continue") {
268        AdvisorVerdict::Continue {
269            rationale: response.trim().to_string(),
270        }
271    } else if lower.contains("verdict: stop") {
272        AdvisorVerdict::Stop {
273            reason: response.trim().to_string(),
274        }
275    } else if lower.contains("verdict: correction") {
276        AdvisorVerdict::Correction {
277            guidance: response.trim().to_string(),
278        }
279    } else if lower.contains("verdict: plan") {
280        AdvisorVerdict::Plan {
281            guidance: response.trim().to_string(),
282        }
283    } else {
284        AdvisorVerdict::Uncertain {
285            reason: response.trim().to_string(),
286        }
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293    use car_engine::Runtime;
294
295    struct StaticRunner {
296        response: String,
297    }
298
299    #[async_trait::async_trait]
300    impl crate::runner::AgentRunner for StaticRunner {
301        async fn run(
302            &self,
303            spec: &AgentSpec,
304            _task: &str,
305            _runtime: &Runtime,
306            _mailbox: &Mailbox,
307        ) -> Result<AgentOutput, MultiError> {
308            Ok(AgentOutput {
309                name: spec.name.clone(),
310                answer: self.response.clone(),
311                turns: 1,
312                tool_calls: 0,
313                duration_ms: 3.0,
314                error: None,
315                outcome: None,
316                tokens: None,
317            })
318        }
319    }
320
321    #[tokio::test]
322    async fn consult_returns_parsed_verdict() {
323        let advisor = Advisor::new(AgentSpec::new("advisor", "review"), 2);
324        let runner: Arc<dyn crate::runner::AgentRunner> = Arc::new(StaticRunner {
325            response: r#"{"verdict":"plan","guidance":"check state before editing"}"#.to_string(),
326        });
327        let infra = SharedInfra::new();
328
329        let result = advisor.consult("help", &runner, &infra).await.unwrap();
330        assert_eq!(
331            result.verdict,
332            AdvisorVerdict::Plan {
333                guidance: "check state before editing".to_string(),
334            }
335        );
336        assert_eq!(result.used, 1);
337        assert_eq!(advisor.remaining(), 1);
338    }
339
340    #[tokio::test]
341    async fn consult_enforces_budget() {
342        let advisor = Advisor::new(AgentSpec::new("advisor", "review"), 1);
343        let runner: Arc<dyn crate::runner::AgentRunner> = Arc::new(StaticRunner {
344            response: r#"{"verdict":"uncertain","reason":"need more evidence"}"#.to_string(),
345        });
346        let infra = SharedInfra::new();
347
348        advisor.consult("first", &runner, &infra).await.unwrap();
349        let err = advisor
350            .consult("second", &runner, &infra)
351            .await
352            .unwrap_err();
353        match err {
354            MultiError::AdvisorExhausted { used, max_uses } => {
355                assert_eq!(used, 1);
356                assert_eq!(max_uses, 1);
357            }
358            other => panic!("unexpected error: {other:?}"),
359        }
360    }
361
362    #[test]
363    fn trigger_policy_requires_high_risk() {
364        let policy = AdvisorTriggerPolicy::default();
365        let ctx = AdvisorTriggerContext {
366            task_risk: TaskRisk::High,
367            ..Default::default()
368        };
369        assert_eq!(
370            policy.evaluate(&ctx),
371            AdvisorTriggerDecision::MustConsult {
372                reason: "high-risk task".to_string(),
373            }
374        );
375    }
376
377    #[test]
378    fn trigger_policy_honors_budget() {
379        let policy = AdvisorTriggerPolicy::default();
380        let ctx = AdvisorTriggerContext {
381            prior_advisor_calls: 3,
382            explicit_uncertainty: true,
383            ..Default::default()
384        };
385        assert_eq!(
386            policy.evaluate(&ctx),
387            AdvisorTriggerDecision::NoConsult {
388                reason: "advisor budget exhausted".to_string(),
389            }
390        );
391    }
392
393    #[test]
394    fn parse_verdict_falls_back_to_uncertain() {
395        let verdict = parse_verdict("I am not sure.");
396        assert_eq!(
397            verdict,
398            AdvisorVerdict::Uncertain {
399                reason: "I am not sure.".to_string(),
400            }
401        );
402    }
403
404    #[test]
405    fn parse_verdict_reads_continue_json() {
406        let verdict = parse_verdict(
407            r#"{"verdict":"continue","rationale":"current completion claim is supported"}"#,
408        );
409        assert_eq!(
410            verdict,
411            AdvisorVerdict::Continue {
412                rationale: "current completion claim is supported".to_string(),
413            }
414        );
415    }
416}
car_multi/patterns/advisor.rs

car_multi/patterns/
advisor.rs