Skip to main content

zeph_core/goal/
supervisor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Supervisor verifier for autonomous goal sessions.
5//!
6//! [`GoalSupervisor`] makes a single LLM call with structured JSON output to decide whether
7//! the goal condition has been satisfied. It is NOT a full subagent — no tools, no memory
8//! access. Trust model: heuristic verifier only, not a security boundary.
9
10use std::time::Duration;
11
12use serde::Deserialize;
13use zeph_llm::any::AnyProvider;
14
15use super::autonomous::SupervisorVerdict;
16use crate::quality::parser::{ChatJsonError, chat_json};
17
18#[non_exhaustive]
19/// Errors returned by the supervisor.
20#[derive(Debug, thiserror::Error)]
21pub enum SupervisorError {
22    /// LLM provider returned an error.
23    #[error("supervisor LLM error: {0}")]
24    Llm(String),
25    /// Verification call timed out.
26    #[error("supervisor timed out after {0}ms")]
27    Timeout(u64),
28    /// LLM output could not be parsed as a verdict.
29    #[error("supervisor response was not valid JSON: {0}")]
30    Parse(String),
31    /// Rate-limited (HTTP 429).
32    #[error("supervisor rate-limited (429)")]
33    RateLimited,
34}
35
36impl From<ChatJsonError> for SupervisorError {
37    fn from(e: ChatJsonError) -> Self {
38        match e {
39            ChatJsonError::Llm(inner) => {
40                let msg = inner.to_string();
41                if msg.contains("429") {
42                    Self::RateLimited
43                } else {
44                    Self::Llm(msg)
45                }
46            }
47            ChatJsonError::Timeout(ms) => Self::Timeout(ms),
48            ChatJsonError::Parse(raw) => Self::Parse(raw),
49        }
50    }
51}
52
53/// Internal deserialization target for the supervisor's JSON response.
54#[derive(Deserialize)]
55struct RawVerdict {
56    achieved: bool,
57    reasoning: String,
58    #[serde(default)]
59    confidence: f32,
60    #[serde(default)]
61    suggestions: Vec<String>,
62}
63
64const SUPERVISOR_SYSTEM: &str = "\
65You are an autonomous goal verification assistant. \
66Your task is to determine whether a stated goal condition has been achieved based on the \
67agent's conversation summary and its recent actions.\n\
68\n\
69Respond with strict JSON only — no prose, no markdown fences:\n\
70{\n\
71  \"achieved\": <bool>,\n\
72  \"reasoning\": \"<one or two sentence explanation>\",\n\
73  \"confidence\": <float 0.0..1.0>,\n\
74  \"suggestions\": [\"<optional improvement suggestion>\", ...]\n\
75}\n\
76Be conservative: only set achieved=true when the evidence clearly and completely satisfies \
77the goal condition.";
78
79fn supervisor_user(
80    goal_condition: &str,
81    conversation_summary: &str,
82    recent_actions: &[String],
83) -> String {
84    let actions = if recent_actions.is_empty() {
85        "(none)".to_owned()
86    } else {
87        recent_actions
88            .iter()
89            .map(|a| format!("- {a}"))
90            .collect::<Vec<_>>()
91            .join("\n")
92    };
93    format!(
94        "Goal condition:\n{goal_condition}\n\n\
95         Conversation summary:\n{conversation_summary}\n\n\
96         Recent actions:\n{actions}"
97    )
98}
99
100/// Single-call supervisor verifier for autonomous goal sessions.
101///
102/// Uses a configurable LLM provider (ideally different from the main agent provider to avoid
103/// self-confirmation bias). On HTTP 429 the caller should wait and retry once before counting
104/// the failure toward the consecutive failure limit.
105pub struct GoalSupervisor {
106    provider: AnyProvider,
107    timeout: Duration,
108}
109
110impl GoalSupervisor {
111    /// Create a new supervisor with the given provider and per-call timeout.
112    #[must_use]
113    pub fn new(provider: AnyProvider, timeout: Duration) -> Self {
114        Self { provider, timeout }
115    }
116
117    /// Verify whether `goal_condition` has been achieved.
118    ///
119    /// Makes at most **two** LLM calls (initial + one retry on JSON parse failure via
120    /// [`chat_json`]). Rate-limit (429) errors are surfaced as [`SupervisorError::RateLimited`]
121    /// so the caller can apply backoff before counting the failure.
122    ///
123    /// # Errors
124    ///
125    /// Returns [`SupervisorError`] on provider error, timeout, or unrecoverable parse failure.
126    #[tracing::instrument(name = "goal.supervisor.verify", skip_all, level = "debug", err)]
127    pub async fn verify(
128        &self,
129        goal_condition: &str,
130        conversation_summary: &str,
131        recent_actions: &[String],
132    ) -> Result<SupervisorVerdict, SupervisorError> {
133        let user = supervisor_user(goal_condition, conversation_summary, recent_actions);
134        tracing::debug!("goal.supervisor.verify: calling provider");
135        let (raw, _tokens, _attempt): (RawVerdict, _, _) =
136            chat_json(&self.provider, SUPERVISOR_SYSTEM, &user, self.timeout)
137                .await
138                .map_err(SupervisorError::from)?;
139        tracing::debug!(
140            achieved = raw.achieved,
141            confidence = raw.confidence,
142            "goal.supervisor.verify: done"
143        );
144        Ok(SupervisorVerdict {
145            achieved: raw.achieved,
146            reasoning: raw.reasoning,
147            confidence: raw.confidence.clamp(0.0, 1.0),
148            suggestions: raw.suggestions,
149        })
150    }
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156    use crate::quality::parser::ChatJsonError;
157    use zeph_llm::LlmError;
158
159    #[test]
160    fn supervisor_user_contains_all_sections() {
161        let msg = supervisor_user(
162            "the build must pass",
163            "agent ran cargo build",
164            &[
165                "ran cargo build".to_owned(),
166                "no errors reported".to_owned(),
167            ],
168        );
169        assert!(msg.contains("Goal condition:"), "goal section missing");
170        assert!(msg.contains("the build must pass"), "goal text missing");
171        assert!(
172            msg.contains("Conversation summary:"),
173            "summary section missing"
174        );
175        assert!(
176            msg.contains("agent ran cargo build"),
177            "summary text missing"
178        );
179        assert!(msg.contains("Recent actions:"), "actions section missing");
180        assert!(msg.contains("- ran cargo build"), "first action missing");
181        assert!(
182            msg.contains("- no errors reported"),
183            "second action missing"
184        );
185    }
186
187    #[test]
188    fn supervisor_user_empty_actions_shows_none() {
189        let msg = supervisor_user("goal", "summary", &[]);
190        assert!(msg.contains("(none)"), "empty actions should show (none)");
191    }
192
193    #[test]
194    fn supervisor_error_from_chat_json_error_llm_preserved() {
195        let llm_err = ChatJsonError::Llm(LlmError::Other("backend failure".into()));
196        let sup_err = SupervisorError::from(llm_err);
197        assert!(
198            matches!(sup_err, SupervisorError::Llm(ref msg) if msg.contains("backend failure")),
199            "Llm variant must preserve the error message"
200        );
201    }
202
203    #[test]
204    fn supervisor_error_from_chat_json_error_llm_429_becomes_rate_limited() {
205        let llm_err = ChatJsonError::Llm(LlmError::Other("HTTP 429 rate limit".into()));
206        let sup_err = SupervisorError::from(llm_err);
207        assert!(
208            matches!(sup_err, SupervisorError::RateLimited),
209            "429 in message must become RateLimited"
210        );
211    }
212
213    #[test]
214    fn supervisor_error_from_chat_json_error_timeout() {
215        let err = SupervisorError::from(ChatJsonError::Timeout(5000));
216        assert!(
217            matches!(err, SupervisorError::Timeout(5000)),
218            "Timeout variant must preserve milliseconds"
219        );
220    }
221
222    #[test]
223    fn supervisor_error_from_chat_json_error_parse() {
224        let err = SupervisorError::from(ChatJsonError::Parse("bad json".to_owned()));
225        assert!(
226            matches!(err, SupervisorError::Parse(ref s) if s == "bad json"),
227            "Parse variant must preserve the raw string"
228        );
229    }
230}