Skip to main content

zeph_core/goal/
supervisor.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Supervisor verifier for autonomous goal sessions.
5//!
6//! [`GoalSupervisor`] makes a single LLM call with structured JSON output to decide whether
7//! the goal condition has been satisfied. It is NOT a full subagent — no tools, no memory
8//! access. Trust model: heuristic verifier only, not a security boundary.
9
10use std::time::Duration;
11
12use serde::Deserialize;
13use zeph_llm::any::AnyProvider;
14
15use super::autonomous::SupervisorVerdict;
16use crate::quality::parser::{ChatJsonError, chat_json};
17
18/// Errors returned by the supervisor.
19#[derive(Debug, thiserror::Error)]
20pub enum SupervisorError {
21    /// LLM provider returned an error.
22    #[error("supervisor LLM error: {0}")]
23    Llm(String),
24    /// Verification call timed out.
25    #[error("supervisor timed out after {0}ms")]
26    Timeout(u64),
27    /// LLM output could not be parsed as a verdict.
28    #[error("supervisor response was not valid JSON: {0}")]
29    Parse(String),
30    /// Rate-limited (HTTP 429).
31    #[error("supervisor rate-limited (429)")]
32    RateLimited,
33}
34
35impl From<ChatJsonError> for SupervisorError {
36    fn from(e: ChatJsonError) -> Self {
37        match e {
38            ChatJsonError::Llm(inner) => {
39                let msg = inner.to_string();
40                if msg.contains("429") {
41                    Self::RateLimited
42                } else {
43                    Self::Llm(msg)
44                }
45            }
46            ChatJsonError::Timeout(ms) => Self::Timeout(ms),
47            ChatJsonError::Parse(raw) => Self::Parse(raw),
48        }
49    }
50}
51
52/// Internal deserialization target for the supervisor's JSON response.
53#[derive(Deserialize)]
54struct RawVerdict {
55    achieved: bool,
56    reasoning: String,
57    #[serde(default)]
58    confidence: f32,
59    #[serde(default)]
60    suggestions: Vec<String>,
61}
62
63const SUPERVISOR_SYSTEM: &str = "\
64You are an autonomous goal verification assistant. \
65Your task is to determine whether a stated goal condition has been achieved based on the \
66agent's conversation summary and its recent actions.\n\
67\n\
68Respond with strict JSON only — no prose, no markdown fences:\n\
69{\n\
70  \"achieved\": <bool>,\n\
71  \"reasoning\": \"<one or two sentence explanation>\",\n\
72  \"confidence\": <float 0.0..1.0>,\n\
73  \"suggestions\": [\"<optional improvement suggestion>\", ...]\n\
74}\n\
75Be conservative: only set achieved=true when the evidence clearly and completely satisfies \
76the goal condition.";
77
78fn supervisor_user(
79    goal_condition: &str,
80    conversation_summary: &str,
81    recent_actions: &[String],
82) -> String {
83    let actions = if recent_actions.is_empty() {
84        "(none)".to_owned()
85    } else {
86        recent_actions
87            .iter()
88            .map(|a| format!("- {a}"))
89            .collect::<Vec<_>>()
90            .join("\n")
91    };
92    format!(
93        "Goal condition:\n{goal_condition}\n\n\
94         Conversation summary:\n{conversation_summary}\n\n\
95         Recent actions:\n{actions}"
96    )
97}
98
99/// Single-call supervisor verifier for autonomous goal sessions.
100///
101/// Uses a configurable LLM provider (ideally different from the main agent provider to avoid
102/// self-confirmation bias). On HTTP 429 the caller should wait and retry once before counting
103/// the failure toward the consecutive failure limit.
104pub struct GoalSupervisor {
105    provider: AnyProvider,
106    timeout: Duration,
107}
108
109impl GoalSupervisor {
110    /// Create a new supervisor with the given provider and per-call timeout.
111    #[must_use]
112    pub fn new(provider: AnyProvider, timeout: Duration) -> Self {
113        Self { provider, timeout }
114    }
115
116    /// Verify whether `goal_condition` has been achieved.
117    ///
118    /// Makes at most **two** LLM calls (initial + one retry on JSON parse failure via
119    /// [`chat_json`]). Rate-limit (429) errors are surfaced as [`SupervisorError::RateLimited`]
120    /// so the caller can apply backoff before counting the failure.
121    ///
122    /// # Errors
123    ///
124    /// Returns [`SupervisorError`] on provider error, timeout, or unrecoverable parse failure.
125    #[tracing::instrument(name = "goal.supervisor.verify", skip_all, level = "debug", err)]
126    pub async fn verify(
127        &self,
128        goal_condition: &str,
129        conversation_summary: &str,
130        recent_actions: &[String],
131    ) -> Result<SupervisorVerdict, SupervisorError> {
132        let user = supervisor_user(goal_condition, conversation_summary, recent_actions);
133        tracing::debug!("goal.supervisor.verify: calling provider");
134        let (raw, _tokens, _attempt): (RawVerdict, _, _) =
135            chat_json(&self.provider, SUPERVISOR_SYSTEM, &user, self.timeout)
136                .await
137                .map_err(SupervisorError::from)?;
138        tracing::debug!(
139            achieved = raw.achieved,
140            confidence = raw.confidence,
141            "goal.supervisor.verify: done"
142        );
143        Ok(SupervisorVerdict {
144            achieved: raw.achieved,
145            reasoning: raw.reasoning,
146            confidence: raw.confidence.clamp(0.0, 1.0),
147            suggestions: raw.suggestions,
148        })
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use crate::quality::parser::ChatJsonError;
156    use zeph_llm::LlmError;
157
158    #[test]
159    fn supervisor_user_contains_all_sections() {
160        let msg = supervisor_user(
161            "the build must pass",
162            "agent ran cargo build",
163            &[
164                "ran cargo build".to_owned(),
165                "no errors reported".to_owned(),
166            ],
167        );
168        assert!(msg.contains("Goal condition:"), "goal section missing");
169        assert!(msg.contains("the build must pass"), "goal text missing");
170        assert!(
171            msg.contains("Conversation summary:"),
172            "summary section missing"
173        );
174        assert!(
175            msg.contains("agent ran cargo build"),
176            "summary text missing"
177        );
178        assert!(msg.contains("Recent actions:"), "actions section missing");
179        assert!(msg.contains("- ran cargo build"), "first action missing");
180        assert!(
181            msg.contains("- no errors reported"),
182            "second action missing"
183        );
184    }
185
186    #[test]
187    fn supervisor_user_empty_actions_shows_none() {
188        let msg = supervisor_user("goal", "summary", &[]);
189        assert!(msg.contains("(none)"), "empty actions should show (none)");
190    }
191
192    #[test]
193    fn supervisor_error_from_chat_json_error_llm_preserved() {
194        let llm_err = ChatJsonError::Llm(LlmError::Other("backend failure".into()));
195        let sup_err = SupervisorError::from(llm_err);
196        assert!(
197            matches!(sup_err, SupervisorError::Llm(ref msg) if msg.contains("backend failure")),
198            "Llm variant must preserve the error message"
199        );
200    }
201
202    #[test]
203    fn supervisor_error_from_chat_json_error_llm_429_becomes_rate_limited() {
204        let llm_err = ChatJsonError::Llm(LlmError::Other("HTTP 429 rate limit".into()));
205        let sup_err = SupervisorError::from(llm_err);
206        assert!(
207            matches!(sup_err, SupervisorError::RateLimited),
208            "429 in message must become RateLimited"
209        );
210    }
211
212    #[test]
213    fn supervisor_error_from_chat_json_error_timeout() {
214        let err = SupervisorError::from(ChatJsonError::Timeout(5000));
215        assert!(
216            matches!(err, SupervisorError::Timeout(5000)),
217            "Timeout variant must preserve milliseconds"
218        );
219    }
220
221    #[test]
222    fn supervisor_error_from_chat_json_error_parse() {
223        let err = SupervisorError::from(ChatJsonError::Parse("bad json".to_owned()));
224        assert!(
225            matches!(err, SupervisorError::Parse(ref s) if s == "bad json"),
226            "Parse variant must preserve the raw string"
227        );
228    }
229}