zeph_core/goal/
supervisor.rs1use std::time::Duration;
11
12use serde::Deserialize;
13use zeph_llm::any::AnyProvider;
14
15use super::autonomous::SupervisorVerdict;
16use crate::quality::parser::{ChatJsonError, chat_json};
17
18#[derive(Debug, thiserror::Error)]
20pub enum SupervisorError {
21 #[error("supervisor LLM error: {0}")]
23 Llm(String),
24 #[error("supervisor timed out after {0}ms")]
26 Timeout(u64),
27 #[error("supervisor response was not valid JSON: {0}")]
29 Parse(String),
30 #[error("supervisor rate-limited (429)")]
32 RateLimited,
33}
34
35impl From<ChatJsonError> for SupervisorError {
36 fn from(e: ChatJsonError) -> Self {
37 match e {
38 ChatJsonError::Llm(inner) => {
39 let msg = inner.to_string();
40 if msg.contains("429") {
41 Self::RateLimited
42 } else {
43 Self::Llm(msg)
44 }
45 }
46 ChatJsonError::Timeout(ms) => Self::Timeout(ms),
47 ChatJsonError::Parse(raw) => Self::Parse(raw),
48 }
49 }
50}
51
52#[derive(Deserialize)]
54struct RawVerdict {
55 achieved: bool,
56 reasoning: String,
57 #[serde(default)]
58 confidence: f32,
59 #[serde(default)]
60 suggestions: Vec<String>,
61}
62
63const SUPERVISOR_SYSTEM: &str = "\
64You are an autonomous goal verification assistant. \
65Your task is to determine whether a stated goal condition has been achieved based on the \
66agent's conversation summary and its recent actions.\n\
67\n\
68Respond with strict JSON only — no prose, no markdown fences:\n\
69{\n\
70 \"achieved\": <bool>,\n\
71 \"reasoning\": \"<one or two sentence explanation>\",\n\
72 \"confidence\": <float 0.0..1.0>,\n\
73 \"suggestions\": [\"<optional improvement suggestion>\", ...]\n\
74}\n\
75Be conservative: only set achieved=true when the evidence clearly and completely satisfies \
76the goal condition.";
77
78fn supervisor_user(
79 goal_condition: &str,
80 conversation_summary: &str,
81 recent_actions: &[String],
82) -> String {
83 let actions = if recent_actions.is_empty() {
84 "(none)".to_owned()
85 } else {
86 recent_actions
87 .iter()
88 .map(|a| format!("- {a}"))
89 .collect::<Vec<_>>()
90 .join("\n")
91 };
92 format!(
93 "Goal condition:\n{goal_condition}\n\n\
94 Conversation summary:\n{conversation_summary}\n\n\
95 Recent actions:\n{actions}"
96 )
97}
98
99pub struct GoalSupervisor {
105 provider: AnyProvider,
106 timeout: Duration,
107}
108
109impl GoalSupervisor {
110 #[must_use]
112 pub fn new(provider: AnyProvider, timeout: Duration) -> Self {
113 Self { provider, timeout }
114 }
115
116 #[tracing::instrument(name = "goal.supervisor.verify", skip_all, level = "debug", err)]
126 pub async fn verify(
127 &self,
128 goal_condition: &str,
129 conversation_summary: &str,
130 recent_actions: &[String],
131 ) -> Result<SupervisorVerdict, SupervisorError> {
132 let user = supervisor_user(goal_condition, conversation_summary, recent_actions);
133 tracing::debug!("goal.supervisor.verify: calling provider");
134 let (raw, _tokens, _attempt): (RawVerdict, _, _) =
135 chat_json(&self.provider, SUPERVISOR_SYSTEM, &user, self.timeout)
136 .await
137 .map_err(SupervisorError::from)?;
138 tracing::debug!(
139 achieved = raw.achieved,
140 confidence = raw.confidence,
141 "goal.supervisor.verify: done"
142 );
143 Ok(SupervisorVerdict {
144 achieved: raw.achieved,
145 reasoning: raw.reasoning,
146 confidence: raw.confidence.clamp(0.0, 1.0),
147 suggestions: raw.suggestions,
148 })
149 }
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155 use crate::quality::parser::ChatJsonError;
156 use zeph_llm::LlmError;
157
158 #[test]
159 fn supervisor_user_contains_all_sections() {
160 let msg = supervisor_user(
161 "the build must pass",
162 "agent ran cargo build",
163 &[
164 "ran cargo build".to_owned(),
165 "no errors reported".to_owned(),
166 ],
167 );
168 assert!(msg.contains("Goal condition:"), "goal section missing");
169 assert!(msg.contains("the build must pass"), "goal text missing");
170 assert!(
171 msg.contains("Conversation summary:"),
172 "summary section missing"
173 );
174 assert!(
175 msg.contains("agent ran cargo build"),
176 "summary text missing"
177 );
178 assert!(msg.contains("Recent actions:"), "actions section missing");
179 assert!(msg.contains("- ran cargo build"), "first action missing");
180 assert!(
181 msg.contains("- no errors reported"),
182 "second action missing"
183 );
184 }
185
186 #[test]
187 fn supervisor_user_empty_actions_shows_none() {
188 let msg = supervisor_user("goal", "summary", &[]);
189 assert!(msg.contains("(none)"), "empty actions should show (none)");
190 }
191
192 #[test]
193 fn supervisor_error_from_chat_json_error_llm_preserved() {
194 let llm_err = ChatJsonError::Llm(LlmError::Other("backend failure".into()));
195 let sup_err = SupervisorError::from(llm_err);
196 assert!(
197 matches!(sup_err, SupervisorError::Llm(ref msg) if msg.contains("backend failure")),
198 "Llm variant must preserve the error message"
199 );
200 }
201
202 #[test]
203 fn supervisor_error_from_chat_json_error_llm_429_becomes_rate_limited() {
204 let llm_err = ChatJsonError::Llm(LlmError::Other("HTTP 429 rate limit".into()));
205 let sup_err = SupervisorError::from(llm_err);
206 assert!(
207 matches!(sup_err, SupervisorError::RateLimited),
208 "429 in message must become RateLimited"
209 );
210 }
211
212 #[test]
213 fn supervisor_error_from_chat_json_error_timeout() {
214 let err = SupervisorError::from(ChatJsonError::Timeout(5000));
215 assert!(
216 matches!(err, SupervisorError::Timeout(5000)),
217 "Timeout variant must preserve milliseconds"
218 );
219 }
220
221 #[test]
222 fn supervisor_error_from_chat_json_error_parse() {
223 let err = SupervisorError::from(ChatJsonError::Parse("bad json".to_owned()));
224 assert!(
225 matches!(err, SupervisorError::Parse(ref s) if s == "bad json"),
226 "Parse variant must preserve the raw string"
227 );
228 }
229}