zeph_core/goal/
supervisor.rs1use std::time::Duration;
11
12use serde::Deserialize;
13use zeph_llm::any::AnyProvider;
14
15use super::autonomous::SupervisorVerdict;
16use crate::quality::parser::{ChatJsonError, chat_json};
17
18#[non_exhaustive]
19#[derive(Debug, thiserror::Error)]
21pub enum SupervisorError {
22 #[error("supervisor LLM error: {0}")]
24 Llm(String),
25 #[error("supervisor timed out after {0}ms")]
27 Timeout(u64),
28 #[error("supervisor response was not valid JSON: {0}")]
30 Parse(String),
31 #[error("supervisor rate-limited (429)")]
33 RateLimited,
34}
35
36impl From<ChatJsonError> for SupervisorError {
37 fn from(e: ChatJsonError) -> Self {
38 match e {
39 ChatJsonError::Llm(inner) => {
40 let msg = inner.to_string();
41 if msg.contains("429") {
42 Self::RateLimited
43 } else {
44 Self::Llm(msg)
45 }
46 }
47 ChatJsonError::Timeout(ms) => Self::Timeout(ms),
48 ChatJsonError::Parse(raw) => Self::Parse(raw),
49 }
50 }
51}
52
53#[derive(Deserialize)]
55struct RawVerdict {
56 achieved: bool,
57 reasoning: String,
58 #[serde(default)]
59 confidence: f32,
60 #[serde(default)]
61 suggestions: Vec<String>,
62}
63
64const SUPERVISOR_SYSTEM: &str = "\
65You are an autonomous goal verification assistant. \
66Your task is to determine whether a stated goal condition has been achieved based on the \
67agent's conversation summary and its recent actions.\n\
68\n\
69Respond with strict JSON only — no prose, no markdown fences:\n\
70{\n\
71 \"achieved\": <bool>,\n\
72 \"reasoning\": \"<one or two sentence explanation>\",\n\
73 \"confidence\": <float 0.0..1.0>,\n\
74 \"suggestions\": [\"<optional improvement suggestion>\", ...]\n\
75}\n\
76Be conservative: only set achieved=true when the evidence clearly and completely satisfies \
77the goal condition.";
78
79fn supervisor_user(
80 goal_condition: &str,
81 conversation_summary: &str,
82 recent_actions: &[String],
83) -> String {
84 let actions = if recent_actions.is_empty() {
85 "(none)".to_owned()
86 } else {
87 recent_actions
88 .iter()
89 .map(|a| format!("- {a}"))
90 .collect::<Vec<_>>()
91 .join("\n")
92 };
93 format!(
94 "Goal condition:\n{goal_condition}\n\n\
95 Conversation summary:\n{conversation_summary}\n\n\
96 Recent actions:\n{actions}"
97 )
98}
99
100pub struct GoalSupervisor {
106 provider: AnyProvider,
107 timeout: Duration,
108}
109
110impl GoalSupervisor {
111 #[must_use]
113 pub fn new(provider: AnyProvider, timeout: Duration) -> Self {
114 Self { provider, timeout }
115 }
116
117 #[tracing::instrument(name = "goal.supervisor.verify", skip_all, level = "debug", err)]
127 pub async fn verify(
128 &self,
129 goal_condition: &str,
130 conversation_summary: &str,
131 recent_actions: &[String],
132 ) -> Result<SupervisorVerdict, SupervisorError> {
133 let user = supervisor_user(goal_condition, conversation_summary, recent_actions);
134 tracing::debug!("goal.supervisor.verify: calling provider");
135 let (raw, _tokens, _attempt): (RawVerdict, _, _) =
136 chat_json(&self.provider, SUPERVISOR_SYSTEM, &user, self.timeout)
137 .await
138 .map_err(SupervisorError::from)?;
139 tracing::debug!(
140 achieved = raw.achieved,
141 confidence = raw.confidence,
142 "goal.supervisor.verify: done"
143 );
144 Ok(SupervisorVerdict {
145 achieved: raw.achieved,
146 reasoning: raw.reasoning,
147 confidence: raw.confidence.clamp(0.0, 1.0),
148 suggestions: raw.suggestions,
149 })
150 }
151}
152
153#[cfg(test)]
154mod tests {
155 use super::*;
156 use crate::quality::parser::ChatJsonError;
157 use zeph_llm::LlmError;
158
159 #[test]
160 fn supervisor_user_contains_all_sections() {
161 let msg = supervisor_user(
162 "the build must pass",
163 "agent ran cargo build",
164 &[
165 "ran cargo build".to_owned(),
166 "no errors reported".to_owned(),
167 ],
168 );
169 assert!(msg.contains("Goal condition:"), "goal section missing");
170 assert!(msg.contains("the build must pass"), "goal text missing");
171 assert!(
172 msg.contains("Conversation summary:"),
173 "summary section missing"
174 );
175 assert!(
176 msg.contains("agent ran cargo build"),
177 "summary text missing"
178 );
179 assert!(msg.contains("Recent actions:"), "actions section missing");
180 assert!(msg.contains("- ran cargo build"), "first action missing");
181 assert!(
182 msg.contains("- no errors reported"),
183 "second action missing"
184 );
185 }
186
187 #[test]
188 fn supervisor_user_empty_actions_shows_none() {
189 let msg = supervisor_user("goal", "summary", &[]);
190 assert!(msg.contains("(none)"), "empty actions should show (none)");
191 }
192
193 #[test]
194 fn supervisor_error_from_chat_json_error_llm_preserved() {
195 let llm_err = ChatJsonError::Llm(LlmError::Other("backend failure".into()));
196 let sup_err = SupervisorError::from(llm_err);
197 assert!(
198 matches!(sup_err, SupervisorError::Llm(ref msg) if msg.contains("backend failure")),
199 "Llm variant must preserve the error message"
200 );
201 }
202
203 #[test]
204 fn supervisor_error_from_chat_json_error_llm_429_becomes_rate_limited() {
205 let llm_err = ChatJsonError::Llm(LlmError::Other("HTTP 429 rate limit".into()));
206 let sup_err = SupervisorError::from(llm_err);
207 assert!(
208 matches!(sup_err, SupervisorError::RateLimited),
209 "429 in message must become RateLimited"
210 );
211 }
212
213 #[test]
214 fn supervisor_error_from_chat_json_error_timeout() {
215 let err = SupervisorError::from(ChatJsonError::Timeout(5000));
216 assert!(
217 matches!(err, SupervisorError::Timeout(5000)),
218 "Timeout variant must preserve milliseconds"
219 );
220 }
221
222 #[test]
223 fn supervisor_error_from_chat_json_error_parse() {
224 let err = SupervisorError::from(ChatJsonError::Parse("bad json".to_owned()));
225 assert!(
226 matches!(err, SupervisorError::Parse(ref s) if s == "bad json"),
227 "Parse variant must preserve the raw string"
228 );
229 }
230}