symbi_runtime/reasoning/
human_critic.rs

1//! Human-in-the-loop critic
2//!
3//! Provides a `HumanCritic` that suspends the reasoning loop to wait for
4//! human approval/rejection. Unifies automated and manual review under
5//! a single auditable abstraction.
6
7use serde::{Deserialize, Serialize};
8use std::sync::Arc;
9use std::time::Duration;
10use tokio::sync::{mpsc, oneshot};
11
12/// Result of a critic evaluation (automated or human).
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct CriticResult {
15    /// Whether the content was approved.
16    pub approved: bool,
17    /// Overall score (0.0 - 1.0).
18    pub score: f64,
19    /// Per-dimension scores for rubric evaluations.
20    pub dimension_scores: std::collections::HashMap<String, f64>,
21    /// Free-text feedback.
22    pub feedback: String,
23    /// Identity of the reviewer.
24    pub reviewer: ReviewerIdentity,
25}
26
27/// Identity of who performed the review.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29#[serde(tag = "type")]
30pub enum ReviewerIdentity {
31    /// An LLM model performed the review.
32    #[serde(rename = "llm")]
33    Llm { model_id: String },
34    /// A human performed the review.
35    #[serde(rename = "human")]
36    Human { user_id: String, name: String },
37}
38
39/// A review request sent to human reviewers.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ReviewRequest {
42    /// Unique identifier for this review.
43    pub review_id: String,
44    /// Content to be reviewed.
45    pub content: String,
46    /// Context about what produced this content.
47    pub context: String,
48    /// Rubric dimensions to evaluate (if any).
49    pub rubric_dimensions: Vec<String>,
50    /// When this review was requested.
51    pub requested_at: chrono::DateTime<chrono::Utc>,
52    /// Deadline for the review.
53    pub deadline: chrono::DateTime<chrono::Utc>,
54}
55
56/// A review response from a human reviewer.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct ReviewResponse {
59    /// The review request ID this responds to.
60    pub review_id: String,
61    /// The critic result.
62    pub result: CriticResult,
63}
64
65/// The Critic trait, implementable by both automated and human reviewers.
66#[async_trait::async_trait]
67pub trait Critic: Send + Sync {
68    /// Evaluate content and return a critic result.
69    async fn evaluate(&self, content: &str, context: &str) -> Result<CriticResult, CriticError>;
70
71    /// Returns the type of critic for logging.
72    fn critic_type(&self) -> &str;
73}
74
75/// Human critic that sends reviews to a channel and waits for responses.
76pub struct HumanCritic {
77    /// Channel to send review requests out (to webhook, UI, etc.).
78    review_sender: mpsc::Sender<(ReviewRequest, oneshot::Sender<ReviewResponse>)>,
79    /// Default timeout for human reviews.
80    timeout: Duration,
81    /// Default reviewer identity.
82    default_reviewer: String,
83}
84
85impl HumanCritic {
86    /// Create a new human critic.
87    ///
88    /// Returns the critic and a receiver that dispatches review requests
89    /// to whatever transport the application uses (webhook, websocket, etc.).
90    pub fn new(
91        timeout: Duration,
92    ) -> (
93        Self,
94        mpsc::Receiver<(ReviewRequest, oneshot::Sender<ReviewResponse>)>,
95    ) {
96        let (tx, rx) = mpsc::channel(32);
97        let critic = Self {
98            review_sender: tx,
99            timeout,
100            default_reviewer: "human".into(),
101        };
102        (critic, rx)
103    }
104
105    /// Set the default reviewer name.
106    pub fn with_reviewer_name(mut self, name: impl Into<String>) -> Self {
107        self.default_reviewer = name.into();
108        self
109    }
110}
111
112#[async_trait::async_trait]
113impl Critic for HumanCritic {
114    async fn evaluate(&self, content: &str, context: &str) -> Result<CriticResult, CriticError> {
115        let review_id = uuid::Uuid::new_v4().to_string();
116        let now = chrono::Utc::now();
117
118        let request = ReviewRequest {
119            review_id: review_id.clone(),
120            content: content.to_string(),
121            context: context.to_string(),
122            rubric_dimensions: Vec::new(),
123            requested_at: now,
124            deadline: now + chrono::Duration::from_std(self.timeout).unwrap_or_default(),
125        };
126
127        let (response_tx, response_rx) = oneshot::channel();
128
129        // Send the review request
130        self.review_sender
131            .send((request, response_tx))
132            .await
133            .map_err(|_| CriticError::ChannelClosed)?;
134
135        // Wait for the response with timeout
136        match tokio::time::timeout(self.timeout, response_rx).await {
137            Ok(Ok(response)) => Ok(response.result),
138            Ok(Err(_)) => Err(CriticError::ChannelClosed),
139            Err(_) => Err(CriticError::Timeout {
140                review_id,
141                timeout: self.timeout,
142            }),
143        }
144    }
145
146    fn critic_type(&self) -> &str {
147        "human"
148    }
149}
150
151/// An automated LLM-based critic.
152pub struct LlmCritic {
153    /// The inference provider to use.
154    provider: Arc<dyn crate::reasoning::inference::InferenceProvider>,
155    /// System prompt for the critic.
156    system_prompt: String,
157    /// Model ID for identification.
158    model_id: String,
159}
160
161impl LlmCritic {
162    /// Create a new LLM critic.
163    pub fn new(
164        provider: Arc<dyn crate::reasoning::inference::InferenceProvider>,
165        system_prompt: impl Into<String>,
166    ) -> Self {
167        let model_id = provider.default_model().to_string();
168        Self {
169            provider,
170            system_prompt: system_prompt.into(),
171            model_id,
172        }
173    }
174}
175
176#[async_trait::async_trait]
177impl Critic for LlmCritic {
178    async fn evaluate(&self, content: &str, context: &str) -> Result<CriticResult, CriticError> {
179        use crate::reasoning::conversation::{Conversation, ConversationMessage};
180        use crate::reasoning::inference::{InferenceOptions, ResponseFormat};
181
182        let mut conv = Conversation::with_system(&self.system_prompt);
183        conv.push(ConversationMessage::user(format!(
184            "Context: {}\n\nContent to review:\n{}",
185            context, content
186        )));
187
188        let schema = serde_json::json!({
189            "type": "object",
190            "properties": {
191                "approved": {"type": "boolean"},
192                "score": {"type": "number", "minimum": 0.0, "maximum": 1.0},
193                "feedback": {"type": "string"}
194            },
195            "required": ["approved", "score", "feedback"]
196        });
197
198        let options = InferenceOptions {
199            response_format: ResponseFormat::JsonSchema {
200                schema,
201                name: Some("critic_evaluation".into()),
202            },
203            ..InferenceOptions::default()
204        };
205
206        let response = self.provider.complete(&conv, &options).await.map_err(|e| {
207            CriticError::InferenceError {
208                message: e.to_string(),
209            }
210        })?;
211
212        // Parse the structured response
213        let parsed: serde_json::Value =
214            serde_json::from_str(&response.content).map_err(|e| CriticError::ParseError {
215                message: e.to_string(),
216            })?;
217
218        Ok(CriticResult {
219            approved: parsed["approved"].as_bool().unwrap_or(false),
220            score: parsed["score"].as_f64().unwrap_or(0.0),
221            dimension_scores: std::collections::HashMap::new(),
222            feedback: parsed["feedback"].as_str().unwrap_or("").to_string(),
223            reviewer: ReviewerIdentity::Llm {
224                model_id: self.model_id.clone(),
225            },
226        })
227    }
228
229    fn critic_type(&self) -> &str {
230        "llm"
231    }
232}
233
234/// Errors from the critic system.
235#[derive(Debug, thiserror::Error)]
236pub enum CriticError {
237    #[error("Review timed out (review_id={review_id}, timeout={timeout:?})")]
238    Timeout {
239        review_id: String,
240        timeout: Duration,
241    },
242
243    #[error("Review channel closed")]
244    ChannelClosed,
245
246    #[error("Inference error: {message}")]
247    InferenceError { message: String },
248
249    #[error("Failed to parse critic response: {message}")]
250    ParseError { message: String },
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn test_critic_result_serde() {
259        let result = CriticResult {
260            approved: true,
261            score: 0.85,
262            dimension_scores: {
263                let mut m = std::collections::HashMap::new();
264                m.insert("accuracy".into(), 0.9);
265                m.insert("clarity".into(), 0.8);
266                m
267            },
268            feedback: "Good analysis.".into(),
269            reviewer: ReviewerIdentity::Llm {
270                model_id: "claude-sonnet".into(),
271            },
272        };
273
274        let json = serde_json::to_string(&result).unwrap();
275        let restored: CriticResult = serde_json::from_str(&json).unwrap();
276        assert!(restored.approved);
277        assert!((restored.score - 0.85).abs() < f64::EPSILON);
278        assert_eq!(restored.dimension_scores.len(), 2);
279    }
280
281    #[test]
282    fn test_review_request_serde() {
283        let request = ReviewRequest {
284            review_id: "test-123".into(),
285            content: "Content to review".into(),
286            context: "Generated by agent X".into(),
287            rubric_dimensions: vec!["accuracy".into(), "completeness".into()],
288            requested_at: chrono::Utc::now(),
289            deadline: chrono::Utc::now() + chrono::Duration::minutes(5),
290        };
291
292        let json = serde_json::to_string(&request).unwrap();
293        let restored: ReviewRequest = serde_json::from_str(&json).unwrap();
294        assert_eq!(restored.review_id, "test-123");
295        assert_eq!(restored.rubric_dimensions.len(), 2);
296    }
297
298    #[test]
299    fn test_reviewer_identity_serde() {
300        let llm = ReviewerIdentity::Llm {
301            model_id: "gpt-4".into(),
302        };
303        let json = serde_json::to_string(&llm).unwrap();
304        assert!(json.contains("\"type\":\"llm\""));
305
306        let human = ReviewerIdentity::Human {
307            user_id: "user-1".into(),
308            name: "Alice".into(),
309        };
310        let json = serde_json::to_string(&human).unwrap();
311        assert!(json.contains("\"type\":\"human\""));
312    }
313
314    #[tokio::test]
315    async fn test_human_critic_timeout() {
316        let (critic, _rx) = HumanCritic::new(Duration::from_millis(50));
317        // Don't send a response — should timeout
318        let result = critic.evaluate("test content", "test context").await;
319        assert!(result.is_err());
320        match result.unwrap_err() {
321            CriticError::Timeout { .. } => {}
322            other => panic!("Expected Timeout, got {:?}", other),
323        }
324    }
325
326    #[tokio::test]
327    async fn test_human_critic_response() {
328        let (critic, mut rx) = HumanCritic::new(Duration::from_secs(5));
329
330        // Spawn a task to respond to the review
331        tokio::spawn(async move {
332            if let Some((request, response_tx)) = rx.recv().await {
333                let _ = response_tx.send(ReviewResponse {
334                    review_id: request.review_id,
335                    result: CriticResult {
336                        approved: true,
337                        score: 0.9,
338                        dimension_scores: std::collections::HashMap::new(),
339                        feedback: "Looks good!".into(),
340                        reviewer: ReviewerIdentity::Human {
341                            user_id: "tester".into(),
342                            name: "Test User".into(),
343                        },
344                    },
345                });
346            }
347        });
348
349        let result = critic
350            .evaluate("test content", "test context")
351            .await
352            .unwrap();
353        assert!(result.approved);
354        assert!((result.score - 0.9).abs() < f64::EPSILON);
355        assert_eq!(result.feedback, "Looks good!");
356    }
357
358    #[tokio::test]
359    async fn test_human_critic_channel_closed() {
360        let (critic, rx) = HumanCritic::new(Duration::from_secs(5));
361        drop(rx); // Close the receiver
362
363        let result = critic.evaluate("test", "context").await;
364        assert!(matches!(result.unwrap_err(), CriticError::ChannelClosed));
365    }
366}
symbi_runtime/reasoning/human_critic.rs

symbi_runtime/reasoning/
human_critic.rs