dspy_rs/evaluate/
feedback.rs

1use crate::{Example, Prediction};
2use serde::{Deserialize, Serialize};
3/// Feedback-based evaluation for GEPA optimizer
4///
5/// This module provides structures and traits for rich, textual feedback
6/// that guides the GEPA optimization process.
7use std::collections::HashMap;
8
9/// Rich evaluation metric with both score and textual feedback
10///
11/// GEPA uses this to understand *why* a score was assigned, enabling
12/// more targeted prompt improvements.
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct FeedbackMetric {
15    /// Numerical score (typically 0.0 to 1.0, but can be any range)
16    pub score: f32,
17
18    /// Rich textual feedback explaining the score
19    ///
20    /// Examples:
21    /// - "✓ Retrieved 3/3 correct documents"
22    /// - "✗ Code failed to compile: missing semicolon on line 5"
23    /// - "Partially correct: got answer '42' but expected '42.0'"
24    pub feedback: String,
25
26    /// Optional structured metadata for additional context
27    ///
28    /// Can include:
29    /// - Intermediate outputs from pipeline stages
30    /// - Error messages and stack traces
31    /// - Performance metrics (latency, tokens, cost)
32    /// - Domain-specific diagnostics
33    pub metadata: HashMap<String, serde_json::Value>,
34}
35
36impl FeedbackMetric {
37    /// Create a new feedback metric
38    pub fn new(score: f32, feedback: impl Into<String>) -> Self {
39        Self {
40            score,
41            feedback: feedback.into(),
42            metadata: HashMap::new(),
43        }
44    }
45
46    /// Create a feedback metric with metadata
47    pub fn with_metadata(
48        score: f32,
49        feedback: impl Into<String>,
50        metadata: HashMap<String, serde_json::Value>,
51    ) -> Self {
52        Self {
53            score,
54            feedback: feedback.into(),
55            metadata,
56        }
57    }
58
59    /// Add metadata to an existing feedback metric
60    pub fn add_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
61        self.metadata.insert(key.into(), value);
62        self
63    }
64}
65
66impl Default for FeedbackMetric {
67    fn default() -> Self {
68        Self {
69            score: 0.0,
70            feedback: String::new(),
71            metadata: HashMap::new(),
72        }
73    }
74}
75
76/// Trait for evaluators that provide rich feedback
77///
78/// This extends the basic Evaluator trait to return feedback alongside scores.
79#[allow(async_fn_in_trait)]
80pub trait FeedbackEvaluator {
81    /// Evaluate an example and return both score and feedback
82    async fn feedback_metric(&self, example: &Example, prediction: &Prediction) -> FeedbackMetric;
83
84    /// Evaluate with multiple objectives (for multi-objective optimization)
85    async fn multi_objective_metric(
86        &self,
87        example: &Example,
88        prediction: &Prediction,
89    ) -> Vec<FeedbackMetric> {
90        // Default: single objective
91        vec![self.feedback_metric(example, prediction).await]
92    }
93}
94
95/// Execution trace capturing program behavior
96///
97/// Captures the full execution path of a module, including intermediate
98/// steps, errors, and environmental feedback.
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct ExecutionTrace {
101    /// Input example
102    pub inputs: Example,
103
104    /// Final prediction (if successful)
105    pub outputs: Option<Prediction>,
106
107    /// Evaluation feedback
108    pub feedback: Option<FeedbackMetric>,
109
110    /// Intermediate steps in the execution
111    ///
112    /// Each entry is (step_name, step_output)
113    pub intermediate_steps: Vec<(String, serde_json::Value)>,
114
115    /// Errors encountered during execution
116    pub errors: Vec<String>,
117
118    /// Execution metadata (timing, cost, etc.)
119    pub metadata: HashMap<String, serde_json::Value>,
120}
121
122impl ExecutionTrace {
123    /// Create a simple trace with just inputs and outputs
124    pub fn simple(inputs: Example, outputs: Prediction) -> Self {
125        Self {
126            inputs,
127            outputs: Some(outputs),
128            feedback: None,
129            intermediate_steps: Vec::new(),
130            errors: Vec::new(),
131            metadata: HashMap::new(),
132        }
133    }
134
135    /// Create a new trace builder
136    pub fn builder(inputs: Example) -> ExecutionTraceBuilder {
137        ExecutionTraceBuilder::new(inputs)
138    }
139
140    /// Add feedback to the trace
141    pub fn with_feedback(mut self, feedback: FeedbackMetric) -> Self {
142        self.feedback = Some(feedback);
143        self
144    }
145
146    /// Check if execution was successful
147    pub fn is_successful(&self) -> bool {
148        self.outputs.is_some() && self.errors.is_empty()
149    }
150
151    /// Get score if available
152    pub fn score(&self) -> Option<f32> {
153        self.feedback.as_ref().map(|f| f.score)
154    }
155
156    /// Format trace for LLM reflection
157    pub fn format_for_reflection(&self) -> String {
158        let mut result = String::new();
159
160        // Input
161        result.push_str("Input:\n");
162        result.push_str(&format!("{:?}\n\n", self.inputs));
163
164        // Intermediate steps
165        if !self.intermediate_steps.is_empty() {
166            result.push_str("Execution Steps:\n");
167            for (i, (step_name, output)) in self.intermediate_steps.iter().enumerate() {
168                result.push_str(&format!("{}. {}: {:?}\n", i + 1, step_name, output));
169            }
170            result.push('\n');
171        }
172
173        // Output
174        if let Some(ref outputs) = self.outputs {
175            result.push_str("Output:\n");
176            result.push_str(&format!("{:?}\n\n", outputs));
177        }
178
179        // Errors
180        if !self.errors.is_empty() {
181            result.push_str("Errors:\n");
182            for error in &self.errors {
183                result.push_str(&format!("- {}\n", error));
184            }
185            result.push('\n');
186        }
187
188        // Feedback
189        if let Some(ref feedback) = self.feedback {
190            result.push_str("Evaluation:\n");
191            result.push_str(&format!("Score: {:.3}\n", feedback.score));
192            result.push_str(&format!("Feedback: {}\n", feedback.feedback));
193        }
194
195        result
196    }
197}
198
199/// Builder for ExecutionTrace
200pub struct ExecutionTraceBuilder {
201    trace: ExecutionTrace,
202}
203
204impl ExecutionTraceBuilder {
205    pub fn new(inputs: Example) -> Self {
206        Self {
207            trace: ExecutionTrace {
208                inputs,
209                outputs: None,
210                feedback: None,
211                intermediate_steps: Vec::new(),
212                errors: Vec::new(),
213                metadata: HashMap::new(),
214            },
215        }
216    }
217
218    pub fn outputs(mut self, outputs: Prediction) -> Self {
219        self.trace.outputs = Some(outputs);
220        self
221    }
222
223    pub fn feedback(mut self, feedback: FeedbackMetric) -> Self {
224        self.trace.feedback = Some(feedback);
225        self
226    }
227
228    pub fn add_step(mut self, name: impl Into<String>, output: serde_json::Value) -> Self {
229        self.trace.intermediate_steps.push((name.into(), output));
230        self
231    }
232
233    pub fn add_error(mut self, error: impl Into<String>) -> Self {
234        self.trace.errors.push(error.into());
235        self
236    }
237
238    pub fn add_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
239        self.trace.metadata.insert(key.into(), value);
240        self
241    }
242
243    pub fn build(self) -> ExecutionTrace {
244        self.trace
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use serde_json::json;
252
253    #[test]
254    fn test_feedback_metric_creation() {
255        let feedback = FeedbackMetric::new(0.8, "Good result");
256        assert_eq!(feedback.score, 0.8);
257        assert_eq!(feedback.feedback, "Good result");
258        assert!(feedback.metadata.is_empty());
259    }
260
261    #[test]
262    fn test_feedback_metric_with_metadata() {
263        let mut meta = HashMap::new();
264        meta.insert("latency_ms".to_string(), json!(150));
265
266        let feedback = FeedbackMetric::with_metadata(0.9, "Excellent", meta);
267        assert_eq!(feedback.score, 0.9);
268        assert_eq!(feedback.metadata.get("latency_ms").unwrap(), &json!(150));
269    }
270
271    #[test]
272    fn test_execution_trace_builder() {
273        use std::collections::HashMap;
274        let mut input_data = HashMap::new();
275        input_data.insert("question".to_string(), json!("What is 2+2?"));
276        let inputs = crate::Example::new(input_data, vec!["question".to_string()], vec![]);
277
278        let mut pred_data = HashMap::new();
279        pred_data.insert("answer".to_string(), json!("4"));
280        let prediction = crate::Prediction::new(pred_data, crate::LmUsage::default());
281
282        let trace = ExecutionTrace::builder(inputs)
283            .add_step("parse", json!("2+2"))
284            .add_step("compute", json!(4))
285            .outputs(prediction)
286            .feedback(FeedbackMetric::new(1.0, "Correct"))
287            .build();
288
289        assert!(trace.is_successful());
290        assert_eq!(trace.score(), Some(1.0));
291        assert_eq!(trace.intermediate_steps.len(), 2);
292    }
293
294    #[test]
295    fn test_trace_with_errors() {
296        use std::collections::HashMap;
297        let mut input_data = HashMap::new();
298        input_data.insert("question".to_string(), json!("Invalid"));
299        let inputs = crate::Example::new(input_data, vec!["question".to_string()], vec![]);
300
301        let trace = ExecutionTrace::builder(inputs)
302            .add_error("Parse failed")
303            .build();
304
305        assert!(!trace.is_successful());
306        assert_eq!(trace.errors.len(), 1);
307    }
308}
dspy_rs/evaluate/feedback.rs

dspy_rs/evaluate/
feedback.rs