pub struct FeedbackMetric {
pub score: f32,
pub feedback: String,
pub metadata: HashMap<String, Value>,
}Expand description
Rich evaluation metric with both score and textual feedback
GEPA uses this to understand why a score was assigned, enabling more targeted prompt improvements.
Fields§
§score: f32Numerical score (typically 0.0 to 1.0, but can be any range)
feedback: StringRich textual feedback explaining the score
Examples:
- “✓ Retrieved 3/3 correct documents”
- “✗ Code failed to compile: missing semicolon on line 5”
- “Partially correct: got answer ‘42’ but expected ‘42.0’”
metadata: HashMap<String, Value>Optional structured metadata for additional context
Can include:
- Intermediate outputs from pipeline stages
- Error messages and stack traces
- Performance metrics (latency, tokens, cost)
- Domain-specific diagnostics
Implementations§
Source§impl FeedbackMetric
impl FeedbackMetric
Sourcepub fn new(score: f32, feedback: impl Into<String>) -> Self
pub fn new(score: f32, feedback: impl Into<String>) -> Self
Create a new feedback metric
Examples found in repository?
examples/09-gepa-sentiment.rs (line 111)
51 async fn feedback_metric(&self, example: &Example, prediction: &Prediction) -> FeedbackMetric {
52 let predicted = prediction
53 .get("sentiment", None)
54 .as_str()
55 .unwrap_or("")
56 .to_string()
57 .to_lowercase();
58
59 let expected = example
60 .get("expected_sentiment", None)
61 .as_str()
62 .unwrap_or("")
63 .to_string()
64 .to_lowercase();
65
66 let text = example.get("text", None).as_str().unwrap_or("").to_string();
67
68 let reasoning = prediction
69 .get("reasoning", None)
70 .as_str()
71 .unwrap_or("")
72 .to_string();
73
74 // Calculate score
75 let correct = predicted == expected;
76 let score = if correct { 1.0 } else { 0.0 };
77
78 // Create rich feedback
79 let mut feedback = if correct {
80 format!("Correct classification: \"{}\"\n", expected)
81 } else {
82 format!(
83 "Incorrect classification\n Expected: \"{}\"\n Predicted: \"{}\"\n",
84 expected, predicted
85 )
86 };
87
88 // Add context about the input
89 feedback.push_str(&format!(" Input text: \"{}\"\n", text));
90
91 // Add reasoning analysis
92 if !reasoning.is_empty() {
93 feedback.push_str(&format!(" Reasoning: {}\n", reasoning));
94
95 // Check if reasoning mentions key sentiment words
96 let has_reasoning_quality = if correct {
97 // For correct answers, check if reasoning is substantive
98 reasoning.len() > 20
99 } else {
100 // For incorrect answers, note what went wrong
101 false
102 };
103
104 if has_reasoning_quality {
105 feedback.push_str(" Reasoning appears detailed\n");
106 } else if !correct {
107 feedback.push_str(" May have misunderstood the text sentiment\n");
108 }
109 }
110
111 FeedbackMetric::new(score, feedback)
112 }More examples
examples/10-gepa-llm-judge.rs (line 160)
102 async fn feedback_metric(&self, example: &Example, prediction: &Prediction) -> FeedbackMetric {
103 // Extract the problem and answers
104 let problem = example
105 .get("problem", None)
106 .as_str()
107 .unwrap_or("")
108 .to_string();
109
110 let expected = example
111 .get("expected_answer", None)
112 .as_str()
113 .unwrap_or("")
114 .to_string();
115
116 let student_answer = prediction
117 .get("answer", None)
118 .as_str()
119 .unwrap_or("")
120 .to_string();
121
122 let student_reasoning = prediction
123 .get("reasoning", None)
124 .as_str()
125 .unwrap_or("No reasoning provided")
126 .to_string();
127
128 // Quick check: is the answer exactly correct?
129 let answer_matches = student_answer.trim() == expected.trim();
130
131 // Use LLM judge to analyze the reasoning quality
132 // This is where the magic happens - the judge provides rich feedback
133 let judge_input = example! {
134 "problem": "input" => &problem,
135 "expected_answer": "input" => &expected,
136 "student_answer": "input" => &student_answer,
137 "student_reasoning": "input" => &student_reasoning
138 };
139
140 let judge_output = match self
141 .judge
142 .forward_with_config(judge_input, Arc::clone(&self.judge_lm))
143 .await
144 {
145 Ok(output) => output,
146 Err(_) => {
147 // If judge fails, fall back to simple feedback
148 let score = if answer_matches { 1.0 } else { 0.0 };
149 let simple_feedback = format!(
150 "Problem: {}\nExpected: {}\nPredicted: {}\nAnswer: {}",
151 problem,
152 expected,
153 student_answer,
154 if answer_matches {
155 "CORRECT"
156 } else {
157 "INCORRECT"
158 }
159 );
160 return FeedbackMetric::new(score, simple_feedback);
161 }
162 };
163
164 let judge_evaluation = judge_output
165 .get("evaluation", None)
166 .as_str()
167 .unwrap_or("Unable to evaluate")
168 .to_string();
169
170 // Calculate score based on answer correctness and reasoning quality
171 // The judge's evaluation helps us assign partial credit
172 let score = if answer_matches {
173 // Correct answer - check if reasoning is also sound
174 if judge_evaluation.to_lowercase().contains("sound reasoning")
175 || judge_evaluation.to_lowercase().contains("correct approach")
176 {
177 1.0 // Perfect: right answer, good reasoning
178 } else {
179 0.7 // Right answer but flawed reasoning (lucky guess?)
180 }
181 } else {
182 // Wrong answer - check if there's any partial credit
183 if judge_evaluation.to_lowercase().contains("correct approach")
184 || judge_evaluation.to_lowercase().contains("good start")
185 {
186 0.3 // Wrong answer but some valid steps
187 } else {
188 0.0 // Completely wrong
189 }
190 };
191
192 // Construct rich textual feedback
193 // This combines factual info with the judge's analysis
194 let mut feedback = String::new();
195
196 feedback.push_str(&format!("Problem: {}\n", problem));
197 feedback.push_str(&format!("Expected: {}\n", expected));
198 feedback.push_str(&format!("Predicted: {}\n", student_answer));
199
200 if answer_matches {
201 feedback.push_str("Answer: CORRECT\n\n");
202 } else {
203 feedback.push_str("Answer: INCORRECT\n\n");
204 }
205
206 feedback.push_str("Reasoning Quality Analysis:\n");
207 feedback.push_str(&judge_evaluation);
208
209 // Return the feedback metric with score and rich text
210 FeedbackMetric::new(score, feedback)
211 }Sourcepub fn with_metadata(
score: f32,
feedback: impl Into<String>,
metadata: HashMap<String, Value>,
) -> Self
pub fn with_metadata( score: f32, feedback: impl Into<String>, metadata: HashMap<String, Value>, ) -> Self
Create a feedback metric with metadata
Sourcepub fn add_metadata(self, key: impl Into<String>, value: Value) -> Self
pub fn add_metadata(self, key: impl Into<String>, value: Value) -> Self
Add metadata to an existing feedback metric
Trait Implementations§
Source§impl Clone for FeedbackMetric
impl Clone for FeedbackMetric
Source§fn clone(&self) -> FeedbackMetric
fn clone(&self) -> FeedbackMetric
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for FeedbackMetric
impl Debug for FeedbackMetric
Source§impl Default for FeedbackMetric
impl Default for FeedbackMetric
Source§impl<'de> Deserialize<'de> for FeedbackMetric
impl<'de> Deserialize<'de> for FeedbackMetric
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for FeedbackMetric
impl RefUnwindSafe for FeedbackMetric
impl Send for FeedbackMetric
impl Sync for FeedbackMetric
impl Unpin for FeedbackMetric
impl UnwindSafe for FeedbackMetric
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Code for Twhere
T: Serialize + DeserializeOwned,
impl<T> Code for Twhere
T: Serialize + DeserializeOwned,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more