pub struct Prediction {
pub data: HashMap<String, Value>,
pub lm_usage: LmUsage,
}Fields§
§data: HashMap<String, Value>§lm_usage: LmUsageImplementations§
Source§impl Prediction
impl Prediction
pub fn new(data: HashMap<String, Value>, lm_usage: LmUsage) -> Self
Sourcepub fn get(&self, key: &str, default: Option<&str>) -> Value
pub fn get(&self, key: &str, default: Option<&str>) -> Value
Examples found in repository?
examples/09-gepa-sentiment.rs (line 53)
51 async fn feedback_metric(&self, example: &Example, prediction: &Prediction) -> FeedbackMetric {
52 let predicted = prediction
53 .get("sentiment", None)
54 .as_str()
55 .unwrap_or("")
56 .to_string()
57 .to_lowercase();
58
59 let expected = example
60 .get("expected_sentiment", None)
61 .as_str()
62 .unwrap_or("")
63 .to_string()
64 .to_lowercase();
65
66 let text = example.get("text", None).as_str().unwrap_or("").to_string();
67
68 let reasoning = prediction
69 .get("reasoning", None)
70 .as_str()
71 .unwrap_or("")
72 .to_string();
73
74 // Calculate score
75 let correct = predicted == expected;
76 let score = if correct { 1.0 } else { 0.0 };
77
78 // Create rich feedback
79 let mut feedback = if correct {
80 format!("Correct classification: \"{}\"\n", expected)
81 } else {
82 format!(
83 "Incorrect classification\n Expected: \"{}\"\n Predicted: \"{}\"\n",
84 expected, predicted
85 )
86 };
87
88 // Add context about the input
89 feedback.push_str(&format!(" Input text: \"{}\"\n", text));
90
91 // Add reasoning analysis
92 if !reasoning.is_empty() {
93 feedback.push_str(&format!(" Reasoning: {}\n", reasoning));
94
95 // Check if reasoning mentions key sentiment words
96 let has_reasoning_quality = if correct {
97 // For correct answers, check if reasoning is substantive
98 reasoning.len() > 20
99 } else {
100 // For incorrect answers, note what went wrong
101 false
102 };
103
104 if has_reasoning_quality {
105 feedback.push_str(" Reasoning appears detailed\n");
106 } else if !correct {
107 feedback.push_str(" May have misunderstood the text sentiment\n");
108 }
109 }
110
111 FeedbackMetric::new(score, feedback)
112 }
113}
114
115#[tokio::main]
116async fn main() -> Result<()> {
117 println!("GEPA Sentiment Analysis Optimization Example\n");
118
119 // Setup LM
120 let lm = LM::builder().temperature(0.7).build().await.unwrap();
121
122 configure(lm.clone(), ChatAdapter);
123
124 // Create training examples with diverse sentiments
125 let trainset = vec![
126 example! {
127 "text": "input" => "This movie was absolutely fantastic! I loved every minute of it.",
128 "expected_sentiment": "input" => "positive"
129 },
130 example! {
131 "text": "input" => "Terrible service, will never come back again.",
132 "expected_sentiment": "input" => "negative"
133 },
134 example! {
135 "text": "input" => "The weather is okay, nothing special.",
136 "expected_sentiment": "input" => "neutral"
137 },
138 example! {
139 "text": "input" => "Despite some minor issues, I'm quite happy with the purchase.",
140 "expected_sentiment": "input" => "positive"
141 },
142 example! {
143 "text": "input" => "I have mixed feelings about this product.",
144 "expected_sentiment": "input" => "neutral"
145 },
146 example! {
147 "text": "input" => "This is the worst experience I've ever had!",
148 "expected_sentiment": "input" => "negative"
149 },
150 example! {
151 "text": "input" => "It's fine. Does what it's supposed to do.",
152 "expected_sentiment": "input" => "neutral"
153 },
154 example! {
155 "text": "input" => "Exceeded all my expectations! Highly recommend!",
156 "expected_sentiment": "input" => "positive"
157 },
158 example! {
159 "text": "input" => "Disappointed and frustrated with the outcome.",
160 "expected_sentiment": "input" => "negative"
161 },
162 example! {
163 "text": "input" => "Standard quality, nothing remarkable.",
164 "expected_sentiment": "input" => "neutral"
165 },
166 ];
167
168 // Create module
169 let mut module = SentimentAnalyzer::builder()
170 .predictor(Predict::new(SentimentSignature::new()))
171 .build();
172
173 // Evaluate baseline performance
174 println!("Baseline Performance:");
175 let baseline_score = module.evaluate(trainset.clone()).await;
176 println!(" Average score: {:.3}\n", baseline_score);
177
178 // Configure GEPA optimizer
179 let gepa = GEPA::builder()
180 .num_iterations(5)
181 .minibatch_size(5)
182 .num_trials(3)
183 .temperature(0.9)
184 .track_stats(true)
185 .build();
186
187 // Run optimization
188 println!("Starting GEPA optimization...\n");
189 let result = gepa
190 .compile_with_feedback(&mut module, trainset.clone())
191 .await?;
192
193 // Display results
194 println!("\nOptimization Results:");
195 println!(
196 " Best average score: {:.3}",
197 result.best_candidate.average_score()
198 );
199 println!(" Total rollouts: {}", result.total_rollouts);
200 println!(" Total LM calls: {}", result.total_lm_calls);
201 println!(" Generations: {}", result.evolution_history.len());
202
203 println!("\nBest Instruction:");
204 println!(" {}", result.best_candidate.instruction);
205
206 if !result.evolution_history.is_empty() {
207 println!("\nEvolution History:");
208 for entry in &result.evolution_history {
209 println!(" Generation {}: {:.3}", entry.0, entry.1);
210 }
211 }
212
213 // Test optimized module on a new example
214 println!("\nTesting Optimized Module:");
215 let test_example = example! {
216 "text": "input" => "This product changed my life! Absolutely amazing!",
217 "expected_sentiment": "input" => "positive"
218 };
219
220 let test_prediction = module.forward(test_example.clone()).await?;
221 let test_feedback = module
222 .feedback_metric(&test_example, &test_prediction)
223 .await;
224
225 println!(
226 " Test prediction: {}",
227 test_prediction.get("sentiment", None)
228 );
229 println!(" Test score: {:.3}", test_feedback.score);
230 println!(" Feedback:\n{}", test_feedback.feedback);
231
232 Ok(())
233}More examples
examples/08-optimize-mipro.rs (line 169)
84async fn main() -> Result<()> {
85 println!("=== MIPROv2 Optimizer Example ===\n");
86
87 // Configure the LM
88 configure(LM::default(), ChatAdapter);
89
90 // Load training data from HuggingFace
91 println!("Loading training data from HuggingFace...");
92 let train_examples = DataLoader::load_hf(
93 "hotpotqa/hotpot_qa",
94 vec!["question".to_string()],
95 vec!["answer".to_string()],
96 "fullwiki",
97 "validation",
98 true,
99 )?;
100
101 // Use a small subset for faster optimization
102 let train_subset = train_examples[..15].to_vec();
103 println!("Using {} training examples\n", train_subset.len());
104
105 // Create the module
106 let mut qa_module = SimpleQA::builder().build();
107
108 // Show initial instruction
109 println!("Initial instruction:");
110 println!(
111 " \"{}\"\n",
112 qa_module.answerer.get_signature().instruction()
113 );
114
115 // Test baseline performance
116 println!("Evaluating baseline performance...");
117 let baseline_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
118 println!("Baseline score: {:.3}\n", baseline_score);
119
120 // Create MIPROv2 optimizer
121 let optimizer = MIPROv2::builder()
122 .num_candidates(8) // Generate 8 candidate prompts
123 .num_trials(15) // Run 15 evaluation trials
124 .minibatch_size(10) // Evaluate on 10 examples per candidate
125 .temperature(1.0) // Temperature for prompt generation
126 .track_stats(true) // Display detailed statistics
127 .build();
128
129 // Optimize the module
130 println!("Starting MIPROv2 optimization...");
131 println!("This will:");
132 println!(" 1. Generate execution traces");
133 println!(" 2. Create a program description using LLM");
134 println!(" 3. Generate {} candidate prompts with best practices", 8);
135 println!(" 4. Evaluate each candidate");
136 println!(" 5. Select and apply the best prompt\n");
137
138 optimizer
139 .compile(&mut qa_module, train_subset.clone())
140 .await?;
141
142 // Show optimized instruction
143 println!("\nOptimized instruction:");
144 println!(
145 " \"{}\"\n",
146 qa_module.answerer.get_signature().instruction()
147 );
148
149 // Test optimized performance
150 println!("Evaluating optimized performance...");
151 let optimized_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
152 println!("Optimized score: {:.3}", optimized_score);
153
154 // Show improvement
155 let improvement = ((optimized_score - baseline_score) / baseline_score) * 100.0;
156 println!(
157 "\n✓ Improvement: {:.1}% ({:.3} -> {:.3})",
158 improvement, baseline_score, optimized_score
159 );
160
161 // Test on a new example
162 println!("\n--- Testing on a new example ---");
163 let test_example = example! {
164 "question": "input" => "What is the capital of France?",
165 };
166
167 let result = qa_module.forward(test_example).await?;
168 println!("Question: What is the capital of France?");
169 println!("Answer: {}", result.get("answer", None));
170
171 println!("\n=== Example Complete ===");
172 Ok(())
173}examples/10-gepa-llm-judge.rs (line 117)
102 async fn feedback_metric(&self, example: &Example, prediction: &Prediction) -> FeedbackMetric {
103 // Extract the problem and answers
104 let problem = example
105 .get("problem", None)
106 .as_str()
107 .unwrap_or("")
108 .to_string();
109
110 let expected = example
111 .get("expected_answer", None)
112 .as_str()
113 .unwrap_or("")
114 .to_string();
115
116 let student_answer = prediction
117 .get("answer", None)
118 .as_str()
119 .unwrap_or("")
120 .to_string();
121
122 let student_reasoning = prediction
123 .get("reasoning", None)
124 .as_str()
125 .unwrap_or("No reasoning provided")
126 .to_string();
127
128 // Quick check: is the answer exactly correct?
129 let answer_matches = student_answer.trim() == expected.trim();
130
131 // Use LLM judge to analyze the reasoning quality
132 // This is where the magic happens - the judge provides rich feedback
133 let judge_input = example! {
134 "problem": "input" => &problem,
135 "expected_answer": "input" => &expected,
136 "student_answer": "input" => &student_answer,
137 "student_reasoning": "input" => &student_reasoning
138 };
139
140 let judge_output = match self
141 .judge
142 .forward_with_config(judge_input, Arc::clone(&self.judge_lm))
143 .await
144 {
145 Ok(output) => output,
146 Err(_) => {
147 // If judge fails, fall back to simple feedback
148 let score = if answer_matches { 1.0 } else { 0.0 };
149 let simple_feedback = format!(
150 "Problem: {}\nExpected: {}\nPredicted: {}\nAnswer: {}",
151 problem,
152 expected,
153 student_answer,
154 if answer_matches {
155 "CORRECT"
156 } else {
157 "INCORRECT"
158 }
159 );
160 return FeedbackMetric::new(score, simple_feedback);
161 }
162 };
163
164 let judge_evaluation = judge_output
165 .get("evaluation", None)
166 .as_str()
167 .unwrap_or("Unable to evaluate")
168 .to_string();
169
170 // Calculate score based on answer correctness and reasoning quality
171 // The judge's evaluation helps us assign partial credit
172 let score = if answer_matches {
173 // Correct answer - check if reasoning is also sound
174 if judge_evaluation.to_lowercase().contains("sound reasoning")
175 || judge_evaluation.to_lowercase().contains("correct approach")
176 {
177 1.0 // Perfect: right answer, good reasoning
178 } else {
179 0.7 // Right answer but flawed reasoning (lucky guess?)
180 }
181 } else {
182 // Wrong answer - check if there's any partial credit
183 if judge_evaluation.to_lowercase().contains("correct approach")
184 || judge_evaluation.to_lowercase().contains("good start")
185 {
186 0.3 // Wrong answer but some valid steps
187 } else {
188 0.0 // Completely wrong
189 }
190 };
191
192 // Construct rich textual feedback
193 // This combines factual info with the judge's analysis
194 let mut feedback = String::new();
195
196 feedback.push_str(&format!("Problem: {}\n", problem));
197 feedback.push_str(&format!("Expected: {}\n", expected));
198 feedback.push_str(&format!("Predicted: {}\n", student_answer));
199
200 if answer_matches {
201 feedback.push_str("Answer: CORRECT\n\n");
202 } else {
203 feedback.push_str("Answer: INCORRECT\n\n");
204 }
205
206 feedback.push_str("Reasoning Quality Analysis:\n");
207 feedback.push_str(&judge_evaluation);
208
209 // Return the feedback metric with score and rich text
210 FeedbackMetric::new(score, feedback)
211 }
212}
213
214// ============================================================================
215// Step 6: Main function - Set up and run GEPA optimization
216// ============================================================================
217
218#[tokio::main]
219async fn main() -> Result<()> {
220 println!("GEPA with LLM-as-a-Judge Example\n");
221 println!("This example shows how to use an LLM judge to automatically");
222 println!("generate rich feedback for optimizing a math solver.\n");
223
224 // Setup: Configure the LLM
225 // Main LM for the task
226 let task_lm = LM::builder().temperature(0.7).build().await.unwrap();
227
228 // Judge LM (could use a different/cheaper model)
229 let judge_lm = LM::builder().temperature(0.3).build().await.unwrap();
230
231 configure(task_lm, ChatAdapter);
232
233 // Create training examples
234 let trainset = vec![
235 example! {
236 "problem": "input" => "Sarah has 12 apples. She gives 3 to her friend and buys 5 more. How many apples does she have now?",
237 "expected_answer": "input" => "14"
238 },
239 example! {
240 "problem": "input" => "A train travels 60 miles in 1 hour. How far will it travel in 3.5 hours at the same speed?",
241 "expected_answer": "input" => "210"
242 },
243 example! {
244 "problem": "input" => "There are 24 students in a class. If 1/3 of them are absent, how many students are present?",
245 "expected_answer": "input" => "16"
246 },
247 example! {
248 "problem": "input" => "A rectangle has length 8 cm and width 5 cm. What is its area?",
249 "expected_answer": "input" => "40"
250 },
251 example! {
252 "problem": "input" => "John has $50. He spends $12 on lunch and $8 on a book. How much money does he have left?",
253 "expected_answer": "input" => "30"
254 },
255 ];
256
257 // Create the module
258 let mut module = MathSolver::builder()
259 .solver(Predict::new(MathWordProblem::new()))
260 .judge(Predict::new(MathJudge::new()))
261 .judge_lm(Arc::new(judge_lm))
262 .build();
263
264 // Evaluate baseline performance
265 println!("Step 1: Baseline Performance");
266 println!("Testing the solver before optimization...\n");
267 let baseline_score = module.evaluate(trainset.clone()).await;
268 println!(" Baseline average score: {:.3}\n", baseline_score);
269
270 // Configure GEPA optimizer
271 println!("Step 2: Configure GEPA");
272 println!("Setting up the optimizer with budget controls...\n");
273
274 let gepa = GEPA::builder()
275 .num_iterations(3) // Fewer iterations for demo
276 .minibatch_size(3) // Smaller batches
277 .temperature(0.9)
278 .track_stats(true)
279 .maybe_max_lm_calls(Some(100)) // Important: we're using 2x LM calls (task + judge)
280 .build();
281
282 // Run GEPA optimization
283 println!("Step 3: Run GEPA Optimization");
284 println!("The judge will analyze reasoning quality and provide feedback...\n");
285
286 let result = gepa
287 .compile_with_feedback(&mut module, trainset.clone())
288 .await?;
289
290 // Display results
291 println!("\nStep 4: Results");
292 println!("===============\n");
293 println!("Optimization complete!");
294 println!(
295 " Best average score: {:.3}",
296 result.best_candidate.average_score()
297 );
298 println!(
299 " Improvement: {:.3}",
300 result.best_candidate.average_score() - baseline_score
301 );
302 println!(" Total rollouts: {}", result.total_rollouts);
303 println!(
304 " Total LM calls: {} (includes judge evaluations)",
305 result.total_lm_calls
306 );
307
308 println!("\nEvolution over time:");
309 for (generation, score) in &result.evolution_history {
310 println!(" Generation {}: {:.3}", generation, score);
311 }
312
313 println!("\nOptimized instruction:");
314 println!(" {}", result.best_candidate.instruction);
315
316 // Test the optimized solver
317 println!("\nStep 5: Test Optimized Solver");
318 println!("==============================\n");
319
320 let test_problem = example! {
321 "problem": "input" => "A store sells pencils for $0.25 each. If you buy 8 pencils, how much will you pay?",
322 "expected_answer": "input" => "2"
323 };
324
325 let test_prediction = module.forward(test_problem.clone()).await?;
326 let test_feedback = module
327 .feedback_metric(&test_problem, &test_prediction)
328 .await;
329
330 println!(
331 "Test problem: A store sells pencils for $0.25 each. If you buy 8 pencils, how much will you pay?"
332 );
333 println!("\nAnswer: {}", test_prediction.get("answer", None));
334 println!("Score: {:.3}\n", test_feedback.score);
335 println!("Detailed Feedback from Judge:");
336 println!("{}", test_feedback.feedback);
337
338 Ok(())
339}pub fn keys(&self) -> Vec<String>
pub fn values(&self) -> Vec<Value>
Sourcepub fn set_lm_usage(&mut self, lm_usage: LmUsage) -> Self
pub fn set_lm_usage(&mut self, lm_usage: LmUsage) -> Self
Examples found in repository?
examples/01-simple.rs (line 66)
49 async fn forward(&self, inputs: Example) -> Result<Prediction> {
50 let answerer_prediction = self.answerer.forward(inputs.clone()).await?;
51
52 let question = inputs.data.get("question").unwrap().clone();
53 let answer = answerer_prediction.data.get("answer").unwrap().clone();
54
55 let inputs = example! {
56 "question": "input" => question.clone(),
57 "answer": "output" => answer.clone()
58 };
59
60 let rating_prediction = self.rater.forward(inputs).await?;
61 Ok(prediction! {
62 "answer"=> answer,
63 "question"=> question,
64 "rating"=> rating_prediction.data.get("rating").unwrap().clone(),
65 }
66 .set_lm_usage(rating_prediction.lm_usage))
67 }More examples
examples/02-module-iteration-and-updation.rs (line 86)
66 async fn forward(&self, inputs: Example) -> Result<Prediction> {
67 let answerer_prediction = self.answerer.forward(inputs.clone()).await?;
68
69 let question = inputs.data.get("question").unwrap().clone();
70 let answer = answerer_prediction.data.get("answer").unwrap().clone();
71
72 let inputs = Example::new(
73 hashmap! {
74 "answer".to_string() => answer.clone(),
75 "question".to_string() => question.clone()
76 },
77 vec!["answer".to_string(), "question".to_string()],
78 vec![],
79 );
80 let rating_prediction = self.rater.forward(inputs).await?;
81 Ok(prediction! {
82 "answer"=> answer,
83 "question"=> question,
84 "rating"=> rating_prediction.data.get("rating").unwrap().clone(),
85 }
86 .set_lm_usage(rating_prediction.lm_usage))
87 }examples/06-other-providers-batch.rs (line 72)
49 async fn forward(&self, inputs: Example) -> Result<Prediction> {
50 let answerer_prediction = self.answerer.forward(inputs.clone()).await?;
51
52 let question = inputs.data.get("question").unwrap().clone();
53 let answer = answerer_prediction.data.get("answer").unwrap().clone();
54 let answer_lm_usage = answerer_prediction.lm_usage;
55
56 let inputs = Example::new(
57 hashmap! {
58 "answer".to_string() => answer.clone(),
59 "question".to_string() => question.clone()
60 },
61 vec!["answer".to_string(), "question".to_string()],
62 vec![],
63 );
64 let rating_prediction = self.rater.forward(inputs).await?;
65 let rating_lm_usage = rating_prediction.lm_usage;
66
67 Ok(prediction! {
68 "answer"=> answer,
69 "question"=> question,
70 "rating"=> rating_prediction.data.get("rating").unwrap().clone(),
71 }
72 .set_lm_usage(answer_lm_usage + rating_lm_usage))
73 }Trait Implementations§
Source§impl Clone for Prediction
impl Clone for Prediction
Source§fn clone(&self) -> Prediction
fn clone(&self) -> Prediction
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for Prediction
impl Debug for Prediction
Source§impl Default for Prediction
impl Default for Prediction
Source§fn default() -> Prediction
fn default() -> Prediction
Returns the “default value” for a type. Read more
Source§impl<'de> Deserialize<'de> for Prediction
impl<'de> Deserialize<'de> for Prediction
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Source§impl Index<String> for Prediction
impl Index<String> for Prediction
Source§impl IntoIterator for Prediction
impl IntoIterator for Prediction
Auto Trait Implementations§
impl Freeze for Prediction
impl RefUnwindSafe for Prediction
impl Send for Prediction
impl Sync for Prediction
impl Unpin for Prediction
impl UnwindSafe for Prediction
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Code for Twhere
T: Serialize + DeserializeOwned,
impl<T> Code for Twhere
T: Serialize + DeserializeOwned,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more