pub trait Evaluator: Module {
const MAX_CONCURRENCY: usize = 32usize;
const DISPLAY_PROGRESS: bool = true;
// Required method
async fn metric(&self, example: &Example, prediction: &Prediction) -> f32;
// Provided method
async fn evaluate(&self, examples: Vec<Example>) -> f32 { ... }
}Provided Associated Constants§
const MAX_CONCURRENCY: usize = 32usize
const DISPLAY_PROGRESS: bool = true
Required Methods§
async fn metric(&self, example: &Example, prediction: &Prediction) -> f32
Provided Methods§
Sourceasync fn evaluate(&self, examples: Vec<Example>) -> f32
async fn evaluate(&self, examples: Vec<Example>) -> f32
Examples found in repository?
examples/03-evaluate-hotpotqa.rs (line 84)
64async fn main() -> anyhow::Result<()> {
65 configure(
66 LM::builder()
67 .model("openai:gpt-4o-mini".to_string())
68 .build()
69 .await?,
70 ChatAdapter {},
71 );
72
73 let examples = DataLoader::load_hf(
74 "hotpotqa/hotpot_qa",
75 vec!["question".to_string()],
76 vec!["answer".to_string()],
77 "fullwiki",
78 "validation",
79 true,
80 )?[..128]
81 .to_vec();
82
83 let evaluator = QARater::builder().build();
84 let metric = evaluator.evaluate(examples).await;
85
86 println!("Metric: {metric}");
87 Ok(())
88}More examples
examples/08-optimize-mipro.rs (line 117)
84async fn main() -> Result<()> {
85 println!("=== MIPROv2 Optimizer Example ===\n");
86
87 // Configure the LM
88 configure(LM::default(), ChatAdapter);
89
90 // Load training data from HuggingFace
91 println!("Loading training data from HuggingFace...");
92 let train_examples = DataLoader::load_hf(
93 "hotpotqa/hotpot_qa",
94 vec!["question".to_string()],
95 vec!["answer".to_string()],
96 "fullwiki",
97 "validation",
98 true,
99 )?;
100
101 // Use a small subset for faster optimization
102 let train_subset = train_examples[..15].to_vec();
103 println!("Using {} training examples\n", train_subset.len());
104
105 // Create the module
106 let mut qa_module = SimpleQA::builder().build();
107
108 // Show initial instruction
109 println!("Initial instruction:");
110 println!(
111 " \"{}\"\n",
112 qa_module.answerer.get_signature().instruction()
113 );
114
115 // Test baseline performance
116 println!("Evaluating baseline performance...");
117 let baseline_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
118 println!("Baseline score: {:.3}\n", baseline_score);
119
120 // Create MIPROv2 optimizer
121 let optimizer = MIPROv2::builder()
122 .num_candidates(8) // Generate 8 candidate prompts
123 .num_trials(15) // Run 15 evaluation trials
124 .minibatch_size(10) // Evaluate on 10 examples per candidate
125 .temperature(1.0) // Temperature for prompt generation
126 .track_stats(true) // Display detailed statistics
127 .build();
128
129 // Optimize the module
130 println!("Starting MIPROv2 optimization...");
131 println!("This will:");
132 println!(" 1. Generate execution traces");
133 println!(" 2. Create a program description using LLM");
134 println!(" 3. Generate {} candidate prompts with best practices", 8);
135 println!(" 4. Evaluate each candidate");
136 println!(" 5. Select and apply the best prompt\n");
137
138 optimizer
139 .compile(&mut qa_module, train_subset.clone())
140 .await?;
141
142 // Show optimized instruction
143 println!("\nOptimized instruction:");
144 println!(
145 " \"{}\"\n",
146 qa_module.answerer.get_signature().instruction()
147 );
148
149 // Test optimized performance
150 println!("Evaluating optimized performance...");
151 let optimized_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
152 println!("Optimized score: {:.3}", optimized_score);
153
154 // Show improvement
155 let improvement = ((optimized_score - baseline_score) / baseline_score) * 100.0;
156 println!(
157 "\n✓ Improvement: {:.1}% ({:.3} -> {:.3})",
158 improvement, baseline_score, optimized_score
159 );
160
161 // Test on a new example
162 println!("\n--- Testing on a new example ---");
163 let test_example = example! {
164 "question": "input" => "What is the capital of France?",
165 };
166
167 let result = qa_module.forward(test_example).await?;
168 println!("Question: What is the capital of France?");
169 println!("Answer: {}", result.get("answer", None));
170
171 println!("\n=== Example Complete ===");
172 Ok(())
173}examples/09-gepa-sentiment.rs (line 175)
116async fn main() -> Result<()> {
117 println!("GEPA Sentiment Analysis Optimization Example\n");
118
119 // Setup LM
120 let lm = LM::builder().temperature(0.7).build().await.unwrap();
121
122 configure(lm.clone(), ChatAdapter);
123
124 // Create training examples with diverse sentiments
125 let trainset = vec![
126 example! {
127 "text": "input" => "This movie was absolutely fantastic! I loved every minute of it.",
128 "expected_sentiment": "input" => "positive"
129 },
130 example! {
131 "text": "input" => "Terrible service, will never come back again.",
132 "expected_sentiment": "input" => "negative"
133 },
134 example! {
135 "text": "input" => "The weather is okay, nothing special.",
136 "expected_sentiment": "input" => "neutral"
137 },
138 example! {
139 "text": "input" => "Despite some minor issues, I'm quite happy with the purchase.",
140 "expected_sentiment": "input" => "positive"
141 },
142 example! {
143 "text": "input" => "I have mixed feelings about this product.",
144 "expected_sentiment": "input" => "neutral"
145 },
146 example! {
147 "text": "input" => "This is the worst experience I've ever had!",
148 "expected_sentiment": "input" => "negative"
149 },
150 example! {
151 "text": "input" => "It's fine. Does what it's supposed to do.",
152 "expected_sentiment": "input" => "neutral"
153 },
154 example! {
155 "text": "input" => "Exceeded all my expectations! Highly recommend!",
156 "expected_sentiment": "input" => "positive"
157 },
158 example! {
159 "text": "input" => "Disappointed and frustrated with the outcome.",
160 "expected_sentiment": "input" => "negative"
161 },
162 example! {
163 "text": "input" => "Standard quality, nothing remarkable.",
164 "expected_sentiment": "input" => "neutral"
165 },
166 ];
167
168 // Create module
169 let mut module = SentimentAnalyzer::builder()
170 .predictor(Predict::new(SentimentSignature::new()))
171 .build();
172
173 // Evaluate baseline performance
174 println!("Baseline Performance:");
175 let baseline_score = module.evaluate(trainset.clone()).await;
176 println!(" Average score: {:.3}\n", baseline_score);
177
178 // Configure GEPA optimizer
179 let gepa = GEPA::builder()
180 .num_iterations(5)
181 .minibatch_size(5)
182 .num_trials(3)
183 .temperature(0.9)
184 .track_stats(true)
185 .build();
186
187 // Run optimization
188 println!("Starting GEPA optimization...\n");
189 let result = gepa
190 .compile_with_feedback(&mut module, trainset.clone())
191 .await?;
192
193 // Display results
194 println!("\nOptimization Results:");
195 println!(
196 " Best average score: {:.3}",
197 result.best_candidate.average_score()
198 );
199 println!(" Total rollouts: {}", result.total_rollouts);
200 println!(" Total LM calls: {}", result.total_lm_calls);
201 println!(" Generations: {}", result.evolution_history.len());
202
203 println!("\nBest Instruction:");
204 println!(" {}", result.best_candidate.instruction);
205
206 if !result.evolution_history.is_empty() {
207 println!("\nEvolution History:");
208 for entry in &result.evolution_history {
209 println!(" Generation {}: {:.3}", entry.0, entry.1);
210 }
211 }
212
213 // Test optimized module on a new example
214 println!("\nTesting Optimized Module:");
215 let test_example = example! {
216 "text": "input" => "This product changed my life! Absolutely amazing!",
217 "expected_sentiment": "input" => "positive"
218 };
219
220 let test_prediction = module.forward(test_example.clone()).await?;
221 let test_feedback = module
222 .feedback_metric(&test_example, &test_prediction)
223 .await;
224
225 println!(
226 " Test prediction: {}",
227 test_prediction.get("sentiment", None)
228 );
229 println!(" Test score: {:.3}", test_feedback.score);
230 println!(" Feedback:\n{}", test_feedback.feedback);
231
232 Ok(())
233}examples/10-gepa-llm-judge.rs (line 267)
219async fn main() -> Result<()> {
220 println!("GEPA with LLM-as-a-Judge Example\n");
221 println!("This example shows how to use an LLM judge to automatically");
222 println!("generate rich feedback for optimizing a math solver.\n");
223
224 // Setup: Configure the LLM
225 // Main LM for the task
226 let task_lm = LM::builder().temperature(0.7).build().await.unwrap();
227
228 // Judge LM (could use a different/cheaper model)
229 let judge_lm = LM::builder().temperature(0.3).build().await.unwrap();
230
231 configure(task_lm, ChatAdapter);
232
233 // Create training examples
234 let trainset = vec![
235 example! {
236 "problem": "input" => "Sarah has 12 apples. She gives 3 to her friend and buys 5 more. How many apples does she have now?",
237 "expected_answer": "input" => "14"
238 },
239 example! {
240 "problem": "input" => "A train travels 60 miles in 1 hour. How far will it travel in 3.5 hours at the same speed?",
241 "expected_answer": "input" => "210"
242 },
243 example! {
244 "problem": "input" => "There are 24 students in a class. If 1/3 of them are absent, how many students are present?",
245 "expected_answer": "input" => "16"
246 },
247 example! {
248 "problem": "input" => "A rectangle has length 8 cm and width 5 cm. What is its area?",
249 "expected_answer": "input" => "40"
250 },
251 example! {
252 "problem": "input" => "John has $50. He spends $12 on lunch and $8 on a book. How much money does he have left?",
253 "expected_answer": "input" => "30"
254 },
255 ];
256
257 // Create the module
258 let mut module = MathSolver::builder()
259 .solver(Predict::new(MathWordProblem::new()))
260 .judge(Predict::new(MathJudge::new()))
261 .judge_lm(Arc::new(judge_lm))
262 .build();
263
264 // Evaluate baseline performance
265 println!("Step 1: Baseline Performance");
266 println!("Testing the solver before optimization...\n");
267 let baseline_score = module.evaluate(trainset.clone()).await;
268 println!(" Baseline average score: {:.3}\n", baseline_score);
269
270 // Configure GEPA optimizer
271 println!("Step 2: Configure GEPA");
272 println!("Setting up the optimizer with budget controls...\n");
273
274 let gepa = GEPA::builder()
275 .num_iterations(3) // Fewer iterations for demo
276 .minibatch_size(3) // Smaller batches
277 .temperature(0.9)
278 .track_stats(true)
279 .maybe_max_lm_calls(Some(100)) // Important: we're using 2x LM calls (task + judge)
280 .build();
281
282 // Run GEPA optimization
283 println!("Step 3: Run GEPA Optimization");
284 println!("The judge will analyze reasoning quality and provide feedback...\n");
285
286 let result = gepa
287 .compile_with_feedback(&mut module, trainset.clone())
288 .await?;
289
290 // Display results
291 println!("\nStep 4: Results");
292 println!("===============\n");
293 println!("Optimization complete!");
294 println!(
295 " Best average score: {:.3}",
296 result.best_candidate.average_score()
297 );
298 println!(
299 " Improvement: {:.3}",
300 result.best_candidate.average_score() - baseline_score
301 );
302 println!(" Total rollouts: {}", result.total_rollouts);
303 println!(
304 " Total LM calls: {} (includes judge evaluations)",
305 result.total_lm_calls
306 );
307
308 println!("\nEvolution over time:");
309 for (generation, score) in &result.evolution_history {
310 println!(" Generation {}: {:.3}", generation, score);
311 }
312
313 println!("\nOptimized instruction:");
314 println!(" {}", result.best_candidate.instruction);
315
316 // Test the optimized solver
317 println!("\nStep 5: Test Optimized Solver");
318 println!("==============================\n");
319
320 let test_problem = example! {
321 "problem": "input" => "A store sells pencils for $0.25 each. If you buy 8 pencils, how much will you pay?",
322 "expected_answer": "input" => "2"
323 };
324
325 let test_prediction = module.forward(test_problem.clone()).await?;
326 let test_feedback = module
327 .feedback_metric(&test_problem, &test_prediction)
328 .await;
329
330 println!(
331 "Test problem: A store sells pencils for $0.25 each. If you buy 8 pencils, how much will you pay?"
332 );
333 println!("\nAnswer: {}", test_prediction.get("answer", None));
334 println!("Score: {:.3}\n", test_feedback.score);
335 println!("Detailed Feedback from Judge:");
336 println!("{}", test_feedback.feedback);
337
338 Ok(())
339}Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.