Struct Prediction

Source

pub struct Prediction {
    pub data: HashMap<String, Value>,
    pub lm_usage: LmUsage,
}

Fields§

§data: HashMap<String, Value>§lm_usage: LmUsage

Implementations§

Source §

impl Prediction

Source

pub fn new(data: HashMap<String, Value>, lm_usage: LmUsage) -> Self

Source

pub fn get(&self, key: &str, default: Option<&str>) -> Value

Examples found in repository ?

examples/09-gepa-sentiment.rs (line 53)

51    async fn feedback_metric(&self, example: &Example, prediction: &Prediction) -> FeedbackMetric {
52        let predicted = prediction
53            .get("sentiment", None)
54            .as_str()
55            .unwrap_or("")
56            .to_string()
57            .to_lowercase();
58
59        let expected = example
60            .get("expected_sentiment", None)
61            .as_str()
62            .unwrap_or("")
63            .to_string()
64            .to_lowercase();
65
66        let text = example.get("text", None).as_str().unwrap_or("").to_string();
67
68        let reasoning = prediction
69            .get("reasoning", None)
70            .as_str()
71            .unwrap_or("")
72            .to_string();
73
74        // Calculate score
75        let correct = predicted == expected;
76        let score = if correct { 1.0 } else { 0.0 };
77
78        // Create rich feedback
79        let mut feedback = if correct {
80            format!("Correct classification: \"{}\"\n", expected)
81        } else {
82            format!(
83                "Incorrect classification\n  Expected: \"{}\"\n  Predicted: \"{}\"\n",
84                expected, predicted
85            )
86        };
87
88        // Add context about the input
89        feedback.push_str(&format!("  Input text: \"{}\"\n", text));
90
91        // Add reasoning analysis
92        if !reasoning.is_empty() {
93            feedback.push_str(&format!("  Reasoning: {}\n", reasoning));
94
95            // Check if reasoning mentions key sentiment words
96            let has_reasoning_quality = if correct {
97                // For correct answers, check if reasoning is substantive
98                reasoning.len() > 20
99            } else {
100                // For incorrect answers, note what went wrong
101                false
102            };
103
104            if has_reasoning_quality {
105                feedback.push_str("  Reasoning appears detailed\n");
106            } else if !correct {
107                feedback.push_str("  May have misunderstood the text sentiment\n");
108            }
109        }
110
111        FeedbackMetric::new(score, feedback)
112    }
113}
114
115#[tokio::main]
116async fn main() -> Result<()> {
117    println!("GEPA Sentiment Analysis Optimization Example\n");
118
119    // Setup LM
120    let lm = LM::builder().temperature(0.7).build().await.unwrap();
121
122    configure(lm.clone(), ChatAdapter);
123
124    // Create training examples with diverse sentiments
125    let trainset = vec![
126        example! {
127            "text": "input" => "This movie was absolutely fantastic! I loved every minute of it.",
128            "expected_sentiment": "input" => "positive"
129        },
130        example! {
131            "text": "input" => "Terrible service, will never come back again.",
132            "expected_sentiment": "input" => "negative"
133        },
134        example! {
135            "text": "input" => "The weather is okay, nothing special.",
136            "expected_sentiment": "input" => "neutral"
137        },
138        example! {
139            "text": "input" => "Despite some minor issues, I'm quite happy with the purchase.",
140            "expected_sentiment": "input" => "positive"
141        },
142        example! {
143            "text": "input" => "I have mixed feelings about this product.",
144            "expected_sentiment": "input" => "neutral"
145        },
146        example! {
147            "text": "input" => "This is the worst experience I've ever had!",
148            "expected_sentiment": "input" => "negative"
149        },
150        example! {
151            "text": "input" => "It's fine. Does what it's supposed to do.",
152            "expected_sentiment": "input" => "neutral"
153        },
154        example! {
155            "text": "input" => "Exceeded all my expectations! Highly recommend!",
156            "expected_sentiment": "input" => "positive"
157        },
158        example! {
159            "text": "input" => "Disappointed and frustrated with the outcome.",
160            "expected_sentiment": "input" => "negative"
161        },
162        example! {
163            "text": "input" => "Standard quality, nothing remarkable.",
164            "expected_sentiment": "input" => "neutral"
165        },
166    ];
167
168    // Create module
169    let mut module = SentimentAnalyzer::builder()
170        .predictor(Predict::new(SentimentSignature::new()))
171        .build();
172
173    // Evaluate baseline performance
174    println!("Baseline Performance:");
175    let baseline_score = module.evaluate(trainset.clone()).await;
176    println!("  Average score: {:.3}\n", baseline_score);
177
178    // Configure GEPA optimizer
179    let gepa = GEPA::builder()
180        .num_iterations(5)
181        .minibatch_size(5)
182        .num_trials(3)
183        .temperature(0.9)
184        .track_stats(true)
185        .build();
186
187    // Run optimization
188    println!("Starting GEPA optimization...\n");
189    let result = gepa
190        .compile_with_feedback(&mut module, trainset.clone())
191        .await?;
192
193    // Display results
194    println!("\nOptimization Results:");
195    println!(
196        "  Best average score: {:.3}",
197        result.best_candidate.average_score()
198    );
199    println!("  Total rollouts: {}", result.total_rollouts);
200    println!("  Total LM calls: {}", result.total_lm_calls);
201    println!("  Generations: {}", result.evolution_history.len());
202
203    println!("\nBest Instruction:");
204    println!("  {}", result.best_candidate.instruction);
205
206    if !result.evolution_history.is_empty() {
207        println!("\nEvolution History:");
208        for entry in &result.evolution_history {
209            println!("  Generation {}: {:.3}", entry.0, entry.1);
210        }
211    }
212
213    // Test optimized module on a new example
214    println!("\nTesting Optimized Module:");
215    let test_example = example! {
216        "text": "input" => "This product changed my life! Absolutely amazing!",
217        "expected_sentiment": "input" => "positive"
218    };
219
220    let test_prediction = module.forward(test_example.clone()).await?;
221    let test_feedback = module
222        .feedback_metric(&test_example, &test_prediction)
223        .await;
224
225    println!(
226        "  Test prediction: {}",
227        test_prediction.get("sentiment", None)
228    );
229    println!("  Test score: {:.3}", test_feedback.score);
230    println!("  Feedback:\n{}", test_feedback.feedback);
231
232    Ok(())
233}

More examples

Hide additional examples

examples/08-optimize-mipro.rs (line 169)

84async fn main() -> Result<()> {
85    println!("=== MIPROv2 Optimizer Example ===\n");
86
87    // Configure the LM
88    configure(LM::default(), ChatAdapter);
89
90    // Load training data from HuggingFace
91    println!("Loading training data from HuggingFace...");
92    let train_examples = DataLoader::load_hf(
93        "hotpotqa/hotpot_qa",
94        vec!["question".to_string()],
95        vec!["answer".to_string()],
96        "fullwiki",
97        "validation",
98        true,
99    )?;
100
101    // Use a small subset for faster optimization
102    let train_subset = train_examples[..15].to_vec();
103    println!("Using {} training examples\n", train_subset.len());
104
105    // Create the module
106    let mut qa_module = SimpleQA::builder().build();
107
108    // Show initial instruction
109    println!("Initial instruction:");
110    println!(
111        "  \"{}\"\n",
112        qa_module.answerer.get_signature().instruction()
113    );
114
115    // Test baseline performance
116    println!("Evaluating baseline performance...");
117    let baseline_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
118    println!("Baseline score: {:.3}\n", baseline_score);
119
120    // Create MIPROv2 optimizer
121    let optimizer = MIPROv2::builder()
122        .num_candidates(8) // Generate 8 candidate prompts
123        .num_trials(15) // Run 15 evaluation trials
124        .minibatch_size(10) // Evaluate on 10 examples per candidate
125        .temperature(1.0) // Temperature for prompt generation
126        .track_stats(true) // Display detailed statistics
127        .build();
128
129    // Optimize the module
130    println!("Starting MIPROv2 optimization...");
131    println!("This will:");
132    println!("  1. Generate execution traces");
133    println!("  2. Create a program description using LLM");
134    println!("  3. Generate {} candidate prompts with best practices", 8);
135    println!("  4. Evaluate each candidate");
136    println!("  5. Select and apply the best prompt\n");
137
138    optimizer
139        .compile(&mut qa_module, train_subset.clone())
140        .await?;
141
142    // Show optimized instruction
143    println!("\nOptimized instruction:");
144    println!(
145        "  \"{}\"\n",
146        qa_module.answerer.get_signature().instruction()
147    );
148
149    // Test optimized performance
150    println!("Evaluating optimized performance...");
151    let optimized_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
152    println!("Optimized score: {:.3}", optimized_score);
153
154    // Show improvement
155    let improvement = ((optimized_score - baseline_score) / baseline_score) * 100.0;
156    println!(
157        "\n✓ Improvement: {:.1}% ({:.3} -> {:.3})",
158        improvement, baseline_score, optimized_score
159    );
160
161    // Test on a new example
162    println!("\n--- Testing on a new example ---");
163    let test_example = example! {
164        "question": "input" => "What is the capital of France?",
165    };
166
167    let result = qa_module.forward(test_example).await?;
168    println!("Question: What is the capital of France?");
169    println!("Answer: {}", result.get("answer", None));
170
171    println!("\n=== Example Complete ===");
172    Ok(())
173}

examples/10-gepa-llm-judge.rs (line 117)

102    async fn feedback_metric(&self, example: &Example, prediction: &Prediction) -> FeedbackMetric {
103        // Extract the problem and answers
104        let problem = example
105            .get("problem", None)
106            .as_str()
107            .unwrap_or("")
108            .to_string();
109
110        let expected = example
111            .get("expected_answer", None)
112            .as_str()
113            .unwrap_or("")
114            .to_string();
115
116        let student_answer = prediction
117            .get("answer", None)
118            .as_str()
119            .unwrap_or("")
120            .to_string();
121
122        let student_reasoning = prediction
123            .get("reasoning", None)
124            .as_str()
125            .unwrap_or("No reasoning provided")
126            .to_string();
127
128        // Quick check: is the answer exactly correct?
129        let answer_matches = student_answer.trim() == expected.trim();
130
131        // Use LLM judge to analyze the reasoning quality
132        // This is where the magic happens - the judge provides rich feedback
133        let judge_input = example! {
134            "problem": "input" => &problem,
135            "expected_answer": "input" => &expected,
136            "student_answer": "input" => &student_answer,
137            "student_reasoning": "input" => &student_reasoning
138        };
139
140        let judge_output = match self
141            .judge
142            .forward_with_config(judge_input, Arc::clone(&self.judge_lm))
143            .await
144        {
145            Ok(output) => output,
146            Err(_) => {
147                // If judge fails, fall back to simple feedback
148                let score = if answer_matches { 1.0 } else { 0.0 };
149                let simple_feedback = format!(
150                    "Problem: {}\nExpected: {}\nPredicted: {}\nAnswer: {}",
151                    problem,
152                    expected,
153                    student_answer,
154                    if answer_matches {
155                        "CORRECT"
156                    } else {
157                        "INCORRECT"
158                    }
159                );
160                return FeedbackMetric::new(score, simple_feedback);
161            }
162        };
163
164        let judge_evaluation = judge_output
165            .get("evaluation", None)
166            .as_str()
167            .unwrap_or("Unable to evaluate")
168            .to_string();
169
170        // Calculate score based on answer correctness and reasoning quality
171        // The judge's evaluation helps us assign partial credit
172        let score = if answer_matches {
173            // Correct answer - check if reasoning is also sound
174            if judge_evaluation.to_lowercase().contains("sound reasoning")
175                || judge_evaluation.to_lowercase().contains("correct approach")
176            {
177                1.0 // Perfect: right answer, good reasoning
178            } else {
179                0.7 // Right answer but flawed reasoning (lucky guess?)
180            }
181        } else {
182            // Wrong answer - check if there's any partial credit
183            if judge_evaluation.to_lowercase().contains("correct approach")
184                || judge_evaluation.to_lowercase().contains("good start")
185            {
186                0.3 // Wrong answer but some valid steps
187            } else {
188                0.0 // Completely wrong
189            }
190        };
191
192        // Construct rich textual feedback
193        // This combines factual info with the judge's analysis
194        let mut feedback = String::new();
195
196        feedback.push_str(&format!("Problem: {}\n", problem));
197        feedback.push_str(&format!("Expected: {}\n", expected));
198        feedback.push_str(&format!("Predicted: {}\n", student_answer));
199
200        if answer_matches {
201            feedback.push_str("Answer: CORRECT\n\n");
202        } else {
203            feedback.push_str("Answer: INCORRECT\n\n");
204        }
205
206        feedback.push_str("Reasoning Quality Analysis:\n");
207        feedback.push_str(&judge_evaluation);
208
209        // Return the feedback metric with score and rich text
210        FeedbackMetric::new(score, feedback)
211    }
212}
213
214// ============================================================================
215// Step 6: Main function - Set up and run GEPA optimization
216// ============================================================================
217
218#[tokio::main]
219async fn main() -> Result<()> {
220    println!("GEPA with LLM-as-a-Judge Example\n");
221    println!("This example shows how to use an LLM judge to automatically");
222    println!("generate rich feedback for optimizing a math solver.\n");
223
224    // Setup: Configure the LLM
225    // Main LM for the task
226    let task_lm = LM::builder().temperature(0.7).build().await.unwrap();
227
228    // Judge LM (could use a different/cheaper model)
229    let judge_lm = LM::builder().temperature(0.3).build().await.unwrap();
230
231    configure(task_lm, ChatAdapter);
232
233    // Create training examples
234    let trainset = vec![
235        example! {
236            "problem": "input" => "Sarah has 12 apples. She gives 3 to her friend and buys 5 more. How many apples does she have now?",
237            "expected_answer": "input" => "14"
238        },
239        example! {
240            "problem": "input" => "A train travels 60 miles in 1 hour. How far will it travel in 3.5 hours at the same speed?",
241            "expected_answer": "input" => "210"
242        },
243        example! {
244            "problem": "input" => "There are 24 students in a class. If 1/3 of them are absent, how many students are present?",
245            "expected_answer": "input" => "16"
246        },
247        example! {
248            "problem": "input" => "A rectangle has length 8 cm and width 5 cm. What is its area?",
249            "expected_answer": "input" => "40"
250        },
251        example! {
252            "problem": "input" => "John has $50. He spends $12 on lunch and $8 on a book. How much money does he have left?",
253            "expected_answer": "input" => "30"
254        },
255    ];
256
257    // Create the module
258    let mut module = MathSolver::builder()
259        .solver(Predict::new(MathWordProblem::new()))
260        .judge(Predict::new(MathJudge::new()))
261        .judge_lm(Arc::new(judge_lm))
262        .build();
263
264    // Evaluate baseline performance
265    println!("Step 1: Baseline Performance");
266    println!("Testing the solver before optimization...\n");
267    let baseline_score = module.evaluate(trainset.clone()).await;
268    println!("  Baseline average score: {:.3}\n", baseline_score);
269
270    // Configure GEPA optimizer
271    println!("Step 2: Configure GEPA");
272    println!("Setting up the optimizer with budget controls...\n");
273
274    let gepa = GEPA::builder()
275        .num_iterations(3) // Fewer iterations for demo
276        .minibatch_size(3) // Smaller batches
277        .temperature(0.9)
278        .track_stats(true)
279        .maybe_max_lm_calls(Some(100)) // Important: we're using 2x LM calls (task + judge)
280        .build();
281
282    // Run GEPA optimization
283    println!("Step 3: Run GEPA Optimization");
284    println!("The judge will analyze reasoning quality and provide feedback...\n");
285
286    let result = gepa
287        .compile_with_feedback(&mut module, trainset.clone())
288        .await?;
289
290    // Display results
291    println!("\nStep 4: Results");
292    println!("===============\n");
293    println!("Optimization complete!");
294    println!(
295        "  Best average score: {:.3}",
296        result.best_candidate.average_score()
297    );
298    println!(
299        "  Improvement: {:.3}",
300        result.best_candidate.average_score() - baseline_score
301    );
302    println!("  Total rollouts: {}", result.total_rollouts);
303    println!(
304        "  Total LM calls: {} (includes judge evaluations)",
305        result.total_lm_calls
306    );
307
308    println!("\nEvolution over time:");
309    for (generation, score) in &result.evolution_history {
310        println!("  Generation {}: {:.3}", generation, score);
311    }
312
313    println!("\nOptimized instruction:");
314    println!("  {}", result.best_candidate.instruction);
315
316    // Test the optimized solver
317    println!("\nStep 5: Test Optimized Solver");
318    println!("==============================\n");
319
320    let test_problem = example! {
321        "problem": "input" => "A store sells pencils for $0.25 each. If you buy 8 pencils, how much will you pay?",
322        "expected_answer": "input" => "2"
323    };
324
325    let test_prediction = module.forward(test_problem.clone()).await?;
326    let test_feedback = module
327        .feedback_metric(&test_problem, &test_prediction)
328        .await;
329
330    println!(
331        "Test problem: A store sells pencils for $0.25 each. If you buy 8 pencils, how much will you pay?"
332    );
333    println!("\nAnswer: {}", test_prediction.get("answer", None));
334    println!("Score: {:.3}\n", test_feedback.score);
335    println!("Detailed Feedback from Judge:");
336    println!("{}", test_feedback.feedback);
337
338    Ok(())
339}

Source

pub fn keys(&self) -> Vec<String>

Source

pub fn values(&self) -> Vec<Value>

Source

pub fn set_lm_usage(&mut self, lm_usage: LmUsage) -> Self

Examples found in repository ?

examples/01-simple.rs (line 66)

49    async fn forward(&self, inputs: Example) -> Result<Prediction> {
50        let answerer_prediction = self.answerer.forward(inputs.clone()).await?;
51
52        let question = inputs.data.get("question").unwrap().clone();
53        let answer = answerer_prediction.data.get("answer").unwrap().clone();
54
55        let inputs = example! {
56            "question": "input" => question.clone(),
57            "answer": "output" => answer.clone()
58        };
59
60        let rating_prediction = self.rater.forward(inputs).await?;
61        Ok(prediction! {
62            "answer"=> answer,
63            "question"=> question,
64            "rating"=> rating_prediction.data.get("rating").unwrap().clone(),
65        }
66        .set_lm_usage(rating_prediction.lm_usage))
67    }

More examples

Hide additional examples

examples/02-module-iteration-and-updation.rs (line 86)

66    async fn forward(&self, inputs: Example) -> Result<Prediction> {
67        let answerer_prediction = self.answerer.forward(inputs.clone()).await?;
68
69        let question = inputs.data.get("question").unwrap().clone();
70        let answer = answerer_prediction.data.get("answer").unwrap().clone();
71
72        let inputs = Example::new(
73            hashmap! {
74                "answer".to_string() => answer.clone(),
75                "question".to_string() => question.clone()
76            },
77            vec!["answer".to_string(), "question".to_string()],
78            vec![],
79        );
80        let rating_prediction = self.rater.forward(inputs).await?;
81        Ok(prediction! {
82            "answer"=> answer,
83            "question"=> question,
84            "rating"=> rating_prediction.data.get("rating").unwrap().clone(),
85        }
86        .set_lm_usage(rating_prediction.lm_usage))
87    }

examples/06-other-providers-batch.rs (line 72)

49    async fn forward(&self, inputs: Example) -> Result<Prediction> {
50        let answerer_prediction = self.answerer.forward(inputs.clone()).await?;
51
52        let question = inputs.data.get("question").unwrap().clone();
53        let answer = answerer_prediction.data.get("answer").unwrap().clone();
54        let answer_lm_usage = answerer_prediction.lm_usage;
55
56        let inputs = Example::new(
57            hashmap! {
58                "answer".to_string() => answer.clone(),
59                "question".to_string() => question.clone()
60            },
61            vec!["answer".to_string(), "question".to_string()],
62            vec![],
63        );
64        let rating_prediction = self.rater.forward(inputs).await?;
65        let rating_lm_usage = rating_prediction.lm_usage;
66
67        Ok(prediction! {
68            "answer"=> answer,
69            "question"=> question,
70            "rating"=> rating_prediction.data.get("rating").unwrap().clone(),
71        }
72        .set_lm_usage(answer_lm_usage + rating_lm_usage))
73    }

Trait Implementations§

Source §

impl Clone for Prediction

Source §

fn clone(&self) -> Prediction

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for Prediction

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Default for Prediction

Source §

fn default() -> Prediction

Returns the “default value” for a type. Read more

Source §

impl<'de> Deserialize<'de> for Prediction

Source §

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more

Source §

impl From<Vec<(String, Value)>> for Prediction

Source §

fn from(value: Vec<(String, Value)>) -> Self

Converts to this type from the input type.

Source §

impl Index<String> for Prediction

Source §

type Output = Value

The returned type after indexing.

Source §

fn index(&self, index: String) -> &Self::Output

Performs the indexing (container[index]) operation. Read more

Source §

impl IntoIterator for Prediction

Source §

type Item = (String, Value)

The type of the elements being iterated over.

Source §

type IntoIter = IntoIter<String, Value>

Which kind of iterator are we turning this into?

Source §

fn into_iter(self) -> Self::IntoIter

Creates an iterator from a value. Read more

Source §

impl Serialize for Prediction

Source §

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

§

impl UnwindSafe for Prediction

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> Code for T
where T: Serialize + DeserializeOwned,

Source §

fn encode(&self, writer: &mut impl Write) -> Result<(), CodeError>

Encode the object into a writer.

Source §

fn decode(reader: &mut impl Read) -> Result<T, CodeError>

Decode the object from a reader.

Source §

fn estimated_size(&self) -> usize

Estimated serialized size of the object. Read more

Source §

impl<T> DynClone for T
where T: Clone,

Source §

fn __clone_box(&self, _: Private) -> *mut ()

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §