Evaluator

Trait Evaluator 

Source
pub trait Evaluator: Module {
    const MAX_CONCURRENCY: usize = 32usize;
    const DISPLAY_PROGRESS: bool = true;

    // Required method
    async fn metric(&self, example: &Example, prediction: &Prediction) -> f32;

    // Provided method
    async fn evaluate(&self, examples: Vec<Example>) -> f32 { ... }
}

Provided Associated Constants§

Required Methods§

Source

async fn metric(&self, example: &Example, prediction: &Prediction) -> f32

Provided Methods§

Source

async fn evaluate(&self, examples: Vec<Example>) -> f32

Examples found in repository?
examples/03-evaluate-hotpotqa.rs (line 80)
61async fn main() -> anyhow::Result<()> {
62    configure(
63        LM::builder()
64            .api_key(SecretString::from(std::env::var("OPENAI_API_KEY")?))
65            .build(),
66        ChatAdapter {},
67    );
68
69    let examples = DataLoader::load_hf(
70        "hotpotqa/hotpot_qa",
71        vec!["question".to_string()],
72        vec!["answer".to_string()],
73        "fullwiki",
74        "validation",
75        true,
76    )?[..128]
77        .to_vec();
78
79    let evaluator = QARater::builder().build();
80    let metric = evaluator.evaluate(examples).await;
81
82    println!("Metric: {metric}");
83    Ok(())
84}
More examples
Hide additional examples
examples/08-optimize-mipro.rs (line 123)
85async fn main() -> Result<()> {
86    println!("=== MIPROv2 Optimizer Example ===\n");
87
88    // Configure the LM
89    configure(
90        LM::builder()
91            .api_key(SecretString::from(std::env::var("OPENAI_API_KEY")?))
92            .build(),
93        ChatAdapter {},
94    );
95
96    // Load training data from HuggingFace
97    println!("Loading training data from HuggingFace...");
98    let train_examples = DataLoader::load_hf(
99        "hotpotqa/hotpot_qa",
100        vec!["question".to_string()],
101        vec!["answer".to_string()],
102        "fullwiki",
103        "validation",
104        true,
105    )?;
106
107    // Use a small subset for faster optimization
108    let train_subset = train_examples[..15].to_vec();
109    println!("Using {} training examples\n", train_subset.len());
110
111    // Create the module
112    let mut qa_module = SimpleQA::builder().build();
113
114    // Show initial instruction
115    println!("Initial instruction:");
116    println!(
117        "  \"{}\"\n",
118        qa_module.answerer.get_signature().instruction()
119    );
120
121    // Test baseline performance
122    println!("Evaluating baseline performance...");
123    let baseline_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
124    println!("Baseline score: {:.3}\n", baseline_score);
125
126    // Create MIPROv2 optimizer
127    let optimizer = MIPROv2::builder()
128        .num_candidates(8) // Generate 8 candidate prompts
129        .num_trials(15) // Run 15 evaluation trials
130        .minibatch_size(10) // Evaluate on 10 examples per candidate
131        .temperature(1.0) // Temperature for prompt generation
132        .track_stats(true) // Display detailed statistics
133        .build();
134
135    // Optimize the module
136    println!("Starting MIPROv2 optimization...");
137    println!("This will:");
138    println!("  1. Generate execution traces");
139    println!("  2. Create a program description using LLM");
140    println!("  3. Generate {} candidate prompts with best practices", 8);
141    println!("  4. Evaluate each candidate");
142    println!("  5. Select and apply the best prompt\n");
143
144    optimizer.compile(&mut qa_module, train_subset.clone()).await?;
145
146    // Show optimized instruction
147    println!("\nOptimized instruction:");
148    println!(
149        "  \"{}\"\n",
150        qa_module.answerer.get_signature().instruction()
151    );
152
153    // Test optimized performance
154    println!("Evaluating optimized performance...");
155    let optimized_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
156    println!("Optimized score: {:.3}", optimized_score);
157
158    // Show improvement
159    let improvement = ((optimized_score - baseline_score) / baseline_score) * 100.0;
160    println!(
161        "\n✓ Improvement: {:.1}% ({:.3} -> {:.3})",
162        improvement, baseline_score, optimized_score
163    );
164
165    // Test on a new example
166    println!("\n--- Testing on a new example ---");
167    let test_example = example! {
168        "question": "input" => "What is the capital of France?",
169    };
170
171    let result = qa_module.forward(test_example).await?;
172    println!("Question: What is the capital of France?");
173    println!("Answer: {}", result.get("answer", None));
174
175    println!("\n=== Example Complete ===");
176    Ok(())
177}

Dyn Compatibility§

This trait is not dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.

Implementors§