Trait Evaluator

Source

pub trait Evaluator: Module {
    const MAX_CONCURRENCY: usize = 32usize;
    const DISPLAY_PROGRESS: bool = true;

    // Required method
    async fn metric(&self, example: &Example, prediction: &Prediction) -> f32;

    // Provided method
    async fn evaluate(&self, examples: Vec<Example>) -> f32 { ... }
}

Provided Associated Constants§

Source

const MAX_CONCURRENCY: usize = 32usize

Source

const DISPLAY_PROGRESS: bool = true

Required Methods§

Source

async fn metric(&self, example: &Example, prediction: &Prediction) -> f32

Provided Methods§

Source

async fn evaluate(&self, examples: Vec<Example>) -> f32

Examples found in repository ?

examples/03-evaluate-hotpotqa.rs (line 80)

61async fn main() -> anyhow::Result<()> {
62    configure(
63        LM::builder()
64            .api_key(SecretString::from(std::env::var("OPENAI_API_KEY")?))
65            .build(),
66        ChatAdapter {},
67    );
68
69    let examples = DataLoader::load_hf(
70        "hotpotqa/hotpot_qa",
71        vec!["question".to_string()],
72        vec!["answer".to_string()],
73        "fullwiki",
74        "validation",
75        true,
76    )?[..128]
77        .to_vec();
78
79    let evaluator = QARater::builder().build();
80    let metric = evaluator.evaluate(examples).await;
81
82    println!("Metric: {metric}");
83    Ok(())
84}

More examples

Hide additional examples

examples/08-optimize-mipro.rs (line 123)

85async fn main() -> Result<()> {
86    println!("=== MIPROv2 Optimizer Example ===\n");
87
88    // Configure the LM
89    configure(
90        LM::builder()
91            .api_key(SecretString::from(std::env::var("OPENAI_API_KEY")?))
92            .build(),
93        ChatAdapter {},
94    );
95
96    // Load training data from HuggingFace
97    println!("Loading training data from HuggingFace...");
98    let train_examples = DataLoader::load_hf(
99        "hotpotqa/hotpot_qa",
100        vec!["question".to_string()],
101        vec!["answer".to_string()],
102        "fullwiki",
103        "validation",
104        true,
105    )?;
106
107    // Use a small subset for faster optimization
108    let train_subset = train_examples[..15].to_vec();
109    println!("Using {} training examples\n", train_subset.len());
110
111    // Create the module
112    let mut qa_module = SimpleQA::builder().build();
113
114    // Show initial instruction
115    println!("Initial instruction:");
116    println!(
117        "  \"{}\"\n",
118        qa_module.answerer.get_signature().instruction()
119    );
120
121    // Test baseline performance
122    println!("Evaluating baseline performance...");
123    let baseline_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
124    println!("Baseline score: {:.3}\n", baseline_score);
125
126    // Create MIPROv2 optimizer
127    let optimizer = MIPROv2::builder()
128        .num_candidates(8) // Generate 8 candidate prompts
129        .num_trials(15) // Run 15 evaluation trials
130        .minibatch_size(10) // Evaluate on 10 examples per candidate
131        .temperature(1.0) // Temperature for prompt generation
132        .track_stats(true) // Display detailed statistics
133        .build();
134
135    // Optimize the module
136    println!("Starting MIPROv2 optimization...");
137    println!("This will:");
138    println!("  1. Generate execution traces");
139    println!("  2. Create a program description using LLM");
140    println!("  3. Generate {} candidate prompts with best practices", 8);
141    println!("  4. Evaluate each candidate");
142    println!("  5. Select and apply the best prompt\n");
143
144    optimizer.compile(&mut qa_module, train_subset.clone()).await?;
145
146    // Show optimized instruction
147    println!("\nOptimized instruction:");
148    println!(
149        "  \"{}\"\n",
150        qa_module.answerer.get_signature().instruction()
151    );
152
153    // Test optimized performance
154    println!("Evaluating optimized performance...");
155    let optimized_score = qa_module.evaluate(train_subset[..5].to_vec()).await;
156    println!("Optimized score: {:.3}", optimized_score);
157
158    // Show improvement
159    let improvement = ((optimized_score - baseline_score) / baseline_score) * 100.0;
160    println!(
161        "\n✓ Improvement: {:.1}% ({:.3} -> {:.3})",
162        improvement, baseline_score, optimized_score
163    );
164
165    // Test on a new example
166    println!("\n--- Testing on a new example ---");
167    let test_example = example! {
168        "question": "input" => "What is the capital of France?",
169    };
170
171    let result = qa_module.forward(test_example).await?;
172    println!("Question: What is the capital of France?");
173    println!("Answer: {}", result.get("answer", None));
174
175    println!("\n=== Example Complete ===");
176    Ok(())
177}

Dyn Compatibility§

This trait is not dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.

Trait Evaluator Copy item path

Provided Associated Constants§

const MAX_CONCURRENCY: usize = 32usize

const DISPLAY_PROGRESS: bool = true

Required Methods§

async fn metric(&self, example: &Example, prediction: &Prediction) -> f32

Provided Methods§

async fn evaluate(&self, examples: Vec<Example>) -> f32

Dyn Compatibility§

Implementors§

Trait Evaluator