llm/evaluator/mod.rs
1//! Module for evaluating and comparing responses from multiple LLM providers.
2//!
3//! This module provides functionality to run the same prompt through multiple LLMs
4//! and score their responses using custom evaluation functions.
5
6mod parallel;
7
8use crate::{chat::ChatMessage, error::LLMError, LLMProvider};
9
10pub use parallel::{ParallelEvalResult, ParallelEvaluator};
11
12/// Type alias for scoring functions that evaluate LLM responses
13pub type ScoringFn = dyn Fn(&str) -> f32 + Send + Sync + 'static;
14
15/// Evaluator for comparing responses from multiple LLM providers
16pub struct LLMEvaluator {
17 /// Collection of LLM providers to evaluate
18 llms: Vec<Box<dyn LLMProvider>>,
19 /// Optional scoring function to evaluate responses
20 scorings_fns: Vec<Box<ScoringFn>>,
21}
22
23impl LLMEvaluator {
24 /// Creates a new evaluator with the given LLM providers
25 ///
26 /// # Arguments
27 /// * `llms` - Vector of LLM providers to evaluate
28 pub fn new(llms: Vec<Box<dyn LLMProvider>>) -> Self {
29 Self {
30 llms,
31 scorings_fns: Vec::new(),
32 }
33 }
34
35 /// Adds a scoring function to evaluate LLM responses
36 ///
37 /// # Arguments
38 /// * `f` - Function that takes a response string and returns a score
39 pub fn scoring<F>(mut self, f: F) -> Self
40 where
41 F: Fn(&str) -> f32 + Send + Sync + 'static,
42 {
43 self.scorings_fns.push(Box::new(f));
44 self
45 }
46
47 /// Evaluates chat responses from all providers for the given messages
48 ///
49 /// # Arguments
50 /// * `messages` - Chat messages to send to each provider
51 ///
52 /// # Returns
53 /// Vector of evaluation results containing responses and scores
54 pub async fn evaluate_chat(
55 &self,
56 messages: &[ChatMessage],
57 ) -> Result<Vec<EvalResult>, LLMError> {
58 let mut results = Vec::new();
59 for llm in &self.llms {
60 let response = llm.chat(messages).await?;
61 let score = self.compute_score(&response.text().unwrap_or_default());
62 results.push(EvalResult {
63 text: response.text().unwrap_or_default(),
64 score,
65 });
66 }
67 Ok(results)
68 }
69
70 /// Computes the score for a given response
71 ///
72 /// # Arguments
73 /// * `response` - The response to score
74 ///
75 /// # Returns
76 /// The computed score
77 fn compute_score(&self, response: &str) -> f32 {
78 let mut total = 0.0;
79 for sc in &self.scorings_fns {
80 total += sc(response);
81 }
82 total
83 }
84}
85
86/// Result of evaluating an LLM response
87pub struct EvalResult {
88 /// The text response from the LLM
89 pub text: String,
90 /// Score assigned by the scoring function, if any
91 pub score: f32,
92}