entrenar/monitor/llm/
memory_evaluator.rs

1//! In-memory LLM evaluator implementation.
2
3use crate::monitor::llm::{
4    heuristics::{compute_coherence, compute_groundedness, compute_harmfulness, compute_relevance},
5    EvalResult, LLMError, LLMEvaluator, LLMMetrics, LLMStats, PromptVersion, Result,
6};
7use std::collections::HashMap;
8use std::sync::{Arc, RwLock};
9
10/// In-memory LLM evaluator for testing
11#[derive(Debug, Default)]
12pub struct InMemoryLLMEvaluator {
13    /// Metrics by run ID
14    metrics: Arc<RwLock<HashMap<String, Vec<LLMMetrics>>>>,
15    /// Prompts by run ID
16    prompts: Arc<RwLock<HashMap<String, Vec<PromptVersion>>>>,
17}
18
19impl InMemoryLLMEvaluator {
20    /// Create a new in-memory evaluator
21    pub fn new() -> Self {
22        Self::default()
23    }
24}
25
26impl LLMEvaluator for InMemoryLLMEvaluator {
27    fn evaluate_response(
28        &self,
29        prompt: &str,
30        response: &str,
31        reference: Option<&str>,
32    ) -> Result<EvalResult> {
33        // Simple heuristic evaluation (production would use a model)
34        let relevance = compute_relevance(prompt, response);
35        let coherence = compute_coherence(response);
36        let groundedness = if let Some(ref_text) = reference {
37            compute_groundedness(response, ref_text)
38        } else {
39            0.5 // Unknown without reference
40        };
41        let harmfulness = compute_harmfulness(response);
42
43        Ok(EvalResult::new(relevance, coherence, groundedness, harmfulness))
44    }
45
46    fn log_llm_call(&mut self, run_id: &str, metrics: LLMMetrics) -> Result<()> {
47        let mut store =
48            self.metrics.write().map_err(|e| LLMError::Internal(format!("Lock error: {e}")))?;
49
50        store.entry(run_id.to_string()).or_default().push(metrics);
51
52        Ok(())
53    }
54
55    fn track_prompt(&mut self, run_id: &str, prompt: &PromptVersion) -> Result<()> {
56        let mut store =
57            self.prompts.write().map_err(|e| LLMError::Internal(format!("Lock error: {e}")))?;
58
59        store.entry(run_id.to_string()).or_default().push(prompt.clone());
60
61        Ok(())
62    }
63
64    fn get_metrics(&self, run_id: &str) -> Result<Vec<LLMMetrics>> {
65        let store =
66            self.metrics.read().map_err(|e| LLMError::Internal(format!("Lock error: {e}")))?;
67
68        store.get(run_id).cloned().ok_or_else(|| LLMError::RunNotFound(run_id.to_string()))
69    }
70
71    fn get_prompts(&self, run_id: &str) -> Result<Vec<PromptVersion>> {
72        let store =
73            self.prompts.read().map_err(|e| LLMError::Internal(format!("Lock error: {e}")))?;
74
75        store.get(run_id).cloned().ok_or_else(|| LLMError::RunNotFound(run_id.to_string()))
76    }
77
78    fn get_stats(&self, run_id: &str) -> Result<LLMStats> {
79        let metrics = self.get_metrics(run_id)?;
80        Ok(LLMStats::from_metrics(&metrics))
81    }
82}
entrenar/monitor/llm/memory_evaluator.rs

entrenar/monitor/llm/
memory_evaluator.rs