1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
//! LLM Evaluation Metrics Module (#71)
//!
//! Direct observation of LLM behavior through comprehensive metrics tracking.
//!
//! # Toyota Way: 現地現物 (Genchi Genbutsu)
//!
//! "Go and see" - Direct observation of LLM behavior through metrics enables
//! data-driven decisions about prompt engineering and model selection.
//!
//! # Example
//!
//! ```ignore
//! use entrenar::monitor::llm::{LLMMetrics, PromptVersion, EvalResult, InMemoryLLMEvaluator};
//!
//! let mut evaluator = InMemoryLLMEvaluator::new();
//!
//! // Track prompt version
//! let prompt = PromptVersion::new("Summarize: {text}", vec!["text".to_string()]);
//! evaluator.track_prompt("run-1", &prompt)?;
//!
//! // Log LLM call metrics
//! let metrics = LLMMetrics::new("gpt-4")
//! .with_tokens(100, 50)
//! .with_latency(1500.0);
//! evaluator.log_llm_call("run-1", metrics)?;
//!
//! // Evaluate response quality
//! let result = evaluator.evaluate_response("What is 2+2?", "4", Some("4"))?;
//! ```
// Re-export all public types for API compatibility
pub use ;
pub use EvalResult;
pub use InMemoryLLMEvaluator;
pub use LLMMetrics;
pub use ;
pub use LLMStats;
pub use LLMEvaluator;