1pub mod pipeline_validation;
21
22pub use pipeline_validation::{
23 PhaseValidation, ValidationCheck, PipelineValidationReport,
24 DocumentProcessingValidator, EntityExtractionValidator,
25 RelationshipExtractionValidator, GraphConstructionValidator,
26};
27
28use crate::{Entity, Relationship, Result, GraphRAGError};
29use serde::{Deserialize, Serialize};
30use std::collections::HashMap;
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct EvaluableQueryResult {
35 pub query: String,
37 pub answer: String,
39 pub retrieved_entities: Vec<Entity>,
41 pub retrieved_relationships: Vec<Relationship>,
43 pub context_chunks: Vec<String>,
45 pub metadata: ResultMetadata,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct ResultMetadata {
52 pub entities_count: usize,
54 pub relationships_count: usize,
56 pub chunks_count: usize,
58 pub retrieval_strategy: String,
60 pub processing_time_ms: u64,
62 pub custom: HashMap<String, String>,
64}
65
66#[derive(Debug, Clone)]
68pub struct LLMEvaluationPrompt {
69 pub template: String,
71}
72
73impl Default for LLMEvaluationPrompt {
74 fn default() -> Self {
75 Self {
76 template: Self::default_template(),
77 }
78 }
79}
80
81impl LLMEvaluationPrompt {
82 fn default_template() -> String {
84 r#"You are an expert evaluator for question-answering systems. Evaluate the following GraphRAG query result.
85
86## Query
87{query}
88
89## Generated Answer
90{answer}
91
92## Retrieved Context
93### Entities ({entities_count} total)
94{entities}
95
96### Relationships ({relationships_count} total)
97{relationships}
98
99### Text Chunks ({chunks_count} total)
100{chunks}
101
102## Evaluation Criteria
103Please evaluate the answer on the following dimensions (score 1-5, where 5 is best):
104
1051. **Relevance**: How well does the answer address the query?
106 - 5: Perfectly addresses the query
107 - 3: Partially addresses the query
108 - 1: Not relevant to the query
109
1102. **Faithfulness**: Is the answer grounded in the provided context?
111 - 5: Fully supported by context, no hallucination
112 - 3: Mostly supported, minor extrapolation
113 - 1: Contains unsupported claims
114
1153. **Completeness**: Does the answer cover all aspects of the query?
116 - 5: Comprehensive, addresses all aspects
117 - 3: Covers main points, misses some details
118 - 1: Incomplete, misses key information
119
1204. **Coherence**: Is the answer well-structured and clear?
121 - 5: Excellent structure, very clear
122 - 3: Adequate structure, somewhat clear
123 - 1: Poor structure, confusing
124
1255. **Groundedness**: Are entity names and relationships correctly mentioned?
126 - 5: All entities/relationships accurate
127 - 3: Minor inaccuracies
128 - 1: Significant errors in entity/relationship mentions
129
130## Output Format
131Provide your evaluation in the following JSON format:
132
133```json
134{{
135 "relevance": {{
136 "score": <1-5>,
137 "reasoning": "<brief explanation>"
138 }},
139 "faithfulness": {{
140 "score": <1-5>,
141 "reasoning": "<brief explanation>"
142 }},
143 "completeness": {{
144 "score": <1-5>,
145 "reasoning": "<brief explanation>"
146 }},
147 "coherence": {{
148 "score": <1-5>,
149 "reasoning": "<brief explanation>"
150 }},
151 "groundedness": {{
152 "score": <1-5>,
153 "reasoning": "<brief explanation>"
154 }},
155 "overall_score": <average of all scores>,
156 "summary": "<overall assessment in 2-3 sentences>"
157}}
158```
159
160Evaluate now:"#.to_string()
161 }
162
163 pub fn generate(&self, result: &EvaluableQueryResult) -> String {
165 let entities_str = self.format_entities(&result.retrieved_entities);
166 let relationships_str = self.format_relationships(&result.retrieved_relationships);
167 let chunks_str = self.format_chunks(&result.context_chunks);
168
169 self.template
170 .replace("{query}", &result.query)
171 .replace("{answer}", &result.answer)
172 .replace("{entities_count}", &result.metadata.entities_count.to_string())
173 .replace("{relationships_count}", &result.metadata.relationships_count.to_string())
174 .replace("{chunks_count}", &result.metadata.chunks_count.to_string())
175 .replace("{entities}", &entities_str)
176 .replace("{relationships}", &relationships_str)
177 .replace("{chunks}", &chunks_str)
178 }
179
180 fn format_entities(&self, entities: &[Entity]) -> String {
181 if entities.is_empty() {
182 return "No entities retrieved.".to_string();
183 }
184
185 entities
186 .iter()
187 .take(10) .map(|e| format!("- {} (type: {}, confidence: {:.2})", e.name, e.entity_type, e.confidence))
189 .collect::<Vec<_>>()
190 .join("\n")
191 }
192
193 fn format_relationships(&self, relationships: &[Relationship]) -> String {
194 if relationships.is_empty() {
195 return "No relationships retrieved.".to_string();
196 }
197
198 relationships
199 .iter()
200 .take(10) .map(|r| format!("- {} --[{}]--> {} (confidence: {:.2})",
202 r.source, r.relation_type, r.target, r.confidence))
203 .collect::<Vec<_>>()
204 .join("\n")
205 }
206
207 fn format_chunks(&self, chunks: &[String]) -> String {
208 if chunks.is_empty() {
209 return "No context chunks retrieved.".to_string();
210 }
211
212 chunks
213 .iter()
214 .take(5) .enumerate()
216 .map(|(i, chunk)| {
217 let preview = if chunk.len() > 200 {
218 format!("{}...", &chunk[..200])
219 } else {
220 chunk.clone()
221 };
222 format!("Chunk {}:\n{}\n", i + 1, preview)
223 })
224 .collect::<Vec<_>>()
225 .join("\n")
226 }
227}
228
229#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct LLMEvaluation {
232 pub relevance: DimensionScore,
234 pub faithfulness: DimensionScore,
236 pub completeness: DimensionScore,
238 pub coherence: DimensionScore,
240 pub groundedness: DimensionScore,
242 pub overall_score: f32,
244 pub summary: String,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize)]
250pub struct DimensionScore {
251 pub score: u8,
253 pub reasoning: String,
255}
256
257impl LLMEvaluation {
258 pub fn from_json(json_str: &str) -> Result<Self> {
260 serde_json::from_str(json_str).map_err(|e| GraphRAGError::Serialization {
261 message: format!("Failed to parse LLM evaluation JSON: {}", e),
262 })
263 }
264
265 pub fn passes_threshold(&self, min_score: f32) -> bool {
267 self.overall_score >= min_score
268 }
269
270 pub fn weakest_dimension(&self) -> (&str, &DimensionScore) {
272 let dimensions = [
273 ("relevance", &self.relevance),
274 ("faithfulness", &self.faithfulness),
275 ("completeness", &self.completeness),
276 ("coherence", &self.coherence),
277 ("groundedness", &self.groundedness),
278 ];
279
280 dimensions
281 .iter()
282 .min_by_key(|(_, score)| score.score)
283 .map(|(name, score)| (*name, *score))
284 .unwrap_or(("unknown", &self.relevance))
285 }
286
287 pub fn report(&self) -> String {
289 format!(
290 r#"## LLM Evaluation Report
291
292**Overall Score**: {:.2}/5.0
293
294### Dimension Scores
295- Relevance: {}/5 - {}
296- Faithfulness: {}/5 - {}
297- Completeness: {}/5 - {}
298- Coherence: {}/5 - {}
299- Groundedness: {}/5 - {}
300
301### Summary
302{}
303
304### Weakest Dimension
305{}: {} (score {}/5)
306"#,
307 self.overall_score,
308 self.relevance.score, self.relevance.reasoning,
309 self.faithfulness.score, self.faithfulness.reasoning,
310 self.completeness.score, self.completeness.reasoning,
311 self.coherence.score, self.coherence.reasoning,
312 self.groundedness.score, self.groundedness.reasoning,
313 self.summary,
314 self.weakest_dimension().0,
315 self.weakest_dimension().1.reasoning,
316 self.weakest_dimension().1.score
317 )
318 }
319}
320
321pub struct EvaluableQueryResultBuilder {
323 query: Option<String>,
324 answer: Option<String>,
325 entities: Vec<Entity>,
326 relationships: Vec<Relationship>,
327 chunks: Vec<String>,
328 retrieval_strategy: String,
329 processing_time_ms: u64,
330 custom: HashMap<String, String>,
331}
332
333impl EvaluableQueryResultBuilder {
334 pub fn new() -> Self {
336 Self {
337 query: None,
338 answer: None,
339 entities: Vec::new(),
340 relationships: Vec::new(),
341 chunks: Vec::new(),
342 retrieval_strategy: "unknown".to_string(),
343 processing_time_ms: 0,
344 custom: HashMap::new(),
345 }
346 }
347
348 pub fn query(mut self, query: impl Into<String>) -> Self {
350 self.query = Some(query.into());
351 self
352 }
353
354 pub fn answer(mut self, answer: impl Into<String>) -> Self {
356 self.answer = Some(answer.into());
357 self
358 }
359
360 pub fn entities(mut self, entities: Vec<Entity>) -> Self {
362 self.entities = entities;
363 self
364 }
365
366 pub fn relationships(mut self, relationships: Vec<Relationship>) -> Self {
368 self.relationships = relationships;
369 self
370 }
371
372 pub fn chunks(mut self, chunks: Vec<String>) -> Self {
374 self.chunks = chunks;
375 self
376 }
377
378 pub fn retrieval_strategy(mut self, strategy: impl Into<String>) -> Self {
380 self.retrieval_strategy = strategy.into();
381 self
382 }
383
384 pub fn processing_time_ms(mut self, time_ms: u64) -> Self {
386 self.processing_time_ms = time_ms;
387 self
388 }
389
390 pub fn custom_metadata(mut self, key: String, value: String) -> Self {
392 self.custom.insert(key, value);
393 self
394 }
395
396 pub fn build(self) -> Result<EvaluableQueryResult> {
398 let query = self.query.ok_or_else(|| GraphRAGError::Config {
399 message: "Query is required".to_string(),
400 })?;
401 let answer = self.answer.ok_or_else(|| GraphRAGError::Config {
402 message: "Answer is required".to_string(),
403 })?;
404
405 Ok(EvaluableQueryResult {
406 query,
407 answer,
408 metadata: ResultMetadata {
409 entities_count: self.entities.len(),
410 relationships_count: self.relationships.len(),
411 chunks_count: self.chunks.len(),
412 retrieval_strategy: self.retrieval_strategy,
413 processing_time_ms: self.processing_time_ms,
414 custom: self.custom,
415 },
416 retrieved_entities: self.entities,
417 retrieved_relationships: self.relationships,
418 context_chunks: self.chunks,
419 })
420 }
421}
422
423impl Default for EvaluableQueryResultBuilder {
424 fn default() -> Self {
425 Self::new()
426 }
427}
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432 use crate::{EntityId, ChunkId};
433
434 #[test]
435 fn test_prompt_generation() {
436 let entity = Entity {
437 id: EntityId::new("e1".to_string()),
438 name: "Alice".to_string(),
439 entity_type: "person".to_string(),
440 confidence: 0.9,
441 mentions: vec![],
442 embedding: None,
443 };
444
445 let result = EvaluableQueryResultBuilder::new()
446 .query("Who is Alice?")
447 .answer("Alice is a person mentioned in the context.")
448 .entities(vec![entity])
449 .chunks(vec!["Alice works at Stanford.".to_string()])
450 .retrieval_strategy("semantic")
451 .build()
452 .unwrap();
453
454 let prompt = LLMEvaluationPrompt::default();
455 let generated = prompt.generate(&result);
456
457 assert!(generated.contains("Who is Alice?"));
458 assert!(generated.contains("Alice is a person"));
459 assert!(generated.contains("Alice (type: person"));
460 assert!(generated.contains("Evaluate now:"));
461 }
462
463 #[test]
464 fn test_evaluation_parsing() {
465 let json = r#"{
466 "relevance": {
467 "score": 5,
468 "reasoning": "Perfectly answers the question"
469 },
470 "faithfulness": {
471 "score": 4,
472 "reasoning": "Mostly grounded in context"
473 },
474 "completeness": {
475 "score": 4,
476 "reasoning": "Covers main points"
477 },
478 "coherence": {
479 "score": 5,
480 "reasoning": "Well structured"
481 },
482 "groundedness": {
483 "score": 5,
484 "reasoning": "All entities accurate"
485 },
486 "overall_score": 4.6,
487 "summary": "High quality answer"
488 }"#;
489
490 let eval = LLMEvaluation::from_json(json).unwrap();
491 assert_eq!(eval.relevance.score, 5);
492 assert_eq!(eval.faithfulness.score, 4);
493 assert!(eval.passes_threshold(4.0));
494 assert!(!eval.passes_threshold(5.0));
495 }
496
497 #[test]
498 fn test_weakest_dimension() {
499 let json = r#"{
500 "relevance": {"score": 5, "reasoning": "Perfect"},
501 "faithfulness": {"score": 3, "reasoning": "Some issues"},
502 "completeness": {"score": 4, "reasoning": "Good"},
503 "coherence": {"score": 5, "reasoning": "Excellent"},
504 "groundedness": {"score": 4, "reasoning": "Accurate"},
505 "overall_score": 4.2,
506 "summary": "Good overall"
507 }"#;
508
509 let eval = LLMEvaluation::from_json(json).unwrap();
510 let (name, score) = eval.weakest_dimension();
511 assert_eq!(name, "faithfulness");
512 assert_eq!(score.score, 3);
513 }
514
515 #[test]
516 fn test_report_generation() {
517 let json = r#"{
518 "relevance": {"score": 5, "reasoning": "Perfect"},
519 "faithfulness": {"score": 4, "reasoning": "Good"},
520 "completeness": {"score": 4, "reasoning": "Complete"},
521 "coherence": {"score": 5, "reasoning": "Clear"},
522 "groundedness": {"score": 4, "reasoning": "Accurate"},
523 "overall_score": 4.4,
524 "summary": "Excellent answer"
525 }"#;
526
527 let eval = LLMEvaluation::from_json(json).unwrap();
528 let report = eval.report();
529
530 assert!(report.contains("4.4") || report.contains("4.40"),
532 "Expected score 4.4 not found in report: {}", report);
533 assert!(report.contains("5/5") && report.contains("Relevance"),
534 "Expected 'Relevance: 5/5' not found");
535 assert!(report.contains("Excellent answer"));
536
537 assert!((eval.overall_score - 4.4).abs() < 0.01);
539 }
540}