1pub mod pipeline_validation;
21
22pub use pipeline_validation::{
23 DocumentProcessingValidator, EntityExtractionValidator, GraphConstructionValidator,
24 PhaseValidation, PipelineValidationReport, RelationshipExtractionValidator, ValidationCheck,
25};
26
27use crate::{Entity, GraphRAGError, Relationship, Result};
28use serde::{Deserialize, Serialize};
29use std::collections::HashMap;
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct EvaluableQueryResult {
34 pub query: String,
36 pub answer: String,
38 pub retrieved_entities: Vec<Entity>,
40 pub retrieved_relationships: Vec<Relationship>,
42 pub context_chunks: Vec<String>,
44 pub metadata: ResultMetadata,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct ResultMetadata {
51 pub entities_count: usize,
53 pub relationships_count: usize,
55 pub chunks_count: usize,
57 pub retrieval_strategy: String,
59 pub processing_time_ms: u64,
61 pub custom: HashMap<String, String>,
63}
64
65#[derive(Debug, Clone)]
67pub struct LLMEvaluationPrompt {
68 pub template: String,
70}
71
72impl Default for LLMEvaluationPrompt {
73 fn default() -> Self {
74 Self {
75 template: Self::default_template(),
76 }
77 }
78}
79
80impl LLMEvaluationPrompt {
81 fn default_template() -> String {
83 r#"You are an expert evaluator for question-answering systems. Evaluate the following GraphRAG query result.
84
85## Query
86{query}
87
88## Generated Answer
89{answer}
90
91## Retrieved Context
92### Entities ({entities_count} total)
93{entities}
94
95### Relationships ({relationships_count} total)
96{relationships}
97
98### Text Chunks ({chunks_count} total)
99{chunks}
100
101## Evaluation Criteria
102Please evaluate the answer on the following dimensions (score 1-5, where 5 is best):
103
1041. **Relevance**: How well does the answer address the query?
105 - 5: Perfectly addresses the query
106 - 3: Partially addresses the query
107 - 1: Not relevant to the query
108
1092. **Faithfulness**: Is the answer grounded in the provided context?
110 - 5: Fully supported by context, no hallucination
111 - 3: Mostly supported, minor extrapolation
112 - 1: Contains unsupported claims
113
1143. **Completeness**: Does the answer cover all aspects of the query?
115 - 5: Comprehensive, addresses all aspects
116 - 3: Covers main points, misses some details
117 - 1: Incomplete, misses key information
118
1194. **Coherence**: Is the answer well-structured and clear?
120 - 5: Excellent structure, very clear
121 - 3: Adequate structure, somewhat clear
122 - 1: Poor structure, confusing
123
1245. **Groundedness**: Are entity names and relationships correctly mentioned?
125 - 5: All entities/relationships accurate
126 - 3: Minor inaccuracies
127 - 1: Significant errors in entity/relationship mentions
128
129## Output Format
130Provide your evaluation in the following JSON format:
131
132```json
133{{
134 "relevance": {{
135 "score": <1-5>,
136 "reasoning": "<brief explanation>"
137 }},
138 "faithfulness": {{
139 "score": <1-5>,
140 "reasoning": "<brief explanation>"
141 }},
142 "completeness": {{
143 "score": <1-5>,
144 "reasoning": "<brief explanation>"
145 }},
146 "coherence": {{
147 "score": <1-5>,
148 "reasoning": "<brief explanation>"
149 }},
150 "groundedness": {{
151 "score": <1-5>,
152 "reasoning": "<brief explanation>"
153 }},
154 "overall_score": <average of all scores>,
155 "summary": "<overall assessment in 2-3 sentences>"
156}}
157```
158
159Evaluate now:"#.to_string()
160 }
161
162 pub fn generate(&self, result: &EvaluableQueryResult) -> String {
164 let entities_str = self.format_entities(&result.retrieved_entities);
165 let relationships_str = self.format_relationships(&result.retrieved_relationships);
166 let chunks_str = self.format_chunks(&result.context_chunks);
167
168 self.template
169 .replace("{query}", &result.query)
170 .replace("{answer}", &result.answer)
171 .replace(
172 "{entities_count}",
173 &result.metadata.entities_count.to_string(),
174 )
175 .replace(
176 "{relationships_count}",
177 &result.metadata.relationships_count.to_string(),
178 )
179 .replace("{chunks_count}", &result.metadata.chunks_count.to_string())
180 .replace("{entities}", &entities_str)
181 .replace("{relationships}", &relationships_str)
182 .replace("{chunks}", &chunks_str)
183 }
184
185 fn format_entities(&self, entities: &[Entity]) -> String {
186 if entities.is_empty() {
187 return "No entities retrieved.".to_string();
188 }
189
190 entities
191 .iter()
192 .take(10) .map(|e| format!("- {} (type: {}, confidence: {:.2})", e.name, e.entity_type, e.confidence))
194 .collect::<Vec<_>>()
195 .join("\n")
196 }
197
198 fn format_relationships(&self, relationships: &[Relationship]) -> String {
199 if relationships.is_empty() {
200 return "No relationships retrieved.".to_string();
201 }
202
203 relationships
204 .iter()
205 .take(10) .map(|r| format!("- {} --[{}]--> {} (confidence: {:.2})",
207 r.source, r.relation_type, r.target, r.confidence))
208 .collect::<Vec<_>>()
209 .join("\n")
210 }
211
212 fn format_chunks(&self, chunks: &[String]) -> String {
213 if chunks.is_empty() {
214 return "No context chunks retrieved.".to_string();
215 }
216
217 chunks
218 .iter()
219 .take(5) .enumerate()
221 .map(|(i, chunk)| {
222 let preview = if chunk.len() > 200 {
223 format!("{}...", &chunk[..200])
224 } else {
225 chunk.clone()
226 };
227 format!("Chunk {}:\n{}\n", i + 1, preview)
228 })
229 .collect::<Vec<_>>()
230 .join("\n")
231 }
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct LLMEvaluation {
237 pub relevance: DimensionScore,
239 pub faithfulness: DimensionScore,
241 pub completeness: DimensionScore,
243 pub coherence: DimensionScore,
245 pub groundedness: DimensionScore,
247 pub overall_score: f32,
249 pub summary: String,
251}
252
253#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct DimensionScore {
256 pub score: u8,
258 pub reasoning: String,
260}
261
262impl LLMEvaluation {
263 pub fn from_json(json_str: &str) -> Result<Self> {
265 serde_json::from_str(json_str).map_err(|e| GraphRAGError::Serialization {
266 message: format!("Failed to parse LLM evaluation JSON: {}", e),
267 })
268 }
269
270 pub fn passes_threshold(&self, min_score: f32) -> bool {
272 self.overall_score >= min_score
273 }
274
275 pub fn weakest_dimension(&self) -> (&str, &DimensionScore) {
277 let dimensions = [
278 ("relevance", &self.relevance),
279 ("faithfulness", &self.faithfulness),
280 ("completeness", &self.completeness),
281 ("coherence", &self.coherence),
282 ("groundedness", &self.groundedness),
283 ];
284
285 dimensions
286 .iter()
287 .min_by_key(|(_, score)| score.score)
288 .map(|(name, score)| (*name, *score))
289 .unwrap_or(("unknown", &self.relevance))
290 }
291
292 pub fn report(&self) -> String {
294 format!(
295 r#"## LLM Evaluation Report
296
297**Overall Score**: {:.2}/5.0
298
299### Dimension Scores
300- Relevance: {}/5 - {}
301- Faithfulness: {}/5 - {}
302- Completeness: {}/5 - {}
303- Coherence: {}/5 - {}
304- Groundedness: {}/5 - {}
305
306### Summary
307{}
308
309### Weakest Dimension
310{}: {} (score {}/5)
311"#,
312 self.overall_score,
313 self.relevance.score,
314 self.relevance.reasoning,
315 self.faithfulness.score,
316 self.faithfulness.reasoning,
317 self.completeness.score,
318 self.completeness.reasoning,
319 self.coherence.score,
320 self.coherence.reasoning,
321 self.groundedness.score,
322 self.groundedness.reasoning,
323 self.summary,
324 self.weakest_dimension().0,
325 self.weakest_dimension().1.reasoning,
326 self.weakest_dimension().1.score
327 )
328 }
329}
330
331pub struct EvaluableQueryResultBuilder {
333 query: Option<String>,
334 answer: Option<String>,
335 entities: Vec<Entity>,
336 relationships: Vec<Relationship>,
337 chunks: Vec<String>,
338 retrieval_strategy: String,
339 processing_time_ms: u64,
340 custom: HashMap<String, String>,
341}
342
343impl EvaluableQueryResultBuilder {
344 pub fn new() -> Self {
346 Self {
347 query: None,
348 answer: None,
349 entities: Vec::new(),
350 relationships: Vec::new(),
351 chunks: Vec::new(),
352 retrieval_strategy: "unknown".to_string(),
353 processing_time_ms: 0,
354 custom: HashMap::new(),
355 }
356 }
357
358 pub fn query(mut self, query: impl Into<String>) -> Self {
360 self.query = Some(query.into());
361 self
362 }
363
364 pub fn answer(mut self, answer: impl Into<String>) -> Self {
366 self.answer = Some(answer.into());
367 self
368 }
369
370 pub fn entities(mut self, entities: Vec<Entity>) -> Self {
372 self.entities = entities;
373 self
374 }
375
376 pub fn relationships(mut self, relationships: Vec<Relationship>) -> Self {
378 self.relationships = relationships;
379 self
380 }
381
382 pub fn chunks(mut self, chunks: Vec<String>) -> Self {
384 self.chunks = chunks;
385 self
386 }
387
388 pub fn retrieval_strategy(mut self, strategy: impl Into<String>) -> Self {
390 self.retrieval_strategy = strategy.into();
391 self
392 }
393
394 pub fn processing_time_ms(mut self, time_ms: u64) -> Self {
396 self.processing_time_ms = time_ms;
397 self
398 }
399
400 pub fn custom_metadata(mut self, key: String, value: String) -> Self {
402 self.custom.insert(key, value);
403 self
404 }
405
406 pub fn build(self) -> Result<EvaluableQueryResult> {
408 let query = self.query.ok_or_else(|| GraphRAGError::Config {
409 message: "Query is required".to_string(),
410 })?;
411 let answer = self.answer.ok_or_else(|| GraphRAGError::Config {
412 message: "Answer is required".to_string(),
413 })?;
414
415 Ok(EvaluableQueryResult {
416 query,
417 answer,
418 metadata: ResultMetadata {
419 entities_count: self.entities.len(),
420 relationships_count: self.relationships.len(),
421 chunks_count: self.chunks.len(),
422 retrieval_strategy: self.retrieval_strategy,
423 processing_time_ms: self.processing_time_ms,
424 custom: self.custom,
425 },
426 retrieved_entities: self.entities,
427 retrieved_relationships: self.relationships,
428 context_chunks: self.chunks,
429 })
430 }
431}
432
433impl Default for EvaluableQueryResultBuilder {
434 fn default() -> Self {
435 Self::new()
436 }
437}
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442 use crate::{ChunkId, EntityId};
443
444 #[test]
445 fn test_prompt_generation() {
446 let entity = Entity {
447 id: EntityId::new("e1".to_string()),
448 name: "Alice".to_string(),
449 entity_type: "person".to_string(),
450 confidence: 0.9,
451 mentions: vec![],
452 embedding: None,
453 first_mentioned: None,
454 last_mentioned: None,
455 temporal_validity: None,
456 };
457
458 let result = EvaluableQueryResultBuilder::new()
459 .query("Who is Alice?")
460 .answer("Alice is a person mentioned in the context.")
461 .entities(vec![entity])
462 .chunks(vec!["Alice works at Stanford.".to_string()])
463 .retrieval_strategy("semantic")
464 .build()
465 .unwrap();
466
467 let prompt = LLMEvaluationPrompt::default();
468 let generated = prompt.generate(&result);
469
470 assert!(generated.contains("Who is Alice?"));
471 assert!(generated.contains("Alice is a person"));
472 assert!(generated.contains("Alice (type: person"));
473 assert!(generated.contains("Evaluate now:"));
474 }
475
476 #[test]
477 fn test_evaluation_parsing() {
478 let json = r#"{
479 "relevance": {
480 "score": 5,
481 "reasoning": "Perfectly answers the question"
482 },
483 "faithfulness": {
484 "score": 4,
485 "reasoning": "Mostly grounded in context"
486 },
487 "completeness": {
488 "score": 4,
489 "reasoning": "Covers main points"
490 },
491 "coherence": {
492 "score": 5,
493 "reasoning": "Well structured"
494 },
495 "groundedness": {
496 "score": 5,
497 "reasoning": "All entities accurate"
498 },
499 "overall_score": 4.6,
500 "summary": "High quality answer"
501 }"#;
502
503 let eval = LLMEvaluation::from_json(json).unwrap();
504 assert_eq!(eval.relevance.score, 5);
505 assert_eq!(eval.faithfulness.score, 4);
506 assert!(eval.passes_threshold(4.0));
507 assert!(!eval.passes_threshold(5.0));
508 }
509
510 #[test]
511 fn test_weakest_dimension() {
512 let json = r#"{
513 "relevance": {"score": 5, "reasoning": "Perfect"},
514 "faithfulness": {"score": 3, "reasoning": "Some issues"},
515 "completeness": {"score": 4, "reasoning": "Good"},
516 "coherence": {"score": 5, "reasoning": "Excellent"},
517 "groundedness": {"score": 4, "reasoning": "Accurate"},
518 "overall_score": 4.2,
519 "summary": "Good overall"
520 }"#;
521
522 let eval = LLMEvaluation::from_json(json).unwrap();
523 let (name, score) = eval.weakest_dimension();
524 assert_eq!(name, "faithfulness");
525 assert_eq!(score.score, 3);
526 }
527
528 #[test]
529 fn test_report_generation() {
530 let json = r#"{
531 "relevance": {"score": 5, "reasoning": "Perfect"},
532 "faithfulness": {"score": 4, "reasoning": "Good"},
533 "completeness": {"score": 4, "reasoning": "Complete"},
534 "coherence": {"score": 5, "reasoning": "Clear"},
535 "groundedness": {"score": 4, "reasoning": "Accurate"},
536 "overall_score": 4.4,
537 "summary": "Excellent answer"
538 }"#;
539
540 let eval = LLMEvaluation::from_json(json).unwrap();
541 let report = eval.report();
542
543 assert!(
545 report.contains("4.4") || report.contains("4.40"),
546 "Expected score 4.4 not found in report: {}",
547 report
548 );
549 assert!(
550 report.contains("5/5") && report.contains("Relevance"),
551 "Expected 'Relevance: 5/5' not found"
552 );
553 assert!(report.contains("Excellent answer"));
554
555 assert!((eval.overall_score - 4.4).abs() < 0.01);
557 }
558}