graphrag_core/critic/
mod.rs

1//! LLM-based critique of generated answers.
2//!
3//! Scores an answer for quality/faithfulness via an `OllamaClient`, returning an
4//! `EvaluationResult` that callers can use to accept, retry, or refine a response.
5
6use crate::core::{GraphRAGError, Result};
7use crate::ollama::OllamaClient;
8use serde::{Deserialize, Serialize};
9use std::sync::Arc;
10
11/// Result of a critic evaluation
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct EvaluationResult {
14    /// Score from 0.0 to 1.0 (1.0 = perfect)
15    pub score: f32,
16    /// Whether the answer is grounded in the context
17    pub grounded: bool,
18    /// Detailed feedback on why the score was given
19    pub feedback: String,
20}
21
22/// Critic for evaluating RAG answers
23pub struct Critic {
24    client: Arc<OllamaClient>,
25}
26
27impl Critic {
28    /// Create a new critic with an Ollama client
29    pub fn new(client: Arc<OllamaClient>) -> Self {
30        Self { client }
31    }
32
33    /// Evaluate the quality of a RAG answer against the query and context
34    ///
35    /// Returns an evaluation result with score, grounding information, and feedback
36    pub async fn evaluate(
37        &self,
38        query: &str,
39        context: &[String],
40        answer: &str,
41    ) -> Result<EvaluationResult> {
42        let context_text = context.join("\n\n");
43
44        let prompt = format!(
45            "You are a strict critic for a RAG system. Your job is to evaluate the quality of a generated answer based on the provided query and retrieved context.\n\
46            \n\
47            Query: '{}'\n\
48            \n\
49            Retrieved Context:\n\
50            {}\n\
51            \n\
52            Generated Answer:\n\
53            {}\n\
54            \n\
55            Evaluate the answer on: \n\
56            1. Grounding: Is every claim in the answer supported by the context? \n\
57            2. Relevance: Does it answer the user's query? \n\
58            3. Completeness: Is it missing critical info present in the context? \n\
59            \n\
60            Return ONLY a raw JSON object with these keys: \n\
61            - 'score': float between 0.0 and 1.0 \n\
62            - 'grounded': boolean \n\
63            - 'feedback': string explanation \n\
64            \n\
65            JSON Response:",
66            query, context_text, answer
67        );
68
69        let response_text = self.client.generate(&prompt).await?;
70
71        // Clean up markdown code blocks if present
72        let cleaned_json = response_text
73            .trim()
74            .trim_start_matches("```json")
75            .trim_start_matches("```")
76            .trim_end_matches("```")
77            .trim();
78
79        let evaluation: EvaluationResult =
80            serde_json::from_str(cleaned_json).map_err(|e| GraphRAGError::Generation {
81                message: format!(
82                    "Failed to parse critic response: {}. Text: {}",
83                    e, cleaned_json
84                ),
85            })?;
86
87        Ok(evaluation)
88    }
89
90    /// Refine an answer based on specific feedback
91    pub async fn refine(
92        &self,
93        query: &str,
94        current_answer: &str,
95        feedback: &str,
96    ) -> Result<String> {
97        let prompt = format!(
98            "You are an expert editor refining an answer for a RAG system.\n\
99            \n\
100            Original Query: '{}'\n\
101            \n\
102            Current Answer:\n\
103            {}\n\
104            \n\
105            Critique/Feedback:\n\
106            {}\n\
107            \n\
108            Please rewrite the answer to address the critique while maintaining accuracy and relevance. \n\
109            Return ONLY the refined answer text.",
110            query, current_answer, feedback
111        );
112
113        self.client.generate(&prompt).await
114    }
115}
graphrag_core/critic/mod.rs

graphrag_core/critic/
mod.rs