graphrag_core/critic/
mod.rs

1use crate::core::{GraphRAGError, Result};
2use crate::ollama::OllamaClient;
3use serde::{Deserialize, Serialize};
4use std::sync::Arc;
5
6/// Result of a critic evaluation
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct EvaluationResult {
9    /// Score from 0.0 to 1.0 (1.0 = perfect)
10    pub score: f32,
11    /// Whether the answer is grounded in the context
12    pub grounded: bool,
13    /// Detailed feedback on why the score was given
14    pub feedback: String,
15}
16
17/// Critic for evaluating RAG answers
18pub struct Critic {
19    client: Arc<OllamaClient>,
20}
21
22impl Critic {
23    /// Create a new critic with an Ollama client
24    pub fn new(client: Arc<OllamaClient>) -> Self {
25        Self { client }
26    }
27
28    /// Evaluate the quality of a RAG answer against the query and context
29    ///
30    /// Returns an evaluation result with score, grounding information, and feedback
31    pub async fn evaluate(
32        &self,
33        query: &str,
34        context: &[String],
35        answer: &str,
36    ) -> Result<EvaluationResult> {
37        let context_text = context.join("\n\n");
38
39        let prompt = format!(
40            "You are a strict critic for a RAG system. Your job is to evaluate the quality of a generated answer based on the provided query and retrieved context.\n\
41            \n\
42            Query: '{}'\n\
43            \n\
44            Retrieved Context:\n\
45            {}\n\
46            \n\
47            Generated Answer:\n\
48            {}\n\
49            \n\
50            Evaluate the answer on: \n\
51            1. Grounding: Is every claim in the answer supported by the context? \n\
52            2. Relevance: Does it answer the user's query? \n\
53            3. Completeness: Is it missing critical info present in the context? \n\
54            \n\
55            Return ONLY a raw JSON object with these keys: \n\
56            - 'score': float between 0.0 and 1.0 \n\
57            - 'grounded': boolean \n\
58            - 'feedback': string explanation \n\
59            \n\
60            JSON Response:",
61            query, context_text, answer
62        );
63
64        let response_text = self.client.generate(&prompt).await?;
65
66        // Clean up markdown code blocks if present
67        let cleaned_json = response_text
68            .trim()
69            .trim_start_matches("```json")
70            .trim_start_matches("```")
71            .trim_end_matches("```")
72            .trim();
73
74        let evaluation: EvaluationResult =
75            serde_json::from_str(cleaned_json).map_err(|e| GraphRAGError::Generation {
76                message: format!(
77                    "Failed to parse critic response: {}. Text: {}",
78                    e, cleaned_json
79                ),
80            })?;
81
82        Ok(evaluation)
83    }
84
85    /// Refine an answer based on specific feedback
86    pub async fn refine(
87        &self,
88        query: &str,
89        current_answer: &str,
90        feedback: &str,
91    ) -> Result<String> {
92        let prompt = format!(
93            "You are an expert editor refining an answer for a RAG system.\n\
94            \n\
95            Original Query: '{}'\n\
96            \n\
97            Current Answer:\n\
98            {}\n\
99            \n\
100            Critique/Feedback:\n\
101            {}\n\
102            \n\
103            Please rewrite the answer to address the critique while maintaining accuracy and relevance. \n\
104            Return ONLY the refined answer text.",
105            query, current_answer, feedback
106        );
107
108        self.client.generate(&prompt).await
109    }
110}
graphrag_core/critic/mod.rs

graphrag_core/critic/
mod.rs