ragcli 0.2.0

CLI for local RAG
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
//! RAG query planning, evidence assessment, and answer verification via LLM.
//!
//! The agent module drives the iterative retrieval loop. It classifies a user question,
//! builds a retrieval plan, gathers evidence, and verifies that generated answers are
//! actually supported by the retrieved chunks.
//!
//! # Query lifecycle
//!
//! 1. [`build_query_plan`] — classifies the question type and chooses a retrieval strategy
//! 2. Retrieval against the store (handled by the caller / `app.rs`)
//! 3. [`assess_evidence`] — LLM judges whether retrieved candidates are sufficient
//! 4. Optionally iterate with rewritten subqueries
//! 5. [`verify_answer_support`] — LLM checks that the final answer is grounded in evidence

use crate::jsonutil::parse_json;
use crate::models::Generator;
use crate::retrieval::RetrievalCandidate;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};

const SUMMARY_TEXT_MAX_CHARS: usize = 600;

/// Configuration for the agentic retrieval loop.
#[allow(dead_code)]
#[derive(Clone, Debug, PartialEq)]
pub struct AgentQueryConfig {
    /// Maximum candidates to return from the store per iteration.
    pub top_k: usize,
    /// Total candidates to fetch before reranking / pruning.
    pub fetch_k: usize,
    /// Maximum retrieval iterations before giving up.
    pub max_iterations: usize,
    /// Whether to rewrite the query each iteration.
    pub enable_rewrite: bool,
    /// Whether to run the LLM reranker after retrieval.
    pub enable_rerank: bool,
}

/// Class of a user question, influencing which retrieval strategy is selected.
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
#[serde(rename_all = "PascalCase")]
pub enum QuestionType {
    /// A single factual lookup — "What is X?"
    Lookup,
    /// Comparison between two or more things — "How does X differ from Y?"
    Compare,
    /// Multi-step reasoning requiring multiple facts — "Why does X happen?"
    MultiHop,
    /// Summarization or synthesis — "Summarize the debate around X."
    Summary,
    /// Open-ended exploration — "What related topics exist?"
    Exploratory,
}

/// Retrieval strategy selected by the query planner.
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
#[serde(rename_all = "PascalCase")]
pub enum RetrievalStrategy {
    /// Use the original question directly against the store.
    Direct,
    /// Rewrite the question into semantic / keyword variants before retrieving.
    Rewrite,
    /// Decompose the question into subqueries, retrieve each, then combine.
    Decompose,
    /// Retrieve broadly then rerank with an LLM.
    BroadThenRerank,
}

/// Retrieval plan produced by the LLM query planner.
#[derive(Clone, Debug, PartialEq)]
pub struct QueryPlan {
    /// Classified question type.
    pub question_type: QuestionType,
    /// Selected retrieval strategy.
    pub strategy: RetrievalStrategy,
    /// Human-readable reasoning for why this strategy was chosen.
    pub reasoning: String,
    /// Decomposed subqueries when strategy is `Decompose`.
    pub subqueries: Vec<String>,
}

/// Result of an LLM evidence assessment.
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
#[serde(rename_all = "PascalCase")]
pub enum EvidenceVerdict {
    /// Retrieved candidates fully cover what is needed to answer.
    Sufficient,
    /// Candidates are partially relevant; more iterations or reformulation may help.
    Partial,
    /// Candidates do not contain enough relevant information.
    Insufficient,
}

/// One iteration of the agentic retrieval loop.
#[derive(Clone, Debug, PartialEq)]
pub struct AgentIteration {
    /// Zero-based iteration index.
    pub iteration: usize,
    /// Query variants used in this iteration (original + rewritten).
    pub query_variants: Vec<String>,
    /// Total candidates retrieved before pruning.
    pub retrieved_count: usize,
    /// Candidates retained after deduplication and pruning.
    pub kept_count: usize,
    /// LLM judgment of evidence sufficiency.
    pub sufficiency: EvidenceVerdict,
    /// Human-readable notes from this iteration.
    pub notes: Vec<String>,
}

/// Final result of a full agentic retrieval session.
#[allow(dead_code)]
#[derive(Clone, Debug, PartialEq)]
pub struct AgentResult {
    /// Generated answer string.
    pub answer: String,
    /// Retrieval candidates used as context for generation.
    pub contexts: Vec<RetrievalCandidate>,
    /// Query plan selected by the planner.
    pub plan: QueryPlan,
    /// All iterations run before reaching a terminal verdict.
    pub iterations: Vec<AgentIteration>,
    /// Support check confirming answer is grounded in evidence.
    pub support_check: SupportCheck,
}

/// Result of verifying that a generated answer is supported by retrieved evidence.
#[derive(Clone, Debug, PartialEq)]
pub struct SupportCheck {
    /// Whether all substantive claims in the answer have supporting evidence.
    pub supported: bool,
    /// List of specific claims in the answer that lack evidence.
    pub unsupported_claims: Vec<String>,
    /// Additional notes from the verifier.
    pub notes: Vec<String>,
}

/// Evidence assessment returned by the LLM.
#[derive(Clone, Debug, PartialEq)]
pub struct EvidenceAssessment {
    /// Overall verdict on evidence sufficiency.
    pub verdict: EvidenceVerdict,
    /// Specific aspects of the question that are not covered by retrieved candidates.
    pub missing_aspects: Vec<String>,
}

#[derive(Debug, Deserialize)]
struct QueryPlanPayload {
    question_type: String,
    strategy: String,
    reasoning: Option<String>,
    #[serde(default)]
    subqueries: Vec<String>,
}

#[derive(Debug, Deserialize)]
struct EvidencePayload {
    verdict: String,
    #[serde(default)]
    missing_aspects: Vec<String>,
}

#[derive(Debug, Deserialize)]
struct SupportPayload {
    supported: bool,
    #[serde(default)]
    unsupported_claims: Vec<String>,
    #[serde(default)]
    notes: Vec<String>,
}

/// Builds a structured query plan from a user question using an LLM.
///
/// The LLM is instructed to classify the question type and select an appropriate
/// retrieval strategy, returning strict JSON. Returns an error if the LLM misformats
/// its response.
pub async fn build_query_plan(generator: &Generator, question: &str) -> Result<QueryPlan> {
    let response = generator
        .generate_json(
            "You build retrieval plans for a local RAG CLI. Respond with strict JSON only and no markdown fences.",
            &format!(
                concat!(
                    "Return a JSON object with keys question_type, strategy, reasoning, and subqueries. ",
                    "question_type must be one of Lookup, Compare, MultiHop, Summary, Exploratory. ",
                    "strategy must be one of Direct, Rewrite, Decompose, BroadThenRerank. ",
                    "subqueries must be an array and may be empty. ",
                    "Question: {}"
                ),
                question,
            ),
            256,
        )
        .await
        .context("generate query plan JSON")?;

    parse_query_plan(&response)
}

/// Asks an LLM to judge whether the retrieved candidates are sufficient to answer
/// the given question.
///
/// Returns an [`EvidenceAssessment`] with a verdict and, if insufficient,
/// a list of missing aspects. This is used to decide whether to continue iterating
/// or to produce an answer from what was gathered.
pub async fn assess_evidence(
    generator: &Generator,
    question: &str,
    candidates: &[RetrievalCandidate],
) -> Result<EvidenceAssessment> {
    let response = generator
        .generate_json(
            "You assess whether retrieved evidence is sufficient to answer a local RAG query. Respond with strict JSON only and no markdown fences.",
            &format!(
                concat!(
                    "Return a JSON object with keys verdict and missing_aspects. ",
                    "verdict must be one of sufficient, partial, or insufficient. ",
                    "missing_aspects must be an array and may be empty.\n\n",
                    "Question: {}\n\nEvidence:\n{}"
                ),
                question,
                summarize_candidates(candidates),
            ),
            256,
        )
        .await
        .context("generate evidence assessment JSON")?;

    parse_evidence_assessment(&response)
}

/// Asks an LLM to verify that every substantive claim in an answer is grounded
/// in the provided evidence candidates.
///
/// Returns a [`SupportCheck`] indicating whether the answer is fully supported,
/// and listing any claims that lack backing. Used as a final check before returning
/// an answer to the user.
pub async fn verify_answer_support(
    generator: &Generator,
    question: &str,
    answer: &str,
    evidence: &[RetrievalCandidate],
) -> Result<SupportCheck> {
    let response = generator
        .generate_json(
            "You verify whether an answer is supported by retrieved evidence for a local RAG CLI. Respond with strict JSON only and no markdown fences.",
            &format!(
                concat!(
                    "Return a JSON object with keys supported, unsupported_claims, and notes. ",
                    "unsupported_claims and notes must be arrays and may be empty.\n\n",
                    "Question: {}\n\nAnswer: {}\n\nEvidence:\n{}"
                ),
                question,
                answer,
                summarize_candidates(evidence),
            ),
            256,
        )
        .await
        .context("generate support check JSON")?;

    parse_support_check(&response)
}

/// Heuristic fallback plan used when the LLM fails to produce a parseable plan.
///
/// Uses simple keyword detection to decide between `Direct` and `Decompose`:
/// presence of "and" / "interact" / "connect" triggers decomposition.
pub fn fallback_query_plan(question: &str) -> QueryPlan {
    let lower = question.to_ascii_lowercase();
    let strategy =
        if lower.contains(" and ") || lower.contains("interact") || lower.contains("connect") {
            //FIXME: not language agnostic. should be handeled by agent
            RetrievalStrategy::Decompose
        } else {
            RetrievalStrategy::Direct
        };
    let question_type = if matches!(strategy, RetrievalStrategy::Decompose) {
        QuestionType::MultiHop
    } else {
        QuestionType::Lookup
    };
    QueryPlan {
        question_type,
        strategy,
        reasoning: "Fallback heuristic plan based on the query wording.".to_string(),
        subqueries: Vec::new(),
    }
}

/// Heuristic fallback for evidence assessment when the LLM is unavailable.
///
/// Returns `Sufficient` for 3+ candidates, `Partial` for 1–2, and `Insufficient` for none.
pub fn fallback_evidence_assessment(candidates: &[RetrievalCandidate]) -> EvidenceAssessment {
    let verdict = if candidates.len() >= 3 {
        EvidenceVerdict::Sufficient
    } else if candidates.is_empty() {
        EvidenceVerdict::Insufficient
    } else {
        EvidenceVerdict::Partial
    };

    EvidenceAssessment {
        verdict,
        missing_aspects: Vec::new(),
    }
}

/// Heuristic fallback support check when the LLM is unavailable.
///
/// Returns `supported = true` if the answer is non-empty and there is at least one
/// evidence candidate; otherwise `supported = false`.
pub fn fallback_support_check(answer: &str, evidence: &[RetrievalCandidate]) -> SupportCheck {
    let supported = !answer.trim().is_empty() && !evidence.is_empty();
    SupportCheck {
        supported,
        unsupported_claims: if supported {
            Vec::new()
        } else {
            vec!["Unable to confirm answer support from retrieved evidence.".to_string()]
        },
        notes: vec!["Fallback support heuristic used.".to_string()],
    }
}

/// Parses a JSON query plan from the LLM. Used by [`build_query_plan`].
pub fn parse_query_plan(raw: &str) -> Result<QueryPlan> {
    let payload: QueryPlanPayload = parse_json(raw, "parse query plan JSON")?;
    Ok(QueryPlan {
        question_type: parse_question_type(&payload.question_type)?,
        strategy: parse_retrieval_strategy(&payload.strategy)?,
        reasoning: payload.reasoning.unwrap_or_default().trim().to_string(),
        subqueries: payload
            .subqueries
            .into_iter()
            .map(|item| item.trim().to_string())
            .filter(|item| !item.is_empty())
            .collect(),
    })
}

/// Parses an evidence assessment JSON from the LLM. Used by [`assess_evidence`].
pub fn parse_evidence_assessment(raw: &str) -> Result<EvidenceAssessment> {
    let payload: EvidencePayload = parse_json(raw, "parse evidence assessment JSON")?;
    Ok(EvidenceAssessment {
        verdict: parse_evidence_verdict(&payload.verdict)?,
        missing_aspects: payload
            .missing_aspects
            .into_iter()
            .map(|item| item.trim().to_string())
            .filter(|item| !item.is_empty())
            .collect(),
    })
}

/// Parses a support check JSON from the LLM. Used by [`verify_answer_support`].
pub fn parse_support_check(raw: &str) -> Result<SupportCheck> {
    let payload: SupportPayload = parse_json(raw, "parse support check JSON")?;
    Ok(SupportCheck {
        supported: payload.supported,
        unsupported_claims: payload
            .unsupported_claims
            .into_iter()
            .map(|item| item.trim().to_string())
            .filter(|item| !item.is_empty())
            .collect(),
        notes: payload
            .notes
            .into_iter()
            .map(|item| item.trim().to_string())
            .filter(|item| !item.is_empty())
            .collect(),
    })
}

/// Builds a compact multi-line summary of retrieval candidates for inclusion in LLM prompts.
///
/// Each line is `source=<path> page=<N> text=<truncated>` so the LLM can reference
/// specific chunks without receiving the full text of every candidate.
fn summarize_candidates(candidates: &[RetrievalCandidate]) -> String {
    candidates
        .iter()
        .map(|candidate| {
            format!(
                "source={} page={} text={}",
                candidate.source_path,
                candidate.page,
                summarize_candidate_text(&candidate.chunk_text)
            )
        })
        .collect::<Vec<_>>()
        .join("\n")
}

/// Truncates chunk text to [`SUMMARY_TEXT_MAX_CHARS`] characters for prompt size control.
fn summarize_candidate_text(text: &str) -> String {
    let normalized = text.split_whitespace().collect::<Vec<_>>().join(" ");
    let mut truncated = normalized
        .chars()
        .take(SUMMARY_TEXT_MAX_CHARS)
        .collect::<String>();
    if normalized.chars().count() > SUMMARY_TEXT_MAX_CHARS {
        truncated.push_str("...");
    }
    truncated
}

/// Parses a question type string (case-insensitive) into a [`QuestionType`].
fn parse_question_type(value: &str) -> Result<QuestionType> {
    match value.trim().to_ascii_lowercase().as_str() {
        "lookup" => Ok(QuestionType::Lookup),
        "compare" => Ok(QuestionType::Compare),
        "multihop" => Ok(QuestionType::MultiHop),
        "summary" => Ok(QuestionType::Summary),
        "exploratory" => Ok(QuestionType::Exploratory),
        other => anyhow::bail!("unknown question type: {other}"),
    }
}

/// Parses a retrieval strategy string (case-insensitive) into a [`RetrievalStrategy`].
fn parse_retrieval_strategy(value: &str) -> Result<RetrievalStrategy> {
    match value.trim().to_ascii_lowercase().as_str() {
        "direct" => Ok(RetrievalStrategy::Direct),
        "rewrite" => Ok(RetrievalStrategy::Rewrite),
        "decompose" => Ok(RetrievalStrategy::Decompose),
        "broadthenrerank" => Ok(RetrievalStrategy::BroadThenRerank),
        other => anyhow::bail!("unknown retrieval strategy: {other}"),
    }
}

/// Parses an evidence verdict string (case-insensitive) into an [`EvidenceVerdict`].
fn parse_evidence_verdict(value: &str) -> Result<EvidenceVerdict> {
    match value.trim().to_ascii_lowercase().as_str() {
        "sufficient" => Ok(EvidenceVerdict::Sufficient),
        "partial" => Ok(EvidenceVerdict::Partial),
        "insufficient" => Ok(EvidenceVerdict::Insufficient),
        other => anyhow::bail!("unknown evidence verdict: {other}"),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn candidate(source: &str, chunk_text: &str) -> RetrievalCandidate {
        RetrievalCandidate {
            id: String::new(),
            source_path: source.to_string(),
            chunk_text: chunk_text.to_string(),
            metadata: String::new(),
            page: 0,
            chunk_index: 0,
            vector_score: None,
            keyword_score: None,
            fused_score: None,
            rerank_score: None,
        }
    }

    #[test]
    fn test_parse_query_plan_accepts_json_fences() {
        let plan = parse_query_plan(
            "```json\n{\"question_type\":\"MultiHop\",\"strategy\":\"Decompose\",\"reasoning\":\"needs two facts\",\"subqueries\":[\"config resolution\",\"metadata validation\"]}\n```",
        )
        .unwrap();

        assert_eq!(plan.question_type, QuestionType::MultiHop);
        assert_eq!(plan.strategy, RetrievalStrategy::Decompose);
        assert_eq!(plan.subqueries.len(), 2);
    }

    #[test]
    fn test_fallback_evidence_assessment_marks_partial_for_small_result_sets() {
        let assessment = fallback_evidence_assessment(&[candidate("src/config.rs", "config")]);
        assert_eq!(assessment.verdict, EvidenceVerdict::Partial);
    }

    #[test]
    fn test_parse_support_check_errors_on_invalid_json() {
        let err = parse_support_check("not json").unwrap_err().to_string();
        assert!(err.contains("parse support check JSON"));
    }

    #[test]
    fn test_parse_query_plan_accepts_case_insensitive_values() {
        let plan = parse_query_plan(
            "{\"question_type\":\"lookup\",\"strategy\":\"decompose\",\"reasoning\":\"ok\",\"subqueries\":[]}",
        )
        .unwrap();
        assert_eq!(plan.question_type, QuestionType::Lookup);
        assert_eq!(plan.strategy, RetrievalStrategy::Decompose);
    }

    #[test]
    fn test_summarize_candidates_includes_all_candidates() {
        let summary = summarize_candidates(&[
            candidate("a", "one"),
            candidate("b", "two"),
            candidate("c", "three"),
            candidate("d", "four"),
            candidate("e", "five"),
            candidate("f", "six"),
            candidate("g", "seven"),
        ]);

        assert!(summary.contains("source=g page=0 text=seven"));
    }

    #[test]
    fn test_summarize_candidates_normalizes_newlines_and_spaces() {
        let summary = summarize_candidates(&[candidate("src/a.rs", "one\n\n two\tthree")]);
        assert!(summary.contains("source=src/a.rs page=0 text=one two three"));
    }

    #[test]
    fn test_summarize_candidate_text_truncates_long_chunks() {
        let text = format!("{} tail", "a".repeat(SUMMARY_TEXT_MAX_CHARS + 20));
        let summarized = summarize_candidate_text(&text);
        assert_eq!(summarized.len(), SUMMARY_TEXT_MAX_CHARS + 3);
        assert!(summarized.ends_with("..."));
    }

    #[test]
    fn test_parse_evidence_assessment_includes_raw_snippet_in_error() {
        let err = parse_evidence_assessment("not json at all")
            .unwrap_err()
            .to_string();
        assert!(err.contains("parse evidence assessment JSON"));
        assert!(err.contains("not json"));
    }
}