1use std::collections::HashMap;
2use std::path::Path;
3
4use anyhow::{Context, Result};
5use serde::{Deserialize, Serialize};
6
7use crate::graph::store::GraphStore;
8use crate::graph::{GraphQuery, SymbolDetail};
9
10use super::ReviewReport;
11
12#[derive(Debug, Clone, Serialize)]
17pub struct EnrichedReport {
18 pub base_report: ReviewReport,
19 pub enriched_symbols: Vec<EnrichedSymbol>,
20 pub file_diffs: HashMap<String, String>,
21}
22
23#[derive(Debug, Clone, Serialize)]
24pub struct EnrichedSymbol {
25 pub name: String,
26 pub kind: String,
27 pub file: String,
28 pub change_kind: String,
29 pub source: Option<String>,
30 pub callers: Vec<String>,
31 pub callees: Vec<String>,
32 pub similar_symbols: Vec<SimilarSymbol>,
33 pub complexity: Option<u32>,
34}
35
36#[derive(Debug, Clone, Serialize)]
37pub struct SimilarSymbol {
38 pub name: String,
39 pub file: String,
40 pub score: f32,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct LlmReviewResult {
49 pub summary: String,
50 pub findings: Vec<LlmFinding>,
51 pub test_plan: Vec<TestCase>,
52 pub risk_assessment: Vec<RiskItem>,
53 pub deployment_notes: Option<String>,
54 pub token_usage: Option<TokenUsage>,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct LlmFinding {
59 pub file: String,
60 pub line: Option<u32>,
61 pub severity: String,
62 pub category: String,
63 pub message: String,
64 pub suggestion: Option<String>,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct TestCase {
69 pub category: String,
70 pub priority: String,
71 pub description: String,
72 pub related_finding: Option<usize>,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct RiskItem {
77 pub severity: String,
78 pub area: String,
79 pub description: String,
80 pub affected_symbols: Vec<String>,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct TokenUsage {
85 pub input_tokens: u64,
86 pub output_tokens: u64,
87}
88
89pub fn enrich_review(
94 root: &Path,
95 report: &ReviewReport,
96 store: &GraphStore,
97) -> Result<EnrichedReport> {
98 let conn = store.connection()?;
99 let gq = GraphQuery::new(&conn);
100 let canonical = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
101
102 let mut enriched_symbols = Vec::new();
103
104 for sym in &report.changed_symbols {
105 let escaped_name = sym.name.replace('\'', "\\'");
106 let escaped_file = sym.file.replace('\'', "\\'");
107
108 let id_query = format!(
110 "MATCH (s:Symbol) \
111 WHERE s.name = '{escaped_name}' AND s.file ENDS WITH '{escaped_file}' \
112 RETURN s.id, s.start_line, s.end_line, s.complexity"
113 );
114 let rows = gq.raw_query(&id_query).unwrap_or_default();
115 let (symbol_id, complexity) = if let Some(row) = rows.first() {
116 let id = row.first().cloned().unwrap_or_default();
117 let cx: Option<u32> = row.get(3).and_then(|v| v.parse().ok());
118 (id, cx)
119 } else {
120 (String::new(), None)
121 };
122
123 let callers = if !symbol_id.is_empty() {
125 gq.callers_of(&symbol_id).unwrap_or_default()
126 } else {
127 vec![]
128 };
129
130 let callees = if !symbol_id.is_empty() {
132 gq.callees_of(&symbol_id).unwrap_or_default()
133 } else {
134 vec![]
135 };
136
137 let source = if !symbol_id.is_empty() {
139 gq.find_symbol_by_id(&symbol_id)
140 .ok()
141 .flatten()
142 .and_then(|detail| read_symbol_source(&canonical, &detail))
143 } else {
144 None
145 };
146
147 let similar_symbols = find_similar_symbols(&gq, &sym.name, &sym.file);
148
149 enriched_symbols.push(EnrichedSymbol {
150 name: sym.name.clone(),
151 kind: sym.kind.clone(),
152 file: sym.file.clone(),
153 change_kind: sym.change_kind.clone(),
154 source,
155 callers,
156 callees,
157 similar_symbols,
158 complexity,
159 });
160 }
161
162 let file_diffs = collect_file_diffs(root, &report.base_ref)?;
164
165 Ok(EnrichedReport {
166 base_report: report.clone(),
167 enriched_symbols,
168 file_diffs,
169 })
170}
171
172fn read_symbol_source(root: &Path, detail: &SymbolDetail) -> Option<String> {
173 let file_path = root.join(&detail.file);
174 let content = std::fs::read_to_string(&file_path).ok()?;
175 let lines: Vec<&str> = content.lines().collect();
176 let start = detail.start_line.saturating_sub(1) as usize;
177 let end = (detail.end_line as usize).min(lines.len());
178 if start >= end {
179 return None;
180 }
181 Some(lines[start..end].join("\n"))
182}
183
184fn find_similar_symbols(gq: &GraphQuery, name: &str, exclude_file: &str) -> Vec<SimilarSymbol> {
185 let escaped = name.replace('\'', "\\'");
186 let query = format!(
187 "MATCH (s:Symbol) \
188 WHERE s.name CONTAINS '{escaped}' AND NOT s.file ENDS WITH '{exclude_file}' \
189 RETURN s.name, s.file \
190 LIMIT 5"
191 );
192 match gq.raw_query(&query) {
193 Ok(rows) => rows
194 .into_iter()
195 .filter_map(|row| {
196 let n = row.first()?.clone();
197 let f = row.get(1)?.clone();
198 if n == name {
199 return None;
200 }
201 Some(SimilarSymbol {
202 name: n,
203 file: f,
204 score: 1.0,
205 })
206 })
207 .collect(),
208 Err(_) => vec![],
209 }
210}
211
212fn collect_file_diffs(root: &Path, base_ref: &str) -> Result<HashMap<String, String>> {
213 let output = std::process::Command::new("git")
214 .args(["diff", "-U5", base_ref])
215 .current_dir(root)
216 .output()
217 .context("git diff")?;
218
219 let full_diff = String::from_utf8_lossy(&output.stdout);
220 let mut diffs: HashMap<String, String> = HashMap::new();
221 let mut current_file = String::new();
222 let mut current_diff = String::new();
223
224 for line in full_diff.lines() {
225 if line.starts_with("diff --git") {
226 if !current_file.is_empty() {
227 diffs.insert(current_file.clone(), current_diff.clone());
228 }
229 current_file.clear();
230 current_diff.clear();
231 } else if line.starts_with("+++ b/") {
232 current_file = line.strip_prefix("+++ b/").unwrap().to_string();
233 }
234 if !current_file.is_empty() {
235 current_diff.push_str(line);
236 current_diff.push('\n');
237 }
238 }
239 if !current_file.is_empty() {
240 diffs.insert(current_file, current_diff);
241 }
242
243 Ok(diffs)
244}
245
246pub struct LlmConfig {
251 pub api_key: String,
252 pub model: String,
253 pub max_tokens: u32,
254 pub base_url: String,
255}
256
257impl Default for LlmConfig {
258 fn default() -> Self {
259 Self {
260 api_key: String::new(),
261 model: "claude-sonnet-4-20250514".to_string(),
262 max_tokens: 16384,
263 base_url: "https://api.anthropic.com".to_string(),
264 }
265 }
266}
267
268impl LlmConfig {
269 pub fn from_env() -> Result<Self> {
270 let api_key = std::env::var("ANTHROPIC_API_KEY").context("ANTHROPIC_API_KEY not set")?;
271 let model = std::env::var("INFIGRAPH_LLM_MODEL")
272 .unwrap_or_else(|_| "claude-sonnet-4-20250514".to_string());
273 let base_url = std::env::var("INFIGRAPH_LLM_BASE_URL")
274 .unwrap_or_else(|_| "https://api.anthropic.com".to_string());
275 let max_tokens: u32 = std::env::var("INFIGRAPH_LLM_MAX_TOKENS")
276 .unwrap_or_else(|_| "16384".to_string())
277 .parse()
278 .unwrap_or(16384);
279 Ok(Self {
280 api_key,
281 model,
282 max_tokens,
283 base_url,
284 })
285 }
286}
287
288pub fn build_review_prompt(enriched: &EnrichedReport, context: Option<&str>) -> String {
289 let mut prompt = String::with_capacity(32_000);
290
291 prompt.push_str(
292 "You are an expert code reviewer with access to the code knowledge graph. \
293 You have callers, callees, similar code, complexity, and blast radius data \
294 for each changed symbol. Use this to find issues that a diff-only reviewer would miss.\n\n");
295
296 prompt.push_str(&format!(
298 "**Auto-detected context:** {}\n\
299 **PR type:** {} | **Scope:** {} | **Files:** {} | **Symbols:** {}\n\n",
300 enriched.base_report.context.inferred_intent,
301 enriched.base_report.context.pr_type,
302 enriched.base_report.context.scope,
303 enriched.base_report.context.changed_file_count,
304 enriched.base_report.context.changed_symbol_count,
305 ));
306
307 if let Some(ctx) = context {
309 prompt.push_str(&format!(
310 "**User-provided context:** {}\n\
311 Prioritize the user's stated intent over auto-detection. \
312 Flag anything that contradicts or undermines this goal.\n\n",
313 ctx
314 ));
315 }
316
317 match enriched.base_report.context.scope {
319 super::PrScope::CrossModule => {
320 prompt.push_str(
321 "This PR spans multiple modules. Pay special attention to:\n\
322 - Cross-module API contract violations\n\
323 - Shared state mutations that affect other modules\n\
324 - Import/dependency changes that could break build order\n\n",
325 );
326 }
327 super::PrScope::CrossRepo => {
328 prompt.push_str(
329 "This PR is part of a cross-repo change. Pay special attention to:\n\
330 - Interface/COM/API compatibility across repos\n\
331 - Deployment ordering constraints\n\
332 - Cross-repo blast radius\n\
333 - Data format/schema compatibility\n\n",
334 );
335 }
336 _ => {}
337 }
338
339 match enriched.base_report.context.pr_type {
341 super::PrType::Migration => {
342 prompt.push_str(
343 "This is a MIGRATION PR. Critical review areas:\n\
344 - Data loss risk during migration\n\
345 - Rollback path — can the old system resume if migration fails?\n\
346 - Schema compatibility between old and new\n\
347 - NULL/default value handling differences\n\
348 - Performance under production data volume\n\n",
349 );
350 }
351 super::PrType::BugFix => {
352 prompt.push_str(
353 "This is a BUG FIX PR. Focus on:\n\
354 - Does the fix actually address the root cause?\n\
355 - Could the fix introduce regressions in callers?\n\
356 - Is there a test that reproduces the bug?\n\n",
357 );
358 }
359 super::PrType::Refactor => {
360 prompt.push_str(
361 "This is a REFACTOR PR. Focus on:\n\
362 - Behavioral equivalence — does the refactor preserve existing behavior?\n\
363 - Are all callers updated to use the new API?\n\
364 - Are there any callers in other repos not visible here?\n\n",
365 );
366 }
367 _ => {}
368 }
369
370 prompt.push_str(
371 "Respond ONLY with JSON in this exact format:\n\
372 ```json\n\
373 {\n\
374 \"summary\": \"2-3 sentence PR summary\",\n\
375 \"findings\": [\n\
376 {\n\
377 \"file\": \"path/to/file\",\n\
378 \"line\": 42,\n\
379 \"severity\": \"critical|high|medium|low|info\",\n\
380 \"category\": \"bug|security|performance|logic|breaking_change|consistency|dead_code|duplication\",\n\
381 \"message\": \"what is wrong and why\",\n\
382 \"suggestion\": \"how to fix it\"\n\
383 }\n\
384 ],\n\
385 \"test_plan\": [\n\
386 {\n\
387 \"category\": \"data_integrity|concurrency|regression|edge_case|integration|security\",\n\
388 \"priority\": \"must_pass|should_pass|nice_to_have\",\n\
389 \"description\": \"specific test scenario with inputs and expected output\",\n\
390 \"related_finding\": 0\n\
391 }\n\
392 ],\n\
393 \"risk_assessment\": [\n\
394 {\n\
395 \"severity\": \"high|medium|low\",\n\
396 \"area\": \"short label (e.g. 'COM boundary', 'DI container', 'schema migration')\",\n\
397 \"description\": \"what could go wrong and under what conditions\",\n\
398 \"affected_symbols\": [\"ClassName.method\", \"OtherClass\"]\n\
399 }\n\
400 ],\n\
401 \"deployment_notes\": \"ordering constraints, feature flags, rollback plan, migration steps. null if none.\"\n\
402 }\n\
403 ```\n\n\
404 ## Review priorities\n\
405 1. **Bugs and logic errors** — use callers/callees to check contract violations\n\
406 2. **Breaking changes** — check every caller. Will they break?\n\
407 3. **Consistency** — similar symbols that need the same change but weren't changed\n\
408 4. **Concurrency** — thread safety of changed code, especially shared state\n\
409 5. **Security** — injection, auth bypass, data exposure\n\
410 6. **Data integrity** — type coercions, NULL handling, precision loss\n\
411 7. **Dead code** — new functions/methods added with zero callers (unused code)\n\
412 8. **Duplication** — near-identical functions that should be refactored into shared code\n\n\
413 ## Test plan rules\n\
414 Generate tests from THREE sources:\n\
415 1. **Blast radius tests** — from callers/callees graph. If symbol X changed, every caller of X needs a test proving it still works.\n\
416 2. **Logic permutation tests** — from the actual code. For each branch/condition, test all paths. For type coercions (e.g. StrToIntDef), test: valid input, empty string, null, negative, overflow, float-to-int. For boolean fields, test: true, false, null, 0, 1, -1.\n\
417 3. **Consistency tests** — from similar symbols. If 19 entities share a pattern (e.g. COMtoBE), test that ALL 19 follow it. Flag any that diverge.\n\n\
418 Additional rules:\n\
419 - Generate specific, actionable test cases (not generic \"add tests\")\n\
420 - Every critical/high finding MUST have at least one must_pass test case\n\
421 - Include inputs and expected outputs where possible\n\
422 - `related_finding` is the 0-based index into findings array. null if no related finding\n\
423 - Cover: happy path, error path, boundary conditions, concurrency if applicable\n\
424 - For data operations: test CRUD roundtrip, test with max-length strings, test with unicode\n\
425 - For migrations: test upgrade path (old->new), test rollback, test partial failure recovery\n\n\
426 ## What NOT to flag\n\
427 Style nits, missing comments, naming preferences, formatting. Only actionable findings.\n\n\
428 ---\n\n"
429 );
430
431 let (mut interesting, bulk): (Vec<&EnrichedSymbol>, Vec<&EnrichedSymbol>) =
433 enriched.enriched_symbols.iter().partition(|s| {
434 !s.callers.is_empty() || !s.callees.is_empty() || s.complexity.is_some_and(|c| c >= 10)
435 });
436 let detail_cap = 100;
438 if interesting.len() > detail_cap {
439 interesting.sort_by(|a, b| {
440 let score_a = a.callers.len() + a.callees.len() + a.complexity.unwrap_or(0) as usize;
441 let score_b = b.callers.len() + b.callees.len() + b.complexity.unwrap_or(0) as usize;
442 score_b.cmp(&score_a)
443 });
444 let overflow: Vec<&EnrichedSymbol> = interesting.split_off(detail_cap);
445 if !overflow.is_empty() {
447 prompt.push_str(&format!(
448 "### Additional Symbols ({} with minor graph connections, summarized)\n\n",
449 overflow.len()
450 ));
451 let mut by_file: HashMap<&str, Vec<&str>> = HashMap::new();
452 for s in &overflow {
453 by_file.entry(s.file.as_str()).or_default().push(&s.name);
454 }
455 let mut sorted: Vec<_> = by_file.into_iter().collect();
456 sorted.sort_by_key(|a| std::cmp::Reverse(a.1.len()));
457 for (file, names) in sorted.iter().take(20) {
458 prompt.push_str(&format!("- `{}`: {} symbols\n", file, names.len()));
459 }
460 prompt.push('\n');
461 }
462 }
463
464 if !bulk.is_empty() {
466 let mut groups: HashMap<(String, String), Vec<String>> = HashMap::new();
467 for s in &bulk {
468 groups
469 .entry((s.file.clone(), s.change_kind.clone()))
470 .or_default()
471 .push(format!("{} `{}`", s.kind, s.name));
472 }
473
474 prompt.push_str(&format!(
475 "### Bulk Symbol Changes ({} symbols with no graph connections)\n\n",
476 bulk.len()
477 ));
478 let mut sorted_groups: Vec<_> = groups.into_iter().collect();
479 sorted_groups.sort_by(|a, b| a.0.cmp(&b.0));
480 for ((file, change_kind), symbols) in &sorted_groups {
481 prompt.push_str(&format!(
482 "- `{}` ({}): {} symbols — {}\n",
483 file,
484 change_kind,
485 symbols.len(),
486 if symbols.len() <= 5 {
487 symbols.join(", ")
488 } else {
489 format!(
490 "{}, ... and {} more",
491 symbols[..3].join(", "),
492 symbols.len() - 3
493 )
494 }
495 ));
496 }
497 prompt.push('\n');
498 }
499
500 if !interesting.is_empty() {
502 prompt.push_str(&format!(
503 "### Detailed Symbol Analysis ({} symbols with graph connections)\n\n",
504 interesting.len()
505 ));
506 }
507 for sym in &interesting {
508 prompt.push_str(&format!(
509 "#### {} `{}` in `{}` ({})\n\n",
510 sym.kind, sym.name, sym.file, sym.change_kind
511 ));
512
513 if let Some(ref source) = sym.source {
514 let truncated = if source.len() > 2000 {
515 &source[..2000]
516 } else {
517 source.as_str()
518 };
519 prompt.push_str(&format!("**Current source:**\n```\n{}\n```\n\n", truncated));
520 }
521
522 if !sym.callers.is_empty() {
523 let callers: Vec<&str> = sym.callers.iter().take(10).map(|s| s.as_str()).collect();
524 prompt.push_str(&format!(
525 "**Callers ({} total):** {}\n\n",
526 sym.callers.len(),
527 callers.join(", ")
528 ));
529 }
530
531 if !sym.callees.is_empty() {
532 let callees: Vec<&str> = sym.callees.iter().take(10).map(|s| s.as_str()).collect();
533 prompt.push_str(&format!(
534 "**Callees ({} total):** {}\n\n",
535 sym.callees.len(),
536 callees.join(", ")
537 ));
538 }
539
540 if !sym.similar_symbols.is_empty() {
541 prompt.push_str("**Similar code (may need same change):**\n");
542 for s in &sym.similar_symbols {
543 prompt.push_str(&format!(
544 " - `{}` in `{}` (similarity: {:.2})\n",
545 s.name, s.file, s.score
546 ));
547 }
548 prompt.push('\n');
549 }
550
551 if let Some(cx) = sym.complexity {
552 prompt.push_str(&format!("**Complexity:** {}\n\n", cx));
553 }
554 }
555
556 let diff_budget: usize = 80_000;
558 let per_file_cap: usize = 2000;
559 let mut sorted_diffs: Vec<(&String, &String)> = enriched.file_diffs.iter().collect();
560 sorted_diffs.sort_by_key(|(f, _)| {
561 if f.ends_with("_TLB.pas") || f.ends_with(".generated.cs") || f.ends_with(".g.cs") {
562 1
563 } else {
564 0
565 }
566 });
567 prompt.push_str(&format!(
568 "---\n\n### File Diffs ({} files)\n\n",
569 sorted_diffs.len()
570 ));
571 let mut diff_used: usize = 0;
572 let mut skipped = 0usize;
573 for (file, diff) in &sorted_diffs {
574 if diff_used >= diff_budget {
575 skipped += 1;
576 continue;
577 }
578 let truncated = if diff.len() > per_file_cap {
579 &diff[..per_file_cap]
580 } else {
581 diff.as_str()
582 };
583 diff_used += truncated.len();
584 prompt.push_str(&format!("#### `{}`\n```diff\n{}\n```\n\n", file, truncated));
585 }
586 if skipped > 0 {
587 prompt.push_str(&format!(
588 "_{} files omitted (diff budget exceeded)_\n\n",
589 skipped
590 ));
591 }
592
593 if !enriched.base_report.security_findings.is_empty() {
595 prompt.push_str("### Existing Security Findings (from static analysis)\n");
596 for f in &enriched.base_report.security_findings {
597 prompt.push_str(&format!(
598 " - [{}] {}:{} -- {}\n",
599 f.severity, f.file, f.line, f.message
600 ));
601 }
602 prompt.push('\n');
603 }
604
605 if !enriched.base_report.complexity_hotspots.is_empty() {
606 prompt.push_str("### Complexity Hotspots\n");
607 for h in &enriched.base_report.complexity_hotspots {
608 prompt.push_str(&format!(
609 " - `{}` in `{}` (complexity: {})\n",
610 h.name, h.file, h.complexity
611 ));
612 }
613 prompt.push('\n');
614 }
615
616 if !enriched.base_report.dead_code.is_empty() {
617 let dead = &enriched.base_report.dead_code;
618 let cap = 50;
619 prompt.push_str(&format!(
620 "### Dead Code ({} symbols with zero callers)\n",
621 dead.len()
622 ));
623 if dead.len() > cap {
624 let mut by_file: HashMap<&str, Vec<&str>> = HashMap::new();
626 for d in dead {
627 by_file.entry(d.file.as_str()).or_default().push(&d.name);
628 }
629 let mut sorted: Vec<_> = by_file.into_iter().collect();
630 sorted.sort_by_key(|a| std::cmp::Reverse(a.1.len()));
631 for (file, names) in sorted.iter().take(20) {
632 prompt.push_str(&format!(" - `{}`: {} symbols", file, names.len()));
633 if names.len() <= 3 {
634 prompt.push_str(&format!(" -- {}\n", names.join(", ")));
635 } else {
636 prompt.push_str(&format!(
637 " -- {}, ... +{}\n",
638 names[..3].join(", "),
639 names.len() - 3
640 ));
641 }
642 }
643 if sorted.len() > 20 {
644 prompt.push_str(&format!(" ... and {} more files\n", sorted.len() - 20));
645 }
646 } else {
647 for d in dead {
648 prompt.push_str(&format!(" - {} `{}` in `{}`\n", d.kind, d.name, d.file));
649 }
650 }
651 prompt.push_str("\nNote: auto-generated files (*_TLB.pas, COM type libraries) often show as dead code because COM dispatch calls are invisible to the call graph. Focus on dead code in non-generated files.\n\n");
652 }
653
654 if !enriched.base_report.code_clones.is_empty() {
655 prompt.push_str("### Code Clones (near-duplicate functions)\n");
656 for c in &enriched.base_report.code_clones {
657 prompt.push_str(&format!(
658 " - [{:.2}] `{}` ({}) <-> `{}` ({})\n",
659 c.similarity, c.symbol_a, c.file_a, c.symbol_b, c.file_b,
660 ));
661 }
662 prompt
663 .push_str("\nSuggest refactoring clones into shared functions where appropriate.\n\n");
664 }
665
666 if !enriched.base_report.consistency_issues.is_empty() {
667 prompt.push_str("### Consistency Issues (divergent patterns)\n");
668 for ci in &enriched.base_report.consistency_issues {
669 prompt.push_str(&format!(
670 " - Pattern: {} -- {}/{} consistent\n",
671 ci.pattern, ci.actual_count, ci.expected_count,
672 ));
673 for o in &ci.outliers {
674 prompt.push_str(&format!(" ! {}\n", o));
675 }
676 }
677 prompt.push_str("\nFlag consistency violations — all instances of a pattern should follow the same structure.\n\n");
678 }
679
680 prompt
681}
682
683pub fn call_claude(config: &LlmConfig, prompt: &str) -> Result<LlmReviewResult> {
684 let mut messages: Vec<serde_json::Value> =
685 vec![serde_json::json!({"role": "user", "content": prompt})];
686 let mut full_text = String::new();
687 let mut total_input: u64 = 0;
688 let mut total_output: u64 = 0;
689 let max_continuations = 5;
690
691 for attempt in 0..=max_continuations {
692 let body = serde_json::json!({
693 "model": config.model,
694 "max_tokens": config.max_tokens,
695 "messages": messages,
696 });
697
698 let resp = ureq::post(&format!("{}/v1/messages", config.base_url))
699 .set("x-api-key", &config.api_key)
700 .set("anthropic-version", "2023-06-01")
701 .set("content-type", "application/json")
702 .send_string(&body.to_string())
703 .context("Claude API request failed")?;
704
705 let resp_body: serde_json::Value = resp.into_json().context("parse Claude response")?;
706
707 let chunk = resp_body["content"]
708 .as_array()
709 .and_then(|arr| arr.first())
710 .and_then(|block| block["text"].as_str())
711 .unwrap_or("");
712
713 full_text.push_str(chunk);
714 total_input += resp_body["usage"]["input_tokens"].as_u64().unwrap_or(0);
715 total_output += resp_body["usage"]["output_tokens"].as_u64().unwrap_or(0);
716
717 let stop_reason = resp_body["stop_reason"].as_str().unwrap_or("end_turn");
718
719 if stop_reason != "max_tokens" || attempt == max_continuations {
720 break;
721 }
722
723 messages.push(serde_json::json!({"role": "assistant", "content": chunk}));
725 messages.push(serde_json::json!({"role": "user", "content": "Continue from where you left off. Complete the JSON."}));
726 }
727
728 let usage = TokenUsage {
729 input_tokens: total_input,
730 output_tokens: total_output,
731 };
732
733 let json_str = extract_json(&full_text);
734 let parsed: serde_json::Value = serde_json::from_str(json_str).unwrap_or_else(|_| {
735 serde_json::json!({
736 "summary": full_text,
737 "findings": [],
738 "test_plan": [],
739 "risk_assessment": [],
740 "deployment_notes": null
741 })
742 });
743
744 let summary = parsed["summary"].as_str().unwrap_or("").to_string();
745 let findings: Vec<LlmFinding> = parse_json_array(&parsed["findings"]);
746 let test_plan: Vec<TestCase> = parse_json_array(&parsed["test_plan"]);
747 let risk_assessment: Vec<RiskItem> = parse_json_array(&parsed["risk_assessment"]);
748 let deployment_notes = parsed["deployment_notes"].as_str().map(|s| s.to_string());
749
750 Ok(LlmReviewResult {
751 summary,
752 findings,
753 test_plan,
754 risk_assessment,
755 deployment_notes,
756 token_usage: Some(usage),
757 })
758}
759
760fn parse_json_array<T: serde::de::DeserializeOwned>(val: &serde_json::Value) -> Vec<T> {
761 val.as_array()
762 .map(|arr| {
763 arr.iter()
764 .filter_map(|v| serde_json::from_value(v.clone()).ok())
765 .collect()
766 })
767 .unwrap_or_default()
768}
769
770fn extract_json(text: &str) -> &str {
771 let trimmed = text.trim();
773 if let Some(start) = trimmed.find('{') {
774 if let Some(end) = trimmed.rfind('}') {
775 return &trimmed[start..=end];
776 }
777 }
778 trimmed
779}
780
781pub fn review_with_llm(
784 root: &Path,
785 report: &ReviewReport,
786 store: &GraphStore,
787 dry_run: bool,
788 context: Option<&str>,
789) -> Result<(String, Option<LlmReviewResult>)> {
790 let enriched = enrich_review(root, report, store)?;
791 let prompt = build_review_prompt(&enriched, context);
792
793 if dry_run {
794 return Ok((prompt, None));
795 }
796
797 let config = LlmConfig::from_env()?;
798 let result = call_claude(&config, &prompt)?;
799 Ok((prompt, Some(result)))
800}
801
802pub fn format_llm_review(result: &LlmReviewResult) -> String {
807 let mut out = String::new();
808
809 out.push_str("## AI Review Summary\n\n");
810 out.push_str(&result.summary);
811 out.push_str("\n\n");
812
813 if result.findings.is_empty() {
815 out.push_str("### Findings\nNo issues found.\n\n");
816 } else {
817 out.push_str(&format!("### Findings ({})\n\n", result.findings.len()));
818 for (i, f) in result.findings.iter().enumerate() {
819 let location = match f.line {
820 Some(line) => format!("{}:{}", f.file, line),
821 None => f.file.clone(),
822 };
823 out.push_str(&format!(
824 " {}. [{}] **{}** {} -- {}\n",
825 i + 1,
826 f.severity.to_uppercase(),
827 f.category,
828 location,
829 f.message,
830 ));
831 if let Some(ref suggestion) = f.suggestion {
832 out.push_str(&format!(" -> {}\n", suggestion));
833 }
834 }
835 out.push('\n');
836 }
837
838 if !result.risk_assessment.is_empty() {
840 out.push_str(&format!(
841 "### Risk Assessment ({})\n\n",
842 result.risk_assessment.len()
843 ));
844 for r in &result.risk_assessment {
845 out.push_str(&format!(
846 " [{}] **{}** -- {}\n",
847 r.severity.to_uppercase(),
848 r.area,
849 r.description,
850 ));
851 if !r.affected_symbols.is_empty() {
852 out.push_str(&format!(
853 " Affects: {}\n",
854 r.affected_symbols.join(", "),
855 ));
856 }
857 }
858 out.push('\n');
859 }
860
861 if !result.test_plan.is_empty() {
863 let must_pass = result
864 .test_plan
865 .iter()
866 .filter(|t| t.priority == "must_pass")
867 .count();
868 let should_pass = result
869 .test_plan
870 .iter()
871 .filter(|t| t.priority == "should_pass")
872 .count();
873 let nice = result.test_plan.len() - must_pass - should_pass;
874
875 out.push_str(&format!(
876 "### Test Plan ({} tests: {} must-pass, {} should-pass, {} nice-to-have)\n\n",
877 result.test_plan.len(),
878 must_pass,
879 should_pass,
880 nice,
881 ));
882
883 for bucket in &["must_pass", "should_pass", "nice_to_have"] {
884 let tests: Vec<&TestCase> = result
885 .test_plan
886 .iter()
887 .filter(|t| t.priority == *bucket)
888 .collect();
889 if tests.is_empty() {
890 continue;
891 }
892 let label = bucket.replace('_', "-");
893 out.push_str(&format!(" **{}:**\n", label));
894 for t in tests {
895 let finding_ref = match t.related_finding {
896 Some(idx) => format!(" (-> finding #{})", idx + 1),
897 None => String::new(),
898 };
899 out.push_str(&format!(
900 " - [{}] {}{}\n",
901 t.category, t.description, finding_ref,
902 ));
903 }
904 }
905 out.push('\n');
906 }
907
908 if let Some(ref notes) = result.deployment_notes {
910 if !notes.is_empty() {
911 out.push_str("### Deployment Notes\n\n");
912 out.push_str(notes);
913 out.push_str("\n\n");
914 }
915 }
916
917 if let Some(ref usage) = result.token_usage {
918 out.push_str(&format!(
919 "_Tokens: {} in / {} out_\n",
920 usage.input_tokens, usage.output_tokens,
921 ));
922 }
923
924 out
925}
926
927pub fn format_llm_review_json(result: &LlmReviewResult) -> String {
928 serde_json::to_string_pretty(result).unwrap_or_else(|_| "{}".to_string())
929}