Skip to main content

infigraph_core/review/
mod.rs

1//! PR review mode — combines changed symbols, blast radius, affected tests,
2//! API surface changes, security scan, and complexity hotspots into one
3//! structured report.
4//!
5//! Usage:
6//! ```text
7//! infigraph review              # diff HEAD~1..HEAD
8//! infigraph review --base main  # diff main..HEAD
9//! infigraph review --json       # JSON output
10//! ```
11
12pub mod llm;
13
14use std::collections::HashSet;
15use std::path::Path;
16use std::process::Command;
17
18use anyhow::{Context, Result};
19use serde::Serialize;
20
21use crate::diff;
22use crate::graph::store::GraphStore;
23use crate::graph::GraphQuery;
24use crate::lang::LanguageRegistry;
25use crate::security;
26
27// ---------------------------------------------------------------------------
28// Report model
29// ---------------------------------------------------------------------------
30
31/// The complete PR review report.
32#[derive(Debug, Clone, Serialize)]
33pub struct ReviewReport {
34    pub base_ref: String,
35    pub context: ReviewContext,
36    pub changed_symbols: Vec<ChangedSymbol>,
37    pub blast_radius: Vec<AffectedSymbol>,
38    pub affected_tests: Vec<AffectedSymbol>,
39    pub api_surface_changes: Vec<ChangedSymbol>,
40    pub security_findings: Vec<SecurityFinding>,
41    pub complexity_hotspots: Vec<ComplexityHotspot>,
42    pub dead_code: Vec<DeadCodeSymbol>,
43    pub code_clones: Vec<ClonePair>,
44    pub consistency_issues: Vec<ConsistencyIssue>,
45}
46
47/// A symbol that changed between the base and HEAD.
48#[derive(Debug, Clone, Serialize)]
49pub struct ChangedSymbol {
50    pub name: String,
51    pub kind: String,
52    pub file: String,
53    pub change_kind: String,
54}
55
56/// A symbol affected by the changes (in the blast radius or test list).
57#[derive(Debug, Clone, Serialize)]
58pub struct AffectedSymbol {
59    pub name: String,
60    pub kind: String,
61    pub file: String,
62}
63
64/// A security finding scoped to the changed files.
65#[derive(Debug, Clone, Serialize)]
66pub struct SecurityFinding {
67    pub file: String,
68    pub line: u32,
69    pub severity: String,
70    pub message: String,
71}
72
73/// A high-complexity symbol in the changed files.
74#[derive(Debug, Clone, Serialize)]
75pub struct ComplexityHotspot {
76    pub name: String,
77    pub file: String,
78    pub complexity: u32,
79}
80
81/// A symbol in changed files with zero callers.
82#[derive(Debug, Clone, Serialize)]
83pub struct DeadCodeSymbol {
84    pub name: String,
85    pub kind: String,
86    pub file: String,
87}
88
89/// A pair of near-duplicate functions in changed files.
90#[derive(Debug, Clone, Serialize)]
91pub struct ClonePair {
92    pub symbol_a: String,
93    pub file_a: String,
94    pub symbol_b: String,
95    pub file_b: String,
96    pub similarity: f32,
97}
98
99/// A pattern consistency issue — symbols that should follow a common pattern but diverge.
100#[derive(Debug, Clone, Serialize)]
101pub struct ConsistencyIssue {
102    pub pattern: String,
103    pub expected_count: usize,
104    pub actual_count: usize,
105    pub outliers: Vec<String>,
106}
107
108/// Auto-detected PR context that drives review depth.
109#[derive(Debug, Clone, Serialize)]
110pub struct ReviewContext {
111    pub pr_type: PrType,
112    pub scope: PrScope,
113    pub inferred_intent: String,
114    pub changed_file_count: usize,
115    pub changed_symbol_count: usize,
116}
117
118#[derive(Debug, Clone, Serialize, PartialEq)]
119pub enum PrType {
120    BugFix,
121    Refactor,
122    Feature,
123    Migration,
124    Config,
125    Test,
126    Docs,
127    Mixed,
128}
129
130#[derive(Debug, Clone, Serialize, PartialEq)]
131pub enum PrScope {
132    Standalone,
133    CrossModule,
134    CrossRepo,
135}
136
137impl std::fmt::Display for PrType {
138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139        match self {
140            PrType::BugFix => write!(f, "bug-fix"),
141            PrType::Refactor => write!(f, "refactor"),
142            PrType::Feature => write!(f, "feature"),
143            PrType::Migration => write!(f, "migration"),
144            PrType::Config => write!(f, "config"),
145            PrType::Test => write!(f, "test"),
146            PrType::Docs => write!(f, "docs"),
147            PrType::Mixed => write!(f, "mixed"),
148        }
149    }
150}
151
152impl std::fmt::Display for PrScope {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        match self {
155            PrScope::Standalone => write!(f, "standalone"),
156            PrScope::CrossModule => write!(f, "cross-module"),
157            PrScope::CrossRepo => write!(f, "cross-repo"),
158        }
159    }
160}
161
162// ---------------------------------------------------------------------------
163// Core review logic
164// ---------------------------------------------------------------------------
165
166/// Run the full PR review pipeline and produce a structured report.
167pub fn review(
168    root: &Path,
169    base_ref: &str,
170    limit: usize,
171    registry: &LanguageRegistry,
172    store: &GraphStore,
173) -> Result<ReviewReport> {
174    let canonical = root.canonicalize().context("invalid project root")?;
175
176    // 1. Get changed files via git
177    let changed_files = git_changed_files(&canonical, base_ref)?;
178    if changed_files.is_empty() {
179        return Ok(ReviewReport {
180            base_ref: base_ref.to_string(),
181            context: ReviewContext {
182                pr_type: PrType::Mixed,
183                scope: PrScope::Standalone,
184                inferred_intent: "No changes detected".to_string(),
185                changed_file_count: 0,
186                changed_symbol_count: 0,
187            },
188            changed_symbols: vec![],
189            blast_radius: vec![],
190            affected_tests: vec![],
191            api_surface_changes: vec![],
192            security_findings: vec![],
193            complexity_hotspots: vec![],
194            dead_code: vec![],
195            code_clones: vec![],
196            consistency_issues: vec![],
197        });
198    }
199
200    // 2. Semantic diff: get changed symbols
201    let symbol_diff =
202        diff::semantic_diff(&canonical, base_ref, "HEAD", registry).unwrap_or_default();
203
204    let changed_symbols: Vec<ChangedSymbol> = symbol_diff
205        .changes
206        .iter()
207        .map(|c| ChangedSymbol {
208            name: c.name.clone(),
209            kind: c.kind.clone(),
210            file: c.file.clone(),
211            change_kind: c.change.to_string(),
212        })
213        .collect();
214
215    // 2b. Auto-detect PR context from changes
216    let context = detect_pr_context(&canonical, base_ref, &changed_files, &changed_symbols);
217
218    // 3. Query the graph for blast radius, API surface, and complexity
219    let conn = store.connection()?;
220    let gq = GraphQuery::new(&conn);
221
222    // 3a. Resolve changed symbol IDs in the graph
223    let symbol_ids = resolve_symbol_ids(&gq, &changed_symbols);
224
225    // 3b. Blast radius: unbounded CALLS* traversal for each changed symbol
226    let mut blast_set: HashSet<String> = HashSet::new();
227    let mut blast_radius: Vec<AffectedSymbol> = Vec::new();
228
229    for id in &symbol_ids {
230        let escaped = id.replace('\'', "\\'");
231        let query = format!(
232            "MATCH (s:Symbol)<-[:CALLS*]-(a:Symbol) \
233             WHERE s.id = '{escaped}' \
234             RETURN DISTINCT a.name, a.kind, a.file \
235             LIMIT {limit}",
236        );
237        if let Ok(rows) = gq.raw_query(&query) {
238            for row in rows {
239                if row.len() >= 3 {
240                    let key = format!("{}::{}", row[2], row[0]);
241                    if blast_set.insert(key) {
242                        blast_radius.push(AffectedSymbol {
243                            name: row[0].clone(),
244                            kind: row[1].clone(),
245                            file: row[2].clone(),
246                        });
247                    }
248                }
249            }
250        }
251    }
252
253    // Cap total blast radius
254    blast_radius.truncate(limit);
255
256    // 3c. Affected tests: filter blast radius for test symbols
257    let affected_tests: Vec<AffectedSymbol> = blast_radius
258        .iter()
259        .filter(|s| is_test_symbol(s))
260        .cloned()
261        .collect();
262
263    // 3d. API surface changes: filter changed symbols that are public
264    let api_surface_changes = find_api_surface_changes(&gq, &changed_symbols);
265
266    // 4. Security scan scoped to changed files
267    let security_findings = scan_changed_files(&canonical, &changed_files);
268
269    // 5. Complexity hotspots in changed files
270    let complexity_hotspots = find_complexity_hotspots(&gq, &changed_files);
271
272    // 6. Dead code in changed files (symbols with zero callers)
273    let dead_code = find_dead_code_in_changed_files(&gq, &changed_files);
274
275    // 7. Code clones: near-duplicate symbols in changed files (via SIMILAR_TO edges)
276    let code_clones = find_clones_in_changed_files(&gq, &changed_files);
277
278    // 8. Consistency: symbols sharing a name pattern that diverge in structure
279    let consistency_issues = find_consistency_issues(&gq, &changed_symbols);
280
281    Ok(ReviewReport {
282        base_ref: base_ref.to_string(),
283        context,
284        changed_symbols,
285        blast_radius,
286        affected_tests,
287        api_surface_changes,
288        security_findings,
289        complexity_hotspots,
290        dead_code,
291        code_clones,
292        consistency_issues,
293    })
294}
295
296/// Cross-repo review: runs single-repo review, then enriches with cross-repo
297/// blast radius and callers from the group's other repos.
298#[allow(clippy::too_many_arguments)]
299pub fn review_with_group(
300    root: &Path,
301    base_ref: &str,
302    limit: usize,
303    registry: &LanguageRegistry,
304    store: &GraphStore,
305    group_name: &str,
306    group_registry: &crate::multi::Registry,
307    build_registry: impl Fn() -> Result<LanguageRegistry>,
308) -> Result<ReviewReport> {
309    // 1. Run standard single-repo review
310    let mut report = review(root, base_ref, limit, registry, store)?;
311
312    // 2. Force scope to CrossRepo
313    report.context.scope = PrScope::CrossRepo;
314    report.context.inferred_intent = format!(
315        "cross-repo {} PR (group: {}): {}",
316        report.context.pr_type, group_name, report.context.inferred_intent,
317    );
318
319    // 3. Query cross-repo callers for each changed symbol
320    let mut cross_repo_blast: Vec<AffectedSymbol> = Vec::new();
321    let mut seen: HashSet<String> = HashSet::new();
322
323    for sym in &report.changed_symbols {
324        let escaped_name = sym.name.replace('\'', "\\'");
325        let query = format!(
326            "MATCH (s:Symbol)-[:CALLS]->(t:Symbol) \
327             WHERE t.name = '{escaped_name}' \
328             RETURN s.name, s.kind, s.file \
329             LIMIT 50"
330        );
331
332        if let Ok(results) = group_registry.group_query(group_name, &query, &build_registry) {
333            for (repo_name, rows) in results {
334                for row in rows {
335                    if row.len() >= 3 {
336                        let key = format!("{}::{}::{}", repo_name, row[2], row[0]);
337                        if seen.insert(key) {
338                            cross_repo_blast.push(AffectedSymbol {
339                                name: row[0].clone(),
340                                kind: row[1].clone(),
341                                file: format!("[{}] {}", repo_name, row[2]),
342                            });
343                        }
344                    }
345                }
346            }
347        }
348    }
349
350    // 4. Append cross-repo blast radius
351    report.blast_radius.extend(cross_repo_blast);
352
353    // 5. Re-filter affected tests from expanded blast radius
354    let cross_repo_tests: Vec<AffectedSymbol> = report
355        .blast_radius
356        .iter()
357        .filter(|s| is_test_symbol(s) && s.file.starts_with('['))
358        .cloned()
359        .collect();
360    report.affected_tests.extend(cross_repo_tests);
361
362    // 6. Filter dead code against cross-repo callers and implementors
363    if !report.dead_code.is_empty() {
364        let mut alive_names: HashSet<String> = HashSet::new();
365        for dc in &report.dead_code {
366            let escaped_name = dc.name.replace('\'', "\\'");
367            // Check for cross-repo CALLS
368            let calls_query = format!(
369                "MATCH (s:Symbol)-[:CALLS]->(t:Symbol) \
370                 WHERE t.name = '{escaped_name}' \
371                 RETURN t.name LIMIT 1"
372            );
373            if let Ok(results) =
374                group_registry.group_query(group_name, &calls_query, &build_registry)
375            {
376                if results.iter().any(|(_, rows)| !rows.is_empty()) {
377                    alive_names.insert(dc.name.clone());
378                    continue;
379                }
380            }
381            // Check for cross-repo INHERITS (interface implemented in another repo)
382            let inh_query = format!(
383                "MATCH (s:Symbol)-[:INHERITS]->(p:Symbol) \
384                 WHERE p.name = '{escaped_name}' OR \
385                 EXISTS {{ MATCH (m:Symbol) WHERE m.name = '{escaped_name}' AND m.parent = p.id }} \
386                 RETURN s.name LIMIT 1"
387            );
388            if let Ok(results) = group_registry.group_query(group_name, &inh_query, &build_registry)
389            {
390                if results.iter().any(|(_, rows)| !rows.is_empty()) {
391                    alive_names.insert(dc.name.clone());
392                }
393            }
394        }
395        if !alive_names.is_empty() {
396            report
397                .dead_code
398                .retain(|dc| !alive_names.contains(&dc.name));
399        }
400    }
401
402    // 7. Cross-repo consistency: same symbol name across repos should match
403    let mut cross_repo_names: std::collections::HashMap<String, Vec<String>> =
404        std::collections::HashMap::new();
405    for sym in &report.changed_symbols {
406        let query = format!(
407            "MATCH (s:Symbol) WHERE s.name = '{}' RETURN s.name, s.file",
408            sym.name.replace('\'', "\\'")
409        );
410        if let Ok(results) = group_registry.group_query(group_name, &query, &build_registry) {
411            for (repo_name, rows) in results {
412                for row in &rows {
413                    if let Some(file) = row.get(1) {
414                        cross_repo_names
415                            .entry(sym.name.clone())
416                            .or_default()
417                            .push(format!("[{}] {}", repo_name, file));
418                    }
419                }
420            }
421        }
422    }
423
424    for (name, locations) in &cross_repo_names {
425        if locations.len() >= 2 {
426            report.consistency_issues.push(ConsistencyIssue {
427                pattern: format!(
428                    "{} exists in {} repos — verify all updated",
429                    name,
430                    locations.len()
431                ),
432                expected_count: locations.len(),
433                actual_count: 0,
434                outliers: locations.clone(),
435            });
436        }
437    }
438
439    Ok(report)
440}
441
442// ---------------------------------------------------------------------------
443// Helpers
444// ---------------------------------------------------------------------------
445
446/// Get the list of files changed between `base_ref` and HEAD.
447fn git_changed_files(root: &Path, base_ref: &str) -> Result<Vec<String>> {
448    let check = Command::new("git")
449        .args(["rev-parse", "--git-dir"])
450        .current_dir(root)
451        .output();
452    if check.is_err() || !check.unwrap().status.success() {
453        anyhow::bail!("not a git repository — infigraph review requires git history");
454    }
455
456    let output = Command::new("git")
457        .args(["diff", "--name-only", base_ref])
458        .current_dir(root)
459        .output()
460        .context("failed to run git diff --name-only")?;
461
462    if !output.status.success() {
463        let stderr = String::from_utf8_lossy(&output.stderr);
464        anyhow::bail!("git diff failed: {stderr}");
465    }
466
467    let files: Vec<String> = String::from_utf8_lossy(&output.stdout)
468        .lines()
469        .filter(|l| !l.is_empty())
470        .map(|l| l.to_string())
471        .collect();
472
473    Ok(files)
474}
475
476/// Resolve graph symbol IDs for changed symbols by querying the graph.
477fn resolve_symbol_ids(gq: &GraphQuery, symbols: &[ChangedSymbol]) -> Vec<String> {
478    let mut ids = Vec::new();
479    for sym in symbols {
480        let escaped_name = sym.name.replace('\'', "\\'");
481        let escaped_file = sym.file.replace('\'', "\\'");
482        let query = format!(
483            "MATCH (s:Symbol) \
484             WHERE s.name = '{escaped_name}' AND s.file ENDS WITH '{escaped_file}' \
485             RETURN s.id",
486        );
487        if let Ok(rows) = gq.raw_query(&query) {
488            for row in rows {
489                if let Some(id) = row.first() {
490                    ids.push(id.clone());
491                }
492            }
493        }
494    }
495    ids
496}
497
498/// Check if a symbol looks like a test.
499fn is_test_symbol(sym: &AffectedSymbol) -> bool {
500    let name_lower = sym.name.to_lowercase();
501    let kind_lower = sym.kind.to_lowercase();
502    name_lower.starts_with("test_")
503        || name_lower.starts_with("test")
504        || kind_lower.contains("test")
505        || sym.file.contains("test")
506        || sym.file.contains("spec")
507}
508
509/// Find changed symbols that are public (API surface).
510fn find_api_surface_changes(gq: &GraphQuery, symbols: &[ChangedSymbol]) -> Vec<ChangedSymbol> {
511    let mut api_changes = Vec::new();
512    for sym in symbols {
513        let escaped_name = sym.name.replace('\'', "\\'");
514        let escaped_file = sym.file.replace('\'', "\\'");
515        let query = format!(
516            "MATCH (s:Symbol) \
517             WHERE s.name = '{escaped_name}' AND s.file ENDS WITH '{escaped_file}' \
518             AND s.visibility = 'public' \
519             RETURN s.name",
520        );
521        if let Ok(rows) = gq.raw_query(&query) {
522            if !rows.is_empty() {
523                api_changes.push(sym.clone());
524            }
525        }
526    }
527    api_changes
528}
529
530/// Run security scan and filter to only findings in changed files.
531fn scan_changed_files(root: &Path, changed_files: &[String]) -> Vec<SecurityFinding> {
532    let changed_set: HashSet<&str> = changed_files.iter().map(|f| f.as_str()).collect();
533
534    match security::scan_project(root) {
535        Ok(scan) => scan
536            .findings
537            .iter()
538            .filter(|f| changed_set.contains(f.file.as_str()))
539            .map(|f| SecurityFinding {
540                file: f.file.clone(),
541                line: f.line,
542                severity: f.severity.to_string(),
543                message: f.message.clone(),
544            })
545            .collect(),
546        Err(_) => vec![],
547    }
548}
549
550/// Find high-complexity symbols in changed files.
551fn find_complexity_hotspots(gq: &GraphQuery, changed_files: &[String]) -> Vec<ComplexityHotspot> {
552    if changed_files.is_empty() {
553        return vec![];
554    }
555
556    let file_list: Vec<String> = changed_files
557        .iter()
558        .map(|f| format!("'{}'", f.replace('\'', "\\'")))
559        .collect();
560    let files_in = file_list.join(", ");
561
562    let query = format!(
563        "MATCH (s:Symbol) \
564         WHERE s.file IN [{files_in}] AND s.complexity >= 10 \
565         RETURN s.name, s.file, s.complexity \
566         ORDER BY s.complexity DESC",
567    );
568
569    match gq.raw_query(&query) {
570        Ok(rows) => rows
571            .iter()
572            .filter_map(|row| {
573                let name = row.first()?;
574                let file = row.get(1)?;
575                let complexity: u32 = row.get(2)?.parse().ok()?;
576                Some(ComplexityHotspot {
577                    name: name.clone(),
578                    file: file.clone(),
579                    complexity,
580                })
581            })
582            .collect(),
583        Err(_) => vec![],
584    }
585}
586
587/// Auto-detect PR type, scope, and intent from changes.
588fn detect_pr_context(
589    root: &Path,
590    base_ref: &str,
591    changed_files: &[String],
592    changed_symbols: &[ChangedSymbol],
593) -> ReviewContext {
594    let file_count = changed_files.len();
595    let symbol_count = changed_symbols.len();
596
597    // Detect PR type from commit messages and file patterns
598    let pr_type = detect_pr_type(root, base_ref, changed_files, changed_symbols);
599
600    // Detect scope: how many directories/modules are touched?
601    let scope = detect_pr_scope(changed_files);
602
603    // Build inferred intent string
604    let intent = build_intent_string(&pr_type, &scope, changed_files, changed_symbols);
605
606    ReviewContext {
607        pr_type,
608        scope,
609        inferred_intent: intent,
610        changed_file_count: file_count,
611        changed_symbol_count: symbol_count,
612    }
613}
614
615fn detect_pr_type(
616    root: &Path,
617    base_ref: &str,
618    changed_files: &[String],
619    changed_symbols: &[ChangedSymbol],
620) -> PrType {
621    // Get commit messages for signal
622    let commit_msgs = Command::new("git")
623        .args(["log", "--format=%s", &format!("{}..HEAD", base_ref)])
624        .current_dir(root)
625        .output()
626        .map(|o| String::from_utf8_lossy(&o.stdout).to_lowercase())
627        .unwrap_or_default();
628
629    // Score each type
630    let mut scores: Vec<(PrType, i32)> = vec![
631        (PrType::BugFix, 0),
632        (PrType::Refactor, 0),
633        (PrType::Feature, 0),
634        (PrType::Migration, 0),
635        (PrType::Config, 0),
636        (PrType::Test, 0),
637        (PrType::Docs, 0),
638    ];
639
640    // Commit message signals
641    for (pr_type, score) in &mut scores {
642        match pr_type {
643            PrType::BugFix
644                if commit_msgs.contains("fix")
645                    || commit_msgs.contains("bug")
646                    || commit_msgs.contains("patch") =>
647            {
648                *score += 3;
649            }
650            PrType::Refactor
651                if commit_msgs.contains("refactor")
652                    || commit_msgs.contains("rename")
653                    || commit_msgs.contains("move")
654                    || commit_msgs.contains("clean") =>
655            {
656                *score += 3;
657            }
658            PrType::Feature
659                if commit_msgs.contains("add")
660                    || commit_msgs.contains("new")
661                    || commit_msgs.contains("feature")
662                    || commit_msgs.contains("implement") =>
663            {
664                *score += 3;
665            }
666            PrType::Migration
667                if commit_msgs.contains("migrat")
668                    || commit_msgs.contains("upgrade")
669                    || commit_msgs.contains("convert")
670                    || commit_msgs.contains("sqlite") =>
671            {
672                *score += 5;
673            }
674            PrType::Config
675                if commit_msgs.contains("config")
676                    || commit_msgs.contains("setting")
677                    || commit_msgs.contains("version bump") =>
678            {
679                *score += 3;
680            }
681            PrType::Test if commit_msgs.contains("test") => {
682                *score += 3;
683            }
684            PrType::Docs if commit_msgs.contains("doc") || commit_msgs.contains("readme") => {
685                *score += 3;
686            }
687            _ => {}
688        }
689    }
690
691    // File pattern signals
692    let test_files = changed_files
693        .iter()
694        .filter(|f| f.contains("test") || f.contains("spec"))
695        .count();
696    let config_files = changed_files
697        .iter()
698        .filter(|f| {
699            f.ends_with(".json")
700                || f.ends_with(".xml")
701                || f.ends_with(".yaml")
702                || f.ends_with(".yml")
703                || f.ends_with(".csproj")
704                || f.ends_with(".sln")
705                || f.ends_with(".cfg")
706                || f.ends_with(".pkg")
707        })
708        .count();
709    let doc_files = changed_files
710        .iter()
711        .filter(|f| f.ends_with(".md") || f.ends_with(".txt") || f.ends_with(".rst"))
712        .count();
713    let schema_files = changed_files
714        .iter()
715        .filter(|f| f.contains("schema") || f.contains("migration") || f.contains("sql"))
716        .count();
717
718    if test_files as f32 / changed_files.len().max(1) as f32 > 0.7 {
719        scores
720            .iter_mut()
721            .find(|(t, _)| *t == PrType::Test)
722            .unwrap()
723            .1 += 5;
724    }
725    if config_files as f32 / changed_files.len().max(1) as f32 > 0.7 {
726        scores
727            .iter_mut()
728            .find(|(t, _)| *t == PrType::Config)
729            .unwrap()
730            .1 += 5;
731    }
732    if doc_files as f32 / changed_files.len().max(1) as f32 > 0.7 {
733        scores
734            .iter_mut()
735            .find(|(t, _)| *t == PrType::Docs)
736            .unwrap()
737            .1 += 5;
738    }
739    if schema_files > 0 {
740        scores
741            .iter_mut()
742            .find(|(t, _)| *t == PrType::Migration)
743            .unwrap()
744            .1 += 3;
745    }
746
747    // Symbol change signals
748    let moved = changed_symbols
749        .iter()
750        .filter(|s| s.change_kind.starts_with("MOVED"))
751        .count();
752    let removed = changed_symbols
753        .iter()
754        .filter(|s| s.change_kind == "REMOVED")
755        .count();
756    let added_count = changed_symbols
757        .iter()
758        .filter(|s| s.change_kind == "ADDED")
759        .count();
760
761    if moved as f32 / symbol_count_safe(changed_symbols) > 0.3 {
762        scores
763            .iter_mut()
764            .find(|(t, _)| *t == PrType::Refactor)
765            .unwrap()
766            .1 += 3;
767    }
768    if added_count as f32 / symbol_count_safe(changed_symbols) > 0.5 {
769        scores
770            .iter_mut()
771            .find(|(t, _)| *t == PrType::Feature)
772            .unwrap()
773            .1 += 3;
774    }
775    if removed as f32 / symbol_count_safe(changed_symbols) > 0.3 {
776        scores
777            .iter_mut()
778            .find(|(t, _)| *t == PrType::Refactor)
779            .unwrap()
780            .1 += 2;
781    }
782
783    scores.sort_by_key(|a| std::cmp::Reverse(a.1));
784    if scores[0].1 == 0 {
785        PrType::Mixed
786    } else {
787        scores[0].0.clone()
788    }
789}
790
791fn symbol_count_safe(symbols: &[ChangedSymbol]) -> f32 {
792    (symbols.len().max(1)) as f32
793}
794
795fn detect_pr_scope(changed_files: &[String]) -> PrScope {
796    let dirs: HashSet<&str> = changed_files
797        .iter()
798        .filter_map(|f| f.split('/').next())
799        .collect();
800
801    if dirs.len() <= 2 {
802        PrScope::Standalone
803    } else {
804        PrScope::CrossModule
805    }
806}
807
808fn build_intent_string(
809    pr_type: &PrType,
810    scope: &PrScope,
811    changed_files: &[String],
812    changed_symbols: &[ChangedSymbol],
813) -> String {
814    let added = changed_symbols
815        .iter()
816        .filter(|s| s.change_kind == "ADDED")
817        .count();
818    let removed = changed_symbols
819        .iter()
820        .filter(|s| s.change_kind == "REMOVED")
821        .count();
822    let modified = changed_symbols
823        .iter()
824        .filter(|s| s.change_kind == "SIGNATURE_CHANGED")
825        .count();
826    let moved = changed_symbols
827        .iter()
828        .filter(|s| s.change_kind.starts_with("MOVED"))
829        .count();
830
831    let file_types: HashSet<&str> = changed_files
832        .iter()
833        .filter_map(|f| f.rsplit('.').next())
834        .collect();
835    let langs: Vec<&&str> = file_types.iter().take(5).collect();
836
837    format!(
838        "{} {} PR: {} files ({}) changed, {} symbols (+{} -{} ~{} →{})",
839        scope,
840        pr_type,
841        changed_files.len(),
842        langs
843            .iter()
844            .map(|l| format!(".{}", l))
845            .collect::<Vec<_>>()
846            .join(", "),
847        changed_symbols.len(),
848        added,
849        removed,
850        modified,
851        moved,
852    )
853}
854
855/// Find functions/methods in changed files that have zero callers.
856fn find_dead_code_in_changed_files(
857    gq: &GraphQuery,
858    changed_files: &[String],
859) -> Vec<DeadCodeSymbol> {
860    if changed_files.is_empty() {
861        return vec![];
862    }
863
864    let file_list: Vec<String> = changed_files
865        .iter()
866        .map(|f| format!("'{}'", f.replace('\'', "\\'")))
867        .collect();
868    let files_in = file_list.join(", ");
869
870    let query = format!(
871        "MATCH (s:Symbol) \
872         WHERE s.file IN [{files_in}] \
873         AND s.kind IN ['Function', 'Method'] \
874         AND NOT EXISTS {{ MATCH ()-[:CALLS]->(s) }} \
875         AND NOT EXISTS {{ MATCH (p:Symbol)<-[:INHERITS]-() WHERE p.file = s.file AND p.kind IN ['Class', 'Interface', 'Trait'] }} \
876         AND NOT s.name STARTS WITH 'test' \
877         AND NOT s.name STARTS WITH 'Test' \
878         AND NOT s.name = 'main' \
879         RETURN s.name, s.kind, s.file \
880         ORDER BY s.file, s.name"
881    );
882
883    match gq.raw_query(&query) {
884        Ok(rows) => rows
885            .iter()
886            .filter_map(|row| {
887                Some(DeadCodeSymbol {
888                    name: row.first()?.clone(),
889                    kind: row.get(1)?.clone(),
890                    file: row.get(2)?.clone(),
891                })
892            })
893            .collect(),
894        Err(_) => vec![],
895    }
896}
897
898/// Find near-duplicate symbols in changed files using SIMILAR_TO edges.
899fn find_clones_in_changed_files(gq: &GraphQuery, changed_files: &[String]) -> Vec<ClonePair> {
900    if changed_files.is_empty() {
901        return vec![];
902    }
903
904    let file_list: Vec<String> = changed_files
905        .iter()
906        .map(|f| format!("'{}'", f.replace('\'', "\\'")))
907        .collect();
908    let files_in = file_list.join(", ");
909
910    let query = format!(
911        "MATCH (a:Symbol)-[r:SIMILAR_TO]->(b:Symbol) \
912         WHERE a.file IN [{files_in}] \
913         AND r.score >= 0.90 \
914         RETURN a.name, a.file, b.name, b.file, r.score \
915         ORDER BY r.score DESC \
916         LIMIT 30"
917    );
918
919    match gq.raw_query(&query) {
920        Ok(rows) => rows
921            .iter()
922            .filter_map(|row| {
923                Some(ClonePair {
924                    symbol_a: row.first()?.clone(),
925                    file_a: row.get(1)?.clone(),
926                    symbol_b: row.get(2)?.clone(),
927                    file_b: row.get(3)?.clone(),
928                    similarity: row.get(4)?.parse().ok()?,
929                })
930            })
931            .collect(),
932        Err(_) => vec![],
933    }
934}
935
936/// Find consistency issues: groups of changed symbols with similar names
937/// that should follow the same pattern but have different structures.
938fn find_consistency_issues(
939    gq: &GraphQuery,
940    changed_symbols: &[ChangedSymbol],
941) -> Vec<ConsistencyIssue> {
942    let mut issues = Vec::new();
943
944    // Group symbols by name — find cases where the same method exists in multiple files
945    let mut name_groups: std::collections::HashMap<&str, Vec<&ChangedSymbol>> =
946        std::collections::HashMap::new();
947    for sym in changed_symbols {
948        name_groups.entry(sym.name.as_str()).or_default().push(sym);
949    }
950
951    for (name, group) in &name_groups {
952        if group.len() < 3 {
953            continue;
954        }
955
956        // Check if all instances have the same change_kind — divergence = inconsistency
957        let first_kind = &group[0].change_kind;
958        let outliers: Vec<String> = group
959            .iter()
960            .filter(|s| &s.change_kind != first_kind)
961            .map(|s| format!("{} in {} ({})", s.name, s.file, s.change_kind))
962            .collect();
963
964        if !outliers.is_empty() {
965            issues.push(ConsistencyIssue {
966                pattern: format!("{} across {} files", name, group.len()),
967                expected_count: group.len(),
968                actual_count: group.len() - outliers.len(),
969                outliers,
970            });
971        }
972    }
973
974    // Check for structural consistency: same-named symbols should have same caller count
975    for (name, group) in &name_groups {
976        if group.len() < 5 {
977            continue;
978        }
979
980        let mut caller_counts: Vec<(String, usize)> = Vec::new();
981        for sym in group {
982            let escaped_name = sym.name.replace('\'', "\\'");
983            let escaped_file = sym.file.replace('\'', "\\'");
984            let query = format!(
985                "MATCH (s:Symbol)<-[:CALLS]-(c:Symbol) \
986                 WHERE s.name = '{escaped_name}' AND s.file ENDS WITH '{escaped_file}' \
987                 RETURN count(c)"
988            );
989            let count: usize = gq
990                .raw_query(&query)
991                .ok()
992                .and_then(|rows| rows.first()?.first()?.parse().ok())
993                .unwrap_or(0);
994            caller_counts.push((sym.file.clone(), count));
995        }
996
997        if caller_counts.is_empty() {
998            continue;
999        }
1000
1001        let median_count = {
1002            let mut counts: Vec<usize> = caller_counts.iter().map(|(_, c)| *c).collect();
1003            counts.sort();
1004            counts[counts.len() / 2]
1005        };
1006
1007        let structural_outliers: Vec<String> = caller_counts
1008            .iter()
1009            .filter(|(_, c)| {
1010                let diff = (*c).abs_diff(median_count);
1011                diff > 2 && median_count > 0
1012            })
1013            .map(|(file, count)| {
1014                format!(
1015                    "{} in {} ({} callers vs median {})",
1016                    name, file, count, median_count
1017                )
1018            })
1019            .collect();
1020
1021        if !structural_outliers.is_empty() {
1022            issues.push(ConsistencyIssue {
1023                pattern: format!("{} caller count divergence", name),
1024                expected_count: group.len(),
1025                actual_count: group.len() - structural_outliers.len(),
1026                outliers: structural_outliers,
1027            });
1028        }
1029    }
1030
1031    issues
1032}
1033
1034// ---------------------------------------------------------------------------
1035// Output formatting
1036// ---------------------------------------------------------------------------
1037
1038/// Format the review report as Markdown.
1039pub fn format_review(report: &ReviewReport) -> String {
1040    let mut out = String::new();
1041
1042    out.push_str(&format!("## PR Review: {}..HEAD\n\n", report.base_ref,));
1043
1044    // Context
1045    out.push_str(&format!(
1046        "**Context:** {}\n\n",
1047        report.context.inferred_intent,
1048    ));
1049
1050    // Changed symbols
1051    out.push_str(&format!(
1052        "### Changed Symbols ({})\n",
1053        report.changed_symbols.len(),
1054    ));
1055    if report.changed_symbols.is_empty() {
1056        out.push_str("  (none)\n");
1057    } else {
1058        for sym in &report.changed_symbols {
1059            out.push_str(&format!(
1060                "  {} {} ({}) -- {}\n",
1061                sym.kind, sym.name, sym.file, sym.change_kind,
1062            ));
1063        }
1064    }
1065    out.push('\n');
1066
1067    // Blast radius
1068    out.push_str(&format!(
1069        "### Blast Radius ({} affected)\n",
1070        report.blast_radius.len(),
1071    ));
1072    if report.blast_radius.is_empty() {
1073        out.push_str("  (none)\n");
1074    } else {
1075        for sym in &report.blast_radius {
1076            out.push_str(&format!("  {} {} ({})\n", sym.kind, sym.name, sym.file,));
1077        }
1078    }
1079    out.push('\n');
1080
1081    // Affected tests
1082    out.push_str(&format!(
1083        "### Affected Tests ({})\n",
1084        report.affected_tests.len(),
1085    ));
1086    if report.affected_tests.is_empty() {
1087        out.push_str("  (none)\n");
1088    } else {
1089        for sym in &report.affected_tests {
1090            out.push_str(&format!("  {} ({})\n", sym.name, sym.file,));
1091        }
1092    }
1093    out.push('\n');
1094
1095    // API surface changes
1096    out.push_str(&format!(
1097        "### API Surface Changes ({})\n",
1098        report.api_surface_changes.len(),
1099    ));
1100    if report.api_surface_changes.is_empty() {
1101        out.push_str("  (none)\n");
1102    } else {
1103        for sym in &report.api_surface_changes {
1104            out.push_str(&format!(
1105                "  {} {} ({}) -- {}\n",
1106                sym.kind, sym.name, sym.file, sym.change_kind,
1107            ));
1108        }
1109    }
1110    out.push('\n');
1111
1112    // Security findings
1113    out.push_str(&format!(
1114        "### Security Findings ({})\n",
1115        report.security_findings.len(),
1116    ));
1117    if report.security_findings.is_empty() {
1118        out.push_str("  (none)\n");
1119    } else {
1120        for f in &report.security_findings {
1121            out.push_str(&format!(
1122                "  [{}] {}:{} -- {}\n",
1123                f.severity, f.file, f.line, f.message,
1124            ));
1125        }
1126    }
1127    out.push('\n');
1128
1129    // Complexity hotspots
1130    out.push_str(&format!(
1131        "### Complexity Hotspots ({})\n",
1132        report.complexity_hotspots.len(),
1133    ));
1134    if report.complexity_hotspots.is_empty() {
1135        out.push_str("  (none)\n");
1136    } else {
1137        for h in &report.complexity_hotspots {
1138            out.push_str(&format!(
1139                "  [{:>3}] {} ({})\n",
1140                h.complexity, h.name, h.file,
1141            ));
1142        }
1143    }
1144    out.push('\n');
1145
1146    // Dead code
1147    out.push_str(&format!(
1148        "### Dead Code in Changed Files ({})\n",
1149        report.dead_code.len(),
1150    ));
1151    if report.dead_code.is_empty() {
1152        out.push_str("  (none)\n");
1153    } else {
1154        for d in &report.dead_code {
1155            out.push_str(&format!("  {} {} ({})\n", d.kind, d.name, d.file,));
1156        }
1157    }
1158    out.push('\n');
1159
1160    // Code clones
1161    out.push_str(&format!("### Code Clones ({})\n", report.code_clones.len(),));
1162    if report.code_clones.is_empty() {
1163        out.push_str("  (none)\n");
1164    } else {
1165        for c in &report.code_clones {
1166            out.push_str(&format!(
1167                "  [{:.2}] {} ({}) <-> {} ({})\n",
1168                c.similarity, c.symbol_a, c.file_a, c.symbol_b, c.file_b,
1169            ));
1170        }
1171    }
1172    out.push('\n');
1173
1174    // Consistency issues
1175    out.push_str(&format!(
1176        "### Consistency Issues ({})\n",
1177        report.consistency_issues.len(),
1178    ));
1179    if report.consistency_issues.is_empty() {
1180        out.push_str("  (none)\n");
1181    } else {
1182        for ci in &report.consistency_issues {
1183            out.push_str(&format!(
1184                "  Pattern: {} -- {}/{} consistent\n",
1185                ci.pattern, ci.actual_count, ci.expected_count,
1186            ));
1187            for o in &ci.outliers {
1188                out.push_str(&format!("    ! {}\n", o));
1189            }
1190        }
1191    }
1192    out.push('\n');
1193
1194    out
1195}
1196
1197/// Format the review report as JSON.
1198pub fn format_review_json(report: &ReviewReport) -> String {
1199    serde_json::to_string_pretty(report).unwrap_or_else(|_| "{}".to_string())
1200}