use serde::{Deserialize, Serialize};
use crate::core::client::TrustySearchClient;
use crate::core::complexity::{compute_complexity_for, detect_smells};
use crate::types::complexity::{CodeSmell, ComplexityGrade};
use crate::types::CodeChunk;
#[derive(Debug, thiserror::Error)]
pub enum ReviewError {
#[error("malformed hunk header: {0}")]
MalformedHunkHeader(String),
#[error("trusty-search unreachable or returned an error: {0}")]
Search(String),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ReviewComplexity {
pub cyclomatic: u32,
pub cognitive: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct SmellHit {
pub category: String,
pub line: u32,
pub severity: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum ReviewSource {
Indexed { modified_chunks: usize },
NewFile,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FileReview {
pub path: String,
pub grade: ComplexityGrade,
pub complexity: ReviewComplexity,
pub smells: Vec<SmellHit>,
pub recommendations: Vec<String>,
pub source: ReviewSource,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ReviewReport {
pub files: Vec<FileReview>,
pub overall_grade: ComplexityGrade,
pub changed_lines: usize,
pub smell_count: usize,
pub summary: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileDiff {
pub path: String,
pub added_line_numbers: Vec<u32>,
pub added_lines: Vec<String>,
}
impl FileDiff {
pub fn added_content(&self) -> String {
self.added_lines.join("\n")
}
fn touches_range(&self, start: usize, end: usize) -> bool {
self.added_line_numbers
.iter()
.any(|&ln| (ln as usize) >= start && (ln as usize) <= end)
}
}
pub struct DiffParser;
impl DiffParser {
pub fn parse(diff: &str) -> Result<Vec<FileDiff>, ReviewError> {
let mut files: Vec<FileDiff> = Vec::new();
let mut current: Option<FileDiff> = None;
let mut new_line: u32 = 0;
for raw in diff.lines() {
if let Some(rest) = raw.strip_prefix("+++ ") {
if let Some(f) = current.take() {
files.push(f);
}
let path = normalize_diff_path(rest);
current = Some(FileDiff {
path,
added_line_numbers: Vec::new(),
added_lines: Vec::new(),
});
new_line = 0;
continue;
}
if raw.starts_with("--- ") || raw.starts_with("diff ") || raw.starts_with("index ") {
continue;
}
if let Some(header) = raw.strip_prefix("@@") {
new_line = parse_hunk_new_start(header)?;
continue;
}
let Some(file) = current.as_mut() else {
continue;
};
if let Some(added) = raw.strip_prefix('+') {
file.added_line_numbers.push(new_line);
file.added_lines.push(added.to_string());
new_line += 1;
} else if raw.starts_with('-') {
} else if raw.starts_with('\\') {
} else {
new_line += 1;
}
}
if let Some(f) = current.take() {
files.push(f);
}
Ok(files)
}
}
fn normalize_diff_path(token: &str) -> String {
let head = token.split('\t').next().unwrap_or(token).trim();
head.strip_prefix("a/")
.or_else(|| head.strip_prefix("b/"))
.unwrap_or(head)
.to_string()
}
fn parse_hunk_new_start(header: &str) -> Result<u32, ReviewError> {
let plus = header
.split('+')
.nth(1)
.ok_or_else(|| ReviewError::MalformedHunkHeader(header.to_string()))?;
let num: String = plus.chars().take_while(|c| c.is_ascii_digit()).collect();
num.parse::<u32>()
.map_err(|_| ReviewError::MalformedHunkHeader(header.to_string()))
}
fn language_for_path(path: &str) -> &'static str {
let lower = path.to_ascii_lowercase();
if lower.ends_with(".rs") {
"rust"
} else if lower.ends_with(".tsx") {
"tsx"
} else if lower.ends_with(".ts") {
"typescript"
} else if lower.ends_with(".jsx") {
"jsx"
} else if lower.ends_with(".js") {
"javascript"
} else if lower.ends_with(".py") {
"python"
} else if lower.ends_with(".go") {
"go"
} else if lower.ends_with(".java") {
"java"
} else {
"unknown"
}
}
fn smell_projection(s: &CodeSmell) -> (&'static str, &'static str) {
match s {
CodeSmell::LongFunction { .. } => ("long_method", "medium"),
CodeSmell::DeepNesting { .. } => ("deep_nesting", "high"),
CodeSmell::TooManyParams { .. } => ("too_many_params", "medium"),
CodeSmell::MissingDocstring => ("missing_docstring", "low"),
}
}
fn recommendations_for(
grade: ComplexityGrade,
cyclomatic: u32,
smells: &[SmellHit],
line_count: usize,
) -> Vec<String> {
let mut recs: Vec<String> = Vec::new();
if grade >= ComplexityGrade::C {
recs.push(format!(
"Cyclomatic complexity is {cyclomatic} (grade {grade}); extract logic into smaller helper functions"
));
}
for hit in smells {
let rec = match hit.category.as_str() {
"long_method" => format!(
"Long method detected near line {}; split the {line_count}-line change into focused functions",
hit.line
),
"deep_nesting" => format!(
"Deep nesting near line {}; use early returns or guard clauses",
hit.line
),
"too_many_params" => format!(
"Too many parameters near line {}; group related arguments into a struct",
hit.line
),
"missing_docstring" => {
"Add a doc comment explaining the intent of the new code".to_string()
}
other => format!("Review the '{other}' smell near line {}", hit.line),
};
if !recs.contains(&rec) {
recs.push(rec);
}
}
recs
}
fn worst_grade(grades: impl IntoIterator<Item = ComplexityGrade>) -> ComplexityGrade {
grades.into_iter().max().unwrap_or(ComplexityGrade::A)
}
fn project_smells(raw: &[CodeSmell], anchor: u32) -> Vec<SmellHit> {
raw.iter()
.map(|s| {
let (category, severity) = smell_projection(s);
SmellHit {
category: category.to_string(),
line: anchor,
severity: severity.to_string(),
}
})
.collect()
}
fn review_one_file(fd: &FileDiff, index_chunks: &[&CodeChunk]) -> FileReview {
let lang = language_for_path(&fd.path);
let anchor = fd.added_line_numbers.first().copied().unwrap_or(0);
if index_chunks.is_empty() {
let content = fd.added_content();
let metrics = compute_complexity_for(&content, lang);
let smells = project_smells(&detect_smells(&content), anchor);
let recommendations = recommendations_for(
metrics.grade,
metrics.cyclomatic,
&smells,
fd.added_lines.len(),
);
return FileReview {
path: fd.path.clone(),
grade: metrics.grade,
complexity: ReviewComplexity {
cyclomatic: metrics.cyclomatic,
cognitive: metrics.cognitive,
},
smells,
recommendations,
source: ReviewSource::NewFile,
};
}
let joined: String = index_chunks
.iter()
.map(|c| c.content.as_str())
.collect::<Vec<_>>()
.join("\n");
let metrics = compute_complexity_for(&joined, lang);
let smells = project_smells(&detect_smells(&joined), anchor);
let modified_chunks = index_chunks
.iter()
.filter(|c| fd.touches_range(c.start_line, c.end_line))
.count();
let mut recommendations = recommendations_for(
metrics.grade,
metrics.cyclomatic,
&smells,
fd.added_lines.len(),
);
if modified_chunks > 0 {
recommendations.push(format!(
"This change modifies {modified_chunks} already-indexed chunk(s); review their existing complexity before merging"
));
}
FileReview {
path: fd.path.clone(),
grade: metrics.grade,
complexity: ReviewComplexity {
cyclomatic: metrics.cyclomatic,
cognitive: metrics.cognitive,
},
smells,
recommendations,
source: ReviewSource::Indexed { modified_chunks },
}
}
pub fn analyze_diff_with_chunks(
diff: &str,
chunks: &[CodeChunk],
) -> Result<ReviewReport, ReviewError> {
use std::collections::HashMap;
let file_diffs = DiffParser::parse(diff)?;
let mut by_file: HashMap<&str, Vec<&CodeChunk>> = HashMap::new();
for chunk in chunks {
by_file.entry(chunk.file.as_str()).or_default().push(chunk);
}
let mut files: Vec<FileReview> = Vec::new();
let mut changed_lines: usize = 0;
let mut smell_count: usize = 0;
for fd in &file_diffs {
changed_lines += fd.added_lines.len();
let index_chunks: &[&CodeChunk] = by_file
.get(fd.path.as_str())
.map(Vec::as_slice)
.unwrap_or(&[]);
let review = review_one_file(fd, index_chunks);
smell_count += review.smells.len();
files.push(review);
}
let overall_grade = worst_grade(files.iter().map(|f| f.grade));
let indexed = files
.iter()
.filter(|f| matches!(f.source, ReviewSource::Indexed { .. }))
.count();
let summary = format!(
"{} file{} analyzed ({} indexed, {} new); {} smell{} found; overall grade {}",
files.len(),
if files.len() == 1 { "" } else { "s" },
indexed,
files.len() - indexed,
smell_count,
if smell_count == 1 { "" } else { "s" },
overall_grade,
);
Ok(ReviewReport {
files,
overall_grade,
changed_lines,
smell_count,
summary,
})
}
pub async fn analyze_diff_with_client(
diff: &str,
client: &TrustySearchClient,
index_id: &str,
) -> Result<ReviewReport, ReviewError> {
DiffParser::parse(diff)?;
let chunks = client
.get_chunks(index_id)
.await
.map_err(|e| ReviewError::Search(format!("get_chunks({index_id}): {e:#}")))?;
analyze_diff_with_chunks(diff, &chunks)
}
pub fn render_text(report: &ReviewReport) -> String {
let mut out = String::new();
out.push_str("=== PR Review ===\n");
out.push_str(&format!("{}\n", report.summary));
out.push_str(&format!(
"changed lines: {} | overall grade: {}\n",
report.changed_lines, report.overall_grade
));
for f in &report.files {
let src = match &f.source {
ReviewSource::Indexed { modified_chunks } => {
format!("indexed, {modified_chunks} modified chunk(s)")
}
ReviewSource::NewFile => "new file (local analysis)".to_string(),
};
out.push_str(&format!(
"\n{} — grade {} (cyclomatic {}, cognitive {}) [{}]\n",
f.path, f.grade, f.complexity.cyclomatic, f.complexity.cognitive, src
));
if f.smells.is_empty() {
out.push_str(" smells: none\n");
} else {
for s in &f.smells {
out.push_str(&format!(
" smell: {} (severity {}, line {})\n",
s.category, s.severity, s.line
));
}
}
for r in &f.recommendations {
out.push_str(&format!(" → {r}\n"));
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn chunk(file: &str, start: usize, end: usize, content: &str) -> CodeChunk {
CodeChunk {
id: format!("{file}:{start}:{end}"),
file: file.to_string(),
start_line: start,
end_line: end,
content: content.to_string(),
..Default::default()
}
}
#[test]
fn parses_single_file_addition() {
let diff = "\
diff --git a/src/foo.rs b/src/foo.rs
--- a/src/foo.rs
+++ b/src/foo.rs
@@ -1,2 +1,4 @@
fn existing() {}
+fn added() {
+ let x = 1;
+}
";
let files = DiffParser::parse(diff).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "src/foo.rs");
assert_eq!(files[0].added_lines.len(), 3);
assert_eq!(files[0].added_line_numbers, vec![2, 3, 4]);
}
#[test]
fn parses_multi_file_diff() {
let diff = "\
+++ b/a.rs
@@ -0,0 +1,1 @@
+fn a() {}
+++ b/b.py
@@ -0,0 +1,1 @@
+def b(): pass
";
let files = DiffParser::parse(diff).unwrap();
assert_eq!(files.len(), 2);
assert_eq!(files[0].path, "a.rs");
assert_eq!(files[1].path, "b.py");
}
#[test]
fn deletion_lines_do_not_advance_new_counter() {
let diff = "\
+++ b/x.rs
@@ -1,3 +1,2 @@
fn keep() {}
-fn removed() {}
+fn replacement() {}
";
let files = DiffParser::parse(diff).unwrap();
assert_eq!(files[0].added_line_numbers, vec![2]);
}
#[test]
fn malformed_hunk_header_is_rejected() {
let diff = "+++ b/x.rs\n@@ totally bogus @@\n+fn x() {}\n";
let err = analyze_diff_with_chunks(diff, &[]).unwrap_err();
assert!(matches!(err, ReviewError::MalformedHunkHeader(_)));
}
#[test]
fn file_diff_added_content_joins_lines() {
let fd = FileDiff {
path: "f.rs".into(),
added_line_numbers: vec![1, 2],
added_lines: vec!["fn f() {".into(), "}".into()],
};
assert_eq!(fd.added_content(), "fn f() {\n}");
}
#[test]
fn file_diff_touches_chunk_range() {
let fd = FileDiff {
path: "f.rs".into(),
added_line_numbers: vec![5, 6, 7],
added_lines: vec!["a".into(), "b".into(), "c".into()],
};
assert!(fd.touches_range(1, 6));
assert!(fd.touches_range(7, 20));
assert!(!fd.touches_range(8, 12));
}
#[test]
fn smell_hit_projection_maps_categories() {
assert_eq!(
smell_projection(&CodeSmell::LongFunction { lines: 99 }).0,
"long_method"
);
assert_eq!(
smell_projection(&CodeSmell::DeepNesting { max_depth: 7 }).0,
"deep_nesting"
);
assert_eq!(
smell_projection(&CodeSmell::TooManyParams { count: 9 }).0,
"too_many_params"
);
assert_eq!(
smell_projection(&CodeSmell::MissingDocstring).0,
"missing_docstring"
);
}
#[test]
fn analyze_falls_back_for_new_file() {
let diff = "\
+++ b/src/foo.rs
@@ -0,0 +1,3 @@
+/// doc
+fn added() {}
";
let report = analyze_diff_with_chunks(diff, &[]).unwrap();
assert_eq!(report.files.len(), 1);
assert_eq!(report.files[0].path, "src/foo.rs");
assert_eq!(report.files[0].source, ReviewSource::NewFile);
assert_eq!(report.files[0].grade, ComplexityGrade::A);
assert_eq!(report.overall_grade, ComplexityGrade::A);
assert_eq!(report.changed_lines, 2);
assert!(report.summary.contains("1 new"));
}
#[test]
fn analyze_merges_indexed_file() {
let chunks = vec![
chunk("src/foo.rs", 1, 5, "fn existing() { let x = 1; }"),
chunk("src/foo.rs", 10, 20, "fn other() {}"),
];
let diff = "\
+++ b/src/foo.rs
@@ -1,4 +1,5 @@
fn existing() {
let x = 1;
+let y = 2;
}
";
let report = analyze_diff_with_chunks(diff, &chunks).unwrap();
assert_eq!(report.files.len(), 1);
match report.files[0].source {
ReviewSource::Indexed { modified_chunks } => assert_eq!(modified_chunks, 1),
ReviewSource::NewFile => panic!("expected indexed source"),
}
assert!(report.files[0]
.recommendations
.iter()
.any(|r| r.contains("already-indexed chunk")));
assert!(report.summary.contains("1 indexed"));
}
#[test]
fn analyze_mixed_indexed_and_new_files() {
let chunks = vec![chunk("indexed.rs", 1, 3, "fn a() {}")];
let diff = "\
+++ b/indexed.rs
@@ -1,1 +1,2 @@
fn a() {}
+fn a2() {}
+++ b/brand_new.rs
@@ -0,0 +1,1 @@
+fn b() {}
";
let report = analyze_diff_with_chunks(diff, &chunks).unwrap();
assert_eq!(report.files.len(), 2);
assert!(matches!(
report.files[0].source,
ReviewSource::Indexed { .. }
));
assert_eq!(report.files[1].source, ReviewSource::NewFile);
assert!(report.summary.contains("1 indexed, 1 new"));
}
#[test]
fn analyze_detects_long_method_smell_in_new_file() {
let mut diff = String::from("+++ b/big.rs\n@@ -0,0 +1,60 @@\n");
for _ in 0..60 {
diff.push_str("+ let _ = 1;\n");
}
let report = analyze_diff_with_chunks(&diff, &[]).unwrap();
assert!(report.smell_count >= 1);
assert!(report.files[0]
.smells
.iter()
.any(|s| s.category == "long_method"));
}
#[test]
fn analyze_empty_diff_is_grade_a() {
let report = analyze_diff_with_chunks("", &[]).unwrap();
assert!(report.files.is_empty());
assert_eq!(report.overall_grade, ComplexityGrade::A);
assert_eq!(report.changed_lines, 0);
assert_eq!(report.smell_count, 0);
}
#[test]
fn text_report_contains_summary_and_files() {
let diff = "+++ b/foo.rs\n@@ -0,0 +1,2 @@\n+/// doc\n+fn f() {}\n";
let report = analyze_diff_with_chunks(diff, &[]).unwrap();
let text = render_text(&report);
assert!(text.contains("=== PR Review ==="));
assert!(text.contains("foo.rs"));
assert!(text.contains("overall grade"));
assert!(text.contains("new file"));
}
#[test]
fn report_round_trips_json() {
let diff = "+++ b/foo.rs\n@@ -0,0 +1,2 @@\n+/// doc\n+fn f() {}\n";
let report = analyze_diff_with_chunks(diff, &[]).unwrap();
let json = serde_json::to_string(&report).unwrap();
let back: ReviewReport = serde_json::from_str(&json).unwrap();
assert_eq!(report, back);
}
#[tokio::test]
async fn analyze_diff_with_client_errors_when_search_down() {
let client = TrustySearchClient::new("http://127.0.0.1:1");
let diff = "+++ b/foo.rs\n@@ -0,0 +1,1 @@\n+fn f() {}\n";
let err = analyze_diff_with_client(diff, &client, "idx")
.await
.expect_err("search down should error");
assert!(matches!(err, ReviewError::Search(_)));
}
}