use anyhow::Result;
pub const MAX_CHUNK_SIZE: usize = 8000;
pub const CHUNK_OVERLAP: usize = 200;
pub const MAX_PARALLEL_CHUNKS: usize = 10;
pub const MAX_TOTAL_CHUNKS: usize = 50;
#[derive(Debug, Clone)]
pub struct DiffChunk {
pub content: String,
pub file_summary: String, }
pub fn chunk_diff(diff_content: &str) -> Vec<DiffChunk> {
if diff_content.len() <= MAX_CHUNK_SIZE {
return vec![DiffChunk {
content: diff_content.to_string(),
file_summary: extract_file_summary(diff_content),
}];
}
let files = split_diff_by_files(diff_content);
let estimated_chunks = ((diff_content.len() / MAX_CHUNK_SIZE) + 1).min(MAX_TOTAL_CHUNKS);
let mut chunks = Vec::with_capacity(estimated_chunks);
let mut current_chunk = String::with_capacity(MAX_CHUNK_SIZE + CHUNK_OVERLAP);
let mut current_files = Vec::new();
for file_diff in files {
if chunks.len() >= MAX_TOTAL_CHUNKS {
eprintln!(
"Warning: Reached maximum chunk limit ({}), truncating diff processing",
MAX_TOTAL_CHUNKS
);
break;
}
if !current_chunk.is_empty() && current_chunk.len() + file_diff.len() > MAX_CHUNK_SIZE {
chunks.push(DiffChunk {
content: current_chunk.clone(),
file_summary: current_files.join(", "),
});
current_chunk = get_chunk_overlap(¤t_chunk);
current_files.clear();
}
current_chunk.push_str(&file_diff);
current_chunk.push('\n');
if let Some(filename) = extract_filename(&file_diff) {
current_files.push(filename);
}
}
if !current_chunk.is_empty() && chunks.len() < MAX_TOTAL_CHUNKS {
chunks.push(DiffChunk {
content: current_chunk,
file_summary: current_files.join(", "),
});
}
if chunks.len() > 1 {
println!(
"Info: Split diff into {} chunks (limit: {}, parallel: {})",
chunks.len(),
MAX_TOTAL_CHUNKS,
MAX_PARALLEL_CHUNKS
);
}
chunks
}
fn split_diff_by_files(diff_content: &str) -> Vec<String> {
let estimated_files = diff_content.matches("diff --git").count().max(1);
let mut files = Vec::with_capacity(estimated_files);
let mut current_file = String::with_capacity(diff_content.len() / estimated_files);
for line in diff_content.lines() {
if line.starts_with("diff --git") && !current_file.is_empty() {
files.push(std::mem::take(&mut current_file));
current_file = String::with_capacity(diff_content.len() / estimated_files);
}
current_file.push_str(line);
current_file.push('\n');
}
if !current_file.is_empty() {
files.push(current_file);
}
files
}
fn extract_filename(file_diff: &str) -> Option<String> {
for line in file_diff.lines() {
if line.starts_with("diff --git") {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() >= 4 {
let path = parts[3].strip_prefix("b/").unwrap_or(parts[3]);
return Some(path.to_string());
}
}
}
None
}
fn get_chunk_overlap(chunk: &str) -> String {
if chunk.len() <= CHUNK_OVERLAP {
return chunk.to_string();
}
let target_start = chunk.len().saturating_sub(CHUNK_OVERLAP);
let start_pos = chunk
.char_indices()
.find(|(byte_idx, _)| *byte_idx >= target_start)
.map(|(byte_idx, _)| byte_idx)
.unwrap_or(chunk.len());
let overlap_section = &chunk[start_pos..];
if let Some(newline_pos) = overlap_section.find('\n') {
overlap_section[newline_pos + 1..].to_string()
} else {
overlap_section.to_string()
}
}
fn extract_file_summary(diff_content: &str) -> String {
let mut files = Vec::new();
for line in diff_content.lines() {
if line.starts_with("diff --git") {
if let Some(filename) = extract_filename(line) {
files.push(filename);
}
}
}
match files.len() {
0 => "changes".to_string(),
1 => files[0].clone(),
2..=3 => files.join(", "),
n => format!("{} files", n),
}
}
pub fn combine_commit_messages(responses: Vec<String>) -> String {
if responses.is_empty() {
return "chore: update files".to_string();
}
if responses.len() == 1 {
return responses[0].clone();
}
let sections: Vec<String> = responses
.iter()
.enumerate()
.map(|(i, r)| format!("=== Chunk {} ===\n{}", i + 1, r.trim()))
.collect();
sections.join("\n\n")
}
pub fn combine_review_results(responses: Vec<serde_json::Value>) -> Result<serde_json::Value> {
if responses.is_empty() {
eprintln!("Warning: No review responses to combine, using fallback");
return Ok(create_fallback_review_json());
}
if responses.len() == 1 {
return Ok(responses[0].clone());
}
let mut all_issues = Vec::new();
let mut all_recommendations = Vec::new();
let mut total_files = 0;
let mut scores = Vec::new();
for response in responses.iter() {
let review = response;
if let Some(issues) = review.get("issues").and_then(|i| i.as_array()) {
for issue in issues {
all_issues.push(issue.clone());
}
}
if let Some(recs) = review.get("recommendations").and_then(|r| r.as_array()) {
for rec in recs {
if let Some(rec_str) = rec.as_str() {
all_recommendations.push(rec_str.to_string());
}
}
}
if let Some(summary) = review.get("summary") {
if let Some(files) = summary.get("total_files").and_then(|f| f.as_u64()) {
total_files += files as usize;
}
if let Some(score) = summary.get("overall_score").and_then(|s| s.as_u64()) {
if score <= 100 {
scores.push(score as u8);
}
}
}
}
let avg_score = if scores.is_empty() {
75 } else {
let sum: u32 = scores.iter().map(|&s| s as u32).sum();
(sum / scores.len() as u32) as u8
};
let unique_recommendations: std::collections::HashSet<String> =
all_recommendations.into_iter().collect();
let final_recommendations: Vec<String> = unique_recommendations.into_iter().collect();
if all_issues.is_empty() && final_recommendations.is_empty() && total_files == 0 {
return Ok(create_fallback_review_json());
}
let combined_result = serde_json::json!({
"summary": {
"total_files": total_files,
"total_issues": all_issues.len(),
"overall_score": avg_score
},
"issues": all_issues,
"recommendations": final_recommendations
});
Ok(combined_result)
}
fn create_fallback_review_json() -> serde_json::Value {
serde_json::json!({
"summary": {
"total_files": 1,
"total_issues": 1,
"overall_score": 75
},
"issues": [{
"severity": "MEDIUM",
"category": "System",
"title": "Review Analysis Incomplete",
"description": "The automated review could not complete fully. Manual review recommended."
}],
"recommendations": [
"Consider running the review again",
"Perform manual code review for complex changes"
]
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_small_diff_no_chunking() {
let small_diff = "diff --git a/test.rs b/test.rs\n+added line\n";
let chunks = chunk_diff(small_diff);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].content, small_diff);
assert_eq!(chunks[0].file_summary, "test.rs");
}
#[test]
fn test_extract_filename() {
let file_diff = "diff --git a/src/main.rs b/src/main.rs\nindex 123..456\n";
let filename = extract_filename(file_diff);
assert_eq!(filename, Some("src/main.rs".to_string()));
let file_diff2 = "diff --git a/test.rs test.rs\nindex 123..456\n";
let filename2 = extract_filename(file_diff2);
assert_eq!(filename2, Some("test.rs".to_string()));
let invalid_diff = "not a diff line\n";
let filename3 = extract_filename(invalid_diff);
assert_eq!(filename3, None);
}
#[test]
fn test_extract_file_summary() {
let single_file = "diff --git a/test.rs b/test.rs\n+line\n";
assert_eq!(extract_file_summary(single_file), "test.rs");
let multi_files = "diff --git a/file1.rs b/file1.rs\n+line1\ndiff --git a/file2.rs b/file2.rs\n+line2\ndiff --git a/file3.rs b/file3.rs\n+line3\n";
assert_eq!(
extract_file_summary(multi_files),
"file1.rs, file2.rs, file3.rs"
);
let many_files = (0..10)
.map(|i| format!("diff --git a/file{}.rs b/file{}.rs\n+line{}\n", i, i, i))
.collect::<String>();
assert_eq!(extract_file_summary(&many_files), "10 files");
let no_files = "some random content\nwithout diff headers\n";
assert_eq!(extract_file_summary(no_files), "changes");
}
#[test]
fn test_split_diff_by_files() {
let multi_file_diff = concat!(
"diff --git a/file1.rs b/file1.rs\n",
"index 123..456\n",
"+added line 1\n",
"diff --git a/file2.rs b/file2.rs\n",
"index 789..abc\n",
"+added line 2\n"
);
let files = split_diff_by_files(multi_file_diff);
assert_eq!(files.len(), 2);
assert!(files[0].contains("file1.rs"));
assert!(files[0].contains("added line 1"));
assert!(files[1].contains("file2.rs"));
assert!(files[1].contains("added line 2"));
}
#[test]
fn test_get_chunk_overlap() {
let short_chunk = "short content";
assert_eq!(get_chunk_overlap(short_chunk), short_chunk);
let long_chunk = "a".repeat(300) + "\nline1\nline2\nline3";
let overlap = get_chunk_overlap(&long_chunk);
assert!(overlap.len() <= CHUNK_OVERLAP);
assert!(overlap.starts_with("line1\nline2\nline3"));
let no_newlines = "a".repeat(300);
let overlap2 = get_chunk_overlap(&no_newlines);
assert!(overlap2.len() <= CHUNK_OVERLAP);
}
#[test]
fn test_large_diff_chunking() {
let mut large_diff = String::new();
for i in 0..5 {
let file_content = "a".repeat(MAX_CHUNK_SIZE / 3); large_diff.push_str(&format!(
"diff --git a/file{}.rs b/file{}.rs\nindex 123..456\n+{}\n",
i, i, file_content
));
}
let chunks = chunk_diff(&large_diff);
assert!(!chunks.is_empty(), "Should have at least one chunk");
for chunk in &chunks {
assert!(
chunk.content.len() <= MAX_CHUNK_SIZE * 2, "Chunk size {} should be reasonable",
chunk.content.len()
);
}
}
#[test]
fn test_combine_commit_messages() {
let empty: Vec<String> = vec![];
assert_eq!(combine_commit_messages(empty), "chore: update files");
let single = vec!["feat: add new feature".to_string()];
assert_eq!(combine_commit_messages(single), "feat: add new feature");
let multiple = vec![
"feat: add feature A\n\n- Added component A\n- Updated tests".to_string(),
"fix: resolve bug B\n\n- Fixed validation\n- Added error handling".to_string(),
];
let combined = combine_commit_messages(multiple);
assert!(combined.contains("feat: add feature A")); assert!(combined.contains("Added component A"));
assert!(combined.contains("Fixed validation"));
}
#[test]
fn test_combine_commit_messages_single_line_edge_case() {
let single_line_responses = vec!["fix: one line fix".to_string()];
let result = combine_commit_messages(single_line_responses);
assert_eq!(result, "fix: one line fix");
let multi_single_line = vec!["fix: bug one".to_string(), "feat: add thing".to_string()];
let result2 = combine_commit_messages(multi_single_line);
assert!(result2.contains("fix: bug one") || result2.contains("feat: add thing"));
}
#[test]
fn test_combine_review_results() {
let empty: Vec<serde_json::Value> = vec![];
let result =
combine_review_results(empty).expect("Empty responses should produce valid result");
assert!(result.to_string().contains("Review Analysis Incomplete"));
let single = vec![serde_json::json!({
"summary": {"total_files": 1, "total_issues": 2, "overall_score": 85},
"issues": [{"severity": "HIGH", "category": "Security", "title": "Test Issue", "description": "Test description"}],
"recommendations": ["Test recommendation"]
})];
let result = combine_review_results(single)
.expect("Single valid response should produce valid result");
assert!(result.to_string().contains("Test Issue"));
assert!(result.to_string().contains("Test recommendation"));
let multiple = vec![
serde_json::json!({
"summary": {"total_files": 1, "total_issues": 1, "overall_score": 80},
"issues": [{"severity": "MEDIUM", "category": "Code Quality", "title": "Issue 1", "description": "Desc 1"}],
"recommendations": ["Rec 1"]
}),
serde_json::json!({
"summary": {"total_files": 2, "total_issues": 1, "overall_score": 90},
"issues": [{"severity": "LOW", "category": "Style", "title": "Issue 2", "description": "Desc 2"}],
"recommendations": ["Rec 2"]
}),
];
let result = combine_review_results(multiple)
.expect("Multiple valid responses should produce valid result");
assert_eq!(result["summary"]["total_files"], 3); assert_eq!(result["summary"]["total_issues"], 2); assert_eq!(result["summary"]["overall_score"], 85); assert_eq!(
result["issues"]
.as_array()
.expect("issues should be an array")
.len(),
2
);
assert_eq!(
result["recommendations"]
.as_array()
.expect("recommendations should be an array")
.len(),
2
);
}
#[test]
fn test_chunk_limit_enforcement() {
let mut large_diff = String::new();
for i in 0..MAX_TOTAL_CHUNKS + 10 {
large_diff.push_str(&format!(
"diff --git a/file{}.rs b/file{}.rs\nindex 123..456\n+{}\n",
i,
i,
"a".repeat(MAX_CHUNK_SIZE / 2) ));
}
let chunks = chunk_diff(&large_diff);
assert!(
chunks.len() <= MAX_TOTAL_CHUNKS,
"Should not exceed MAX_TOTAL_CHUNKS limit"
);
}
#[test]
fn test_invalid_json_handling() {
let invalid_responses = vec![serde_json::json!({"invalid": "json structure"})];
let result = combine_review_results(invalid_responses)
.expect("Invalid JSON should still produce a result");
assert_eq!(result["invalid"], "json structure");
}
#[test]
fn test_edge_cases() {
let empty_diff = "";
let chunks = chunk_diff(empty_diff);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].content, "");
assert_eq!(chunks[0].file_summary, "changes");
let whitespace_diff = " \n\n \n";
let chunks = chunk_diff(whitespace_diff);
assert_eq!(chunks.len(), 1);
let malformed = "diff --git incomplete header\n+some content\n";
let chunks = chunk_diff(malformed);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].file_summary, "header"); }
#[test]
fn test_utf8_character_boundaries() {
let utf8_content = "├── some content with UTF-8 chars: 🚀 ✨ 🎯\n".repeat(300);
let overlap = get_chunk_overlap(&utf8_content);
assert!(!overlap.is_empty());
assert!(overlap.is_ascii() || overlap.chars().count() > 0);
let diff_with_utf8 = format!(
"diff --git a/README.md b/README.md\nindex 5f16baa..a49d7dc 100644\n--- a/README.md\n+++ b/README.md\n@@ -1,3 +1,3 @@\n{}",
utf8_content
);
let chunks = chunk_diff(&diff_with_utf8);
assert!(!chunks.is_empty());
for chunk in &chunks {
assert!(chunk.content.chars().count() > 0); }
}
}