use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FileDisposition {
Kept,
Dropped,
SummaryOnly,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum HunkDropReason {
WhitespaceOnly,
ImportOnly,
CommentOnly,
MechanicalHaiku,
}
impl HunkDropReason {
pub fn label(&self) -> &'static str {
match self {
HunkDropReason::WhitespaceOnly => "whitespace-only",
HunkDropReason::ImportOnly => "import-only",
HunkDropReason::CommentOnly => "comment-only",
HunkDropReason::MechanicalHaiku => "mechanical (Haiku)",
}
}
}
#[derive(Debug, Clone)]
pub struct FilteredHunk {
pub header: String,
pub lines: Vec<String>,
pub substantive_confidence: f32,
pub reason_kept: String,
}
impl FilteredHunk {
pub fn render(&self) -> String {
let mut out = self.header.clone();
for line in &self.lines {
out.push('\n');
out.push_str(line);
}
out
}
}
#[derive(Debug, Clone)]
pub struct DroppedHunk {
pub reason: HunkDropReason,
pub lines_count: usize,
pub header: String,
}
#[derive(Debug, Clone)]
pub struct FilteredFile {
pub filename: String,
pub status: String,
pub disposition: FileDisposition,
pub hunks: Vec<FilteredHunk>,
pub dropped_hunks: Vec<DroppedHunk>,
pub summary_line: Option<String>,
}
#[derive(Debug, Clone)]
pub struct DroppedFile {
pub path: String,
pub reason: String,
}
#[derive(Debug, Clone)]
pub struct FilteredDiff {
pub files: Vec<FilteredFile>,
pub dropped_files: Vec<DroppedFile>,
pub drop_hunk_counts: HashMap<HunkDropReason, u32>,
pub original_byte_size: usize,
pub filtered_byte_size: usize,
}
impl FilteredDiff {
pub fn render_for_prompt(&self, max_chars: usize) -> String {
let mut out = String::with_capacity(max_chars.min(64 * 1024));
let suffix = self.build_noise_summary();
for file in &self.files {
match file.disposition {
FileDisposition::SummaryOnly => {
if let Some(ref summary) = file.summary_line {
let line = format!("# {}: {}\n", file.filename, summary);
if out.len() + line.len() + suffix.len() > max_chars {
break;
}
out.push_str(&line);
}
}
FileDisposition::Kept => {
let file_header = format!("--- a/{0}\n+++ b/{0}\n", file.filename);
if out.len() + file_header.len() + suffix.len() > max_chars {
break;
}
out.push_str(&file_header);
for hunk in &file.hunks {
let rendered = hunk.render();
if out.len() + rendered.len() + suffix.len() + 1 > max_chars {
break;
}
out.push_str(&rendered);
out.push('\n');
}
}
FileDisposition::Dropped => {
}
}
}
if !suffix.is_empty() {
out.push_str(&suffix);
}
out
}
pub fn build_noise_summary(&self) -> String {
let dropped_files = self.dropped_files.len();
let dropped_hunks: u32 = self.drop_hunk_counts.values().sum();
if dropped_files == 0 && dropped_hunks == 0 {
return String::new();
}
let mut parts: Vec<String> = Vec::new();
if dropped_files > 0 {
let mut reasons: Vec<String> = self
.dropped_files
.iter()
.map(|f| f.reason.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.take(3)
.collect();
reasons.sort();
let reason_str = if reasons.is_empty() {
String::new()
} else {
format!(" ({})", reasons.join(", "))
};
parts.push(format!("{dropped_files} file(s) omitted{reason_str}"));
}
if dropped_hunks > 0 {
let mut labels: Vec<&str> = self
.drop_hunk_counts
.keys()
.map(|r| r.label())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
labels.sort();
let label_str = if labels.is_empty() {
String::new()
} else {
format!(" ({})", labels.join(", "))
};
parts.push(format!("{dropped_hunks} hunk(s) omitted{label_str}"));
}
format!(
"\n[DiffAnalyzer filtered {} — noise removed before review]\n",
parts.join("; ")
)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_kept_file(name: &str, hunk_content: &str) -> FilteredFile {
FilteredFile {
filename: name.to_string(),
status: "modified".to_string(),
disposition: FileDisposition::Kept,
hunks: vec![FilteredHunk {
header: "@@ -1,3 +1,3 @@".to_string(),
lines: vec![hunk_content.to_string()],
substantive_confidence: 1.0,
reason_kept: "deterministic-pass".to_string(),
}],
dropped_hunks: vec![],
summary_line: None,
}
}
#[test]
fn filtered_hunk_render_roundtrip() {
let h = FilteredHunk {
header: "@@ -1,2 +1,2 @@".to_string(),
lines: vec!["-old line".to_string(), "+new line".to_string()],
substantive_confidence: 1.0,
reason_kept: "test".to_string(),
};
let rendered = h.render();
assert!(rendered.contains("@@ -1,2 +1,2 @@"));
assert!(rendered.contains("-old line"));
assert!(rendered.contains("+new line"));
}
#[test]
fn hunk_drop_reason_label() {
assert_eq!(HunkDropReason::WhitespaceOnly.label(), "whitespace-only");
assert_eq!(HunkDropReason::ImportOnly.label(), "import-only");
assert_eq!(HunkDropReason::CommentOnly.label(), "comment-only");
assert_eq!(
HunkDropReason::MechanicalHaiku.label(),
"mechanical (Haiku)"
);
}
#[test]
fn filtered_diff_render_for_prompt_contains_surviving_content() {
let diff = FilteredDiff {
files: vec![make_kept_file("src/auth.rs", "+pub fn authenticate() {}")],
dropped_files: vec![],
drop_hunk_counts: HashMap::new(),
original_byte_size: 500,
filtered_byte_size: 100,
};
let rendered = diff.render_for_prompt(10_000);
assert!(rendered.contains("src/auth.rs"), "file path must appear");
assert!(
rendered.contains("authenticate"),
"hunk content must appear"
);
}
#[test]
fn filtered_diff_render_respects_max_chars() {
let files: Vec<FilteredFile> = (0..100)
.map(|i| make_kept_file(&format!("src/file{i}.rs"), &"+fn foo() {}".repeat(50)))
.collect();
let diff = FilteredDiff {
files,
dropped_files: vec![],
drop_hunk_counts: HashMap::new(),
original_byte_size: 100_000,
filtered_byte_size: 50_000,
};
let rendered = diff.render_for_prompt(2_000);
assert!(
rendered.len() <= 2_000 + 200,
"rendered output must not greatly exceed max_chars: len={}",
rendered.len()
);
}
#[test]
fn filtered_diff_drop_summary_emitted() {
let mut drop_counts = HashMap::new();
drop_counts.insert(HunkDropReason::ImportOnly, 3u32);
drop_counts.insert(HunkDropReason::WhitespaceOnly, 1u32);
let diff = FilteredDiff {
files: vec![make_kept_file("src/main.rs", "+fn main() {}")],
dropped_files: vec![DroppedFile {
path: "Cargo.lock".to_string(),
reason: "lockfile".to_string(),
}],
drop_hunk_counts: drop_counts,
original_byte_size: 5_000,
filtered_byte_size: 200,
};
let rendered = diff.render_for_prompt(100_000);
assert!(
rendered.contains("DiffAnalyzer filtered"),
"noise summary must appear: {rendered}"
);
assert!(
rendered.contains("file(s) omitted"),
"file drop count must appear: {rendered}"
);
assert!(
rendered.contains("hunk(s) omitted"),
"hunk drop count must appear: {rendered}"
);
}
#[test]
fn no_summary_when_nothing_dropped() {
let diff = FilteredDiff {
files: vec![make_kept_file("src/lib.rs", "+pub fn new() {}")],
dropped_files: vec![],
drop_hunk_counts: HashMap::new(),
original_byte_size: 100,
filtered_byte_size: 100,
};
let summary = diff.build_noise_summary();
assert!(summary.is_empty(), "empty summary when nothing was dropped");
}
}