use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FileDisposition {
Kept,
Dropped,
SummaryOnly,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum HunkDropReason {
WhitespaceOnly,
ImportOnly,
CommentOnly,
MechanicalHaiku,
}
impl HunkDropReason {
pub fn label(&self) -> &'static str {
match self {
HunkDropReason::WhitespaceOnly => "whitespace-only",
HunkDropReason::ImportOnly => "import-only",
HunkDropReason::CommentOnly => "comment-only",
HunkDropReason::MechanicalHaiku => "mechanical (Haiku)",
}
}
}
#[derive(Debug, Clone)]
pub struct FilteredHunk {
pub header: String,
pub lines: Vec<String>,
pub substantive_confidence: f32,
pub reason_kept: String,
}
impl FilteredHunk {
pub fn render(&self) -> String {
let mut out = self.header.clone();
for line in &self.lines {
out.push('\n');
out.push_str(line);
}
out
}
}
#[derive(Debug, Clone)]
pub struct DroppedHunk {
pub reason: HunkDropReason,
pub lines_count: usize,
pub header: String,
}
#[derive(Debug, Clone)]
pub struct FilteredFile {
pub filename: String,
pub status: String,
pub disposition: FileDisposition,
pub hunks: Vec<FilteredHunk>,
pub dropped_hunks: Vec<DroppedHunk>,
pub summary_line: Option<String>,
}
#[derive(Debug, Clone)]
pub struct DroppedFile {
pub path: String,
pub reason: String,
}
#[derive(Debug, Clone)]
pub struct FilteredDiff {
pub files: Vec<FilteredFile>,
pub dropped_files: Vec<DroppedFile>,
pub drop_hunk_counts: HashMap<HunkDropReason, u32>,
pub original_byte_size: usize,
pub filtered_byte_size: usize,
}
impl FilteredDiff {
pub fn render_for_prompt(&self, max_chars: usize) -> String {
let mut out = String::with_capacity(max_chars.min(64 * 1024));
let suffix = self.build_noise_summary();
const TRUNC_MARKER_RESERVE: usize = 120;
let suffix_reserve = suffix.len() + TRUNC_MARKER_RESERVE;
let mut budget_exceeded = false;
'files: for file in &self.files {
match file.disposition {
FileDisposition::SummaryOnly => {
if let Some(ref summary) = file.summary_line {
let line = format!("# {}: {}\n", file.filename, summary);
if out.len() + line.len() + suffix_reserve > max_chars {
budget_exceeded = true;
break 'files;
}
out.push_str(&line);
}
}
FileDisposition::Kept => {
let file_header = format!("--- a/{0}\n+++ b/{0}\n", file.filename);
if out.len() + file_header.len() + suffix_reserve > max_chars {
budget_exceeded = true;
break 'files;
}
out.push_str(&file_header);
for hunk in &file.hunks {
let rendered = hunk.render();
if out.len() + rendered.len() + suffix_reserve + 1 > max_chars {
budget_exceeded = true;
break 'files;
}
out.push_str(&rendered);
out.push('\n');
}
}
FileDisposition::Dropped => {
}
}
}
if budget_exceeded {
let remaining_files = self
.files
.iter()
.filter(|f| {
!out.contains(f.filename.as_str())
})
.count();
let marker = if remaining_files > 0 {
format!(
"\n[RENDER TRUNCATED — char budget ({max_chars}) reached; \
~{remaining_files} file(s) omitted; review covers only the \
visible portion above]\n"
)
} else {
format!(
"\n[RENDER TRUNCATED — char budget ({max_chars}) reached; \
some hunks omitted from the last file above; review covers \
only the visible portion]\n"
)
};
out.push_str(&marker);
}
if !suffix.is_empty() {
out.push_str(&suffix);
}
out
}
pub fn build_noise_summary(&self) -> String {
let dropped_files = self.dropped_files.len();
let dropped_hunks: u32 = self.drop_hunk_counts.values().sum();
if dropped_files == 0 && dropped_hunks == 0 {
return String::new();
}
let mut parts: Vec<String> = Vec::new();
if dropped_files > 0 {
let mut reasons: Vec<String> = self
.dropped_files
.iter()
.map(|f| f.reason.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.take(3)
.collect();
reasons.sort();
let reason_str = if reasons.is_empty() {
String::new()
} else {
format!(" ({})", reasons.join(", "))
};
parts.push(format!("{dropped_files} file(s) omitted{reason_str}"));
}
if dropped_hunks > 0 {
let mut labels: Vec<&str> = self
.drop_hunk_counts
.keys()
.map(|r| r.label())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
labels.sort();
let label_str = if labels.is_empty() {
String::new()
} else {
format!(" ({})", labels.join(", "))
};
parts.push(format!("{dropped_hunks} hunk(s) omitted{label_str}"));
}
format!(
"\n[DiffAnalyzer filtered {} — noise removed before review]\n",
parts.join("; ")
)
}
}
#[cfg(test)]
#[path = "models_tests.rs"]
mod tests;