Skip to main content

llm_git/
diff.rs

1/// Diff parsing and smart truncation logic
2use crate::{config::CommitConfig, tokens::TokenCounter};
3
4#[derive(Debug, Clone)]
5pub struct FileDiff {
6   pub filename:  String,
7   pub header:    String, // The diff header (@@, index, etc)
8   pub content:   String, // The actual diff content
9   pub additions: usize,
10   pub deletions: usize,
11   pub is_binary: bool,
12}
13
14impl FileDiff {
15   pub const fn size(&self) -> usize {
16      self.header.len() + self.content.len()
17   }
18
19   /// Estimate token count for this file diff.
20   pub fn token_estimate(&self, counter: &TokenCounter) -> usize {
21      // Use combined header + content for token estimate
22      counter.count_sync(&self.header) + counter.count_sync(&self.content)
23   }
24
25   pub fn priority(&self, config: &CommitConfig) -> i32 {
26      // Higher number = higher priority
27      if self.is_binary {
28         return -100; // Lowest priority
29      }
30
31      // Critical dependency manifests get medium-high priority despite extension
32      let filename_lower = self.filename.to_lowercase();
33      if filename_lower.ends_with("cargo.toml")
34         || filename_lower.ends_with("package.json")
35         || filename_lower.ends_with("go.mod")
36         || filename_lower.ends_with("requirements.txt")
37         || filename_lower.ends_with("pyproject.toml")
38      {
39         return 70; // Medium-high priority for dependency manifests (below source/SQL, above default)
40      }
41
42      // Prompt/system templates are functional source, not docs.
43      if filename_lower.contains("prompt") || filename_lower.contains("system") {
44         return 100;
45      }
46
47      // Check if it's a test file (lower priority)
48      if self.filename.contains("/test")
49         || self.filename.contains("test_")
50         || self.filename.contains("_test.")
51         || self.filename.contains(".test.")
52      {
53         return 10;
54      }
55
56      // Check file extension
57      let ext = self.filename.rsplit('.').next().unwrap_or("");
58      if config
59         .low_priority_extensions
60         .iter()
61         .any(|e| e.trim_start_matches('.') == ext)
62      {
63         return 20;
64      }
65
66      // Source code files get highest priority
67      match ext {
68         "rs" | "go" | "py" | "js" | "ts" | "java" | "c" | "cpp" | "h" | "hpp" => 100,
69         "sql" | "sh" | "bash" => 80,
70         _ => 50,
71      }
72   }
73
74   pub fn truncate(&mut self, max_size: usize) {
75      if self.size() <= max_size {
76         return;
77      }
78
79      // Keep the header, truncate content
80      let available = max_size.saturating_sub(self.header.len() + 50); // Reserve space for truncation message
81
82      if available < 50 {
83         // Too small, just keep header
84         self.content = "... (truncated)".to_string();
85      } else {
86         // Try to keep beginning and end of the diff
87         let lines: Vec<&str> = self.content.lines().collect();
88         if lines.len() > 30 {
89            // Keep first 15 and last 10 lines to show both what was added/removed
90            let keep_start = 15;
91            let keep_end = 10;
92            let omitted = lines.len() - keep_start - keep_end;
93            // Pre-allocate capacity
94            let est_size = keep_start * 60 + keep_end * 60 + 50;
95            let mut truncated = String::with_capacity(est_size);
96            for (i, line) in lines[..keep_start].iter().enumerate() {
97               if i > 0 {
98                  truncated.push('\n');
99               }
100               truncated.push_str(line);
101            }
102            use std::fmt::Write;
103            write!(&mut truncated, "\n... (truncated {omitted} lines) ...\n").unwrap();
104            for (i, line) in lines[lines.len() - keep_end..].iter().enumerate() {
105               if i > 0 {
106                  truncated.push('\n');
107               }
108               truncated.push_str(line);
109            }
110            self.content = truncated;
111         } else {
112            // Just truncate the content
113            let mut truncate_at = available;
114            while !self.content.is_char_boundary(truncate_at) {
115               truncate_at -= 1;
116            }
117            self.content.truncate(truncate_at);
118            self.content.push_str("\n... (truncated)");
119         }
120      }
121   }
122}
123
124/// Parse a git diff into individual file diffs
125#[tracing::instrument(target = "lgit", name = "diff.parse", skip_all, fields(diff_bytes = diff.len()))]
126pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
127   let mut file_diffs = Vec::new();
128   let mut current_file: Option<FileDiff> = None;
129   let mut in_diff_header = false;
130
131   for line in diff.lines() {
132      if line.starts_with("diff --git") {
133         // Save previous file if exists
134         if let Some(file) = current_file.take() {
135            file_diffs.push(file);
136         }
137
138         // Extract filename from diff line - avoid allocation until we know we need it
139         let filename = line
140            .split_whitespace()
141            .nth(3)
142            .map_or("unknown", |s| s.trim_start_matches("b/"))
143            .to_string();
144
145         current_file = Some(FileDiff {
146            filename,
147            header: String::from(line),
148            content: String::new(),
149            additions: 0,
150            deletions: 0,
151            is_binary: false,
152         });
153         in_diff_header = true;
154      } else if let Some(file) = &mut current_file {
155         if line.starts_with("Binary files") {
156            file.is_binary = true;
157            file.header.reserve(line.len() + 1);
158            file.header.push('\n');
159            file.header.push_str(line);
160         } else if line.starts_with("index ")
161            || line.starts_with("new file")
162            || line.starts_with("deleted file")
163            || line.starts_with("rename ")
164            || line.starts_with("similarity index")
165            || line.starts_with("+++")
166            || line.starts_with("---")
167         {
168            // Part of the header
169            file.header.reserve(line.len() + 1);
170            file.header.push('\n');
171            file.header.push_str(line);
172         } else if line.starts_with("@@") {
173            // Hunk header - marks end of file header, start of content
174            in_diff_header = false;
175            file.header.reserve(line.len() + 1);
176            file.header.push('\n');
177            file.header.push_str(line);
178         } else if !in_diff_header {
179            // Actual diff content
180            if !file.content.is_empty() {
181               file.content.push('\n');
182            }
183            file.content.push_str(line);
184
185            if line.starts_with('+') && !line.starts_with("+++") {
186               file.additions += 1;
187            } else if line.starts_with('-') && !line.starts_with("---") {
188               file.deletions += 1;
189            }
190         } else {
191            // Still in header
192            file.header.reserve(line.len() + 1);
193            file.header.push('\n');
194            file.header.push_str(line);
195         }
196      }
197   }
198
199   // Don't forget the last file
200   if let Some(file) = current_file {
201      file_diffs.push(file);
202   }
203
204   file_diffs
205}
206
207/// Smart truncation of git diff with token-aware budgeting
208#[tracing::instrument(target = "lgit", name = "diff.smart_truncate", skip_all, fields(diff_bytes = diff.len(), max_length))]
209pub fn smart_truncate_diff(
210   diff: &str,
211   max_length: usize,
212   config: &CommitConfig,
213   counter: &TokenCounter,
214) -> String {
215   let mut file_diffs = parse_diff(diff);
216
217   // Filter out excluded files
218   file_diffs.retain(|f| {
219      !config
220         .excluded_files
221         .iter()
222         .any(|excluded| f.filename.ends_with(excluded))
223   });
224
225   if file_diffs.is_empty() {
226      return "No relevant files to analyze (only lock files or excluded files were changed)"
227         .to_string();
228   }
229
230   // Sort by priority (highest first)
231   file_diffs.sort_by_key(|f| -f.priority(config));
232
233   // Calculate total size and token estimate
234   let total_size: usize = file_diffs.iter().map(|f| f.size()).sum();
235   let total_tokens: usize = file_diffs.iter().map(|f| f.token_estimate(counter)).sum();
236
237   // Use token budget if it's more restrictive than character budget
238   // Estimate 4 chars per token for the size conversion
239   let effective_max = if total_tokens > config.max_diff_tokens {
240      // Convert token budget to approximate character budget
241      config.max_diff_tokens * 4
242   } else {
243      max_length
244   };
245
246   if total_size <= effective_max {
247      // Everything fits, reconstruct the diff
248      return reconstruct_diff(&file_diffs);
249   }
250
251   // Strategy: Prioritize showing ALL file headers, even if we must truncate
252   // content aggressively This ensures the LLM sees the full scope of changes
253   let mut included_files = Vec::new();
254   let mut current_size = 0;
255
256   // First pass: include all files with minimal content to show the scope
257   let header_only_size: usize = file_diffs.iter().map(|f| f.header.len() + 20).sum();
258   let total_files = file_diffs.len();
259
260   if header_only_size <= effective_max {
261      // We can fit all headers, now distribute remaining space for content
262      let remaining_space = effective_max - header_only_size;
263      let space_per_file = if file_diffs.is_empty() {
264         0
265      } else {
266         remaining_space / file_diffs.len()
267      };
268
269      included_files.reserve(file_diffs.len());
270      for file in file_diffs {
271         if file.is_binary {
272            // Include binary files with just header
273            included_files.push(FileDiff {
274               filename:  file.filename,
275               header:    file.header,
276               content:   String::new(),
277               additions: file.additions,
278               deletions: file.deletions,
279               is_binary: true,
280            });
281         } else {
282            let mut truncated = file;
283            let target_size = truncated.header.len() + space_per_file;
284            if truncated.size() > target_size {
285               truncated.truncate(target_size);
286            }
287            included_files.push(truncated);
288         }
289      }
290   } else {
291      // Even headers don't fit, fall back to including top priority files
292      for mut file in file_diffs {
293         if file.is_binary {
294            continue; // Skip binary files when severely constrained
295         }
296
297         let file_size = file.size();
298         if current_size + file_size <= effective_max {
299            current_size += file_size;
300            included_files.push(file);
301         } else if current_size < effective_max / 2 && file.priority(config) >= 50 {
302            // If we haven't used half the space and this is important, truncate and include
303            // it
304            let remaining = effective_max - current_size;
305            file.truncate(remaining.saturating_sub(100)); // Leave some space
306            included_files.push(file);
307            break;
308         }
309      }
310   }
311
312   if included_files.is_empty() {
313      return "Error: Could not include any files in the diff".to_string();
314   }
315
316   let mut result = reconstruct_diff(&included_files);
317
318   // Add a note about excluded files if any
319   let excluded_count = total_files - included_files.len();
320   if excluded_count > 0 {
321      use std::fmt::Write;
322      write!(result, "\n\n... ({excluded_count} files omitted) ...").unwrap();
323   }
324
325   result
326}
327
328/// Reconstruct a diff from `FileDiff` objects
329#[tracing::instrument(target = "lgit", name = "diff.reconstruct", skip_all, fields(file_count = files.len()))]
330pub fn reconstruct_diff(files: &[FileDiff]) -> String {
331   // Pre-allocate capacity based on file sizes
332   let capacity: usize = files.iter().map(|f| f.size() + 1).sum();
333   let mut result = String::with_capacity(capacity);
334
335   for (i, file) in files.iter().enumerate() {
336      if i > 0 {
337         result.push('\n');
338      }
339      result.push_str(&file.header);
340      if !file.content.is_empty() {
341         result.push('\n');
342         result.push_str(&file.content);
343      }
344   }
345
346   result
347}
348
349/// Classification of a changeset by whitespace footprint.
350///
351/// A file is "whitespace-only" when its added and removed lines are identical
352/// once *all* whitespace (including newlines) is stripped — i.e. only
353/// indentation, spacing, or line wrapping changed, never a token.
354#[derive(Debug, Clone, Default)]
355pub struct WhitespaceReport {
356   /// Paths of files whose only change is whitespace.
357   pub whitespace_only_files: Vec<String>,
358   /// Whether at least one file has a substantive (non-whitespace) change.
359   pub has_substantive:       bool,
360}
361
362impl WhitespaceReport {
363   /// True when every changed file differs only in whitespace (and there is at
364   /// least one such file). This is the signal to record a `style: reformatted`
365   /// commit without calling the model.
366   pub const fn all_whitespace(&self) -> bool {
367      !self.has_substantive && !self.whitespace_only_files.is_empty()
368   }
369}
370
371/// Byte offsets where each `diff --git` file section begins.
372///
373/// Only matches at the start of a line so that occurrences inside diff bodies
374/// (e.g. an edited line containing the literal text) are ignored.
375fn file_section_starts(diff: &str) -> Vec<usize> {
376   let bytes = diff.as_bytes();
377   diff
378      .match_indices("diff --git")
379      .filter(|&(i, _)| i == 0 || bytes[i - 1] == b'\n')
380      .map(|(i, _)| i)
381      .collect()
382}
383
384/// Split a unified diff into a leading preamble (commit metadata from
385/// `git show`, usually empty) and one `(path, section)` pair per file. Section
386/// slices borrow `diff` verbatim, including their trailing newline, so they can
387/// be concatenated back losslessly.
388fn file_sections(diff: &str) -> (&str, Vec<(&str, &str)>) {
389   let starts = file_section_starts(diff);
390   if starts.is_empty() {
391      return (diff, Vec::new());
392   }
393
394   let preamble = &diff[..starts[0]];
395   let mut sections = Vec::with_capacity(starts.len());
396   for (idx, &start) in starts.iter().enumerate() {
397      let end = starts.get(idx + 1).copied().unwrap_or(diff.len());
398      let section = &diff[start..end];
399      let path = section
400         .lines()
401         .next()
402         .and_then(|line| line.split_whitespace().nth(3))
403         .map_or("unknown", |s| s.trim_start_matches("b/"));
404      sections.push((path, section));
405   }
406   (preamble, sections)
407}
408
409/// Whether a single file section changes only whitespace.
410///
411/// Concatenates the added lines and the removed lines separately, strips all
412/// whitespace from each, and compares. Equal non-empty change ⇒ whitespace
413/// only. Binary files and renames are always treated as substantive so they
414/// never masquerade as a reformat.
415fn section_is_whitespace_only(section: &str) -> bool {
416   let mut added = String::new();
417   let mut removed = String::new();
418   let mut has_change = false;
419
420   for line in section.lines() {
421      if line.starts_with("Binary files")
422         || line.starts_with("rename from")
423         || line.starts_with("rename to")
424         || line.starts_with("copy from")
425         || line.starts_with("copy to")
426      {
427         return false;
428      }
429      // Skip the `+++`/`---` file headers; they are not content lines.
430      if line.starts_with("+++") || line.starts_with("---") {
431         continue;
432      }
433      if let Some(rest) = line.strip_prefix('+') {
434         has_change = true;
435         added.extend(rest.chars().filter(|c| !c.is_whitespace()));
436      } else if let Some(rest) = line.strip_prefix('-') {
437         has_change = true;
438         removed.extend(rest.chars().filter(|c| !c.is_whitespace()));
439      }
440   }
441
442   has_change && added == removed
443}
444
445/// Classify a unified diff by whitespace footprint.
446#[tracing::instrument(target = "lgit", name = "diff.classify_whitespace", skip_all, fields(diff_bytes = diff.len()))]
447pub fn classify_diff_whitespace(diff: &str) -> WhitespaceReport {
448   let (_preamble, sections) = file_sections(diff);
449   let mut report = WhitespaceReport::default();
450   for (path, section) in sections {
451      if section_is_whitespace_only(section) {
452         report.whitespace_only_files.push(path.to_string());
453      } else {
454         report.has_substantive = true;
455      }
456   }
457   report
458}
459
460/// Drop whitespace-only file sections from a diff, returning the trimmed diff.
461///
462/// Returns `None` when nothing would change (no whitespace-only files) or when
463/// every section is whitespace-only (stripping would empty the diff), so the
464/// caller keeps the original on the common path without reallocating.
465#[tracing::instrument(target = "lgit", name = "diff.strip_whitespace_only", skip_all, fields(diff_bytes = diff.len()))]
466pub fn strip_whitespace_only_files(diff: &str) -> Option<String> {
467   let (preamble, sections) = file_sections(diff);
468   if sections.is_empty() {
469      return None;
470   }
471
472   let mut kept = Vec::with_capacity(sections.len());
473   let mut stripped_any = false;
474   for (_path, section) in &sections {
475      if section_is_whitespace_only(section) {
476         stripped_any = true;
477      } else {
478         kept.push(*section);
479      }
480   }
481
482   if !stripped_any || kept.is_empty() {
483      return None;
484   }
485
486   let mut out = String::with_capacity(diff.len());
487   out.push_str(preamble);
488   for section in kept {
489      out.push_str(section);
490   }
491   Some(out)
492}
493
494/// Truncate a diff to fit within a line budget, distributing lines across files
495/// by priority.
496///
497/// Unlike `smart_truncate_diff` which works on byte budgets, this operates on
498/// line counts for simpler/faster context window management in fast mode.
499#[tracing::instrument(target = "lgit", name = "diff.truncate_by_lines", skip_all, fields(diff_bytes = diff.len(), max_lines))]
500pub fn truncate_diff_by_lines(diff: &str, max_lines: usize, config: &CommitConfig) -> String {
501   let files = parse_diff(diff);
502
503   // Count total content lines across all files
504   let total_lines: usize = files
505      .iter()
506      .map(|f| f.header.lines().count() + f.content.lines().count())
507      .sum();
508
509   if total_lines <= max_lines {
510      return diff.to_string();
511   }
512
513   // Calculate priority-weighted allocation
514   let total_priority: i32 = files.iter().map(|f| f.priority(config).max(1)).sum();
515
516   let mut result = String::with_capacity(diff.len());
517
518   for file in &files {
519      // Always include the header
520      result.push_str(&file.header);
521      if !file.header.ends_with('\n') {
522         result.push('\n');
523      }
524
525      let content_lines: Vec<&str> = file.content.lines().collect();
526      let priority = file.priority(config).max(1);
527
528      // Allocate lines proportionally by priority
529      #[allow(clippy::cast_sign_loss, reason = "priority and total are positive")]
530      #[allow(clippy::cast_possible_truncation, reason = "line count fits in usize")]
531      let allocated = ((max_lines as f64) * (priority as f64) / (total_priority as f64)) as usize;
532      let allocated = allocated.max(5); // minimum 5 lines per file
533
534      if content_lines.len() <= allocated {
535         result.push_str(&file.content);
536         if !file.content.ends_with('\n') {
537            result.push('\n');
538         }
539      } else {
540         // Keep first half and last half of allocation
541         let keep_start = allocated / 2;
542         let keep_end = allocated - keep_start;
543         let omitted = content_lines.len() - keep_start - keep_end;
544
545         for line in &content_lines[..keep_start] {
546            result.push_str(line);
547            result.push('\n');
548         }
549
550         use std::fmt::Write;
551         writeln!(&mut result, "[... {omitted} lines omitted ...]")
552            .expect("writing to String is infallible");
553
554         for line in &content_lines[content_lines.len() - keep_end..] {
555            result.push_str(line);
556            result.push('\n');
557         }
558      }
559   }
560
561   result
562}
563
564#[cfg(test)]
565mod tests {
566   use super::*;
567
568   fn test_config() -> CommitConfig {
569      CommitConfig::default()
570   }
571
572   fn test_counter() -> TokenCounter {
573      TokenCounter::new("http://localhost:4000", None, "claude-sonnet-4.5")
574   }
575
576   #[test]
577   fn test_parse_diff_simple() {
578      let diff = r#"diff --git a/src/main.rs b/src/main.rs
579index 123..456 100644
580--- a/src/main.rs
581+++ b/src/main.rs
582@@ -1,3 +1,4 @@
583+use std::collections::HashMap;
584 fn main() {
585     println!("hello");
586 }"#;
587      let files = parse_diff(diff);
588      assert_eq!(files.len(), 1);
589      assert_eq!(files[0].filename, "src/main.rs");
590      assert_eq!(files[0].additions, 1);
591      assert_eq!(files[0].deletions, 0);
592      assert!(!files[0].is_binary);
593      assert!(files[0].header.contains("diff --git"));
594      assert!(files[0].content.contains("use std::collections::HashMap"));
595   }
596
597   #[test]
598   fn test_parse_diff_multi_file() {
599      let diff = r"diff --git a/src/lib.rs b/src/lib.rs
600index 111..222 100644
601--- a/src/lib.rs
602+++ b/src/lib.rs
603@@ -1,2 +1,3 @@
604+pub mod utils;
605 pub fn test() {}
606diff --git a/src/main.rs b/src/main.rs
607index 333..444 100644
608--- a/src/main.rs
609+++ b/src/main.rs
610@@ -1,1 +1,2 @@
611 fn main() {}
612+fn helper() {}";
613      let files = parse_diff(diff);
614      assert_eq!(files.len(), 2);
615      assert_eq!(files[0].filename, "src/lib.rs");
616      assert_eq!(files[1].filename, "src/main.rs");
617      assert_eq!(files[0].additions, 1);
618      assert_eq!(files[1].additions, 1);
619   }
620
621   #[test]
622   fn test_parse_diff_rename() {
623      let diff = r"diff --git a/old.rs b/new.rs
624similarity index 95%
625rename from old.rs
626rename to new.rs
627index 123..456 100644
628--- a/old.rs
629+++ b/new.rs
630@@ -1,2 +1,3 @@
631 fn test() {}
632+fn helper() {}";
633      let files = parse_diff(diff);
634      assert_eq!(files.len(), 1);
635      assert_eq!(files[0].filename, "new.rs");
636      assert!(files[0].header.contains("rename from"));
637      assert!(files[0].header.contains("rename to"));
638      assert_eq!(files[0].additions, 1);
639   }
640
641   #[test]
642   fn test_parse_diff_binary() {
643      let diff = r"diff --git a/image.png b/image.png
644index 123..456 100644
645Binary files a/image.png and b/image.png differ";
646      let files = parse_diff(diff);
647      assert_eq!(files.len(), 1);
648      assert_eq!(files[0].filename, "image.png");
649      assert!(files[0].is_binary);
650      assert!(files[0].header.contains("Binary files"));
651   }
652
653   #[test]
654   fn test_parse_diff_empty() {
655      let diff = "";
656      let files = parse_diff(diff);
657      assert_eq!(files.len(), 0);
658   }
659
660   #[test]
661   fn test_parse_diff_malformed_missing_hunks() {
662      let diff = r"diff --git a/src/main.rs b/src/main.rs
663index 123..456 100644
664--- a/src/main.rs
665+++ b/src/main.rs";
666      let files = parse_diff(diff);
667      assert_eq!(files.len(), 1);
668      assert_eq!(files[0].filename, "src/main.rs");
669      assert!(files[0].content.is_empty());
670   }
671
672   #[test]
673   fn test_parse_diff_new_file() {
674      let diff = r"diff --git a/new.rs b/new.rs
675new file mode 100644
676index 000..123 100644
677--- /dev/null
678+++ b/new.rs
679@@ -0,0 +1,2 @@
680+fn test() {}
681+fn main() {}";
682      let files = parse_diff(diff);
683      assert_eq!(files.len(), 1);
684      assert_eq!(files[0].filename, "new.rs");
685      assert!(files[0].header.contains("new file mode"));
686      assert_eq!(files[0].additions, 2);
687   }
688
689   #[test]
690   fn test_parse_diff_deleted_file() {
691      let diff = r"diff --git a/old.rs b/old.rs
692deleted file mode 100644
693index 123..000 100644
694--- a/old.rs
695+++ /dev/null
696@@ -1,2 +0,0 @@
697-fn test() {}
698-fn main() {}";
699      let files = parse_diff(diff);
700      assert_eq!(files.len(), 1);
701      assert_eq!(files[0].filename, "old.rs");
702      assert!(files[0].header.contains("deleted file mode"));
703      assert_eq!(files[0].deletions, 2);
704   }
705
706   #[test]
707   fn test_file_diff_size() {
708      let file = FileDiff {
709         filename:  "test.rs".to_string(),
710         header:    "header".to_string(),
711         content:   "content".to_string(),
712         additions: 0,
713         deletions: 0,
714         is_binary: false,
715      };
716      assert_eq!(file.size(), 6 + 7); // "header" + "content"
717   }
718
719   #[test]
720   fn test_file_diff_priority_source_files() {
721      let config = test_config();
722      let rs_file = FileDiff {
723         filename:  "src/main.rs".to_string(),
724         header:    String::new(),
725         content:   String::new(),
726         additions: 0,
727         deletions: 0,
728         is_binary: false,
729      };
730      assert_eq!(rs_file.priority(&config), 100);
731
732      let py_file = FileDiff {
733         filename:  "script.py".to_string(),
734         header:    String::new(),
735         content:   String::new(),
736         additions: 0,
737         deletions: 0,
738         is_binary: false,
739      };
740      assert_eq!(py_file.priority(&config), 100);
741
742      let js_file = FileDiff {
743         filename:  "app.js".to_string(),
744         header:    String::new(),
745         content:   String::new(),
746         additions: 0,
747         deletions: 0,
748         is_binary: false,
749      };
750      assert_eq!(js_file.priority(&config), 100);
751   }
752
753   #[test]
754   fn test_file_diff_priority_binary() {
755      let config = test_config();
756      let binary = FileDiff {
757         filename:  "image.png".to_string(),
758         header:    String::new(),
759         content:   String::new(),
760         additions: 0,
761         deletions: 0,
762         is_binary: true,
763      };
764      assert_eq!(binary.priority(&config), -100);
765   }
766
767   #[test]
768   fn test_file_diff_priority_test_files() {
769      let config = test_config();
770      let test_file = FileDiff {
771         filename:  "src/test_utils.rs".to_string(),
772         header:    String::new(),
773         content:   String::new(),
774         additions: 0,
775         deletions: 0,
776         is_binary: false,
777      };
778      assert_eq!(test_file.priority(&config), 10);
779
780      let test_dir = FileDiff {
781         filename:  "tests/integration_test.rs".to_string(),
782         header:    String::new(),
783         content:   String::new(),
784         additions: 0,
785         deletions: 0,
786         is_binary: false,
787      };
788      assert_eq!(test_dir.priority(&config), 10);
789   }
790
791   #[test]
792   fn test_file_diff_priority_low_priority_extensions() {
793      let config = test_config();
794      let md_file = FileDiff {
795         filename:  "README.md".to_string(),
796         header:    String::new(),
797         content:   String::new(),
798         additions: 0,
799         deletions: 0,
800         is_binary: false,
801      };
802      assert_eq!(md_file.priority(&config), 20);
803
804      let prompt_file = FileDiff {
805         filename:  "prompts/analysis/default.md".to_string(),
806         header:    String::new(),
807         content:   String::new(),
808         additions: 0,
809         deletions: 0,
810         is_binary: false,
811      };
812      assert_eq!(prompt_file.priority(&config), 100);
813
814      let system_file = FileDiff {
815         filename:  "system/analysis/default.md".to_string(),
816         header:    String::new(),
817         content:   String::new(),
818         additions: 0,
819         deletions: 0,
820         is_binary: false,
821      };
822      assert_eq!(system_file.priority(&config), 100);
823
824      let toml_file = FileDiff {
825         filename:  "config.toml".to_string(),
826         header:    String::new(),
827         content:   String::new(),
828         additions: 0,
829         deletions: 0,
830         is_binary: false,
831      };
832      assert_eq!(toml_file.priority(&config), 20);
833   }
834
835   #[test]
836   fn test_file_diff_priority_dependency_manifests() {
837      let config = test_config();
838
839      let cargo_toml = FileDiff {
840         filename:  "Cargo.toml".to_string(),
841         header:    String::new(),
842         content:   String::new(),
843         additions: 0,
844         deletions: 0,
845         is_binary: false,
846      };
847      assert_eq!(cargo_toml.priority(&config), 70);
848
849      let package_json = FileDiff {
850         filename:  "package.json".to_string(),
851         header:    String::new(),
852         content:   String::new(),
853         additions: 0,
854         deletions: 0,
855         is_binary: false,
856      };
857      assert_eq!(package_json.priority(&config), 70);
858
859      let go_mod = FileDiff {
860         filename:  "go.mod".to_string(),
861         header:    String::new(),
862         content:   String::new(),
863         additions: 0,
864         deletions: 0,
865         is_binary: false,
866      };
867      assert_eq!(go_mod.priority(&config), 70);
868   }
869
870   #[test]
871   fn test_file_diff_priority_default() {
872      let config = test_config();
873      let other = FileDiff {
874         filename:  "data.csv".to_string(),
875         header:    String::new(),
876         content:   String::new(),
877         additions: 0,
878         deletions: 0,
879         is_binary: false,
880      };
881      assert_eq!(other.priority(&config), 50);
882   }
883
884   #[test]
885   fn test_file_diff_truncate_small() {
886      let mut file = FileDiff {
887         filename:  "test.rs".to_string(),
888         header:    "header".to_string(),
889         content:   "short content".to_string(),
890         additions: 0,
891         deletions: 0,
892         is_binary: false,
893      };
894      let original_size = file.size();
895      file.truncate(1000);
896      assert_eq!(file.size(), original_size);
897      assert_eq!(file.content, "short content");
898   }
899
900   #[test]
901   fn test_file_diff_truncate_large() {
902      let lines: Vec<String> = (0..100).map(|i| format!("line {i}")).collect();
903      let content = lines.join("\n");
904      let mut file = FileDiff {
905         filename: "test.rs".to_string(),
906         header: "header".to_string(),
907         content,
908         additions: 0,
909         deletions: 0,
910         is_binary: false,
911      };
912      file.truncate(500);
913      assert!(file.content.contains("... (truncated"));
914      assert!(file.content.contains("line 0")); // First line preserved
915      assert!(file.content.contains("line 99")); // Last line preserved
916   }
917   #[test]
918   fn test_file_diff_truncate_utf8_boundary() {
919      let mut file = FileDiff {
920         filename:  "test.rs".to_string(),
921         header:    "header".to_string(),
922         content:   "😀".repeat(80),
923         additions: 0,
924         deletions: 0,
925         is_binary: false,
926      };
927      file.truncate(121);
928
929      assert!(file.content.ends_with("\n... (truncated)"));
930      let truncated_payload = file.content.trim_end_matches("\n... (truncated)");
931      assert!(!truncated_payload.is_empty());
932      assert_eq!(truncated_payload.len() % 4, 0);
933   }
934
935   #[test]
936   fn test_file_diff_truncate_preserves_context() {
937      let lines: Vec<String> = (0..50).map(|i| format!("line {i}")).collect();
938      let content = lines.join("\n");
939      let original_lines = content.lines().count();
940      let mut file = FileDiff {
941         filename: "test.rs".to_string(),
942         header: "header".to_string(),
943         content,
944         additions: 0,
945         deletions: 0,
946         is_binary: false,
947      };
948      // Use a size that will definitely trigger truncation
949      file.truncate(300);
950      // Should keep first 15 and last 10 lines
951      assert!(file.content.contains("line 0"));
952      assert!(file.content.contains("line 14"));
953      assert!(file.content.contains("line 40"));
954      assert!(file.content.contains("line 49"));
955      // Check that truncation occurred and message is present
956      let truncated_lines = file.content.lines().count();
957      assert!(truncated_lines < original_lines, "Content should be truncated");
958      assert!(file.content.contains("truncated"), "Should have truncation message");
959   }
960
961   #[test]
962   fn test_file_diff_truncate_very_small_space() {
963      let mut file = FileDiff {
964         filename:  "test.rs".to_string(),
965         header:    "long header content here".to_string(),
966         content:   "lots of content that needs to be truncated".to_string(),
967         additions: 0,
968         deletions: 0,
969         is_binary: false,
970      };
971      file.truncate(30);
972      assert_eq!(file.content, "... (truncated)");
973   }
974
975   #[test]
976   fn test_smart_truncate_diff_under_limit() {
977      let config = test_config();
978      let counter = test_counter();
979      let diff = r"diff --git a/src/main.rs b/src/main.rs
980index 123..456 100644
981--- a/src/main.rs
982+++ b/src/main.rs
983@@ -1,2 +1,3 @@
984+use std::io;
985 fn main() {}";
986      let result = smart_truncate_diff(diff, 10000, &config, &counter);
987      assert!(result.contains("use std::io"));
988      assert!(result.contains("src/main.rs"));
989   }
990
991   #[test]
992   fn test_smart_truncate_diff_over_limit() {
993      let config = test_config();
994      let counter = test_counter();
995      let lines: Vec<String> = (0..200).map(|i| format!("+line {i}")).collect();
996      let content = lines.join("\n");
997      let diff = format!(
998         "diff --git a/src/main.rs b/src/main.rs\nindex 123..456 100644\n--- a/src/main.rs\n+++ \
999          b/src/main.rs\n@@ -1,1 +1,200 @@\n{content}"
1000      );
1001      let result = smart_truncate_diff(&diff, 500, &config, &counter);
1002      assert!(result.len() <= 600); // Allow some overhead
1003      assert!(result.contains("src/main.rs"));
1004   }
1005
1006   #[test]
1007   fn test_smart_truncate_diff_priority_allocation() {
1008      let config = test_config();
1009      let counter = test_counter();
1010      // High priority source file and low priority markdown
1011      let diff = r"diff --git a/src/lib.rs b/src/lib.rs
1012index 111..222 100644
1013--- a/src/lib.rs
1014+++ b/src/lib.rs
1015@@ -1,1 +1,50 @@
1016+pub fn important_function() {}
1017+pub fn another_function() {}
1018+pub fn yet_another() {}
1019diff --git a/README.md b/README.md
1020index 333..444 100644
1021--- a/README.md
1022+++ b/README.md
1023@@ -1,1 +1,50 @@
1024+# Documentation
1025+More docs here";
1026      let result = smart_truncate_diff(diff, 300, &config, &counter);
1027      // Should prioritize lib.rs over README.md
1028      assert!(result.contains("src/lib.rs"));
1029      assert!(result.contains("important_function") || result.contains("truncated"));
1030   }
1031
1032   #[test]
1033   fn test_smart_truncate_diff_binary_excluded() {
1034      let config = test_config();
1035      let counter = test_counter();
1036      let diff = r"diff --git a/image.png b/image.png
1037index 123..456 100644
1038Binary files a/image.png and b/image.png differ
1039diff --git a/src/main.rs b/src/main.rs
1040index 789..abc 100644
1041--- a/src/main.rs
1042+++ b/src/main.rs
1043@@ -1,1 +1,2 @@
1044 fn main() {}
1045+fn helper() {}";
1046      let result = smart_truncate_diff(diff, 10000, &config, &counter);
1047      assert!(result.contains("src/main.rs"));
1048      assert!(result.contains("image.png"));
1049      assert!(result.contains("Binary files"));
1050   }
1051
1052   #[test]
1053   fn test_smart_truncate_diff_excluded_files() {
1054      let config = test_config();
1055      let counter = test_counter();
1056      let diff = r"diff --git a/Cargo.lock b/Cargo.lock
1057index 123..456 100644
1058--- a/Cargo.lock
1059+++ b/Cargo.lock
1060@@ -1,1 +1,100 @@
1061+lots of lock file content
1062diff --git a/src/main.rs b/src/main.rs
1063index 789..abc 100644
1064--- a/src/main.rs
1065+++ b/src/main.rs
1066@@ -1,1 +1,2 @@
1067 fn main() {}
1068+fn helper() {}";
1069      let result = smart_truncate_diff(diff, 10000, &config, &counter);
1070      assert!(!result.contains("Cargo.lock"));
1071      assert!(result.contains("src/main.rs"));
1072   }
1073
1074   #[test]
1075   fn test_smart_truncate_diff_all_files_excluded() {
1076      let config = test_config();
1077      let counter = test_counter();
1078      let diff = r"diff --git a/Cargo.lock b/Cargo.lock
1079index 123..456 100644
1080--- a/Cargo.lock
1081+++ b/Cargo.lock
1082@@ -1,1 +1,2 @@
1083+dependency update";
1084      let result = smart_truncate_diff(diff, 10000, &config, &counter);
1085      assert!(result.contains("No relevant files"));
1086   }
1087
1088   #[test]
1089   fn test_smart_truncate_diff_header_preservation() {
1090      let config = test_config();
1091      let counter = test_counter();
1092      let lines: Vec<String> = (0..100).map(|i| format!("+line {i}")).collect();
1093      let content = lines.join("\n");
1094      let diff = format!(
1095         "diff --git a/src/a.rs b/src/a.rs\nindex 111..222 100644\n--- a/src/a.rs\n+++ \
1096          b/src/a.rs\n@@ -1,1 +1,100 @@\n{content}\ndiff --git a/src/b.rs b/src/b.rs\nindex \
1097          333..444 100644\n--- a/src/b.rs\n+++ b/src/b.rs\n@@ -1,1 +1,100 @@\n{content}"
1098      );
1099      let result = smart_truncate_diff(&diff, 600, &config, &counter);
1100      // Both file headers should be present
1101      assert!(result.contains("src/a.rs"));
1102      assert!(result.contains("src/b.rs"));
1103   }
1104
1105   #[test]
1106   fn test_reconstruct_diff_single_file() {
1107      let files = vec![FileDiff {
1108         filename:  "test.rs".to_string(),
1109         header:    "diff --git a/test.rs b/test.rs".to_string(),
1110         content:   "+new line".to_string(),
1111         additions: 1,
1112         deletions: 0,
1113         is_binary: false,
1114      }];
1115      let result = reconstruct_diff(&files);
1116      assert_eq!(result, "diff --git a/test.rs b/test.rs\n+new line");
1117   }
1118
1119   #[test]
1120   fn test_reconstruct_diff_multiple_files() {
1121      let files = vec![
1122         FileDiff {
1123            filename:  "a.rs".to_string(),
1124            header:    "diff --git a/a.rs b/a.rs".to_string(),
1125            content:   "+line a".to_string(),
1126            additions: 1,
1127            deletions: 0,
1128            is_binary: false,
1129         },
1130         FileDiff {
1131            filename:  "b.rs".to_string(),
1132            header:    "diff --git a/b.rs b/b.rs".to_string(),
1133            content:   "+line b".to_string(),
1134            additions: 1,
1135            deletions: 0,
1136            is_binary: false,
1137         },
1138      ];
1139      let result = reconstruct_diff(&files);
1140      assert!(result.contains("a.rs"));
1141      assert!(result.contains("b.rs"));
1142      assert!(result.contains("+line a"));
1143      assert!(result.contains("+line b"));
1144   }
1145
1146   #[test]
1147   fn test_reconstruct_diff_empty_content() {
1148      let files = vec![FileDiff {
1149         filename:  "test.rs".to_string(),
1150         header:    "diff --git a/test.rs b/test.rs".to_string(),
1151         content:   String::new(),
1152         additions: 0,
1153         deletions: 0,
1154         is_binary: false,
1155      }];
1156      let result = reconstruct_diff(&files);
1157      assert_eq!(result, "diff --git a/test.rs b/test.rs");
1158   }
1159
1160   #[test]
1161   fn test_reconstruct_diff_empty_vec() {
1162      let files: Vec<FileDiff> = vec![];
1163      let result = reconstruct_diff(&files);
1164      assert_eq!(result, "");
1165   }
1166
1167   const WS_ONLY_INDENT: &str = "diff --git a/src/foo.rs b/src/foo.rs
1168index 1234567..89abcde 100644
1169--- a/src/foo.rs
1170+++ b/src/foo.rs
1171@@ -1,3 +1,3 @@
1172 fn main() {
1173-let x = 1;
1174+    let x = 1;
1175 }
1176";
1177
1178   const SUBSTANTIVE: &str = "diff --git a/src/bar.rs b/src/bar.rs
1179index 1111111..2222222 100644
1180--- a/src/bar.rs
1181+++ b/src/bar.rs
1182@@ -1,3 +1,3 @@
1183 fn main() {
1184-let x = 1;
1185+let x = 2;
1186 }
1187";
1188
1189   #[test]
1190   fn test_whitespace_only_indentation() {
1191      let report = classify_diff_whitespace(WS_ONLY_INDENT);
1192      assert!(report.all_whitespace());
1193      assert!(!report.has_substantive);
1194      assert_eq!(report.whitespace_only_files, vec!["src/foo.rs".to_string()]);
1195   }
1196
1197   #[test]
1198   fn test_whitespace_only_rewrap() {
1199      // Reflowing one line into two is still whitespace-only: the tokens match
1200      // once newlines are stripped.
1201      let diff = "diff --git a/a.md b/a.md
1202index 111..222 100644
1203--- a/a.md
1204+++ b/a.md
1205@@ -1 +1,2 @@
1206-one two three
1207+one two
1208+three
1209";
1210      let report = classify_diff_whitespace(diff);
1211      assert!(report.all_whitespace());
1212   }
1213
1214   #[test]
1215   fn test_substantive_change_not_whitespace() {
1216      let report = classify_diff_whitespace(SUBSTANTIVE);
1217      assert!(!report.all_whitespace());
1218      assert!(report.has_substantive);
1219      assert!(report.whitespace_only_files.is_empty());
1220   }
1221
1222   #[test]
1223   fn test_new_file_is_substantive() {
1224      let diff = "diff --git a/new.txt b/new.txt
1225new file mode 100644
1226index 0000000..e69de29
1227--- /dev/null
1228+++ b/new.txt
1229@@ -0,0 +1,2 @@
1230+hello
1231+world
1232";
1233      let report = classify_diff_whitespace(diff);
1234      assert!(!report.all_whitespace());
1235      assert!(report.has_substantive);
1236   }
1237
1238   #[test]
1239   fn test_rename_is_substantive() {
1240      let diff = "diff --git a/old.rs b/new.rs
1241similarity index 100%
1242rename from old.rs
1243rename to new.rs
1244";
1245      let report = classify_diff_whitespace(diff);
1246      assert!(!report.all_whitespace());
1247      assert!(report.has_substantive);
1248   }
1249
1250   #[test]
1251   fn test_mixed_changeset() {
1252      let diff = format!("{WS_ONLY_INDENT}{SUBSTANTIVE}");
1253      let report = classify_diff_whitespace(&diff);
1254      assert!(!report.all_whitespace());
1255      assert!(report.has_substantive);
1256      assert_eq!(report.whitespace_only_files, vec!["src/foo.rs".to_string()]);
1257   }
1258
1259   #[test]
1260   fn test_strip_drops_whitespace_only_file() {
1261      let diff = format!("{WS_ONLY_INDENT}{SUBSTANTIVE}");
1262      let stripped = strip_whitespace_only_files(&diff).expect("a file should be dropped");
1263      assert!(!stripped.contains("src/foo.rs"));
1264      assert!(stripped.contains("src/bar.rs"));
1265      // Substantive section is preserved byte-for-byte.
1266      assert_eq!(stripped, SUBSTANTIVE);
1267   }
1268
1269   #[test]
1270   fn test_strip_noop_when_no_whitespace_only() {
1271      assert!(strip_whitespace_only_files(SUBSTANTIVE).is_none());
1272   }
1273
1274   #[test]
1275   fn test_strip_noop_when_all_whitespace() {
1276      // Stripping everything would empty the diff, so the helper declines.
1277      assert!(strip_whitespace_only_files(WS_ONLY_INDENT).is_none());
1278   }
1279
1280   #[test]
1281   fn test_diff_git_text_in_body_not_a_boundary() {
1282      // A removed line that happens to contain the literal "diff --git" text
1283      // must not be mistaken for a new file section.
1284      let diff = "diff --git a/doc.md b/doc.md
1285index 111..222 100644
1286--- a/doc.md
1287+++ b/doc.md
1288@@ -1,2 +1,2 @@
1289-Run diff --git to inspect changes
1290+Run git diff to inspect changes
1291 done
1292";
1293      let report = classify_diff_whitespace(diff);
1294      assert_eq!(report.whitespace_only_files.len() + usize::from(report.has_substantive), 1);
1295      assert!(report.has_substantive);
1296   }
1297}