1use crate::{config::CommitConfig, tokens::TokenCounter};
3
4#[derive(Debug, Clone)]
5pub struct FileDiff {
6 pub filename: String,
7 pub header: String, pub content: String, pub additions: usize,
10 pub deletions: usize,
11 pub is_binary: bool,
12}
13
14impl FileDiff {
15 pub const fn size(&self) -> usize {
16 self.header.len() + self.content.len()
17 }
18
19 pub fn token_estimate(&self, counter: &TokenCounter) -> usize {
21 counter.count_sync(&self.header) + counter.count_sync(&self.content)
23 }
24
25 pub fn priority(&self, config: &CommitConfig) -> i32 {
26 if self.is_binary {
28 return -100; }
30
31 let filename_lower = self.filename.to_lowercase();
33 if filename_lower.ends_with("cargo.toml")
34 || filename_lower.ends_with("package.json")
35 || filename_lower.ends_with("go.mod")
36 || filename_lower.ends_with("requirements.txt")
37 || filename_lower.ends_with("pyproject.toml")
38 {
39 return 70; }
41
42 if filename_lower.contains("prompt") || filename_lower.contains("system") {
44 return 100;
45 }
46
47 if self.filename.contains("/test")
49 || self.filename.contains("test_")
50 || self.filename.contains("_test.")
51 || self.filename.contains(".test.")
52 {
53 return 10;
54 }
55
56 let ext = self.filename.rsplit('.').next().unwrap_or("");
58 if config
59 .low_priority_extensions
60 .iter()
61 .any(|e| e.trim_start_matches('.') == ext)
62 {
63 return 20;
64 }
65
66 match ext {
68 "rs" | "go" | "py" | "js" | "ts" | "java" | "c" | "cpp" | "h" | "hpp" => 100,
69 "sql" | "sh" | "bash" => 80,
70 _ => 50,
71 }
72 }
73
74 pub fn truncate(&mut self, max_size: usize) {
75 if self.size() <= max_size {
76 return;
77 }
78
79 let available = max_size.saturating_sub(self.header.len() + 50); if available < 50 {
83 self.content = "... (truncated)".to_string();
85 } else {
86 let lines: Vec<&str> = self.content.lines().collect();
88 if lines.len() > 30 {
89 let keep_start = 15;
91 let keep_end = 10;
92 let omitted = lines.len() - keep_start - keep_end;
93 let est_size = keep_start * 60 + keep_end * 60 + 50;
95 let mut truncated = String::with_capacity(est_size);
96 for (i, line) in lines[..keep_start].iter().enumerate() {
97 if i > 0 {
98 truncated.push('\n');
99 }
100 truncated.push_str(line);
101 }
102 use std::fmt::Write;
103 write!(&mut truncated, "\n... (truncated {omitted} lines) ...\n").unwrap();
104 for (i, line) in lines[lines.len() - keep_end..].iter().enumerate() {
105 if i > 0 {
106 truncated.push('\n');
107 }
108 truncated.push_str(line);
109 }
110 self.content = truncated;
111 } else {
112 let mut truncate_at = available;
114 while !self.content.is_char_boundary(truncate_at) {
115 truncate_at -= 1;
116 }
117 self.content.truncate(truncate_at);
118 self.content.push_str("\n... (truncated)");
119 }
120 }
121 }
122}
123
124#[tracing::instrument(target = "lgit", name = "diff.parse", skip_all, fields(diff_bytes = diff.len()))]
126pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
127 let mut file_diffs = Vec::new();
128 let mut current_file: Option<FileDiff> = None;
129 let mut in_diff_header = false;
130
131 for line in diff.lines() {
132 if line.starts_with("diff --git") {
133 if let Some(file) = current_file.take() {
135 file_diffs.push(file);
136 }
137
138 let filename = line
140 .split_whitespace()
141 .nth(3)
142 .map_or("unknown", |s| s.trim_start_matches("b/"))
143 .to_string();
144
145 current_file = Some(FileDiff {
146 filename,
147 header: String::from(line),
148 content: String::new(),
149 additions: 0,
150 deletions: 0,
151 is_binary: false,
152 });
153 in_diff_header = true;
154 } else if let Some(file) = &mut current_file {
155 if line.starts_with("Binary files") {
156 file.is_binary = true;
157 file.header.reserve(line.len() + 1);
158 file.header.push('\n');
159 file.header.push_str(line);
160 } else if line.starts_with("index ")
161 || line.starts_with("new file")
162 || line.starts_with("deleted file")
163 || line.starts_with("rename ")
164 || line.starts_with("similarity index")
165 || line.starts_with("+++")
166 || line.starts_with("---")
167 {
168 file.header.reserve(line.len() + 1);
170 file.header.push('\n');
171 file.header.push_str(line);
172 } else if line.starts_with("@@") {
173 in_diff_header = false;
175 file.header.reserve(line.len() + 1);
176 file.header.push('\n');
177 file.header.push_str(line);
178 } else if !in_diff_header {
179 if !file.content.is_empty() {
181 file.content.push('\n');
182 }
183 file.content.push_str(line);
184
185 if line.starts_with('+') && !line.starts_with("+++") {
186 file.additions += 1;
187 } else if line.starts_with('-') && !line.starts_with("---") {
188 file.deletions += 1;
189 }
190 } else {
191 file.header.reserve(line.len() + 1);
193 file.header.push('\n');
194 file.header.push_str(line);
195 }
196 }
197 }
198
199 if let Some(file) = current_file {
201 file_diffs.push(file);
202 }
203
204 file_diffs
205}
206
207#[tracing::instrument(target = "lgit", name = "diff.smart_truncate", skip_all, fields(diff_bytes = diff.len(), max_length))]
209pub fn smart_truncate_diff(
210 diff: &str,
211 max_length: usize,
212 config: &CommitConfig,
213 counter: &TokenCounter,
214) -> String {
215 let mut file_diffs = parse_diff(diff);
216
217 file_diffs.retain(|f| {
219 !config
220 .excluded_files
221 .iter()
222 .any(|excluded| f.filename.ends_with(excluded))
223 });
224
225 if file_diffs.is_empty() {
226 return "No relevant files to analyze (only lock files or excluded files were changed)"
227 .to_string();
228 }
229
230 file_diffs.sort_by_key(|f| -f.priority(config));
232
233 let total_size: usize = file_diffs.iter().map(|f| f.size()).sum();
235 let total_tokens: usize = file_diffs.iter().map(|f| f.token_estimate(counter)).sum();
236
237 let effective_max = if total_tokens > config.max_diff_tokens {
240 config.max_diff_tokens * 4
242 } else {
243 max_length
244 };
245
246 if total_size <= effective_max {
247 return reconstruct_diff(&file_diffs);
249 }
250
251 let mut included_files = Vec::new();
254 let mut current_size = 0;
255
256 let header_only_size: usize = file_diffs.iter().map(|f| f.header.len() + 20).sum();
258 let total_files = file_diffs.len();
259
260 if header_only_size <= effective_max {
261 let remaining_space = effective_max - header_only_size;
263 let space_per_file = if file_diffs.is_empty() {
264 0
265 } else {
266 remaining_space / file_diffs.len()
267 };
268
269 included_files.reserve(file_diffs.len());
270 for file in file_diffs {
271 if file.is_binary {
272 included_files.push(FileDiff {
274 filename: file.filename,
275 header: file.header,
276 content: String::new(),
277 additions: file.additions,
278 deletions: file.deletions,
279 is_binary: true,
280 });
281 } else {
282 let mut truncated = file;
283 let target_size = truncated.header.len() + space_per_file;
284 if truncated.size() > target_size {
285 truncated.truncate(target_size);
286 }
287 included_files.push(truncated);
288 }
289 }
290 } else {
291 for mut file in file_diffs {
293 if file.is_binary {
294 continue; }
296
297 let file_size = file.size();
298 if current_size + file_size <= effective_max {
299 current_size += file_size;
300 included_files.push(file);
301 } else if current_size < effective_max / 2 && file.priority(config) >= 50 {
302 let remaining = effective_max - current_size;
305 file.truncate(remaining.saturating_sub(100)); included_files.push(file);
307 break;
308 }
309 }
310 }
311
312 if included_files.is_empty() {
313 return "Error: Could not include any files in the diff".to_string();
314 }
315
316 let mut result = reconstruct_diff(&included_files);
317
318 let excluded_count = total_files - included_files.len();
320 if excluded_count > 0 {
321 use std::fmt::Write;
322 write!(result, "\n\n... ({excluded_count} files omitted) ...").unwrap();
323 }
324
325 result
326}
327
328#[tracing::instrument(target = "lgit", name = "diff.reconstruct", skip_all, fields(file_count = files.len()))]
330pub fn reconstruct_diff(files: &[FileDiff]) -> String {
331 let capacity: usize = files.iter().map(|f| f.size() + 1).sum();
333 let mut result = String::with_capacity(capacity);
334
335 for (i, file) in files.iter().enumerate() {
336 if i > 0 {
337 result.push('\n');
338 }
339 result.push_str(&file.header);
340 if !file.content.is_empty() {
341 result.push('\n');
342 result.push_str(&file.content);
343 }
344 }
345
346 result
347}
348
349#[derive(Debug, Clone, Default)]
355pub struct WhitespaceReport {
356 pub whitespace_only_files: Vec<String>,
358 pub has_substantive: bool,
360}
361
362impl WhitespaceReport {
363 pub const fn all_whitespace(&self) -> bool {
367 !self.has_substantive && !self.whitespace_only_files.is_empty()
368 }
369}
370
371fn file_section_starts(diff: &str) -> Vec<usize> {
376 let bytes = diff.as_bytes();
377 diff
378 .match_indices("diff --git")
379 .filter(|&(i, _)| i == 0 || bytes[i - 1] == b'\n')
380 .map(|(i, _)| i)
381 .collect()
382}
383
384fn file_sections(diff: &str) -> (&str, Vec<(&str, &str)>) {
389 let starts = file_section_starts(diff);
390 if starts.is_empty() {
391 return (diff, Vec::new());
392 }
393
394 let preamble = &diff[..starts[0]];
395 let mut sections = Vec::with_capacity(starts.len());
396 for (idx, &start) in starts.iter().enumerate() {
397 let end = starts.get(idx + 1).copied().unwrap_or(diff.len());
398 let section = &diff[start..end];
399 let path = section
400 .lines()
401 .next()
402 .and_then(|line| line.split_whitespace().nth(3))
403 .map_or("unknown", |s| s.trim_start_matches("b/"));
404 sections.push((path, section));
405 }
406 (preamble, sections)
407}
408
409fn section_is_whitespace_only(section: &str) -> bool {
416 let mut added = String::new();
417 let mut removed = String::new();
418 let mut has_change = false;
419
420 for line in section.lines() {
421 if line.starts_with("Binary files")
422 || line.starts_with("rename from")
423 || line.starts_with("rename to")
424 || line.starts_with("copy from")
425 || line.starts_with("copy to")
426 {
427 return false;
428 }
429 if line.starts_with("+++") || line.starts_with("---") {
431 continue;
432 }
433 if let Some(rest) = line.strip_prefix('+') {
434 has_change = true;
435 added.extend(rest.chars().filter(|c| !c.is_whitespace()));
436 } else if let Some(rest) = line.strip_prefix('-') {
437 has_change = true;
438 removed.extend(rest.chars().filter(|c| !c.is_whitespace()));
439 }
440 }
441
442 has_change && added == removed
443}
444
445#[tracing::instrument(target = "lgit", name = "diff.classify_whitespace", skip_all, fields(diff_bytes = diff.len()))]
447pub fn classify_diff_whitespace(diff: &str) -> WhitespaceReport {
448 let (_preamble, sections) = file_sections(diff);
449 let mut report = WhitespaceReport::default();
450 for (path, section) in sections {
451 if section_is_whitespace_only(section) {
452 report.whitespace_only_files.push(path.to_string());
453 } else {
454 report.has_substantive = true;
455 }
456 }
457 report
458}
459
460#[tracing::instrument(target = "lgit", name = "diff.strip_whitespace_only", skip_all, fields(diff_bytes = diff.len()))]
466pub fn strip_whitespace_only_files(diff: &str) -> Option<String> {
467 let (preamble, sections) = file_sections(diff);
468 if sections.is_empty() {
469 return None;
470 }
471
472 let mut kept = Vec::with_capacity(sections.len());
473 let mut stripped_any = false;
474 for (_path, section) in §ions {
475 if section_is_whitespace_only(section) {
476 stripped_any = true;
477 } else {
478 kept.push(*section);
479 }
480 }
481
482 if !stripped_any || kept.is_empty() {
483 return None;
484 }
485
486 let mut out = String::with_capacity(diff.len());
487 out.push_str(preamble);
488 for section in kept {
489 out.push_str(section);
490 }
491 Some(out)
492}
493
494#[tracing::instrument(target = "lgit", name = "diff.truncate_by_lines", skip_all, fields(diff_bytes = diff.len(), max_lines))]
500pub fn truncate_diff_by_lines(diff: &str, max_lines: usize, config: &CommitConfig) -> String {
501 let files = parse_diff(diff);
502
503 let total_lines: usize = files
505 .iter()
506 .map(|f| f.header.lines().count() + f.content.lines().count())
507 .sum();
508
509 if total_lines <= max_lines {
510 return diff.to_string();
511 }
512
513 let total_priority: i32 = files.iter().map(|f| f.priority(config).max(1)).sum();
515
516 let mut result = String::with_capacity(diff.len());
517
518 for file in &files {
519 result.push_str(&file.header);
521 if !file.header.ends_with('\n') {
522 result.push('\n');
523 }
524
525 let content_lines: Vec<&str> = file.content.lines().collect();
526 let priority = file.priority(config).max(1);
527
528 #[allow(clippy::cast_sign_loss, reason = "priority and total are positive")]
530 #[allow(clippy::cast_possible_truncation, reason = "line count fits in usize")]
531 let allocated = ((max_lines as f64) * (priority as f64) / (total_priority as f64)) as usize;
532 let allocated = allocated.max(5); if content_lines.len() <= allocated {
535 result.push_str(&file.content);
536 if !file.content.ends_with('\n') {
537 result.push('\n');
538 }
539 } else {
540 let keep_start = allocated / 2;
542 let keep_end = allocated - keep_start;
543 let omitted = content_lines.len() - keep_start - keep_end;
544
545 for line in &content_lines[..keep_start] {
546 result.push_str(line);
547 result.push('\n');
548 }
549
550 use std::fmt::Write;
551 writeln!(&mut result, "[... {omitted} lines omitted ...]")
552 .expect("writing to String is infallible");
553
554 for line in &content_lines[content_lines.len() - keep_end..] {
555 result.push_str(line);
556 result.push('\n');
557 }
558 }
559 }
560
561 result
562}
563
564#[cfg(test)]
565mod tests {
566 use super::*;
567
568 fn test_config() -> CommitConfig {
569 CommitConfig::default()
570 }
571
572 fn test_counter() -> TokenCounter {
573 TokenCounter::new("http://localhost:4000", None, "claude-sonnet-4.5")
574 }
575
576 #[test]
577 fn test_parse_diff_simple() {
578 let diff = r#"diff --git a/src/main.rs b/src/main.rs
579index 123..456 100644
580--- a/src/main.rs
581+++ b/src/main.rs
582@@ -1,3 +1,4 @@
583+use std::collections::HashMap;
584 fn main() {
585 println!("hello");
586 }"#;
587 let files = parse_diff(diff);
588 assert_eq!(files.len(), 1);
589 assert_eq!(files[0].filename, "src/main.rs");
590 assert_eq!(files[0].additions, 1);
591 assert_eq!(files[0].deletions, 0);
592 assert!(!files[0].is_binary);
593 assert!(files[0].header.contains("diff --git"));
594 assert!(files[0].content.contains("use std::collections::HashMap"));
595 }
596
597 #[test]
598 fn test_parse_diff_multi_file() {
599 let diff = r"diff --git a/src/lib.rs b/src/lib.rs
600index 111..222 100644
601--- a/src/lib.rs
602+++ b/src/lib.rs
603@@ -1,2 +1,3 @@
604+pub mod utils;
605 pub fn test() {}
606diff --git a/src/main.rs b/src/main.rs
607index 333..444 100644
608--- a/src/main.rs
609+++ b/src/main.rs
610@@ -1,1 +1,2 @@
611 fn main() {}
612+fn helper() {}";
613 let files = parse_diff(diff);
614 assert_eq!(files.len(), 2);
615 assert_eq!(files[0].filename, "src/lib.rs");
616 assert_eq!(files[1].filename, "src/main.rs");
617 assert_eq!(files[0].additions, 1);
618 assert_eq!(files[1].additions, 1);
619 }
620
621 #[test]
622 fn test_parse_diff_rename() {
623 let diff = r"diff --git a/old.rs b/new.rs
624similarity index 95%
625rename from old.rs
626rename to new.rs
627index 123..456 100644
628--- a/old.rs
629+++ b/new.rs
630@@ -1,2 +1,3 @@
631 fn test() {}
632+fn helper() {}";
633 let files = parse_diff(diff);
634 assert_eq!(files.len(), 1);
635 assert_eq!(files[0].filename, "new.rs");
636 assert!(files[0].header.contains("rename from"));
637 assert!(files[0].header.contains("rename to"));
638 assert_eq!(files[0].additions, 1);
639 }
640
641 #[test]
642 fn test_parse_diff_binary() {
643 let diff = r"diff --git a/image.png b/image.png
644index 123..456 100644
645Binary files a/image.png and b/image.png differ";
646 let files = parse_diff(diff);
647 assert_eq!(files.len(), 1);
648 assert_eq!(files[0].filename, "image.png");
649 assert!(files[0].is_binary);
650 assert!(files[0].header.contains("Binary files"));
651 }
652
653 #[test]
654 fn test_parse_diff_empty() {
655 let diff = "";
656 let files = parse_diff(diff);
657 assert_eq!(files.len(), 0);
658 }
659
660 #[test]
661 fn test_parse_diff_malformed_missing_hunks() {
662 let diff = r"diff --git a/src/main.rs b/src/main.rs
663index 123..456 100644
664--- a/src/main.rs
665+++ b/src/main.rs";
666 let files = parse_diff(diff);
667 assert_eq!(files.len(), 1);
668 assert_eq!(files[0].filename, "src/main.rs");
669 assert!(files[0].content.is_empty());
670 }
671
672 #[test]
673 fn test_parse_diff_new_file() {
674 let diff = r"diff --git a/new.rs b/new.rs
675new file mode 100644
676index 000..123 100644
677--- /dev/null
678+++ b/new.rs
679@@ -0,0 +1,2 @@
680+fn test() {}
681+fn main() {}";
682 let files = parse_diff(diff);
683 assert_eq!(files.len(), 1);
684 assert_eq!(files[0].filename, "new.rs");
685 assert!(files[0].header.contains("new file mode"));
686 assert_eq!(files[0].additions, 2);
687 }
688
689 #[test]
690 fn test_parse_diff_deleted_file() {
691 let diff = r"diff --git a/old.rs b/old.rs
692deleted file mode 100644
693index 123..000 100644
694--- a/old.rs
695+++ /dev/null
696@@ -1,2 +0,0 @@
697-fn test() {}
698-fn main() {}";
699 let files = parse_diff(diff);
700 assert_eq!(files.len(), 1);
701 assert_eq!(files[0].filename, "old.rs");
702 assert!(files[0].header.contains("deleted file mode"));
703 assert_eq!(files[0].deletions, 2);
704 }
705
706 #[test]
707 fn test_file_diff_size() {
708 let file = FileDiff {
709 filename: "test.rs".to_string(),
710 header: "header".to_string(),
711 content: "content".to_string(),
712 additions: 0,
713 deletions: 0,
714 is_binary: false,
715 };
716 assert_eq!(file.size(), 6 + 7); }
718
719 #[test]
720 fn test_file_diff_priority_source_files() {
721 let config = test_config();
722 let rs_file = FileDiff {
723 filename: "src/main.rs".to_string(),
724 header: String::new(),
725 content: String::new(),
726 additions: 0,
727 deletions: 0,
728 is_binary: false,
729 };
730 assert_eq!(rs_file.priority(&config), 100);
731
732 let py_file = FileDiff {
733 filename: "script.py".to_string(),
734 header: String::new(),
735 content: String::new(),
736 additions: 0,
737 deletions: 0,
738 is_binary: false,
739 };
740 assert_eq!(py_file.priority(&config), 100);
741
742 let js_file = FileDiff {
743 filename: "app.js".to_string(),
744 header: String::new(),
745 content: String::new(),
746 additions: 0,
747 deletions: 0,
748 is_binary: false,
749 };
750 assert_eq!(js_file.priority(&config), 100);
751 }
752
753 #[test]
754 fn test_file_diff_priority_binary() {
755 let config = test_config();
756 let binary = FileDiff {
757 filename: "image.png".to_string(),
758 header: String::new(),
759 content: String::new(),
760 additions: 0,
761 deletions: 0,
762 is_binary: true,
763 };
764 assert_eq!(binary.priority(&config), -100);
765 }
766
767 #[test]
768 fn test_file_diff_priority_test_files() {
769 let config = test_config();
770 let test_file = FileDiff {
771 filename: "src/test_utils.rs".to_string(),
772 header: String::new(),
773 content: String::new(),
774 additions: 0,
775 deletions: 0,
776 is_binary: false,
777 };
778 assert_eq!(test_file.priority(&config), 10);
779
780 let test_dir = FileDiff {
781 filename: "tests/integration_test.rs".to_string(),
782 header: String::new(),
783 content: String::new(),
784 additions: 0,
785 deletions: 0,
786 is_binary: false,
787 };
788 assert_eq!(test_dir.priority(&config), 10);
789 }
790
791 #[test]
792 fn test_file_diff_priority_low_priority_extensions() {
793 let config = test_config();
794 let md_file = FileDiff {
795 filename: "README.md".to_string(),
796 header: String::new(),
797 content: String::new(),
798 additions: 0,
799 deletions: 0,
800 is_binary: false,
801 };
802 assert_eq!(md_file.priority(&config), 20);
803
804 let prompt_file = FileDiff {
805 filename: "prompts/analysis/default.md".to_string(),
806 header: String::new(),
807 content: String::new(),
808 additions: 0,
809 deletions: 0,
810 is_binary: false,
811 };
812 assert_eq!(prompt_file.priority(&config), 100);
813
814 let system_file = FileDiff {
815 filename: "system/analysis/default.md".to_string(),
816 header: String::new(),
817 content: String::new(),
818 additions: 0,
819 deletions: 0,
820 is_binary: false,
821 };
822 assert_eq!(system_file.priority(&config), 100);
823
824 let toml_file = FileDiff {
825 filename: "config.toml".to_string(),
826 header: String::new(),
827 content: String::new(),
828 additions: 0,
829 deletions: 0,
830 is_binary: false,
831 };
832 assert_eq!(toml_file.priority(&config), 20);
833 }
834
835 #[test]
836 fn test_file_diff_priority_dependency_manifests() {
837 let config = test_config();
838
839 let cargo_toml = FileDiff {
840 filename: "Cargo.toml".to_string(),
841 header: String::new(),
842 content: String::new(),
843 additions: 0,
844 deletions: 0,
845 is_binary: false,
846 };
847 assert_eq!(cargo_toml.priority(&config), 70);
848
849 let package_json = FileDiff {
850 filename: "package.json".to_string(),
851 header: String::new(),
852 content: String::new(),
853 additions: 0,
854 deletions: 0,
855 is_binary: false,
856 };
857 assert_eq!(package_json.priority(&config), 70);
858
859 let go_mod = FileDiff {
860 filename: "go.mod".to_string(),
861 header: String::new(),
862 content: String::new(),
863 additions: 0,
864 deletions: 0,
865 is_binary: false,
866 };
867 assert_eq!(go_mod.priority(&config), 70);
868 }
869
870 #[test]
871 fn test_file_diff_priority_default() {
872 let config = test_config();
873 let other = FileDiff {
874 filename: "data.csv".to_string(),
875 header: String::new(),
876 content: String::new(),
877 additions: 0,
878 deletions: 0,
879 is_binary: false,
880 };
881 assert_eq!(other.priority(&config), 50);
882 }
883
884 #[test]
885 fn test_file_diff_truncate_small() {
886 let mut file = FileDiff {
887 filename: "test.rs".to_string(),
888 header: "header".to_string(),
889 content: "short content".to_string(),
890 additions: 0,
891 deletions: 0,
892 is_binary: false,
893 };
894 let original_size = file.size();
895 file.truncate(1000);
896 assert_eq!(file.size(), original_size);
897 assert_eq!(file.content, "short content");
898 }
899
900 #[test]
901 fn test_file_diff_truncate_large() {
902 let lines: Vec<String> = (0..100).map(|i| format!("line {i}")).collect();
903 let content = lines.join("\n");
904 let mut file = FileDiff {
905 filename: "test.rs".to_string(),
906 header: "header".to_string(),
907 content,
908 additions: 0,
909 deletions: 0,
910 is_binary: false,
911 };
912 file.truncate(500);
913 assert!(file.content.contains("... (truncated"));
914 assert!(file.content.contains("line 0")); assert!(file.content.contains("line 99")); }
917 #[test]
918 fn test_file_diff_truncate_utf8_boundary() {
919 let mut file = FileDiff {
920 filename: "test.rs".to_string(),
921 header: "header".to_string(),
922 content: "😀".repeat(80),
923 additions: 0,
924 deletions: 0,
925 is_binary: false,
926 };
927 file.truncate(121);
928
929 assert!(file.content.ends_with("\n... (truncated)"));
930 let truncated_payload = file.content.trim_end_matches("\n... (truncated)");
931 assert!(!truncated_payload.is_empty());
932 assert_eq!(truncated_payload.len() % 4, 0);
933 }
934
935 #[test]
936 fn test_file_diff_truncate_preserves_context() {
937 let lines: Vec<String> = (0..50).map(|i| format!("line {i}")).collect();
938 let content = lines.join("\n");
939 let original_lines = content.lines().count();
940 let mut file = FileDiff {
941 filename: "test.rs".to_string(),
942 header: "header".to_string(),
943 content,
944 additions: 0,
945 deletions: 0,
946 is_binary: false,
947 };
948 file.truncate(300);
950 assert!(file.content.contains("line 0"));
952 assert!(file.content.contains("line 14"));
953 assert!(file.content.contains("line 40"));
954 assert!(file.content.contains("line 49"));
955 let truncated_lines = file.content.lines().count();
957 assert!(truncated_lines < original_lines, "Content should be truncated");
958 assert!(file.content.contains("truncated"), "Should have truncation message");
959 }
960
961 #[test]
962 fn test_file_diff_truncate_very_small_space() {
963 let mut file = FileDiff {
964 filename: "test.rs".to_string(),
965 header: "long header content here".to_string(),
966 content: "lots of content that needs to be truncated".to_string(),
967 additions: 0,
968 deletions: 0,
969 is_binary: false,
970 };
971 file.truncate(30);
972 assert_eq!(file.content, "... (truncated)");
973 }
974
975 #[test]
976 fn test_smart_truncate_diff_under_limit() {
977 let config = test_config();
978 let counter = test_counter();
979 let diff = r"diff --git a/src/main.rs b/src/main.rs
980index 123..456 100644
981--- a/src/main.rs
982+++ b/src/main.rs
983@@ -1,2 +1,3 @@
984+use std::io;
985 fn main() {}";
986 let result = smart_truncate_diff(diff, 10000, &config, &counter);
987 assert!(result.contains("use std::io"));
988 assert!(result.contains("src/main.rs"));
989 }
990
991 #[test]
992 fn test_smart_truncate_diff_over_limit() {
993 let config = test_config();
994 let counter = test_counter();
995 let lines: Vec<String> = (0..200).map(|i| format!("+line {i}")).collect();
996 let content = lines.join("\n");
997 let diff = format!(
998 "diff --git a/src/main.rs b/src/main.rs\nindex 123..456 100644\n--- a/src/main.rs\n+++ \
999 b/src/main.rs\n@@ -1,1 +1,200 @@\n{content}"
1000 );
1001 let result = smart_truncate_diff(&diff, 500, &config, &counter);
1002 assert!(result.len() <= 600); assert!(result.contains("src/main.rs"));
1004 }
1005
1006 #[test]
1007 fn test_smart_truncate_diff_priority_allocation() {
1008 let config = test_config();
1009 let counter = test_counter();
1010 let diff = r"diff --git a/src/lib.rs b/src/lib.rs
1012index 111..222 100644
1013--- a/src/lib.rs
1014+++ b/src/lib.rs
1015@@ -1,1 +1,50 @@
1016+pub fn important_function() {}
1017+pub fn another_function() {}
1018+pub fn yet_another() {}
1019diff --git a/README.md b/README.md
1020index 333..444 100644
1021--- a/README.md
1022+++ b/README.md
1023@@ -1,1 +1,50 @@
1024+# Documentation
1025+More docs here";
1026 let result = smart_truncate_diff(diff, 300, &config, &counter);
1027 assert!(result.contains("src/lib.rs"));
1029 assert!(result.contains("important_function") || result.contains("truncated"));
1030 }
1031
1032 #[test]
1033 fn test_smart_truncate_diff_binary_excluded() {
1034 let config = test_config();
1035 let counter = test_counter();
1036 let diff = r"diff --git a/image.png b/image.png
1037index 123..456 100644
1038Binary files a/image.png and b/image.png differ
1039diff --git a/src/main.rs b/src/main.rs
1040index 789..abc 100644
1041--- a/src/main.rs
1042+++ b/src/main.rs
1043@@ -1,1 +1,2 @@
1044 fn main() {}
1045+fn helper() {}";
1046 let result = smart_truncate_diff(diff, 10000, &config, &counter);
1047 assert!(result.contains("src/main.rs"));
1048 assert!(result.contains("image.png"));
1049 assert!(result.contains("Binary files"));
1050 }
1051
1052 #[test]
1053 fn test_smart_truncate_diff_excluded_files() {
1054 let config = test_config();
1055 let counter = test_counter();
1056 let diff = r"diff --git a/Cargo.lock b/Cargo.lock
1057index 123..456 100644
1058--- a/Cargo.lock
1059+++ b/Cargo.lock
1060@@ -1,1 +1,100 @@
1061+lots of lock file content
1062diff --git a/src/main.rs b/src/main.rs
1063index 789..abc 100644
1064--- a/src/main.rs
1065+++ b/src/main.rs
1066@@ -1,1 +1,2 @@
1067 fn main() {}
1068+fn helper() {}";
1069 let result = smart_truncate_diff(diff, 10000, &config, &counter);
1070 assert!(!result.contains("Cargo.lock"));
1071 assert!(result.contains("src/main.rs"));
1072 }
1073
1074 #[test]
1075 fn test_smart_truncate_diff_all_files_excluded() {
1076 let config = test_config();
1077 let counter = test_counter();
1078 let diff = r"diff --git a/Cargo.lock b/Cargo.lock
1079index 123..456 100644
1080--- a/Cargo.lock
1081+++ b/Cargo.lock
1082@@ -1,1 +1,2 @@
1083+dependency update";
1084 let result = smart_truncate_diff(diff, 10000, &config, &counter);
1085 assert!(result.contains("No relevant files"));
1086 }
1087
1088 #[test]
1089 fn test_smart_truncate_diff_header_preservation() {
1090 let config = test_config();
1091 let counter = test_counter();
1092 let lines: Vec<String> = (0..100).map(|i| format!("+line {i}")).collect();
1093 let content = lines.join("\n");
1094 let diff = format!(
1095 "diff --git a/src/a.rs b/src/a.rs\nindex 111..222 100644\n--- a/src/a.rs\n+++ \
1096 b/src/a.rs\n@@ -1,1 +1,100 @@\n{content}\ndiff --git a/src/b.rs b/src/b.rs\nindex \
1097 333..444 100644\n--- a/src/b.rs\n+++ b/src/b.rs\n@@ -1,1 +1,100 @@\n{content}"
1098 );
1099 let result = smart_truncate_diff(&diff, 600, &config, &counter);
1100 assert!(result.contains("src/a.rs"));
1102 assert!(result.contains("src/b.rs"));
1103 }
1104
1105 #[test]
1106 fn test_reconstruct_diff_single_file() {
1107 let files = vec![FileDiff {
1108 filename: "test.rs".to_string(),
1109 header: "diff --git a/test.rs b/test.rs".to_string(),
1110 content: "+new line".to_string(),
1111 additions: 1,
1112 deletions: 0,
1113 is_binary: false,
1114 }];
1115 let result = reconstruct_diff(&files);
1116 assert_eq!(result, "diff --git a/test.rs b/test.rs\n+new line");
1117 }
1118
1119 #[test]
1120 fn test_reconstruct_diff_multiple_files() {
1121 let files = vec![
1122 FileDiff {
1123 filename: "a.rs".to_string(),
1124 header: "diff --git a/a.rs b/a.rs".to_string(),
1125 content: "+line a".to_string(),
1126 additions: 1,
1127 deletions: 0,
1128 is_binary: false,
1129 },
1130 FileDiff {
1131 filename: "b.rs".to_string(),
1132 header: "diff --git a/b.rs b/b.rs".to_string(),
1133 content: "+line b".to_string(),
1134 additions: 1,
1135 deletions: 0,
1136 is_binary: false,
1137 },
1138 ];
1139 let result = reconstruct_diff(&files);
1140 assert!(result.contains("a.rs"));
1141 assert!(result.contains("b.rs"));
1142 assert!(result.contains("+line a"));
1143 assert!(result.contains("+line b"));
1144 }
1145
1146 #[test]
1147 fn test_reconstruct_diff_empty_content() {
1148 let files = vec![FileDiff {
1149 filename: "test.rs".to_string(),
1150 header: "diff --git a/test.rs b/test.rs".to_string(),
1151 content: String::new(),
1152 additions: 0,
1153 deletions: 0,
1154 is_binary: false,
1155 }];
1156 let result = reconstruct_diff(&files);
1157 assert_eq!(result, "diff --git a/test.rs b/test.rs");
1158 }
1159
1160 #[test]
1161 fn test_reconstruct_diff_empty_vec() {
1162 let files: Vec<FileDiff> = vec![];
1163 let result = reconstruct_diff(&files);
1164 assert_eq!(result, "");
1165 }
1166
1167 const WS_ONLY_INDENT: &str = "diff --git a/src/foo.rs b/src/foo.rs
1168index 1234567..89abcde 100644
1169--- a/src/foo.rs
1170+++ b/src/foo.rs
1171@@ -1,3 +1,3 @@
1172 fn main() {
1173-let x = 1;
1174+ let x = 1;
1175 }
1176";
1177
1178 const SUBSTANTIVE: &str = "diff --git a/src/bar.rs b/src/bar.rs
1179index 1111111..2222222 100644
1180--- a/src/bar.rs
1181+++ b/src/bar.rs
1182@@ -1,3 +1,3 @@
1183 fn main() {
1184-let x = 1;
1185+let x = 2;
1186 }
1187";
1188
1189 #[test]
1190 fn test_whitespace_only_indentation() {
1191 let report = classify_diff_whitespace(WS_ONLY_INDENT);
1192 assert!(report.all_whitespace());
1193 assert!(!report.has_substantive);
1194 assert_eq!(report.whitespace_only_files, vec!["src/foo.rs".to_string()]);
1195 }
1196
1197 #[test]
1198 fn test_whitespace_only_rewrap() {
1199 let diff = "diff --git a/a.md b/a.md
1202index 111..222 100644
1203--- a/a.md
1204+++ b/a.md
1205@@ -1 +1,2 @@
1206-one two three
1207+one two
1208+three
1209";
1210 let report = classify_diff_whitespace(diff);
1211 assert!(report.all_whitespace());
1212 }
1213
1214 #[test]
1215 fn test_substantive_change_not_whitespace() {
1216 let report = classify_diff_whitespace(SUBSTANTIVE);
1217 assert!(!report.all_whitespace());
1218 assert!(report.has_substantive);
1219 assert!(report.whitespace_only_files.is_empty());
1220 }
1221
1222 #[test]
1223 fn test_new_file_is_substantive() {
1224 let diff = "diff --git a/new.txt b/new.txt
1225new file mode 100644
1226index 0000000..e69de29
1227--- /dev/null
1228+++ b/new.txt
1229@@ -0,0 +1,2 @@
1230+hello
1231+world
1232";
1233 let report = classify_diff_whitespace(diff);
1234 assert!(!report.all_whitespace());
1235 assert!(report.has_substantive);
1236 }
1237
1238 #[test]
1239 fn test_rename_is_substantive() {
1240 let diff = "diff --git a/old.rs b/new.rs
1241similarity index 100%
1242rename from old.rs
1243rename to new.rs
1244";
1245 let report = classify_diff_whitespace(diff);
1246 assert!(!report.all_whitespace());
1247 assert!(report.has_substantive);
1248 }
1249
1250 #[test]
1251 fn test_mixed_changeset() {
1252 let diff = format!("{WS_ONLY_INDENT}{SUBSTANTIVE}");
1253 let report = classify_diff_whitespace(&diff);
1254 assert!(!report.all_whitespace());
1255 assert!(report.has_substantive);
1256 assert_eq!(report.whitespace_only_files, vec!["src/foo.rs".to_string()]);
1257 }
1258
1259 #[test]
1260 fn test_strip_drops_whitespace_only_file() {
1261 let diff = format!("{WS_ONLY_INDENT}{SUBSTANTIVE}");
1262 let stripped = strip_whitespace_only_files(&diff).expect("a file should be dropped");
1263 assert!(!stripped.contains("src/foo.rs"));
1264 assert!(stripped.contains("src/bar.rs"));
1265 assert_eq!(stripped, SUBSTANTIVE);
1267 }
1268
1269 #[test]
1270 fn test_strip_noop_when_no_whitespace_only() {
1271 assert!(strip_whitespace_only_files(SUBSTANTIVE).is_none());
1272 }
1273
1274 #[test]
1275 fn test_strip_noop_when_all_whitespace() {
1276 assert!(strip_whitespace_only_files(WS_ONLY_INDENT).is_none());
1278 }
1279
1280 #[test]
1281 fn test_diff_git_text_in_body_not_a_boundary() {
1282 let diff = "diff --git a/doc.md b/doc.md
1285index 111..222 100644
1286--- a/doc.md
1287+++ b/doc.md
1288@@ -1,2 +1,2 @@
1289-Run diff --git to inspect changes
1290+Run git diff to inspect changes
1291 done
1292";
1293 let report = classify_diff_whitespace(diff);
1294 assert_eq!(report.whitespace_only_files.len() + usize::from(report.has_substantive), 1);
1295 assert!(report.has_substantive);
1296 }
1297}