llm_git/
normalization.rs

1/// Normalization utilities for commit messages
2use unicode_normalization::UnicodeNormalization;
3
4use crate::{config::CommitConfig, types::ConventionalCommit, validation::is_past_tense_verb};
5
6/// Normalize Unicode characters to ASCII (remove AI-style formatting)
7/// Normalize Unicode characters to ASCII (remove AI-style formatting)
8pub fn normalize_unicode(text: &str) -> String {
9   // Pre-NFKD replacements for chars that decompose badly
10   // (≠ → = + combining, ½ → 1⁄2, ² → 2)
11   let pre_normalized = text
12      // Math symbols that decompose badly
13      .replace('≠', "!=") // not equal to (decomposes to = + \u{338})
14      // Fractions (NFKD decomposes ½ to 1⁄2 with fraction slash, not regular /)
15      .replace('½', "1/2")
16      .replace('¼', "1/4")
17      .replace('¾', "3/4")
18      .replace('⅓', "1/3")
19      .replace('⅔', "2/3")
20      .replace('⅕', "1/5")
21      .replace('⅖', "2/5")
22      .replace('⅗', "3/5")
23      .replace('⅘', "4/5")
24      .replace('⅙', "1/6")
25      .replace('⅚', "5/6")
26      .replace('⅛', "1/8")
27      .replace('⅜', "3/8")
28      .replace('⅝', "5/8")
29      .replace('⅞', "7/8")
30      // Superscripts (NFKD decomposes ² to just "2", losing the superscript meaning)
31      .replace('⁰', "^0")
32      .replace('¹', "^1")
33      .replace('²', "^2")
34      .replace('³', "^3")
35      .replace('⁴', "^4")
36      .replace('⁵', "^5")
37      .replace('⁶', "^6")
38      .replace('⁷', "^7")
39      .replace('⁸', "^8")
40      .replace('⁹', "^9")
41      // Subscripts
42      .replace('₀', "_0")
43      .replace('₁', "_1")
44      .replace('₂', "_2")
45      .replace('₃', "_3")
46      .replace('₄', "_4")
47      .replace('₅', "_5")
48      .replace('₆', "_6")
49      .replace('₇', "_7")
50      .replace('₈', "_8")
51      .replace('₉', "_9");
52
53   // Apply NFKD normalization for canonical decomposition
54   let normalized: String = pre_normalized.nfkd().collect();
55
56   normalized
57      // Smart quotes to straight quotes
58      .replace(['\u{2018}', '\u{2019}'], "'") // ' right single quote / apostrophe
59      .replace(['\u{201C}', '\u{201D}'], "\"") // " right double quote
60      .replace('\u{201A}', "'") // ‚ single low-9 quote
61      .replace(['\u{201E}', '\u{00AB}', '\u{00BB}'], "\"") // » right-pointing double angle quote
62      .replace(['\u{2039}', '\u{203A}'], "'") // › single right-pointing angle quote
63      // Dashes and hyphens
64      .replace(['\u{2010}', '\u{2011}', '\u{2012}'], "-") // ‒ figure dash
65      .replace(['\u{2013}', '\u{2014}', '\u{2015}'], "--") // ― horizontal bar
66      .replace('\u{2212}', "-") // − minus sign
67      // Arrows
68      .replace('\u{2192}', "->") // rightwards arrow
69      .replace('←', "<-") // leftwards arrow
70      .replace('↔', "<->") // left right arrow
71      .replace('⇒', "=>") // rightwards double arrow
72      .replace('⇐', "<=") // leftwards double arrow
73      .replace('⇔', "<=>") // left right double arrow
74      .replace('↑', "^") // upwards arrow
75      .replace('↓', "v") // downwards arrow
76      // Math symbols
77      .replace('\u{2264}', "<=") // less than or equal to
78      .replace('≥', ">=") // greater than or equal to
79      .replace('≈', "~=") // approximately equal to
80      .replace('≡', "==") // identical to
81      .replace('\u{00D7}', "x") // multiplication sign
82      .replace('÷', "/") // division sign
83      // Ellipsis
84      .replace(['\u{2026}', '⋯', '⋮'], "...") // vertical ellipsis
85      // Bullet points (convert to hyphens for consistency)
86      .replace(['•', '◦', '▪', '▫', '◆', '◇'], "-") // white diamond
87      // Check marks
88      .replace(['✓', '✔'], "v") // heavy check mark
89      .replace(['✗', '✘'], "x") // heavy ballot x
90      // Greek letters (common in programming)
91      .replace('λ', "lambda")
92      .replace('α', "alpha")
93      .replace('β', "beta")
94      .replace('γ', "gamma")
95      .replace('δ', "delta")
96      .replace('ε', "epsilon")
97      .replace('θ', "theta")
98      .replace('μ', "mu")
99      .replace('π', "pi")
100      .replace('σ', "sigma")
101      .replace('Σ', "Sigma")
102      .replace('Δ', "Delta")
103      .replace('Π', "Pi")
104      // Special spaces to regular space
105      .replace(
106         [
107            '\u{00A0}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}',
108            '\u{2006}', '\u{2007}', '\u{2008}', '\u{2009}', '\u{200A}', '\u{202F}', '\u{205F}',
109            '\u{3000}',
110         ],
111         " ",
112      ) // ideographic space
113      // Zero-width characters (remove)
114      .replace(['\u{200B}', '\u{200C}', '\u{200D}', '\u{FEFF}'], "") // zero-width no-break space (BOM)
115}
116
117/// Estimate token count for text (rough approximation: 1 token ≈ 4 chars)
118const fn estimate_tokens(text: &str) -> usize {
119   text.len().div_ceil(4) // Round up
120}
121
122/// Cap detail points by token budget instead of hard count
123/// Keeps high-priority details until budget exhausted
124pub fn cap_details(details: &mut Vec<String>, max_tokens: usize) {
125   if details.is_empty() {
126      return;
127   }
128
129   // Calculate total tokens
130   let total_tokens: usize = details.iter().map(|d| estimate_tokens(d)).sum();
131
132   if total_tokens <= max_tokens {
133      return; // Under budget, keep all
134   }
135
136   // Score by priority keywords and length
137   let mut scored: Vec<(usize, i32, usize, &String)> = details
138      .iter()
139      .enumerate()
140      .map(|(idx, detail)| {
141         let lower = detail.to_lowercase();
142         let mut score = 0;
143
144         // High priority keywords (security, crashes, critical bugs)
145         if lower.contains("security")
146            || lower.contains("vulnerability")
147            || lower.contains("exploit")
148            || lower.contains("critical")
149            || (lower.contains("fix") && lower.contains("crash"))
150         {
151            score += 100;
152         }
153         if lower.contains("breaking") || lower.contains("incompatible") {
154            score += 90;
155         }
156         if lower.contains("performance")
157            || lower.contains("faster")
158            || lower.contains("optimization")
159         {
160            score += 80;
161         }
162         if lower.contains("fix") || lower.contains("bug") {
163            score += 70;
164         }
165
166         // Medium priority keywords
167         if lower.contains("api") || lower.contains("interface") || lower.contains("public") {
168            score += 50;
169         }
170         if lower.contains("user") || lower.contains("client") {
171            score += 40;
172         }
173         if lower.contains("deprecated") || lower.contains("removed") {
174            score += 35;
175         }
176
177         // Add length component (capped contribution to avoid favoring verbosity)
178         score += (detail.len() / 20).min(10) as i32;
179
180         let tokens = estimate_tokens(detail);
181         (idx, score, tokens, detail)
182      })
183      .collect();
184
185   // Sort by score descending
186   scored.sort_by(|a, b| b.1.cmp(&a.1));
187
188   // Keep details until budget exhausted
189   let mut budget_remaining = max_tokens;
190   let mut keep_indices: Vec<usize> = Vec::new();
191
192   for (idx, _score, tokens, _detail) in scored {
193      if tokens <= budget_remaining {
194         keep_indices.push(idx);
195         budget_remaining -= tokens;
196      }
197   }
198
199   keep_indices.sort_unstable(); // Preserve original order
200
201   // Filter details
202   let kept: Vec<String> = keep_indices
203      .iter()
204      .filter_map(|&idx| details.get(idx).cloned())
205      .collect();
206   *details = kept;
207}
208
209/// Convert present-tense verbs to past-tense and handle type-specific
210/// replacements
211pub fn normalize_summary_verb(summary: &mut String, commit_type: &str) {
212   if summary.trim().is_empty() {
213      return;
214   }
215
216   let mut parts_iter = summary.split_whitespace();
217   let first_word = match parts_iter.next() {
218      Some(word) => word.to_string(),
219      None => return,
220   };
221   let rest = parts_iter.collect::<Vec<_>>().join(" ");
222   let first_word_lower = first_word.to_lowercase();
223
224   // Check if already past tense
225   if is_past_tense_verb(&first_word_lower) {
226      // Special case: refactor type shouldn't use "refactored"
227      if commit_type == "refactor" && first_word_lower == "refactored" {
228         *summary = if rest.is_empty() {
229            "restructured".to_string()
230         } else {
231            format!("restructured {rest}")
232         };
233      }
234      return;
235   }
236
237   // Convert present tense to past tense
238   let converted = match first_word_lower.as_str() {
239      "add" | "adds" => Some("added"),
240      "fix" | "fixes" => Some("fixed"),
241      "update" | "updates" => Some("updated"),
242      "refactor" | "refactors" => Some(if commit_type == "refactor" {
243         "restructured"
244      } else {
245         "refactored"
246      }),
247      "remove" | "removes" => Some("removed"),
248      "replace" | "replaces" => Some("replaced"),
249      "improve" | "improves" => Some("improved"),
250      "implement" | "implements" => Some("implemented"),
251      "migrate" | "migrates" => Some("migrated"),
252      "rename" | "renames" => Some("renamed"),
253      "move" | "moves" => Some("moved"),
254      "merge" | "merges" => Some("merged"),
255      "split" | "splits" => Some("split"),
256      "extract" | "extracts" => Some("extracted"),
257      "restructure" | "restructures" => Some("restructured"),
258      "reorganize" | "reorganizes" => Some("reorganized"),
259      "consolidate" | "consolidates" => Some("consolidated"),
260      "simplify" | "simplifies" => Some("simplified"),
261      "optimize" | "optimizes" => Some("optimized"),
262      "document" | "documents" => Some("documented"),
263      "test" | "tests" => Some("tested"),
264      "change" | "changes" => Some("changed"),
265      "introduce" | "introduces" => Some("introduced"),
266      "deprecate" | "deprecates" => Some("deprecated"),
267      "delete" | "deletes" => Some("deleted"),
268      "correct" | "corrects" => Some("corrected"),
269      "enhance" | "enhances" => Some("enhanced"),
270      "revert" | "reverts" => Some("reverted"),
271      _ => None,
272   };
273
274   if let Some(past) = converted {
275      *summary = if rest.is_empty() {
276         past.to_string()
277      } else {
278         format!("{past} {rest}")
279      };
280   }
281}
282
283/// Post-process conventional commit message to fix common issues
284pub fn post_process_commit_message(msg: &mut ConventionalCommit, config: &CommitConfig) {
285   // CommitType and Scope are already normalized to lowercase in their
286   // constructors No need to re-normalize them here
287
288   // Extract summary string for mutations, will reconstruct at end
289   let mut summary_str = normalize_unicode(msg.summary.as_str());
290
291   // Normalize body and footers
292   msg.body = msg.body.iter().map(|s| normalize_unicode(s)).collect();
293   msg.footers = msg.footers.iter().map(|s| normalize_unicode(s)).collect();
294
295   // Normalize summary formatting: single line, trimmed, enforce trailing period
296   summary_str = summary_str
297      .replace(['\r', '\n'], " ")
298      .split_whitespace()
299      .collect::<Vec<_>>()
300      .join(" ")
301      .trim()
302      .trim_end_matches('.')
303      .trim_end_matches(';')
304      .trim_end_matches(':')
305      .to_string();
306
307   // Helper: check if first token is all caps (acronym/initialism)
308   let is_first_token_all_caps = |s: &str| -> bool {
309      s.split_whitespace().next().is_some_and(|token| {
310         token
311            .chars()
312            .all(|c| !c.is_alphabetic() || c.is_uppercase())
313      })
314   };
315
316   // Ensure summary starts with lowercase (unless first token is all caps)
317   if !is_first_token_all_caps(&summary_str)
318      && let Some(first_char) = summary_str.chars().next()
319      && first_char.is_uppercase()
320   {
321      let rest = &summary_str[first_char.len_utf8()..];
322      summary_str = format!("{}{}", first_char.to_lowercase(), rest);
323   }
324
325   // Normalize verb tense (present \u{2192} past, handle type-specific
326   // replacements)
327   normalize_summary_verb(&mut summary_str, msg.commit_type.as_str());
328   summary_str = summary_str.trim().to_string();
329
330   // Ensure lowercase after normalization (unless first token is all caps)
331   if !is_first_token_all_caps(&summary_str)
332      && let Some(first_char) = summary_str.chars().next()
333      && first_char.is_uppercase()
334   {
335      let rest = &summary_str[first_char.len_utf8()..];
336      summary_str = format!("{}{}", first_char.to_lowercase(), rest);
337   }
338
339   // No truncation - validation handles length checks
340   // Remove any trailing period (conventional commits don't use periods)
341   summary_str = summary_str.trim_end_matches('.').to_string();
342
343   // Reconstruct CommitSummary (bypassing warnings since post-processing
344   // normalizes)
345   msg.summary = crate::types::CommitSummary::new_unchecked(summary_str, 128)
346      .expect("post-processed summary should be valid");
347
348   // Clean and enforce punctuation for body items
349   for item in &mut msg.body {
350      let mut cleaned = item
351         .replace(['\r', '\n'], " ")
352         .trim()
353         .trim_start_matches('\u{2022}')
354         .trim_start_matches('-')
355         .trim_start_matches('*')
356         .trim_start_matches('+')
357         .trim()
358         .to_string();
359
360      cleaned = cleaned
361         .split_whitespace()
362         .collect::<Vec<_>>()
363         .join(" ")
364         .trim()
365         .trim_end_matches('.')
366         .trim_end_matches(';')
367         .trim_end_matches(',')
368         .to_string();
369
370      if cleaned.is_empty() {
371         *item = cleaned;
372         continue;
373      }
374
375      // Capitalize first letter
376      if let Some(first_char) = cleaned.chars().next()
377         && first_char.is_lowercase()
378      {
379         let rest = &cleaned[first_char.len_utf8()..];
380         cleaned = format!("{}{}", first_char.to_uppercase(), rest);
381      }
382
383      if !cleaned.ends_with('.') {
384         cleaned.push('.');
385      }
386
387      *item = cleaned;
388   }
389
390   // Remove empty body items
391   msg.body.retain(|item| !item.trim().is_empty());
392
393   // Cap details by token budget
394   cap_details(&mut msg.body, config.max_detail_tokens);
395}
396
397/// Format `ConventionalCommit` as a single string for display and commit
398pub fn format_commit_message(msg: &ConventionalCommit) -> String {
399   // Build first line: type(scope): summary
400   let scope_part = msg
401      .scope
402      .as_ref()
403      .map(|s| format!("({s})"))
404      .unwrap_or_default();
405   let first_line = format!("{}{}: {}", msg.commit_type, scope_part, msg.summary);
406
407   // Build body with - bullets
408   let body_formatted = if msg.body.is_empty() {
409      String::new()
410   } else {
411      msg.body
412         .iter()
413         .map(|item| format!("- {item}"))
414         .collect::<Vec<_>>()
415         .join("\n")
416   };
417
418   // Build footers
419   let footers_formatted = if msg.footers.is_empty() {
420      String::new()
421   } else {
422      msg.footers.join("\n")
423   };
424
425   // Combine parts
426   let mut result = first_line;
427   if !body_formatted.is_empty() {
428      result.push_str("\n\n");
429      result.push_str(&body_formatted);
430   }
431   if !footers_formatted.is_empty() {
432      result.push_str("\n\n");
433      result.push_str(&footers_formatted);
434   }
435   result
436}
437
438#[cfg(test)]
439mod tests {
440   use super::*;
441   use crate::types::{CommitSummary, CommitType, ConventionalCommit, Scope};
442
443   // normalize_unicode tests
444   #[test]
445   fn test_normalize_unicode_smart_quotes() {
446      assert_eq!(normalize_unicode("\u{2018}smart quotes\u{2019}"), "'smart quotes'");
447      assert_eq!(normalize_unicode("\u{201C}double quotes\u{201D}"), "\"double quotes\"");
448      assert_eq!(normalize_unicode("\u{201A}low quote\u{2019}"), "'low quote'");
449      assert_eq!(normalize_unicode("\u{201E}low double\u{201D}"), "\"low double\"");
450   }
451
452   #[test]
453   fn test_normalize_unicode_dashes() {
454      assert_eq!(normalize_unicode("en\u{2013}dash"), "en--dash");
455      assert_eq!(normalize_unicode("em\u{2014}dash"), "em--dash");
456      assert_eq!(normalize_unicode("fig\u{2012}dash"), "fig-dash");
457      assert_eq!(normalize_unicode("minus\u{2212}sign"), "minus-sign");
458   }
459
460   #[test]
461   fn test_normalize_unicode_arrows() {
462      assert_eq!(normalize_unicode("arrow\u{2192}right"), "arrow->right");
463      assert_eq!(normalize_unicode("arrow\u{2190}left"), "arrow<-left");
464      assert_eq!(normalize_unicode("arrow\u{2194}both"), "arrow<->both");
465      assert_eq!(normalize_unicode("double\u{21D2}arrow"), "double=>arrow");
466      assert_eq!(normalize_unicode("up\u{2191}arrow"), "up^arrow");
467   }
468
469   #[test]
470   fn test_normalize_unicode_math() {
471      assert_eq!(normalize_unicode("a\u{00D7}b"), "axb");
472      assert_eq!(normalize_unicode("a\u{00F7}b"), "a/b");
473      assert_eq!(normalize_unicode("x\u{2264}y"), "x<=y");
474      assert_eq!(normalize_unicode("x\u{2265}y"), "x>=y");
475      assert_eq!(normalize_unicode("x\u{2260}y"), "x!=y");
476      assert_eq!(normalize_unicode("x\u{2248}y"), "x~=y");
477   }
478
479   #[test]
480   fn test_normalize_unicode_greek() {
481      assert_eq!(normalize_unicode("\u{03BB} function"), "lambda function");
482      assert_eq!(normalize_unicode("\u{03B1} beta \u{03B3}"), "alpha beta gamma");
483      assert_eq!(normalize_unicode("\u{03BC} service"), "mu service");
484      assert_eq!(normalize_unicode("\u{03A3} total"), "Sigma total");
485   }
486
487   #[test]
488   fn test_normalize_unicode_fractions() {
489      assert_eq!(normalize_unicode("\u{00BD} cup"), "1/2 cup");
490      assert_eq!(normalize_unicode("\u{00BE} done"), "3/4 done");
491      assert_eq!(normalize_unicode("\u{2153} left"), "1/3 left");
492   }
493
494   #[test]
495   fn test_normalize_unicode_superscripts() {
496      assert_eq!(normalize_unicode("x\u{00B2}"), "x^2");
497      assert_eq!(normalize_unicode("10\u{00B3}"), "10^3");
498   }
499
500   #[test]
501   fn test_normalize_unicode_multiple_replacements() {
502      let input =
503         "\u{2018}smart\u{2019}\u{2192}straight \u{201C}quotes\u{201D}\u{00D7}math\u{2264}ops";
504      let expected = "'smart'->straight \"quotes\"xmath<=ops";
505      assert_eq!(normalize_unicode(input), expected);
506   }
507
508   #[test]
509   fn test_normalize_unicode_ellipsis() {
510      assert_eq!(normalize_unicode("wait\u{2026}"), "wait...");
511      assert_eq!(normalize_unicode("more\u{22EF}dots"), "more...dots");
512   }
513
514   #[test]
515   fn test_normalize_unicode_bullets() {
516      assert_eq!(normalize_unicode("\u{2022}item"), "-item");
517      assert_eq!(normalize_unicode("\u{25E6}item"), "-item");
518   }
519
520   #[test]
521   fn test_normalize_unicode_check_marks() {
522      assert_eq!(normalize_unicode("\u{2713}done"), "vdone");
523      assert_eq!(normalize_unicode("\u{2717}failed"), "xfailed");
524   }
525
526   // normalize_summary_verb tests
527   #[test]
528   fn test_normalize_summary_verb_present_to_past() {
529      let mut s = "add new feature".to_string();
530      normalize_summary_verb(&mut s, "feat");
531      assert_eq!(s, "added new feature");
532
533      let mut s = "fix bug".to_string();
534      normalize_summary_verb(&mut s, "fix");
535      assert_eq!(s, "fixed bug");
536
537      let mut s = "update docs".to_string();
538      normalize_summary_verb(&mut s, "docs");
539      assert_eq!(s, "updated docs");
540   }
541
542   #[test]
543   fn test_normalize_summary_verb_already_past() {
544      let mut s = "added feature".to_string();
545      normalize_summary_verb(&mut s, "feat");
546      assert_eq!(s, "added feature");
547
548      let mut s = "fixed bug".to_string();
549      normalize_summary_verb(&mut s, "fix");
550      assert_eq!(s, "fixed bug");
551   }
552
553   #[test]
554   fn test_normalize_summary_verb_third_person() {
555      let mut s = "adds feature".to_string();
556      normalize_summary_verb(&mut s, "feat");
557      assert_eq!(s, "added feature");
558
559      let mut s = "fixes bug".to_string();
560      normalize_summary_verb(&mut s, "fix");
561      assert_eq!(s, "fixed bug");
562   }
563
564   #[test]
565   fn test_normalize_summary_verb_non_verb_start() {
566      let mut s = "123 files changed".to_string();
567      normalize_summary_verb(&mut s, "chore");
568      assert_eq!(s, "123 files changed");
569   }
570
571   #[test]
572   fn test_normalize_summary_verb_refactor_special_case() {
573      let mut s = "refactored code".to_string();
574      normalize_summary_verb(&mut s, "refactor");
575      assert_eq!(s, "restructured code");
576   }
577
578   #[test]
579   fn test_normalize_summary_verb_refactor_present() {
580      let mut s = "refactor code".to_string();
581      normalize_summary_verb(&mut s, "refactor");
582      assert_eq!(s, "restructured code");
583
584      let mut s = "refactor logic".to_string();
585      normalize_summary_verb(&mut s, "feat");
586      assert_eq!(s, "refactored logic");
587   }
588
589   #[test]
590   fn test_normalize_summary_verb_empty() {
591      let mut s = String::new();
592      normalize_summary_verb(&mut s, "feat");
593      assert_eq!(s, "");
594   }
595
596   #[test]
597   fn test_normalize_summary_verb_single_word() {
598      let mut s = "add".to_string();
599      normalize_summary_verb(&mut s, "feat");
600      assert_eq!(s, "added");
601   }
602
603   // cap_details tests (budget-based)
604   #[test]
605   fn test_cap_details_under_budget() {
606      let mut details = vec!["first".to_string(), "second".to_string(), "third".to_string()];
607      let tokens: usize = details.iter().map(|d| estimate_tokens(d)).sum();
608      cap_details(&mut details, tokens + 100);
609      assert_eq!(details.len(), 3);
610   }
611
612   #[test]
613   fn test_cap_details_at_budget() {
614      let mut details = vec![
615         "one".to_string(),
616         "two".to_string(),
617         "three".to_string(),
618         "four".to_string(),
619         "five".to_string(),
620         "six".to_string(),
621      ];
622      let tokens: usize = details.iter().map(|d| estimate_tokens(d)).sum();
623      cap_details(&mut details, tokens);
624      assert_eq!(details.len(), 6);
625   }
626
627   #[test]
628   fn test_cap_details_security_priority() {
629      let mut details = vec![
630         "normal change".to_string(),
631         "security vulnerability fixed".to_string(),
632         "another change".to_string(),
633         "third change".to_string(),
634         "fourth change".to_string(),
635         "fifth change".to_string(),
636         "sixth change".to_string(),
637      ];
638      // Budget for ~4 typical items (15 chars each = ~4 tokens, 4*4 = 16 tokens)
639      cap_details(&mut details, 60);
640      assert!(details.iter().any(|d| d.contains("security")));
641   }
642
643   #[test]
644   fn test_cap_details_performance_priority() {
645      let mut details = vec![
646         "normal change".to_string(),
647         "performance optimization added".to_string(),
648         "another change".to_string(),
649         "third change".to_string(),
650         "fourth change".to_string(),
651         "fifth change".to_string(),
652      ];
653      // Budget for ~3 typical items
654      cap_details(&mut details, 40);
655      assert!(details.iter().any(|d| d.contains("performance")));
656   }
657
658   #[test]
659   fn test_cap_details_api_priority() {
660      let mut details = vec![
661         "normal change".to_string(),
662         "API interface updated".to_string(),
663         "internal change".to_string(),
664         "another internal change".to_string(),
665         "yet another change".to_string(),
666      ];
667      // Budget for ~3 items
668      cap_details(&mut details, 50);
669      assert!(details.iter().any(|d| d.contains("API")));
670   }
671
672   #[test]
673   fn test_cap_details_preserves_order() {
674      let mut details = vec![
675         "first".to_string(),
676         "critical security fix".to_string(),
677         "third".to_string(),
678         "performance improvement".to_string(),
679         "fifth".to_string(),
680      ];
681      // Budget for ~3 items
682      cap_details(&mut details, 50);
683      // Should preserve relative order of kept items
684      let security_idx = details.iter().position(|d| d.contains("security"));
685      let perf_idx = details.iter().position(|d| d.contains("performance"));
686      assert!(security_idx.unwrap() < perf_idx.unwrap());
687   }
688
689   #[test]
690   fn test_cap_details_empty_list() {
691      let mut details: Vec<String> = vec![];
692      cap_details(&mut details, 100);
693      assert_eq!(details.len(), 0);
694   }
695
696   #[test]
697   fn test_cap_details_breaking_priority() {
698      let mut details = vec![
699         "normal change".to_string(),
700         "breaking change introduced".to_string(),
701         "another change".to_string(),
702         "third change".to_string(),
703         "fourth change".to_string(),
704      ];
705      // Budget for ~3 items
706      cap_details(&mut details, 50);
707      assert!(details.iter().any(|d| d.contains("breaking")));
708   }
709
710   #[test]
711   fn test_cap_details_budget_prefers_short_high_priority() {
712      // 6 short high-priority items should fit, but 2 long low-priority shouldn't
713      let mut details = vec![
714         "security fix".to_string(),     // ~12 chars, ~3 tokens, score 100
715         "bug fix".to_string(),          // ~7 chars, ~2 tokens, score 70
716         "API change".to_string(),       // ~10 chars, ~3 tokens, score 50
717         "performance gain".to_string(), // ~16 chars, ~4 tokens, score 80
718         "breaking change".to_string(),  // ~15 chars, ~4 tokens, score 90
719         "user feature".to_string(),     // ~12 chars, ~3 tokens, score 40
720         "This is a very long internal refactoring detail that adds no user value".to_string(), /* ~73 chars, ~19 tokens, score 0 */
721         "Another extremely long low priority change description here".to_string(), /* ~61 chars, ~16 tokens, score 0 */
722      ];
723      // Budget: 30 tokens (enough for all 6 short items, not enough for long ones)
724      cap_details(&mut details, 30);
725      // Should keep short high-priority items
726      assert!(details.iter().any(|d| d.contains("security")));
727      assert!(details.iter().any(|d| d.contains("breaking")));
728      // Should drop long low-priority items
729      assert!(!details.iter().any(|d| d.contains("very long internal")));
730   }
731
732   #[test]
733   fn test_cap_details_budget_allows_variable_count() {
734      // With same budget, should fit more short items or fewer long items
735      let short_details = vec![
736         "fix A".to_string(),
737         "fix B".to_string(),
738         "fix C".to_string(),
739         "fix D".to_string(),
740         "fix E".to_string(),
741         "fix F".to_string(),
742      ];
743      let long_details = vec![
744         "Fixed a critical security vulnerability in authentication".to_string(),
745         "Implemented comprehensive performance optimization".to_string(),
746         "Added extensive API documentation and examples".to_string(),
747      ];
748
749      let mut short = short_details;
750      let mut long = long_details;
751
752      cap_details(&mut short, 50); // Should fit all 6 short items (~2 tokens each)
753      cap_details(&mut long, 50); // Should fit only 2-3 long items (~13-15 tokens each)
754
755      assert!(short.len() >= 5); // Most short items fit
756      assert!(long.len() <= 3); // Fewer long items fit
757   }
758
759   // format_commit_message tests
760   #[test]
761   fn test_format_commit_message_type_summary_only() {
762      let commit = ConventionalCommit {
763         commit_type: CommitType::new("feat").unwrap(),
764         scope:       None,
765         summary:     CommitSummary::new_unchecked("added new feature", 128).unwrap(),
766         body:        vec![],
767         footers:     vec![],
768      };
769      assert_eq!(format_commit_message(&commit), "feat: added new feature");
770   }
771
772   #[test]
773   fn test_format_commit_message_with_scope() {
774      let commit = ConventionalCommit {
775         commit_type: CommitType::new("fix").unwrap(),
776         scope:       Some(Scope::new("api").unwrap()),
777         summary:     CommitSummary::new_unchecked("fixed bug", 128).unwrap(),
778         body:        vec![],
779         footers:     vec![],
780      };
781      assert_eq!(format_commit_message(&commit), "fix(api): fixed bug");
782   }
783
784   #[test]
785   fn test_format_commit_message_with_body() {
786      let commit = ConventionalCommit {
787         commit_type: CommitType::new("feat").unwrap(),
788         scope:       None,
789         summary:     CommitSummary::new_unchecked("added feature", 128).unwrap(),
790         body:        vec!["First detail.".to_string(), "Second detail.".to_string()],
791         footers:     vec![],
792      };
793      let expected = "feat: added feature\n\n- First detail.\n- Second detail.";
794      assert_eq!(format_commit_message(&commit), expected);
795   }
796
797   #[test]
798   fn test_format_commit_message_with_footers() {
799      let commit = ConventionalCommit {
800         commit_type: CommitType::new("fix").unwrap(),
801         scope:       None,
802         summary:     CommitSummary::new_unchecked("fixed bug", 128).unwrap(),
803         body:        vec![],
804         footers:     vec!["Closes: #123".to_string(), "Fixes: #456".to_string()],
805      };
806      let expected = "fix: fixed bug\n\nCloses: #123\nFixes: #456";
807      assert_eq!(format_commit_message(&commit), expected);
808   }
809
810   #[test]
811   fn test_format_commit_message_full() {
812      let commit = ConventionalCommit {
813         commit_type: CommitType::new("feat").unwrap(),
814         scope:       Some(Scope::new("auth").unwrap()),
815         summary:     CommitSummary::new_unchecked("added oauth support", 128).unwrap(),
816         body:        vec![
817            "Implemented OAuth2 flow.".to_string(),
818            "Added token refresh.".to_string(),
819         ],
820         footers:     vec!["Closes: #789".to_string()],
821      };
822      let expected = "feat(auth): added oauth support\n\n- Implemented OAuth2 flow.\n- Added \
823                      token refresh.\n\nCloses: #789";
824      assert_eq!(format_commit_message(&commit), expected);
825   }
826
827   #[test]
828   fn test_format_commit_message_nested_scope() {
829      let commit = ConventionalCommit {
830         commit_type: CommitType::new("refactor").unwrap(),
831         scope:       Some(Scope::new("api/client").unwrap()),
832         summary:     CommitSummary::new_unchecked("restructured code", 128).unwrap(),
833         body:        vec![],
834         footers:     vec![],
835      };
836      assert_eq!(format_commit_message(&commit), "refactor(api/client): restructured code");
837   }
838}
llm_git/normalization.rs

llm_git/
normalization.rs