Skip to main content

llm_git/
validation.rs

1use crate::{
2   config::CommitConfig,
3   error::{CommitGenError, Result},
4   git::git_command,
5   style::{self, icons},
6   types::ConventionalCommit,
7};
8
9// Static lookup tables (verbs, morphology blocklists, file extensions, filler/
10// meta phrases) are codegen'd from `src/validation_data.json` by `build.rs`,
11// defining `PAST_TENSE_MAP`, `IRREGULAR_PAST`, `ED_BLOCKLIST`, `D_BLOCKLIST`,
12// `CODE_EXTENSIONS`, `DOC_EXTENSIONS`, `FILLER_WORDS`, `META_PHRASES`, and
13// `BODY_PRESENT_TENSE`. Edit the JSON, not generated code.
14include!(concat!(env!("OUT_DIR"), "/validation_data.rs"));
15
16/// Check if an extension is a code file extension
17fn is_code_extension(ext: &str) -> bool {
18   CODE_EXTENSIONS.iter().any(|&e| e.eq_ignore_ascii_case(ext))
19}
20
21/// Get repository name from git working directory
22fn get_repository_name() -> Result<String> {
23   let output = git_command()
24      .args(["rev-parse", "--show-toplevel"])
25      .output()
26      .map_err(|e| CommitGenError::git(e.to_string()))?;
27
28   if !output.status.success() {
29      return Err(CommitGenError::git("Failed to get repository root".to_string()));
30   }
31
32   let path = String::from_utf8_lossy(&output.stdout);
33   let repo_name = std::path::Path::new(path.trim())
34      .file_name()
35      .and_then(|n| n.to_str())
36      .ok_or_else(|| CommitGenError::git("Could not extract repository name".to_string()))?;
37
38   Ok(repo_name.to_string())
39}
40
41/// Normalize name for comparison (convert hyphens/underscores, lowercase)
42fn normalize_name(name: &str) -> String {
43   name.to_lowercase().replace(['-', '_'], "")
44}
45
46/// Look up the past-tense form of a lowercase present-tense verb.
47pub fn present_to_past(present: &str) -> Option<&'static str> {
48   PAST_TENSE_MAP
49      .iter()
50      .find(|(k, _)| *k == present)
51      .map(|(_, v)| *v)
52}
53
54/// Extract the verb stem from a first-word token by stripping any trailing
55/// non-alphabetic suffix (dash, slash, number, etc.).
56///
57/// Returns the lowercase stem, or `None` if the leading run is all uppercase
58/// (acronym like `API`/`NFC`) or there are no leading ASCII letters \u{2014}
59/// those are skipped for conversion since they aren't verbs.
60pub fn verb_stem(token: &str) -> Option<String> {
61   let n = token
62      .bytes()
63      .take_while(|&b| b.is_ascii_alphabetic())
64      .count();
65   if n == 0 {
66      return None;
67   }
68   let stem = &token[..n];
69   // Skip all-caps acronyms; they're not verbs we convert.
70   if stem.chars().all(|c| c.is_uppercase()) {
71      return None;
72   }
73   Some(stem.to_ascii_lowercase())
74}
75
76/// Split a first-word token into `(stem, suffix)` where `stem` is the
77/// leading ASCII-alphabetic run and `suffix` is the remainder.
78///
79/// Returns `None` if the token has no leading letters.
80pub fn split_verb_token(token: &str) -> Option<(&str, &str)> {
81   let n = token
82      .bytes()
83      .take_while(|&b| b.is_ascii_alphabetic())
84      .count();
85   if n == 0 {
86      None
87   } else {
88      Some((&token[..n], &token[n..]))
89   }
90}
91
92/// Check if word is past-tense verb using morphology + common irregulars.
93/// `word` should be a bare verb (no trailing suffix); use
94/// [`is_past_tense_first_word`] for raw summary tokens.
95pub fn is_past_tense_verb(word: &str) -> bool {
96   // Values in PAST_TENSE_MAP that are genuinely past-tense (key != value).
97   // Same-form entries like `("reset", "reset")` are NOT accepted here;
98   // they're handled by UNCHANGED_IRREGULAR below.
99   if PAST_TENSE_MAP
100      .iter()
101      .any(|(k, v)| *v == word && *k != *v)
102   {
103      return true;
104   }
105
106   // Regular past tense: ends with -ed
107   if word.ends_with("ed") {
108      // Exclude common false positives (words that end in -ed but aren't verbs)
109      return !ED_BLOCKLIST.contains(&word);
110   }
111
112   // Words ending in single 'd' preceded by vowel (configured, exposed, etc.)
113   // Must be at least 4 chars and not end in common non-verb patterns
114   if word.len() >= 4 && word.ends_with('d') {
115      let before_d = &word[word.len() - 2..word.len() - 1];
116      // Check if letter before 'd' is vowel (covers: configured, exposed, etc.)
117      if "aeiou".contains(before_d) {
118         return !D_BLOCKLIST.contains(&word);
119      }
120   }
121
122   IRREGULAR_PAST.contains(&word)
123}
124
125/// Check whether a summary's first raw token is a past-tense verb, tolerating
126/// trailing non-alpha suffixes (e.g. `bound-check`, `isolated-subagent`).
127///
128/// The full token is tried first (so `re-enabled` passes via `-ed`), then the
129/// stripped stem. All-caps acronyms and numeric-led tokens are rejected.
130pub fn is_past_tense_first_word(token: &str) -> bool {
131   if token.is_empty() {
132      return false;
133   }
134   // Try the full token first (covers `re-enabled`, `auto-detected`, ...).
135   if is_past_tense_verb(&token.to_ascii_lowercase()) {
136      return true;
137   }
138   // Then try the stripped stem (`bound-check` -> `bound`).
139   if let Some(stem) = verb_stem(token)
140      && is_past_tense_verb(&stem)
141   {
142      return true;
143   }
144   // Handle `re-` prefixed verbs: `re-ran`, `re-built`, `re-wrote`.
145   // `split_verb_token` gives stem="re", suffix="-ran". Parse the inner
146   // segment and check it as past tense.
147   if let Some((stem, suffix)) = split_verb_token(token)
148      && stem.eq_ignore_ascii_case("re")
149      && let Some(rest) = suffix.strip_prefix('-')
150   {
151      let inner_n = rest
152         .bytes()
153         .take_while(|&b| b.is_ascii_alphabetic())
154         .count();
155      if inner_n > 0 {
156         let inner = &rest[..inner_n];
157         // Try the inner segment as a past-tense verb (covers `re-ran`,
158         // `re-built`, `re-wrote`, `re-read`, `re-set`).
159         if is_past_tense_verb(&inner.to_ascii_lowercase()) {
160            return true;
161         }
162         // Also try converting from present tense (e.g. `re-enable` ->
163         // `enabled` is past, so the present `enable` should be accepted
164         // because normalization will convert it).
165         if present_to_past(&inner.to_ascii_lowercase()).is_some() {
166            return true;
167         }
168      }
169   }
170   false
171}
172
173/// Validate conventional commit message
174pub fn validate_commit_message(msg: &ConventionalCommit, config: &CommitConfig) -> Result<()> {
175   // Validate commit type
176   let valid_types = [
177      "feat", "fix", "refactor", "docs", "test", "chore", "style", "perf", "build", "ci", "revert",
178      "deps", "security", "config", "ux", "release", "hotfix", "infra", "init", "merge", "hack",
179      "wip",
180   ];
181   if !valid_types.contains(&msg.commit_type.as_str()) {
182      return Err(CommitGenError::InvalidCommitType(format!(
183         "Invalid commit type: '{}'. Must be one of: {}",
184         msg.commit_type,
185         valid_types.join(", ")
186      )));
187   }
188
189   // Validate scope (if present) - Scope type already validates format
190   // This is just a double-check, Scope::new() already enforces rules
191   if let Some(scope) = &msg.scope
192      && scope.is_empty()
193   {
194      return Err(CommitGenError::InvalidScope(
195         "Scope cannot be empty string (omit if not applicable)".to_string(),
196      ));
197   }
198
199   // Reject scope if it's just the project/repo name
200   if let Some(scope) = &msg.scope
201      && let Ok(repo_name) = get_repository_name()
202   {
203      let normalized_scope = normalize_name(scope.as_str());
204      let normalized_repo = normalize_name(&repo_name);
205
206      if normalized_scope == normalized_repo {
207         return Err(CommitGenError::InvalidScope(format!(
208            "Scope '{scope}' is the project name - omit scope for project-wide changes"
209         )));
210      }
211   }
212
213   // Check summary not empty
214   if msg.summary.as_str().trim().is_empty() {
215      return Err(CommitGenError::ValidationError("Summary cannot be empty".to_string()));
216   }
217
218   // Check summary does NOT end with period (conventional commits don't use
219   // periods)
220   if msg.summary.as_str().trim_end().ends_with('.') {
221      return Err(CommitGenError::ValidationError(
222         "Summary must NOT end with a period (conventional commits style)".to_string(),
223      ));
224   }
225
226   // Check first line length: type(scope): summary
227   let scope_part = msg
228      .scope
229      .as_ref()
230      .map(|s| format!("({s})"))
231      .unwrap_or_default();
232   let first_line_len = msg.commit_type.len() + scope_part.len() + 2 + msg.summary.len();
233
234   // Hard limit check (absolute maximum) - REJECT
235   if first_line_len > config.summary_hard_limit {
236      return Err(CommitGenError::SummaryTooLong {
237         len: first_line_len,
238         max: config.summary_hard_limit,
239      });
240   }
241
242   // Soft limit warning (triggers retry in main.rs) - WARN but pass
243   if first_line_len > config.summary_soft_limit {
244      style::warn(&format!(
245         "Summary exceeds soft limit: {} > {} chars (retry recommended)",
246         first_line_len, config.summary_soft_limit
247      ));
248   }
249
250   // Guideline warning (72-96 range) - INFO
251   if first_line_len > config.summary_guideline && first_line_len <= config.summary_soft_limit {
252      eprintln!(
253         "{} {}",
254         style::info(icons::INFO),
255         style::info(&format!(
256            "Summary exceeds guideline: {} > {} chars (still acceptable)",
257            first_line_len, config.summary_guideline
258         ))
259      );
260   }
261
262   // Note: lowercase check is done in CommitSummary::new() to avoid duplication
263
264   // Check first word is past-tense verb (morphology-based)
265   let first_word = msg.summary.as_str().split_whitespace().next().unwrap_or("");
266
267   if first_word.is_empty() {
268      return Err(CommitGenError::ValidationError(
269         "Summary must contain at least one word".to_string(),
270      ));
271   }
272
273   if !is_past_tense_first_word(first_word) {
274      return Err(CommitGenError::ValidationError(format!(
275         "Summary must start with a past-tense verb (ending in -ed/-d or irregular). Got \
276          '{first_word}'"
277      )));
278   }
279
280   // Check for type-word repetition
281   let type_word = msg.commit_type.as_str();
282   let first_word_lower = first_word.to_lowercase();
283   if first_word_lower == type_word {
284      return Err(CommitGenError::ValidationError(format!(
285         "Summary repeats commit type '{type_word}': first word is '{first_word}'"
286      )));
287   }
288
289   // Check for filler words (removed "improved"/"enhanced" as they're valid
290   // past-tense verbs)
291   for filler in FILLER_WORDS {
292      if msg.summary.as_str().to_lowercase().contains(filler) {
293         style::warn(&format!("Summary contains filler word '{}': {}", filler, msg.summary));
294      }
295   }
296
297   // Check for meta-phrases that add no information
298   for phrase in META_PHRASES {
299      if msg.summary.as_str().to_lowercase().contains(phrase) {
300         style::warn(&format!(
301            "Summary contains meta-phrase '{phrase}' - be more specific about what changed"
302         ));
303      }
304   }
305
306   // Final length check after all potential mutations
307   let final_scope_part = msg
308      .scope
309      .as_ref()
310      .map(|s| format!("({s})"))
311      .unwrap_or_default();
312   let final_first_line_len =
313      msg.commit_type.len() + final_scope_part.len() + 2 + msg.summary.len();
314
315   if final_first_line_len > config.summary_hard_limit {
316      return Err(CommitGenError::SummaryTooLong {
317         len: final_first_line_len,
318         max: config.summary_hard_limit,
319      });
320   }
321
322   // Validate body items
323   for item in &msg.body {
324      let first_word = item.split_whitespace().next().unwrap_or("");
325      if BODY_PRESENT_TENSE
326         .iter()
327         .any(|&word| first_word.to_lowercase() == word)
328      {
329         style::warn(&format!("Body item uses present tense: '{item}'"));
330      }
331      if !item.trim_end().ends_with('.') {
332         style::warn(&format!("Body item missing period: '{item}'"));
333      }
334   }
335
336   Ok(())
337}
338
339/// Check type-scope consistency (warn if mismatched)
340pub fn check_type_scope_consistency(msg: &ConventionalCommit, stat: &str) {
341   let commit_type = msg.commit_type.as_str();
342
343   // Check for docs type
344   if commit_type == "docs" {
345      let has_docs = stat.lines().any(|line| {
346         let path = line.split('|').next().unwrap_or("").trim();
347         let is_doc_file = std::path::Path::new(&path)
348            .extension()
349            .and_then(|ext| ext.to_str())
350            .is_some_and(|ext| DOC_EXTENSIONS.contains(&ext.to_ascii_lowercase().as_str()));
351         is_doc_file
352            || path.to_lowercase().contains("/docs/")
353            || path.to_lowercase().contains("readme")
354      });
355      if !has_docs {
356         style::warn("Commit type 'docs' but no documentation files changed");
357      }
358   }
359
360   // Check for test type
361   if commit_type == "test" {
362      let has_test = stat.lines().any(|line| {
363         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
364         path.contains("/test") || path.contains("_test.") || path.contains(".test.")
365      });
366      if !has_test {
367         style::warn("Commit type 'test' but no test files changed");
368      }
369   }
370
371   // Check for style type (should be mostly whitespace/formatting)
372   if commit_type == "style" {
373      let has_code = stat.lines().any(|line| {
374         let path = line.split('|').next().unwrap_or("").trim();
375         let path_obj = std::path::Path::new(&path);
376         path_obj
377            .extension()
378            .is_some_and(|ext| is_code_extension(ext.to_str().unwrap_or("")))
379      });
380      if has_code {
381         style::warn("Commit type 'style' but code files changed (verify no logic changes)");
382      }
383   }
384
385   // Check for ci type
386   if commit_type == "ci" {
387      let has_ci = stat.lines().any(|line| {
388         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
389         path.contains(".github/workflows")
390            || path.contains(".gitlab-ci")
391            || path.contains("jenkinsfile")
392      });
393      if !has_ci {
394         style::warn("Commit type 'ci' but no CI configuration files changed");
395      }
396   }
397
398   // Check for build type
399   if commit_type == "build" {
400      let has_build = stat.lines().any(|line| {
401         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
402         path.contains("cargo.toml")
403            || path.contains("package.json")
404            || path.contains("makefile")
405            || path.contains("build.")
406      });
407      if !has_build {
408         style::warn("Commit type 'build' but no build files (Cargo.toml, package.json) changed");
409      }
410   }
411
412   // Check for refactor with new files (might actually be feat)
413   if commit_type == "refactor" {
414      let has_new_files = stat
415         .lines()
416         .any(|line| line.trim().starts_with("create mode") || line.contains("new file"));
417      if has_new_files {
418         style::warn(
419            "Commit type 'refactor' but new files were created - verify no new capabilities added \
420             (might be 'feat')",
421         );
422      }
423   }
424
425   // Check for perf type without performance evidence
426   if commit_type == "perf" {
427      let has_perf_files = stat.lines().any(|line| {
428         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
429         path.contains("bench") || path.contains("perf") || path.contains("profile")
430      });
431
432      // Check if details mention performance
433      let details_text = msg.body.join(" ").to_lowercase();
434      let has_perf_details = details_text.contains("faster")
435         || details_text.contains("optimization")
436         || details_text.contains("performance")
437         || details_text.contains("optimized");
438
439      if !has_perf_files && !has_perf_details {
440         style::warn(
441            "Commit type 'perf' but no performance-related files or optimization keywords found",
442         );
443      }
444   }
445}
446
447#[cfg(test)]
448mod tests {
449   use super::*;
450   use crate::types::{CommitSummary, CommitType, ConventionalCommit, Scope};
451
452   fn create_commit(
453      type_str: &str,
454      scope: Option<&str>,
455      summary: &str,
456      body: Vec<&str>,
457   ) -> ConventionalCommit {
458      ConventionalCommit {
459         commit_type: CommitType::new(type_str).unwrap(),
460         scope:       scope.map(|s| Scope::new(s).unwrap()),
461         summary:     CommitSummary::new_unchecked(summary, 128).unwrap(),
462         body:        body.into_iter().map(|s| s.to_string()).collect(),
463         footers:     vec![],
464      }
465   }
466
467   #[test]
468   fn test_validate_valid_commit() {
469      let config = CommitConfig::default();
470      let msg = create_commit("feat", Some("api"), "added new endpoint", vec![]);
471      assert!(validate_commit_message(&msg, &config).is_ok());
472   }
473
474   #[test]
475   fn test_validate_valid_commit_no_scope() {
476      let config = CommitConfig::default();
477      let msg = create_commit("fix", None, "corrected race condition", vec![]);
478      assert!(validate_commit_message(&msg, &config).is_ok());
479   }
480
481   #[test]
482   fn test_validate_invalid_type() {
483      let _config = CommitConfig::default();
484      let result = CommitType::new("invalid");
485      assert!(result.is_err());
486      assert!(matches!(result.unwrap_err(), CommitGenError::InvalidCommitType(_)));
487   }
488
489   #[test]
490   fn test_validate_summary_ends_with_period() {
491      let config = CommitConfig::default();
492      let msg = create_commit("feat", Some("api"), "added endpoint.", vec![]);
493      let result = validate_commit_message(&msg, &config);
494      assert!(result.is_err());
495      assert!(
496         result
497            .unwrap_err()
498            .to_string()
499            .contains("must NOT end with a period")
500      );
501   }
502
503   #[test]
504   fn test_validate_summary_too_long() {
505      // CommitSummary::new() enforces 128 char hard limit on summary alone
506      let long_summary = "a".repeat(129);
507      let result = CommitSummary::new(&long_summary, 128);
508      assert!(result.is_err());
509      assert!(matches!(result.unwrap_err(), CommitGenError::SummaryTooLong { .. }));
510   }
511
512   #[test]
513   fn test_validate_summary_empty() {
514      let result = CommitSummary::new("", 128);
515      assert!(result.is_err());
516      assert!(matches!(result.unwrap_err(), CommitGenError::ValidationError(_)));
517   }
518
519   #[test]
520   fn test_validate_summary_empty_whitespace() {
521      let result = CommitSummary::new("   ", 128);
522      assert!(result.is_err());
523      assert!(matches!(result.unwrap_err(), CommitGenError::ValidationError(_)));
524   }
525
526   #[test]
527   fn test_validate_wrong_verb() {
528      let config = CommitConfig::default();
529      let result = CommitSummary::new_unchecked("adding new feature", 128);
530      assert!(result.is_ok());
531      let msg = ConventionalCommit {
532         commit_type: CommitType::new("feat").unwrap(),
533         scope:       None,
534         summary:     result.unwrap(),
535         body:        vec![],
536         footers:     vec![],
537      };
538      let result = validate_commit_message(&msg, &config);
539      assert!(result.is_err());
540      assert!(
541         result
542            .unwrap_err()
543            .to_string()
544            .contains("must start with a past-tense verb")
545      );
546   }
547
548   #[test]
549   fn test_validate_present_tense_verb() {
550      let config = CommitConfig::default();
551      let result = CommitSummary::new_unchecked("adds new feature", 128);
552      assert!(result.is_ok());
553      let msg = ConventionalCommit {
554         commit_type: CommitType::new("feat").unwrap(),
555         scope:       None,
556         summary:     result.unwrap(),
557         body:        vec![],
558         footers:     vec![],
559      };
560      let result = validate_commit_message(&msg, &config);
561      assert!(result.is_err());
562      assert!(
563         result
564            .unwrap_err()
565            .to_string()
566            .contains("must start with a past-tense verb")
567      );
568   }
569
570   #[test]
571   fn test_validate_no_type_verb_overlap() {
572      // This test verifies that using a related verb doesn't trigger false positives
573      // "documented" is valid for "docs" type since they're not exact matches
574      let config = CommitConfig::default();
575      let msg = create_commit("docs", Some("api"), "documented new api", vec![]);
576      assert!(validate_commit_message(&msg, &config).is_ok());
577
578      // "tested" is valid for "test" type
579      let msg = create_commit("test", Some("api"), "added unit tests", vec![]);
580      assert!(validate_commit_message(&msg, &config).is_ok());
581   }
582
583   #[test]
584   fn test_validate_morphology_based_past_tense() {
585      let config = CommitConfig::default();
586      // Test regular -ed endings
587      let regular_verbs = ["added", "configured", "exposed", "formatted", "clarified"];
588      for verb in regular_verbs {
589         let summary = format!("{verb} something");
590         let msg = create_commit("feat", None, &summary, vec![]);
591         assert!(
592            validate_commit_message(&msg, &config).is_ok(),
593            "Regular verb '{verb}' should be accepted"
594         );
595      }
596
597      // Test irregular verbs
598      let irregular_verbs = ["made", "built", "ran", "wrote", "split"];
599      for verb in irregular_verbs {
600         let summary = format!("{verb} something");
601         let msg = create_commit("feat", None, &summary, vec![]);
602         assert!(
603            validate_commit_message(&msg, &config).is_ok(),
604            "Irregular verb '{verb}' should be accepted"
605         );
606      }
607
608      // Test false positives (should be rejected)
609      let non_verbs = ["hundred", "red", "bed"];
610      for word in non_verbs {
611         let summary = format!("{word} something");
612         let msg = ConventionalCommit {
613            commit_type: CommitType::new("feat").unwrap(),
614            scope:       None,
615            summary:     CommitSummary::new_unchecked(&summary, 128).unwrap(),
616            body:        vec![],
617            footers:     vec![],
618         };
619         assert!(
620            validate_commit_message(&msg, &config).is_err(),
621            "Non-verb '{word}' should be rejected"
622         );
623      }
624   }
625
626   #[test]
627   fn test_validate_scope_empty_string() {
628      let result = Scope::new("");
629      assert!(result.is_err());
630      assert!(matches!(result.unwrap_err(), CommitGenError::InvalidScope(_)));
631   }
632
633   #[test]
634   fn test_validate_scope_invalid_chars() {
635      let result = Scope::new("API/New");
636      assert!(result.is_err());
637      assert!(matches!(result.unwrap_err(), CommitGenError::InvalidScope(_)));
638   }
639
640   #[test]
641   fn test_validate_scope_too_many_segments() {
642      let result = Scope::new("core/api/http");
643      assert!(result.is_err());
644      assert!(result.unwrap_err().to_string().contains("max 2 allowed"));
645   }
646
647   #[test]
648   fn test_validate_scope_valid_single() {
649      let result = Scope::new("api");
650      assert!(result.is_ok());
651   }
652
653   #[test]
654   fn test_validate_scope_valid_two_segments() {
655      let result = Scope::new("core/api");
656      assert!(result.is_ok());
657   }
658
659   #[test]
660   fn test_validate_scope_with_dash_underscore() {
661      let result = Scope::new("core_api/http-client");
662      assert!(result.is_ok());
663   }
664
665   #[test]
666   fn test_validate_total_length_at_guideline() {
667      let config = CommitConfig::default();
668      // type(scope): summary = exactly 72 chars (guideline)
669      // "feat(scope): " = 13 chars, summary = 59 chars, starts with valid verb
670      let summary = format!("added {}", "x".repeat(53));
671      let msg = create_commit("feat", Some("scope"), &summary, vec![]);
672      // Should pass (with info message about being at guideline)
673      assert!(validate_commit_message(&msg, &config).is_ok());
674   }
675
676   #[test]
677   fn test_validate_total_length_at_soft_limit() {
678      let config = CommitConfig::default();
679      // type(scope): summary = exactly 96 chars (soft limit)
680      // "feat(scope): " = 13 chars, summary = 83 chars
681      let summary = format!("added {}", "x".repeat(77));
682      let msg = create_commit("feat", Some("scope"), &summary, vec![]);
683      // Should pass (with warning about soft limit)
684      assert!(validate_commit_message(&msg, &config).is_ok());
685   }
686
687   #[test]
688   fn test_validate_total_length_at_hard_limit() {
689      let config = CommitConfig::default();
690      // type(scope): summary = exactly 128 chars (hard limit)
691      // "feat(scope): " = 13 chars, summary = 115 chars
692      let summary = format!("added {}", "x".repeat(109));
693      let msg = create_commit("feat", Some("scope"), &summary, vec![]);
694      // Should pass (at hard limit)
695      assert!(validate_commit_message(&msg, &config).is_ok());
696   }
697
698   #[test]
699   fn test_validate_total_length_over_hard_limit() {
700      let config = CommitConfig::default();
701      // type(scope): summary > 128 chars (exceeds hard limit)
702      // "feat(scope): " = 13 chars, summary = 116 chars (total 129)
703      let summary = "a".repeat(116);
704      let msg = ConventionalCommit {
705         commit_type: CommitType::new("feat").unwrap(),
706         scope:       Some(Scope::new("scope").unwrap()),
707         summary:     CommitSummary::new_unchecked(&summary, 128).unwrap(),
708         body:        vec![],
709         footers:     vec![],
710      };
711      let result = validate_commit_message(&msg, &config);
712      assert!(result.is_err());
713      assert!(matches!(result.unwrap_err(), CommitGenError::SummaryTooLong { .. }));
714   }
715
716   #[test]
717   fn test_check_type_scope_docs_with_md() {
718      let msg = create_commit("docs", Some("readme"), "updated installation guide", vec![]);
719      let stat = " README.md | 10 +++++++---\n 1 file changed, 7 insertions(+), 3 deletions(-)";
720      // Should not print warning
721      check_type_scope_consistency(&msg, stat);
722   }
723
724   #[test]
725   fn test_check_type_scope_docs_without_md() {
726      let msg = create_commit("docs", None, "updated documentation", vec![]);
727      let stat = " src/main.rs | 10 +++++++---\n 1 file changed, 7 insertions(+), 3 deletions(-)";
728      // Should print warning (but we can't test stderr easily)
729      check_type_scope_consistency(&msg, stat);
730   }
731
732   #[test]
733   fn test_check_type_scope_test_with_test_files() {
734      let msg = create_commit("test", Some("api"), "added integration tests", vec![]);
735      let stat = " tests/integration_test.rs | 50 ++++++++++++++++++++++++++++++++\n";
736      check_type_scope_consistency(&msg, stat);
737   }
738
739   #[test]
740   fn test_check_type_scope_test_without_test_files() {
741      let msg = create_commit("test", None, "added tests", vec![]);
742      let stat = " src/lib.rs | 10 +++++++---\n";
743      check_type_scope_consistency(&msg, stat);
744   }
745
746   #[test]
747   fn test_check_type_scope_refactor_new_files() {
748      let msg = create_commit("refactor", Some("core"), "restructured modules", vec![]);
749      let stat = " create mode 100644 src/new_module.rs\n src/lib.rs | 10 +++++++---\n";
750      check_type_scope_consistency(&msg, stat);
751   }
752
753   #[test]
754   fn test_check_type_scope_ci_with_workflow() {
755      let msg = create_commit("ci", None, "updated github actions", vec![]);
756      let stat = " .github/workflows/ci.yml | 20 ++++++++++++++++++++\n";
757      check_type_scope_consistency(&msg, stat);
758   }
759
760   #[test]
761   fn test_check_type_scope_build_with_cargo() {
762      let msg = create_commit("build", Some("deps"), "updated dependencies", vec![]);
763      let stat = " Cargo.toml | 5 +++--\n Cargo.lock | 150 +++++++++++++++++++\n";
764      check_type_scope_consistency(&msg, stat);
765   }
766
767   #[test]
768   fn test_check_type_scope_perf_with_details() {
769      let msg = create_commit("perf", Some("core"), "optimized batch processing", vec![
770         "reduced allocations by 50% for faster throughput.",
771      ]);
772      let stat = " src/core.rs | 30 +++++++++++++-----------------\n";
773      check_type_scope_consistency(&msg, stat);
774   }
775
776   #[test]
777   fn test_check_type_scope_perf_without_evidence() {
778      let msg = create_commit("perf", None, "changed algorithm", vec![]);
779      let stat = " src/lib.rs | 10 +++++++---\n";
780      check_type_scope_consistency(&msg, stat);
781   }
782
783   #[test]
784   fn test_validate_body_present_tense_warning() {
785      let config = CommitConfig::default();
786      let msg = create_commit("feat", None, "added new feature", vec![
787         "adds support for TLS.",
788         "updates configuration.",
789      ]);
790      // Should succeed but print warnings (we can't easily test stderr)
791      assert!(validate_commit_message(&msg, &config).is_ok());
792   }
793
794   #[test]
795   fn test_validate_body_missing_period_warning() {
796      let config = CommitConfig::default();
797      let msg = create_commit("feat", None, "added new feature", vec![
798         "added support for TLS",
799         "updated configuration",
800      ]);
801      // Should succeed but print warnings
802      assert!(validate_commit_message(&msg, &config).is_ok());
803   }
804
805   #[test]
806   fn test_commit_type_case_normalization() {
807      assert!(CommitType::new("FEAT").is_ok());
808      assert!(CommitType::new("Feat").is_ok());
809      assert!(CommitType::new("feat").is_ok());
810      assert_eq!(CommitType::new("FEAT").unwrap().as_str(), "feat");
811   }
812
813   #[test]
814   fn test_commit_type_all_valid() {
815      let valid_types = [
816         "feat", "fix", "refactor", "docs", "test", "chore", "style", "perf", "build", "ci",
817         "revert",
818      ];
819      for t in &valid_types {
820         assert!(CommitType::new(*t).is_ok(), "Type '{t}' should be valid");
821      }
822   }
823
824   #[test]
825   fn test_summary_length_boundaries() {
826      // Guideline (72) - should pass
827      let summary_72 = "a".repeat(72);
828      assert!(CommitSummary::new(&summary_72, 128).is_ok());
829
830      // Soft limit (96) - should pass
831      let summary_96 = "a".repeat(96);
832      assert!(CommitSummary::new(&summary_96, 128).is_ok());
833
834      // Hard limit (128) - should pass
835      let summary_128 = "a".repeat(128);
836      assert!(CommitSummary::new(&summary_128, 128).is_ok());
837
838      // Over hard limit (129) - should fail
839      let summary_129 = "a".repeat(129);
840      let result = CommitSummary::new(&summary_129, 128);
841      assert!(result.is_err());
842      match result.unwrap_err() {
843         CommitGenError::SummaryTooLong { len, max } => {
844            assert_eq!(len, 129);
845            assert_eq!(max, 128);
846         },
847         _ => panic!("Expected SummaryTooLong error"),
848      }
849   }
850
851   #[test]
852   fn test_is_past_tense_verb_map_values() {
853      // Values from PAST_TENSE_MAP should be accepted as past tense
854      assert!(is_past_tense_verb("hardened"));
855      assert!(is_past_tense_verb("bound"));
856      assert!(is_past_tense_verb("isolated"));
857      assert!(is_past_tense_verb("guarded"));
858      assert!(is_past_tense_verb("rebuilt"));
859      assert!(is_past_tense_verb("rewrote"));
860      assert!(is_past_tense_verb("reran"));
861   }
862
863   #[test]
864   fn test_is_past_tense_verb_same_form_not_accepted_via_map() {
865      // Same-form entries (key == value) should NOT be accepted via the map
866      // check. "reset" is accepted via IRREGULAR_PAST, not the map.
867      // But "setup" was removed from IRREGULAR_PAST, so it should NOT pass.
868      assert!(!is_past_tense_verb("setup"));
869      // "reset" is in IRREGULAR_PAST so it passes
870      assert!(is_past_tense_verb("reset"));
871   }
872
873   #[test]
874   fn test_is_past_tense_first_word_suffix_tolerance() {
875      // Trailing non-alpha suffix should be stripped for stem check
876      assert!(is_past_tense_first_word("bound-check"));
877      assert!(is_past_tense_first_word("isolated-subagent"));
878      assert!(is_past_tense_first_word("re-enabled"));
879      assert!(is_past_tense_first_word("auto-detected"));
880      // Full token that is past tense via -ed
881      assert!(is_past_tense_first_word("hardened"));
882   }
883
884   #[test]
885   fn test_is_past_tense_first_word_acronyms_rejected() {
886      // All-caps acronyms should be rejected
887      assert!(!is_past_tense_first_word("API"));
888      assert!(!is_past_tense_first_word("NFC"));
889      assert!(!is_past_tense_first_word("LSP"));
890   }
891
892   #[test]
893   fn test_is_past_tense_first_word_numeric_rejected() {
894      // Numeric-led tokens should be rejected
895      assert!(!is_past_tense_first_word("403"));
896      assert!(!is_past_tense_first_word("v1.0"));
897      assert!(!is_past_tense_first_word("2.0.0"));
898   }
899
900   #[test]
901   fn test_is_past_tense_first_word_re_prefix() {
902      // re-ran: inner segment "ran" is past tense
903      assert!(is_past_tense_first_word("re-ran"));
904      // re-built: inner segment "built" is past tense
905      assert!(is_past_tense_first_word("re-built"));
906      // re-wrote: inner segment "wrote" is past tense
907      assert!(is_past_tense_first_word("re-wrote"));
908      // re-enabled: full token ends in -ed, passes via full token check
909      assert!(is_past_tense_first_word("re-enabled"));
910      // re-enable: inner "enable" is present tense but in map, so accepted
911      // (normalization will convert it to re-enabled)
912      assert!(is_past_tense_first_word("re-enable"));
913      // re-read: inner "read" is unchanged irregular
914      assert!(is_past_tense_first_word("re-read"));
915      // re-reset: inner "reset" is unchanged irregular
916      assert!(is_past_tense_first_word("re-reset"));
917   }
918
919   #[test]
920   fn test_is_past_tense_first_word_re_prefix_rejected() {
921      // re- with non-verb inner segment should be rejected
922      assert!(!is_past_tense_first_word("re-foo"));
923      assert!(!is_past_tense_first_word("re-123"));
924   }
925
926   #[test]
927   fn test_verb_stem_extraction() {
928      assert_eq!(verb_stem("bound-check"), Some("bound".to_string()));
929      assert_eq!(verb_stem("isolated-subagent"), Some("isolated".to_string()));
930      assert_eq!(verb_stem("harden"), Some("harden".to_string()));
931      // All-caps -> None (acronym)
932      assert_eq!(verb_stem("API"), None);
933      assert_eq!(verb_stem("NFC"), None);
934      // No leading letters -> None
935      assert_eq!(verb_stem("403"), None);
936      assert_eq!(verb_stem(""), None);
937   }
938
939   #[test]
940   fn test_split_verb_token() {
941      assert_eq!(split_verb_token("bound-check"), Some(("bound", "-check")));
942      assert_eq!(split_verb_token("harden"), Some(("harden", "")));
943      assert_eq!(split_verb_token("fix(tui):"), Some(("fix", "(tui):")));
944      assert_eq!(split_verb_token("403"), None);
945   }
946
947   #[test]
948   fn test_present_to_past_lookup() {
949      assert_eq!(present_to_past("harden"), Some("hardened"));
950      assert_eq!(present_to_past("bind"), Some("bound"));
951      assert_eq!(present_to_past("isolate"), Some("isolated"));
952      assert_eq!(present_to_past("rebuild"), Some("rebuilt"));
953      assert_eq!(present_to_past("nonexistent"), None);
954   }
955
956   #[test]
957   fn test_validate_bound_and_hardened() {
958      let config = CommitConfig::default();
959      // "bound" should pass validation (the original failing case)
960      let msg = create_commit("fix", Some("stealth"), "bound native Reflect methods to variables", vec![]);
961      assert!(
962         validate_commit_message(&msg, &config).is_ok(),
963         "'bound' should be accepted as past-tense verb"
964      );
965      // "hardened" should pass validation
966      let msg = create_commit("fix", Some("stealth"), "hardened stealth scripts against detection", vec![]);
967      assert!(
968         validate_commit_message(&msg, &config).is_ok(),
969         "'hardened' should be accepted as past-tense verb"
970      );
971   }
972
973   #[test]
974   fn test_validate_bound_check_suffix() {
975      let config = CommitConfig::default();
976      // "bound-check" should pass via stem extraction
977      let msg = create_commit("fix", None, "bound-checked the inputs", vec![]);
978      assert!(
979         validate_commit_message(&msg, &config).is_ok(),
980         "'bound-checked' should be accepted as past-tense verb"
981      );
982   }
983}