Skip to main content

llm_git/
validation.rs

1use crate::{
2   config::CommitConfig,
3   error::{CommitGenError, Result},
4   git::git_command,
5   style::{self, icons},
6   types::ConventionalCommit,
7};
8
9// Static lookup tables (verbs, morphology blocklists, file extensions, filler/
10// meta phrases) are codegen'd from `src/validation_data.json` by `build.rs`,
11// defining `PAST_TENSE_MAP`, `IRREGULAR_PAST`, `ED_BLOCKLIST`, `D_BLOCKLIST`,
12// `CODE_EXTENSIONS`, `DOC_EXTENSIONS`, `FILLER_WORDS`, `META_PHRASES`, and
13// `BODY_PRESENT_TENSE`. Edit the JSON, not generated code.
14include!(concat!(env!("OUT_DIR"), "/validation_data.rs"));
15
16/// Check if an extension is a code file extension
17fn is_code_extension(ext: &str) -> bool {
18   CODE_EXTENSIONS.iter().any(|&e| e.eq_ignore_ascii_case(ext))
19}
20
21/// Get repository name from git working directory
22fn get_repository_name() -> Result<String> {
23   let output = git_command()
24      .args(["rev-parse", "--show-toplevel"])
25      .output()
26      .map_err(|e| CommitGenError::git(e.to_string()))?;
27
28   if !output.status.success() {
29      return Err(CommitGenError::git("Failed to get repository root".to_string()));
30   }
31
32   let path = String::from_utf8_lossy(&output.stdout);
33   let repo_name = std::path::Path::new(path.trim())
34      .file_name()
35      .and_then(|n| n.to_str())
36      .ok_or_else(|| CommitGenError::git("Could not extract repository name".to_string()))?;
37
38   Ok(repo_name.to_string())
39}
40
41/// Normalize name for comparison (convert hyphens/underscores, lowercase)
42fn normalize_name(name: &str) -> String {
43   name.to_lowercase().replace(['-', '_'], "")
44}
45
46/// Look up the past-tense form of a lowercase present-tense verb.
47pub fn present_to_past(present: &str) -> Option<&'static str> {
48   PAST_TENSE_MAP
49      .iter()
50      .find(|(k, _)| *k == present)
51      .map(|(_, v)| *v)
52}
53
54/// Extract the verb stem from a first-word token by stripping any trailing
55/// non-alphabetic suffix (dash, slash, number, etc.).
56///
57/// Returns the lowercase stem, or `None` if the leading run is all uppercase
58/// (acronym like `API`/`NFC`) or there are no leading ASCII letters \u{2014}
59/// those are skipped for conversion since they aren't verbs.
60pub fn verb_stem(token: &str) -> Option<String> {
61   let n = token
62      .bytes()
63      .take_while(|&b| b.is_ascii_alphabetic())
64      .count();
65   if n == 0 {
66      return None;
67   }
68   let stem = &token[..n];
69   // Skip all-caps acronyms; they're not verbs we convert.
70   if stem.chars().all(|c| c.is_uppercase()) {
71      return None;
72   }
73   Some(stem.to_ascii_lowercase())
74}
75
76/// Split a first-word token into `(stem, suffix)` where `stem` is the
77/// leading ASCII-alphabetic run and `suffix` is the remainder.
78///
79/// Returns `None` if the token has no leading letters.
80pub fn split_verb_token(token: &str) -> Option<(&str, &str)> {
81   let n = token
82      .bytes()
83      .take_while(|&b| b.is_ascii_alphabetic())
84      .count();
85   if n == 0 {
86      None
87   } else {
88      Some((&token[..n], &token[n..]))
89   }
90}
91
92/// Check if word is past-tense verb using morphology + common irregulars.
93/// `word` should be a bare verb (no trailing suffix); use
94/// [`is_past_tense_first_word`] for raw summary tokens.
95pub fn is_past_tense_verb(word: &str) -> bool {
96   // Values in PAST_TENSE_MAP that are genuinely past-tense (key != value).
97   // Same-form entries like `("reset", "reset")` are NOT accepted here;
98   // they're handled by UNCHANGED_IRREGULAR below.
99   if PAST_TENSE_MAP.iter().any(|(k, v)| *v == word && *k != *v) {
100      return true;
101   }
102
103   // Regular past tense: ends with -ed
104   if word.ends_with("ed") {
105      // Exclude common false positives (words that end in -ed but aren't verbs)
106      return !ED_BLOCKLIST.contains(&word);
107   }
108
109   // Words ending in single 'd' preceded by vowel (configured, exposed, etc.)
110   // Must be at least 4 chars and not end in common non-verb patterns
111   if word.len() >= 4 && word.ends_with('d') {
112      let before_d = &word[word.len() - 2..word.len() - 1];
113      // Check if letter before 'd' is vowel (covers: configured, exposed, etc.)
114      if "aeiou".contains(before_d) {
115         return !D_BLOCKLIST.contains(&word);
116      }
117   }
118
119   IRREGULAR_PAST.contains(&word)
120}
121
122/// Check whether a summary's first raw token is a past-tense verb, tolerating
123/// trailing non-alpha suffixes (e.g. `bound-check`, `isolated-subagent`).
124///
125/// The full token is tried first (so `re-enabled` passes via `-ed`), then the
126/// stripped stem. All-caps acronyms and numeric-led tokens are rejected.
127pub fn is_past_tense_first_word(token: &str) -> bool {
128   if token.is_empty() {
129      return false;
130   }
131   // Try the full token first (covers `re-enabled`, `auto-detected`, ...).
132   if is_past_tense_verb(&token.to_ascii_lowercase()) {
133      return true;
134   }
135   // Then try the stripped stem (`bound-check` -> `bound`).
136   if let Some(stem) = verb_stem(token)
137      && is_past_tense_verb(&stem)
138   {
139      return true;
140   }
141   // Handle `re-` prefixed verbs: `re-ran`, `re-built`, `re-wrote`.
142   // `split_verb_token` gives stem="re", suffix="-ran". Parse the inner
143   // segment and check it as past tense.
144   if let Some((stem, suffix)) = split_verb_token(token)
145      && stem.eq_ignore_ascii_case("re")
146      && let Some(rest) = suffix.strip_prefix('-')
147   {
148      let inner_n = rest
149         .bytes()
150         .take_while(|&b| b.is_ascii_alphabetic())
151         .count();
152      if inner_n > 0 {
153         let inner = &rest[..inner_n];
154         // Try the inner segment as a past-tense verb (covers `re-ran`,
155         // `re-built`, `re-wrote`, `re-read`, `re-set`).
156         if is_past_tense_verb(&inner.to_ascii_lowercase()) {
157            return true;
158         }
159         // Also try converting from present tense (e.g. `re-enable` ->
160         // `enabled` is past, so the present `enable` should be accepted
161         // because normalization will convert it).
162         if present_to_past(&inner.to_ascii_lowercase()).is_some() {
163            return true;
164         }
165      }
166   }
167   false
168}
169
170/// Validate conventional commit message
171pub fn validate_commit_message(msg: &ConventionalCommit, config: &CommitConfig) -> Result<()> {
172   // Validate commit type
173   let valid_types = [
174      "feat", "fix", "refactor", "docs", "test", "chore", "style", "perf", "build", "ci", "revert",
175      "deps", "security", "config", "ux", "release", "hotfix", "infra", "init", "merge", "hack",
176      "wip",
177   ];
178   if !valid_types.contains(&msg.commit_type.as_str()) {
179      return Err(CommitGenError::InvalidCommitType(format!(
180         "Invalid commit type: '{}'. Must be one of: {}",
181         msg.commit_type,
182         valid_types.join(", ")
183      )));
184   }
185
186   // Validate scope (if present) - Scope type already validates format
187   // This is just a double-check, Scope::new() already enforces rules
188   if let Some(scope) = &msg.scope
189      && scope.is_empty()
190   {
191      return Err(CommitGenError::InvalidScope(
192         "Scope cannot be empty string (omit if not applicable)".to_string(),
193      ));
194   }
195
196   // Reject scope if it's just the project/repo name
197   if let Some(scope) = &msg.scope
198      && let Ok(repo_name) = get_repository_name()
199   {
200      let normalized_scope = normalize_name(scope.as_str());
201      let normalized_repo = normalize_name(&repo_name);
202
203      if normalized_scope == normalized_repo {
204         return Err(CommitGenError::InvalidScope(format!(
205            "Scope '{scope}' is the project name - omit scope for project-wide changes"
206         )));
207      }
208   }
209
210   // Check summary not empty
211   if msg.summary.as_str().trim().is_empty() {
212      return Err(CommitGenError::ValidationError("Summary cannot be empty".to_string()));
213   }
214
215   // Check summary does NOT end with period (conventional commits don't use
216   // periods)
217   if msg.summary.as_str().trim_end().ends_with('.') {
218      return Err(CommitGenError::ValidationError(
219         "Summary must NOT end with a period (conventional commits style)".to_string(),
220      ));
221   }
222
223   // Check first line length: type(scope): summary
224   let scope_part = msg
225      .scope
226      .as_ref()
227      .map(|s| format!("({s})"))
228      .unwrap_or_default();
229   let first_line_len = msg.commit_type.len() + scope_part.len() + 2 + msg.summary.len();
230
231   // Hard limit check (absolute maximum) - REJECT
232   if first_line_len > config.summary_hard_limit {
233      return Err(CommitGenError::SummaryTooLong {
234         len: first_line_len,
235         max: config.summary_hard_limit,
236      });
237   }
238
239   // Soft limit warning (triggers retry in main.rs) - WARN but pass
240   if first_line_len > config.summary_soft_limit {
241      style::warn(&format!(
242         "Summary exceeds soft limit: {} > {} chars (retry recommended)",
243         first_line_len, config.summary_soft_limit
244      ));
245   }
246
247   // Guideline warning (72-96 range) - INFO
248   if first_line_len > config.summary_guideline && first_line_len <= config.summary_soft_limit {
249      eprintln!(
250         "{} {}",
251         style::info(icons::INFO),
252         style::info(&format!(
253            "Summary exceeds guideline: {} > {} chars (still acceptable)",
254            first_line_len, config.summary_guideline
255         ))
256      );
257   }
258
259   // Note: lowercase check is done in CommitSummary::new() to avoid duplication
260
261   // Check first word is past-tense verb (morphology-based)
262   let first_word = msg.summary.as_str().split_whitespace().next().unwrap_or("");
263
264   if first_word.is_empty() {
265      return Err(CommitGenError::ValidationError(
266         "Summary must contain at least one word".to_string(),
267      ));
268   }
269
270   if !is_past_tense_first_word(first_word) {
271      return Err(CommitGenError::ValidationError(format!(
272         "Summary must start with a past-tense verb (ending in -ed/-d or irregular). Got \
273          '{first_word}'"
274      )));
275   }
276
277   // Check for type-word repetition
278   let type_word = msg.commit_type.as_str();
279   let first_word_lower = first_word.to_lowercase();
280   if first_word_lower == type_word {
281      return Err(CommitGenError::ValidationError(format!(
282         "Summary repeats commit type '{type_word}': first word is '{first_word}'"
283      )));
284   }
285
286   // Check for filler words (removed "improved"/"enhanced" as they're valid
287   // past-tense verbs)
288   for filler in FILLER_WORDS {
289      if msg.summary.as_str().to_lowercase().contains(filler) {
290         style::warn(&format!("Summary contains filler word '{}': {}", filler, msg.summary));
291      }
292   }
293
294   // Check for meta-phrases that add no information
295   for phrase in META_PHRASES {
296      if msg.summary.as_str().to_lowercase().contains(phrase) {
297         style::warn(&format!(
298            "Summary contains meta-phrase '{phrase}' - be more specific about what changed"
299         ));
300      }
301   }
302
303   // Final length check after all potential mutations
304   let final_scope_part = msg
305      .scope
306      .as_ref()
307      .map(|s| format!("({s})"))
308      .unwrap_or_default();
309   let final_first_line_len =
310      msg.commit_type.len() + final_scope_part.len() + 2 + msg.summary.len();
311
312   if final_first_line_len > config.summary_hard_limit {
313      return Err(CommitGenError::SummaryTooLong {
314         len: final_first_line_len,
315         max: config.summary_hard_limit,
316      });
317   }
318
319   // Validate body items
320   for item in &msg.body {
321      let first_word = item.split_whitespace().next().unwrap_or("");
322      if BODY_PRESENT_TENSE
323         .iter()
324         .any(|&word| first_word.to_lowercase() == word)
325      {
326         style::warn(&format!("Body item uses present tense: '{item}'"));
327      }
328      if !item.trim_end().ends_with('.') {
329         style::warn(&format!("Body item missing period: '{item}'"));
330      }
331   }
332
333   Ok(())
334}
335
336/// Check type-scope consistency (warn if mismatched)
337pub fn check_type_scope_consistency(msg: &ConventionalCommit, stat: &str) {
338   let commit_type = msg.commit_type.as_str();
339
340   // Check for docs type
341   if commit_type == "docs" {
342      let has_docs = stat.lines().any(|line| {
343         let path = line.split('|').next().unwrap_or("").trim();
344         let is_doc_file = std::path::Path::new(&path)
345            .extension()
346            .and_then(|ext| ext.to_str())
347            .is_some_and(|ext| DOC_EXTENSIONS.contains(&ext.to_ascii_lowercase().as_str()));
348         is_doc_file
349            || path.to_lowercase().contains("/docs/")
350            || path.to_lowercase().contains("readme")
351      });
352      if !has_docs {
353         style::warn("Commit type 'docs' but no documentation files changed");
354      }
355   }
356
357   // Check for test type
358   if commit_type == "test" {
359      let has_test = stat.lines().any(|line| {
360         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
361         path.contains("/test") || path.contains("_test.") || path.contains(".test.")
362      });
363      if !has_test {
364         style::warn("Commit type 'test' but no test files changed");
365      }
366   }
367
368   // Check for style type (should be mostly whitespace/formatting)
369   if commit_type == "style" {
370      let has_code = stat.lines().any(|line| {
371         let path = line.split('|').next().unwrap_or("").trim();
372         let path_obj = std::path::Path::new(&path);
373         path_obj
374            .extension()
375            .is_some_and(|ext| is_code_extension(ext.to_str().unwrap_or("")))
376      });
377      if has_code {
378         style::warn("Commit type 'style' but code files changed (verify no logic changes)");
379      }
380   }
381
382   // Check for ci type
383   if commit_type == "ci" {
384      let has_ci = stat.lines().any(|line| {
385         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
386         path.contains(".github/workflows")
387            || path.contains(".gitlab-ci")
388            || path.contains("jenkinsfile")
389      });
390      if !has_ci {
391         style::warn("Commit type 'ci' but no CI configuration files changed");
392      }
393   }
394
395   // Check for build type
396   if commit_type == "build" {
397      let has_build = stat.lines().any(|line| {
398         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
399         path.contains("cargo.toml")
400            || path.contains("package.json")
401            || path.contains("makefile")
402            || path.contains("build.")
403      });
404      if !has_build {
405         style::warn("Commit type 'build' but no build files (Cargo.toml, package.json) changed");
406      }
407   }
408
409   // Check for refactor with new files (might actually be feat)
410   if commit_type == "refactor" {
411      let has_new_files = stat
412         .lines()
413         .any(|line| line.trim().starts_with("create mode") || line.contains("new file"));
414      if has_new_files {
415         style::warn(
416            "Commit type 'refactor' but new files were created - verify no new capabilities added \
417             (might be 'feat')",
418         );
419      }
420   }
421
422   // Check for perf type without performance evidence
423   if commit_type == "perf" {
424      let has_perf_files = stat.lines().any(|line| {
425         let path = line.split('|').next().unwrap_or("").trim().to_lowercase();
426         path.contains("bench") || path.contains("perf") || path.contains("profile")
427      });
428
429      // Check if details mention performance
430      let details_text = msg.body.join(" ").to_lowercase();
431      let has_perf_details = details_text.contains("faster")
432         || details_text.contains("optimization")
433         || details_text.contains("performance")
434         || details_text.contains("optimized");
435
436      if !has_perf_files && !has_perf_details {
437         style::warn(
438            "Commit type 'perf' but no performance-related files or optimization keywords found",
439         );
440      }
441   }
442}
443
444#[cfg(test)]
445mod tests {
446   use super::*;
447   use crate::types::{CommitSummary, CommitType, ConventionalCommit, Scope};
448
449   fn create_commit(
450      type_str: &str,
451      scope: Option<&str>,
452      summary: &str,
453      body: Vec<&str>,
454   ) -> ConventionalCommit {
455      ConventionalCommit {
456         commit_type: CommitType::new(type_str).unwrap(),
457         scope:       scope.map(|s| Scope::new(s).unwrap()),
458         summary:     CommitSummary::new_unchecked(summary, 128).unwrap(),
459         body:        body.into_iter().map(|s| s.to_string()).collect(),
460         footers:     vec![],
461      }
462   }
463
464   #[test]
465   fn test_validate_valid_commit() {
466      let config = CommitConfig::default();
467      let msg = create_commit("feat", Some("api"), "added new endpoint", vec![]);
468      assert!(validate_commit_message(&msg, &config).is_ok());
469   }
470
471   #[test]
472   fn test_validate_valid_commit_no_scope() {
473      let config = CommitConfig::default();
474      let msg = create_commit("fix", None, "corrected race condition", vec![]);
475      assert!(validate_commit_message(&msg, &config).is_ok());
476   }
477
478   #[test]
479   fn test_validate_invalid_type() {
480      let _config = CommitConfig::default();
481      let result = CommitType::new("invalid");
482      assert!(result.is_err());
483      assert!(matches!(result.unwrap_err(), CommitGenError::InvalidCommitType(_)));
484   }
485
486   #[test]
487   fn test_validate_summary_ends_with_period() {
488      let config = CommitConfig::default();
489      let msg = create_commit("feat", Some("api"), "added endpoint.", vec![]);
490      let result = validate_commit_message(&msg, &config);
491      assert!(result.is_err());
492      assert!(
493         result
494            .unwrap_err()
495            .to_string()
496            .contains("must NOT end with a period")
497      );
498   }
499
500   #[test]
501   fn test_validate_summary_too_long() {
502      // CommitSummary::new() enforces 128 char hard limit on summary alone
503      let long_summary = "a".repeat(129);
504      let result = CommitSummary::new(&long_summary, 128);
505      assert!(result.is_err());
506      assert!(matches!(result.unwrap_err(), CommitGenError::SummaryTooLong { .. }));
507   }
508
509   #[test]
510   fn test_validate_summary_empty() {
511      let result = CommitSummary::new("", 128);
512      assert!(result.is_err());
513      assert!(matches!(result.unwrap_err(), CommitGenError::ValidationError(_)));
514   }
515
516   #[test]
517   fn test_validate_summary_empty_whitespace() {
518      let result = CommitSummary::new("   ", 128);
519      assert!(result.is_err());
520      assert!(matches!(result.unwrap_err(), CommitGenError::ValidationError(_)));
521   }
522
523   #[test]
524   fn test_validate_wrong_verb() {
525      let config = CommitConfig::default();
526      let result = CommitSummary::new_unchecked("adding new feature", 128);
527      assert!(result.is_ok());
528      let msg = ConventionalCommit {
529         commit_type: CommitType::new("feat").unwrap(),
530         scope:       None,
531         summary:     result.unwrap(),
532         body:        vec![],
533         footers:     vec![],
534      };
535      let result = validate_commit_message(&msg, &config);
536      assert!(result.is_err());
537      assert!(
538         result
539            .unwrap_err()
540            .to_string()
541            .contains("must start with a past-tense verb")
542      );
543   }
544
545   #[test]
546   fn test_validate_present_tense_verb() {
547      let config = CommitConfig::default();
548      let result = CommitSummary::new_unchecked("adds new feature", 128);
549      assert!(result.is_ok());
550      let msg = ConventionalCommit {
551         commit_type: CommitType::new("feat").unwrap(),
552         scope:       None,
553         summary:     result.unwrap(),
554         body:        vec![],
555         footers:     vec![],
556      };
557      let result = validate_commit_message(&msg, &config);
558      assert!(result.is_err());
559      assert!(
560         result
561            .unwrap_err()
562            .to_string()
563            .contains("must start with a past-tense verb")
564      );
565   }
566
567   #[test]
568   fn test_validate_no_type_verb_overlap() {
569      // This test verifies that using a related verb doesn't trigger false positives
570      // "documented" is valid for "docs" type since they're not exact matches
571      let config = CommitConfig::default();
572      let msg = create_commit("docs", Some("api"), "documented new api", vec![]);
573      assert!(validate_commit_message(&msg, &config).is_ok());
574
575      // "tested" is valid for "test" type
576      let msg = create_commit("test", Some("api"), "added unit tests", vec![]);
577      assert!(validate_commit_message(&msg, &config).is_ok());
578   }
579
580   #[test]
581   fn test_validate_morphology_based_past_tense() {
582      let config = CommitConfig::default();
583      // Test regular -ed endings
584      let regular_verbs = ["added", "configured", "exposed", "formatted", "clarified"];
585      for verb in regular_verbs {
586         let summary = format!("{verb} something");
587         let msg = create_commit("feat", None, &summary, vec![]);
588         assert!(
589            validate_commit_message(&msg, &config).is_ok(),
590            "Regular verb '{verb}' should be accepted"
591         );
592      }
593
594      // Test irregular verbs
595      let irregular_verbs = ["made", "built", "ran", "wrote", "split"];
596      for verb in irregular_verbs {
597         let summary = format!("{verb} something");
598         let msg = create_commit("feat", None, &summary, vec![]);
599         assert!(
600            validate_commit_message(&msg, &config).is_ok(),
601            "Irregular verb '{verb}' should be accepted"
602         );
603      }
604
605      // Test false positives (should be rejected)
606      let non_verbs = ["hundred", "red", "bed"];
607      for word in non_verbs {
608         let summary = format!("{word} something");
609         let msg = ConventionalCommit {
610            commit_type: CommitType::new("feat").unwrap(),
611            scope:       None,
612            summary:     CommitSummary::new_unchecked(&summary, 128).unwrap(),
613            body:        vec![],
614            footers:     vec![],
615         };
616         assert!(
617            validate_commit_message(&msg, &config).is_err(),
618            "Non-verb '{word}' should be rejected"
619         );
620      }
621   }
622
623   #[test]
624   fn test_validate_scope_empty_string() {
625      let result = Scope::new("");
626      assert!(result.is_err());
627      assert!(matches!(result.unwrap_err(), CommitGenError::InvalidScope(_)));
628   }
629
630   #[test]
631   fn test_validate_scope_invalid_chars() {
632      let result = Scope::new("API/New");
633      assert!(result.is_err());
634      assert!(matches!(result.unwrap_err(), CommitGenError::InvalidScope(_)));
635   }
636
637   #[test]
638   fn test_validate_scope_too_many_segments() {
639      let result = Scope::new("core/api/http");
640      assert!(result.is_err());
641      assert!(result.unwrap_err().to_string().contains("max 2 allowed"));
642   }
643
644   #[test]
645   fn test_validate_scope_valid_single() {
646      let result = Scope::new("api");
647      assert!(result.is_ok());
648   }
649
650   #[test]
651   fn test_validate_scope_valid_two_segments() {
652      let result = Scope::new("core/api");
653      assert!(result.is_ok());
654   }
655
656   #[test]
657   fn test_validate_scope_with_dash_underscore() {
658      let result = Scope::new("core_api/http-client");
659      assert!(result.is_ok());
660   }
661
662   #[test]
663   fn test_validate_total_length_at_guideline() {
664      let config = CommitConfig::default();
665      // type(scope): summary = exactly 72 chars (guideline)
666      // "feat(scope): " = 13 chars, summary = 59 chars, starts with valid verb
667      let summary = format!("added {}", "x".repeat(53));
668      let msg = create_commit("feat", Some("scope"), &summary, vec![]);
669      // Should pass (with info message about being at guideline)
670      assert!(validate_commit_message(&msg, &config).is_ok());
671   }
672
673   #[test]
674   fn test_validate_total_length_at_soft_limit() {
675      let config = CommitConfig::default();
676      // type(scope): summary = exactly 96 chars (soft limit)
677      // "feat(scope): " = 13 chars, summary = 83 chars
678      let summary = format!("added {}", "x".repeat(77));
679      let msg = create_commit("feat", Some("scope"), &summary, vec![]);
680      // Should pass (with warning about soft limit)
681      assert!(validate_commit_message(&msg, &config).is_ok());
682   }
683
684   #[test]
685   fn test_validate_total_length_at_hard_limit() {
686      let config = CommitConfig::default();
687      // type(scope): summary = exactly 128 chars (hard limit)
688      // "feat(scope): " = 13 chars, summary = 115 chars
689      let summary = format!("added {}", "x".repeat(109));
690      let msg = create_commit("feat", Some("scope"), &summary, vec![]);
691      // Should pass (at hard limit)
692      assert!(validate_commit_message(&msg, &config).is_ok());
693   }
694
695   #[test]
696   fn test_validate_total_length_over_hard_limit() {
697      let config = CommitConfig::default();
698      // type(scope): summary > 128 chars (exceeds hard limit)
699      // "feat(scope): " = 13 chars, summary = 116 chars (total 129)
700      let summary = "a".repeat(116);
701      let msg = ConventionalCommit {
702         commit_type: CommitType::new("feat").unwrap(),
703         scope:       Some(Scope::new("scope").unwrap()),
704         summary:     CommitSummary::new_unchecked(&summary, 128).unwrap(),
705         body:        vec![],
706         footers:     vec![],
707      };
708      let result = validate_commit_message(&msg, &config);
709      assert!(result.is_err());
710      assert!(matches!(result.unwrap_err(), CommitGenError::SummaryTooLong { .. }));
711   }
712
713   #[test]
714   fn test_check_type_scope_docs_with_md() {
715      let msg = create_commit("docs", Some("readme"), "updated installation guide", vec![]);
716      let stat = " README.md | 10 +++++++---\n 1 file changed, 7 insertions(+), 3 deletions(-)";
717      // Should not print warning
718      check_type_scope_consistency(&msg, stat);
719   }
720
721   #[test]
722   fn test_check_type_scope_docs_without_md() {
723      let msg = create_commit("docs", None, "updated documentation", vec![]);
724      let stat = " src/main.rs | 10 +++++++---\n 1 file changed, 7 insertions(+), 3 deletions(-)";
725      // Should print warning (but we can't test stderr easily)
726      check_type_scope_consistency(&msg, stat);
727   }
728
729   #[test]
730   fn test_check_type_scope_test_with_test_files() {
731      let msg = create_commit("test", Some("api"), "added integration tests", vec![]);
732      let stat = " tests/integration_test.rs | 50 ++++++++++++++++++++++++++++++++\n";
733      check_type_scope_consistency(&msg, stat);
734   }
735
736   #[test]
737   fn test_check_type_scope_test_without_test_files() {
738      let msg = create_commit("test", None, "added tests", vec![]);
739      let stat = " src/lib.rs | 10 +++++++---\n";
740      check_type_scope_consistency(&msg, stat);
741   }
742
743   #[test]
744   fn test_check_type_scope_refactor_new_files() {
745      let msg = create_commit("refactor", Some("core"), "restructured modules", vec![]);
746      let stat = " create mode 100644 src/new_module.rs\n src/lib.rs | 10 +++++++---\n";
747      check_type_scope_consistency(&msg, stat);
748   }
749
750   #[test]
751   fn test_check_type_scope_ci_with_workflow() {
752      let msg = create_commit("ci", None, "updated github actions", vec![]);
753      let stat = " .github/workflows/ci.yml | 20 ++++++++++++++++++++\n";
754      check_type_scope_consistency(&msg, stat);
755   }
756
757   #[test]
758   fn test_check_type_scope_build_with_cargo() {
759      let msg = create_commit("build", Some("deps"), "updated dependencies", vec![]);
760      let stat = " Cargo.toml | 5 +++--\n Cargo.lock | 150 +++++++++++++++++++\n";
761      check_type_scope_consistency(&msg, stat);
762   }
763
764   #[test]
765   fn test_check_type_scope_perf_with_details() {
766      let msg = create_commit("perf", Some("core"), "optimized batch processing", vec![
767         "reduced allocations by 50% for faster throughput.",
768      ]);
769      let stat = " src/core.rs | 30 +++++++++++++-----------------\n";
770      check_type_scope_consistency(&msg, stat);
771   }
772
773   #[test]
774   fn test_check_type_scope_perf_without_evidence() {
775      let msg = create_commit("perf", None, "changed algorithm", vec![]);
776      let stat = " src/lib.rs | 10 +++++++---\n";
777      check_type_scope_consistency(&msg, stat);
778   }
779
780   #[test]
781   fn test_validate_body_present_tense_warning() {
782      let config = CommitConfig::default();
783      let msg = create_commit("feat", None, "added new feature", vec![
784         "adds support for TLS.",
785         "updates configuration.",
786      ]);
787      // Should succeed but print warnings (we can't easily test stderr)
788      assert!(validate_commit_message(&msg, &config).is_ok());
789   }
790
791   #[test]
792   fn test_validate_body_missing_period_warning() {
793      let config = CommitConfig::default();
794      let msg = create_commit("feat", None, "added new feature", vec![
795         "added support for TLS",
796         "updated configuration",
797      ]);
798      // Should succeed but print warnings
799      assert!(validate_commit_message(&msg, &config).is_ok());
800   }
801
802   #[test]
803   fn test_commit_type_case_normalization() {
804      assert!(CommitType::new("FEAT").is_ok());
805      assert!(CommitType::new("Feat").is_ok());
806      assert!(CommitType::new("feat").is_ok());
807      assert_eq!(CommitType::new("FEAT").unwrap().as_str(), "feat");
808   }
809
810   #[test]
811   fn test_commit_type_all_valid() {
812      let valid_types = [
813         "feat", "fix", "refactor", "docs", "test", "chore", "style", "perf", "build", "ci",
814         "revert",
815      ];
816      for t in &valid_types {
817         assert!(CommitType::new(*t).is_ok(), "Type '{t}' should be valid");
818      }
819   }
820
821   #[test]
822   fn test_summary_length_boundaries() {
823      // Guideline (72) - should pass
824      let summary_72 = "a".repeat(72);
825      assert!(CommitSummary::new(&summary_72, 128).is_ok());
826
827      // Soft limit (96) - should pass
828      let summary_96 = "a".repeat(96);
829      assert!(CommitSummary::new(&summary_96, 128).is_ok());
830
831      // Hard limit (128) - should pass
832      let summary_128 = "a".repeat(128);
833      assert!(CommitSummary::new(&summary_128, 128).is_ok());
834
835      // Over hard limit (129) - should fail
836      let summary_129 = "a".repeat(129);
837      let result = CommitSummary::new(&summary_129, 128);
838      assert!(result.is_err());
839      match result.unwrap_err() {
840         CommitGenError::SummaryTooLong { len, max } => {
841            assert_eq!(len, 129);
842            assert_eq!(max, 128);
843         },
844         _ => panic!("Expected SummaryTooLong error"),
845      }
846   }
847
848   #[test]
849   fn test_is_past_tense_verb_map_values() {
850      // Values from PAST_TENSE_MAP should be accepted as past tense
851      assert!(is_past_tense_verb("hardened"));
852      assert!(is_past_tense_verb("bound"));
853      assert!(is_past_tense_verb("isolated"));
854      assert!(is_past_tense_verb("guarded"));
855      assert!(is_past_tense_verb("rebuilt"));
856      assert!(is_past_tense_verb("rewrote"));
857      assert!(is_past_tense_verb("reran"));
858   }
859
860   #[test]
861   fn test_is_past_tense_verb_same_form_not_accepted_via_map() {
862      // Same-form entries (key == value) should NOT be accepted via the map
863      // check. "reset" is accepted via IRREGULAR_PAST, not the map.
864      // But "setup" was removed from IRREGULAR_PAST, so it should NOT pass.
865      assert!(!is_past_tense_verb("setup"));
866      // "reset" is in IRREGULAR_PAST so it passes
867      assert!(is_past_tense_verb("reset"));
868   }
869
870   #[test]
871   fn test_is_past_tense_first_word_suffix_tolerance() {
872      // Trailing non-alpha suffix should be stripped for stem check
873      assert!(is_past_tense_first_word("bound-check"));
874      assert!(is_past_tense_first_word("isolated-subagent"));
875      assert!(is_past_tense_first_word("re-enabled"));
876      assert!(is_past_tense_first_word("auto-detected"));
877      // Full token that is past tense via -ed
878      assert!(is_past_tense_first_word("hardened"));
879   }
880
881   #[test]
882   fn test_is_past_tense_first_word_acronyms_rejected() {
883      // All-caps acronyms should be rejected
884      assert!(!is_past_tense_first_word("API"));
885      assert!(!is_past_tense_first_word("NFC"));
886      assert!(!is_past_tense_first_word("LSP"));
887   }
888
889   #[test]
890   fn test_is_past_tense_first_word_numeric_rejected() {
891      // Numeric-led tokens should be rejected
892      assert!(!is_past_tense_first_word("403"));
893      assert!(!is_past_tense_first_word("v1.0"));
894      assert!(!is_past_tense_first_word("2.0.0"));
895   }
896
897   #[test]
898   fn test_is_past_tense_first_word_re_prefix() {
899      // re-ran: inner segment "ran" is past tense
900      assert!(is_past_tense_first_word("re-ran"));
901      // re-built: inner segment "built" is past tense
902      assert!(is_past_tense_first_word("re-built"));
903      // re-wrote: inner segment "wrote" is past tense
904      assert!(is_past_tense_first_word("re-wrote"));
905      // re-enabled: full token ends in -ed, passes via full token check
906      assert!(is_past_tense_first_word("re-enabled"));
907      // re-enable: inner "enable" is present tense but in map, so accepted
908      // (normalization will convert it to re-enabled)
909      assert!(is_past_tense_first_word("re-enable"));
910      // re-read: inner "read" is unchanged irregular
911      assert!(is_past_tense_first_word("re-read"));
912      // re-reset: inner "reset" is unchanged irregular
913      assert!(is_past_tense_first_word("re-reset"));
914   }
915
916   #[test]
917   fn test_is_past_tense_first_word_re_prefix_rejected() {
918      // re- with non-verb inner segment should be rejected
919      assert!(!is_past_tense_first_word("re-foo"));
920      assert!(!is_past_tense_first_word("re-123"));
921   }
922
923   #[test]
924   fn test_verb_stem_extraction() {
925      assert_eq!(verb_stem("bound-check"), Some("bound".to_string()));
926      assert_eq!(verb_stem("isolated-subagent"), Some("isolated".to_string()));
927      assert_eq!(verb_stem("harden"), Some("harden".to_string()));
928      // All-caps -> None (acronym)
929      assert_eq!(verb_stem("API"), None);
930      assert_eq!(verb_stem("NFC"), None);
931      // No leading letters -> None
932      assert_eq!(verb_stem("403"), None);
933      assert_eq!(verb_stem(""), None);
934   }
935
936   #[test]
937   fn test_split_verb_token() {
938      assert_eq!(split_verb_token("bound-check"), Some(("bound", "-check")));
939      assert_eq!(split_verb_token("harden"), Some(("harden", "")));
940      assert_eq!(split_verb_token("fix(tui):"), Some(("fix", "(tui):")));
941      assert_eq!(split_verb_token("403"), None);
942   }
943
944   #[test]
945   fn test_present_to_past_lookup() {
946      assert_eq!(present_to_past("harden"), Some("hardened"));
947      assert_eq!(present_to_past("bind"), Some("bound"));
948      assert_eq!(present_to_past("isolate"), Some("isolated"));
949      assert_eq!(present_to_past("rebuild"), Some("rebuilt"));
950      assert_eq!(present_to_past("nonexistent"), None);
951   }
952
953   #[test]
954   fn test_validate_bound_and_hardened() {
955      let config = CommitConfig::default();
956      // "bound" should pass validation (the original failing case)
957      let msg =
958         create_commit("fix", Some("stealth"), "bound native Reflect methods to variables", vec![]);
959      assert!(
960         validate_commit_message(&msg, &config).is_ok(),
961         "'bound' should be accepted as past-tense verb"
962      );
963      // "hardened" should pass validation
964      let msg = create_commit(
965         "fix",
966         Some("stealth"),
967         "hardened stealth scripts against detection",
968         vec![],
969      );
970      assert!(
971         validate_commit_message(&msg, &config).is_ok(),
972         "'hardened' should be accepted as past-tense verb"
973      );
974   }
975
976   #[test]
977   fn test_validate_bound_check_suffix() {
978      let config = CommitConfig::default();
979      // "bound-check" should pass via stem extraction
980      let msg = create_commit("fix", None, "bound-checked the inputs", vec![]);
981      assert!(
982         validate_commit_message(&msg, &config).is_ok(),
983         "'bound-checked' should be accepted as past-tense verb"
984      );
985   }
986}