Skip to main content

tokmd_git/
intent.rs

1//! Commit intent classification for git history subjects.
2
3use tokmd_types::CommitIntentKind;
4
5/// Classify a commit subject line into an intent kind.
6///
7/// Uses a two-stage pipeline:
8/// 1. **Conventional Commits**: Parse `type(scope)!: description` prefix
9/// 2. **Keyword heuristic**: Match known keywords in the subject
10pub fn classify_intent(subject: &str) -> CommitIntentKind {
11    let trimmed = subject.trim();
12    if trimmed.is_empty() {
13        return CommitIntentKind::Other;
14    }
15
16    // Check for revert pattern first
17    if trimmed.starts_with("Revert \"") || trimmed.starts_with("revert:") {
18        return CommitIntentKind::Revert;
19    }
20
21    // Try conventional commit parsing
22    if let Some(kind) = parse_conventional_prefix(trimmed) {
23        return kind;
24    }
25
26    // Fall back to keyword heuristic
27    keyword_heuristic(trimmed)
28}
29
30/// Parse a conventional commit prefix like `feat(scope)!: description`.
31fn parse_conventional_prefix(subject: &str) -> Option<CommitIntentKind> {
32    let colon_pos = subject.find(':')?;
33    let prefix = &subject[..colon_pos];
34
35    // Strip optional (scope) and trailing !
36    let prefix = if let Some(paren_pos) = prefix.find('(') {
37        &prefix[..paren_pos]
38    } else {
39        prefix
40    };
41    let prefix = prefix.trim_end_matches('!');
42
43    match prefix.to_ascii_lowercase().as_str() {
44        "feat" | "feature" => Some(CommitIntentKind::Feat),
45        "fix" | "bugfix" | "hotfix" => Some(CommitIntentKind::Fix),
46        "refactor" => Some(CommitIntentKind::Refactor),
47        "docs" | "doc" => Some(CommitIntentKind::Docs),
48        "test" | "tests" => Some(CommitIntentKind::Test),
49        "chore" => Some(CommitIntentKind::Chore),
50        "ci" => Some(CommitIntentKind::Ci),
51        "build" => Some(CommitIntentKind::Build),
52        "perf" => Some(CommitIntentKind::Perf),
53        "style" => Some(CommitIntentKind::Style),
54        "revert" => Some(CommitIntentKind::Revert),
55        _ => None,
56    }
57}
58
59/// Keyword-based heuristic for commit intent classification.
60fn keyword_heuristic(subject: &str) -> CommitIntentKind {
61    let lower = subject.to_ascii_lowercase();
62
63    // Ordered by priority: more specific matches first
64    if contains_word(&lower, "revert") {
65        CommitIntentKind::Revert
66    } else if contains_word(&lower, "fix")
67        || contains_word(&lower, "bug")
68        || contains_word(&lower, "patch")
69        || contains_word(&lower, "hotfix")
70    {
71        CommitIntentKind::Fix
72    } else if contains_word(&lower, "feat")
73        || contains_word(&lower, "feature")
74        || lower.starts_with("add ")
75        || lower.starts_with("implement ")
76        || lower.starts_with("introduce ")
77    {
78        CommitIntentKind::Feat
79    } else if contains_word(&lower, "refactor") || contains_word(&lower, "restructure") {
80        CommitIntentKind::Refactor
81    } else if contains_word(&lower, "doc") || contains_word(&lower, "readme") {
82        CommitIntentKind::Docs
83    } else if contains_word(&lower, "test") {
84        CommitIntentKind::Test
85    } else if contains_word(&lower, "perf")
86        || contains_word(&lower, "performance")
87        || contains_word(&lower, "optimize")
88    {
89        CommitIntentKind::Perf
90    } else if contains_word(&lower, "style")
91        || contains_word(&lower, "format")
92        || contains_word(&lower, "lint")
93    {
94        CommitIntentKind::Style
95    } else if contains_word(&lower, "ci") || contains_word(&lower, "pipeline") {
96        CommitIntentKind::Ci
97    } else if contains_word(&lower, "build") || contains_word(&lower, "deps") {
98        CommitIntentKind::Build
99    } else if contains_word(&lower, "chore") || contains_word(&lower, "cleanup") {
100        CommitIntentKind::Chore
101    } else {
102        CommitIntentKind::Other
103    }
104}
105
106/// Check if a word appears as a word boundary match in the subject.
107fn contains_word(haystack: &str, word: &str) -> bool {
108    for (idx, _) in haystack.match_indices(word) {
109        let before_ok = idx == 0 || !haystack.as_bytes()[idx - 1].is_ascii_alphanumeric();
110        let after_idx = idx + word.len();
111        let after_ok =
112            after_idx >= haystack.len() || !haystack.as_bytes()[after_idx].is_ascii_alphanumeric();
113        if before_ok && after_ok {
114            return true;
115        }
116    }
117    false
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    fn classify_intent_prefers_conventional_commit_prefix() {
126        assert_eq!(
127            classify_intent("feat(parser): add support"),
128            CommitIntentKind::Feat
129        );
130        assert_eq!(
131            classify_intent("fix!: breaking hotfix"),
132            CommitIntentKind::Fix
133        );
134        assert_eq!(
135            classify_intent("docs(readme): update usage"),
136            CommitIntentKind::Docs
137        );
138        assert_eq!(
139            classify_intent("test: add regression"),
140            CommitIntentKind::Test
141        );
142    }
143
144    #[test]
145    fn classify_intent_uses_keyword_heuristics() {
146        assert_eq!(classify_intent("Add caching layer"), CommitIntentKind::Feat);
147        assert_eq!(
148            classify_intent("optimize parser allocations"),
149            CommitIntentKind::Perf
150        );
151        assert_eq!(classify_intent("lint workspace"), CommitIntentKind::Style);
152        assert_eq!(
153            classify_intent("pipeline: update checks"),
154            CommitIntentKind::Ci
155        );
156    }
157
158    #[test]
159    fn classify_intent_handles_revert_and_empty_subjects() {
160        assert_eq!(
161            classify_intent("Revert \"bad commit\""),
162            CommitIntentKind::Revert
163        );
164        assert_eq!(
165            classify_intent("revert: undo change"),
166            CommitIntentKind::Revert
167        );
168        assert_eq!(classify_intent("   \t"), CommitIntentKind::Other);
169    }
170
171    #[test]
172    fn contains_word_respects_word_boundaries() {
173        assert!(contains_word("fix parser", "fix"));
174        assert!(contains_word("fix-parser", "fix"));
175        assert!(!contains_word("prefix parser", "fix"));
176        assert!(!contains_word("fixture", "fix"));
177    }
178}