Skip to main content

yarli_cli/yarli-git/src/
commit_message.rs

1use std::collections::BTreeMap;
2use std::path::{Path, PathBuf};
3use std::process::Command;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct GeneratedCommitMessage {
7    pub subject: String,
8    pub body: Option<String>,
9}
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum DiffSpec<'a> {
13    Staged,
14    Range { base: &'a str, head: &'a str },
15}
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18enum ChangeStatus {
19    Added,
20    Modified,
21    Deleted,
22    Renamed,
23    TypeChanged,
24    Unmerged,
25    Unknown,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29struct ChangedPath {
30    path: PathBuf,
31    status: ChangeStatus,
32}
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35enum PathCategory {
36    State,
37    Docs,
38    Tests,
39    Ci,
40    Build,
41    Config,
42    Code,
43    Unknown,
44}
45
46pub fn generate_commit_message(
47    repo: &Path,
48    diff: DiffSpec<'_>,
49    metadata: &[(String, String)],
50    fallback_subject: &str,
51    fallback_scope: Option<&str>,
52) -> GeneratedCommitMessage {
53    let subject = collect_changed_paths(repo, diff)
54        .ok()
55        .and_then(|paths| infer_subject(&paths, fallback_scope))
56        .unwrap_or_else(|| fallback_subject.to_string());
57
58    let body = if metadata.is_empty() {
59        None
60    } else {
61        Some(
62            metadata
63                .iter()
64                .map(|(key, value)| format!("{key}: {value}"))
65                .collect::<Vec<_>>()
66                .join("\n"),
67        )
68    };
69
70    GeneratedCommitMessage { subject, body }
71}
72
73pub fn render_commit_message(message: &GeneratedCommitMessage) -> String {
74    match message.body.as_deref() {
75        Some(body) if !body.trim().is_empty() => format!("{}\n\n{}", message.subject, body),
76        _ => message.subject.clone(),
77    }
78}
79
80fn collect_changed_paths(repo: &Path, diff: DiffSpec<'_>) -> std::io::Result<Vec<ChangedPath>> {
81    let mut args = vec![
82        "diff".to_string(),
83        "--name-status".to_string(),
84        "--find-renames".to_string(),
85        "--diff-filter=ACDMRTUXB".to_string(),
86    ];
87    match diff {
88        DiffSpec::Staged => args.push("--cached".to_string()),
89        DiffSpec::Range { base, head } => args.push(format!("{base}..{head}")),
90    }
91
92    let output = Command::new("git").args(&args).current_dir(repo).output()?;
93    if !output.status.success() {
94        return Ok(Vec::new());
95    }
96
97    Ok(parse_name_status(&String::from_utf8_lossy(&output.stdout)))
98}
99
100fn parse_name_status(text: &str) -> Vec<ChangedPath> {
101    text.lines()
102        .filter_map(|line| {
103            let mut parts = line.split('\t');
104            let status = parts.next()?;
105            let status = parse_status(status);
106            let first_path = parts.next()?;
107            let path = if status == ChangeStatus::Renamed {
108                parts.next().unwrap_or(first_path)
109            } else {
110                first_path
111            };
112            Some(ChangedPath {
113                path: PathBuf::from(path),
114                status,
115            })
116        })
117        .collect()
118}
119
120fn parse_status(raw: &str) -> ChangeStatus {
121    match raw.chars().next().unwrap_or('M') {
122        'A' => ChangeStatus::Added,
123        'M' => ChangeStatus::Modified,
124        'D' => ChangeStatus::Deleted,
125        'R' => ChangeStatus::Renamed,
126        'T' => ChangeStatus::TypeChanged,
127        'U' => ChangeStatus::Unmerged,
128        _ => ChangeStatus::Unknown,
129    }
130}
131
132fn infer_subject(paths: &[ChangedPath], fallback_scope: Option<&str>) -> Option<String> {
133    if paths.is_empty() {
134        return None;
135    }
136
137    if paths
138        .iter()
139        .all(|path| classify_path(&path.path) == PathCategory::State)
140    {
141        return Some("chore(state): checkpoint runtime state".to_string());
142    }
143
144    let scope = infer_scope(paths, fallback_scope);
145    let categories: Vec<_> = paths.iter().map(|path| classify_path(&path.path)).collect();
146    let all_docs = categories
147        .iter()
148        .all(|category| *category == PathCategory::Docs);
149    let all_tests = categories
150        .iter()
151        .all(|category| *category == PathCategory::Tests);
152    let all_ci = categories
153        .iter()
154        .all(|category| *category == PathCategory::Ci);
155    let all_build = categories
156        .iter()
157        .all(|category| *category == PathCategory::Build);
158    let all_config = categories
159        .iter()
160        .all(|category| *category == PathCategory::Config);
161    let any_added = paths.iter().any(|path| path.status == ChangeStatus::Added);
162    let only_deleted = paths
163        .iter()
164        .all(|path| path.status == ChangeStatus::Deleted);
165    let only_renamed = paths
166        .iter()
167        .all(|path| path.status == ChangeStatus::Renamed);
168
169    if all_docs {
170        return Some(format!("docs({scope}): update documentation"));
171    }
172    if all_tests {
173        return Some(format!("test({scope}): update coverage"));
174    }
175    if all_ci {
176        return Some(format!("ci({scope}): update automation"));
177    }
178    if all_build {
179        return Some(format!("build({scope}): update build tooling"));
180    }
181    if all_config {
182        return Some(format!("chore({scope}): update configuration"));
183    }
184
185    let target = if paths.len() == 1 {
186        describe_target(&paths[0].path)
187    } else {
188        humanize_token(&scope)
189    };
190
191    if only_deleted {
192        return Some(format!("refactor({scope}): remove {target}"));
193    }
194    if only_renamed {
195        return Some(format!("refactor({scope}): reorganize {target}"));
196    }
197    if any_added {
198        let action = if paths.len() == 1 {
199            format!("add {target}")
200        } else {
201            format!("update {target}")
202        };
203        return Some(format!("feat({scope}): {action}"));
204    }
205
206    Some(format!("fix({scope}): update {target}"))
207}
208
209fn infer_scope(paths: &[ChangedPath], fallback_scope: Option<&str>) -> String {
210    let mut counts = BTreeMap::<String, usize>::new();
211    for path in paths {
212        let candidate = path_scope_candidate(&path.path);
213        *counts.entry(candidate).or_default() += 1;
214    }
215
216    counts
217        .into_iter()
218        .max_by_key(|(_, count)| *count)
219        .map(|(scope, _)| scope)
220        .or_else(|| fallback_scope.map(ToString::to_string))
221        .unwrap_or_else(|| "repo".to_string())
222}
223
224fn path_scope_candidate(path: &Path) -> String {
225    let parts: Vec<_> = path
226        .iter()
227        .map(|segment| segment.to_string_lossy().to_string())
228        .collect();
229
230    if parts.is_empty() {
231        return "repo".to_string();
232    }
233
234    if parts.first().is_some_and(|part| part == ".yarli") {
235        return "state".to_string();
236    }
237
238    if parts.first().is_some_and(|part| part == "crates") {
239        if parts.len() >= 4 {
240            let file_stem = Path::new(&parts[3])
241                .file_stem()
242                .and_then(|stem| stem.to_str())
243                .unwrap_or("repo");
244            if !is_generic_token(file_stem) {
245                return sanitize_scope(file_stem);
246            }
247        }
248        if parts.len() >= 2 {
249            return sanitize_scope(parts[1].trim_start_matches("yarli-"));
250        }
251    }
252
253    if parts.first().is_some_and(|part| part == "docs") {
254        if parts.len() >= 2 {
255            let stem = Path::new(&parts[1])
256                .file_stem()
257                .and_then(|value| value.to_str())
258                .unwrap_or("docs");
259            if !is_generic_token(stem) {
260                return sanitize_scope(stem);
261            }
262        }
263        return "docs".to_string();
264    }
265
266    if parts.first().is_some_and(|part| part == "tests") {
267        if parts.len() >= 3 {
268            return sanitize_scope(&parts[2]);
269        }
270        return "tests".to_string();
271    }
272
273    if parts.first().is_some_and(|part| part == "scripts") {
274        if parts.len() >= 2 {
275            let stem = Path::new(&parts[1])
276                .file_stem()
277                .and_then(|value| value.to_str())
278                .unwrap_or("scripts");
279            return sanitize_scope(stem);
280        }
281        return "scripts".to_string();
282    }
283
284    if parts.first().is_some_and(|part| part == ".github") {
285        return "ci".to_string();
286    }
287
288    if let Some(file_name) = path.file_stem().and_then(|name| name.to_str()) {
289        if !is_generic_token(file_name) {
290            return sanitize_scope(file_name);
291        }
292    }
293
294    sanitize_scope(&parts[0])
295}
296
297fn classify_path(path: &Path) -> PathCategory {
298    let parts: Vec<_> = path
299        .iter()
300        .map(|segment| segment.to_string_lossy().to_string())
301        .collect();
302
303    if parts.is_empty() {
304        return PathCategory::Unknown;
305    }
306
307    if parts.first().is_some_and(|part| part == ".yarli") {
308        return PathCategory::State;
309    }
310
311    if parts.first().is_some_and(|part| part == "docs")
312        || path.extension().and_then(|ext| ext.to_str()) == Some("md")
313    {
314        return PathCategory::Docs;
315    }
316
317    if parts
318        .iter()
319        .any(|part| part == "tests" || part == "__tests__")
320        || parts
321            .last()
322            .is_some_and(|name| name.ends_with("_test.rs") || name.ends_with(".snap"))
323    {
324        return PathCategory::Tests;
325    }
326
327    if parts.first().is_some_and(|part| part == ".github")
328        || parts.starts_with(&["docs".to_string(), "ci".to_string()])
329    {
330        return PathCategory::Ci;
331    }
332
333    if parts
334        .last()
335        .is_some_and(|name| matches!(name.as_str(), "Cargo.toml" | "Cargo.lock" | "Dockerfile"))
336    {
337        return PathCategory::Build;
338    }
339
340    if parts.last().is_some_and(|name| {
341        name.ends_with(".toml") || name.ends_with(".yml") || name.ends_with(".yaml")
342    }) {
343        return PathCategory::Config;
344    }
345
346    if parts.first().is_some_and(|part| part == "scripts") {
347        return PathCategory::Build;
348    }
349
350    PathCategory::Code
351}
352
353fn describe_target(path: &Path) -> String {
354    let stem = path
355        .file_stem()
356        .and_then(|value| value.to_str())
357        .filter(|value| !is_generic_token(value))
358        .map(humanize_token);
359    if let Some(stem) = stem {
360        return stem;
361    }
362
363    path.parent()
364        .and_then(|parent| parent.file_name())
365        .and_then(|value| value.to_str())
366        .map(humanize_token)
367        .unwrap_or_else(|| "changes".to_string())
368}
369
370fn sanitize_scope(raw: &str) -> String {
371    let cleaned = raw
372        .chars()
373        .map(|ch| {
374            if ch.is_ascii_alphanumeric() || ch == '-' {
375                ch.to_ascii_lowercase()
376            } else {
377                '-'
378            }
379        })
380        .collect::<String>();
381
382    cleaned
383        .trim_matches('-')
384        .split('-')
385        .filter(|part| !part.is_empty())
386        .collect::<Vec<_>>()
387        .join("-")
388}
389
390fn humanize_token(raw: &str) -> String {
391    raw.replace(['_', '-', '.'], " ")
392        .split_whitespace()
393        .collect::<Vec<_>>()
394        .join(" ")
395        .to_lowercase()
396}
397
398fn is_generic_token(value: &str) -> bool {
399    matches!(value, "mod" | "lib" | "main" | "index" | "readme")
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn state_only_changes_become_checkpoint_subject() {
408        let paths = vec![ChangedPath {
409            path: PathBuf::from(".yarli/tranches.toml"),
410            status: ChangeStatus::Modified,
411        }];
412        assert_eq!(
413            infer_subject(&paths, None).as_deref(),
414            Some("chore(state): checkpoint runtime state")
415        );
416    }
417
418    #[test]
419    fn single_added_code_file_becomes_feature_subject() {
420        let paths = vec![ChangedPath {
421            path: PathBuf::from("crates/yarli-cli/src/workspace.rs"),
422            status: ChangeStatus::Added,
423        }];
424        assert_eq!(
425            infer_subject(&paths, None).as_deref(),
426            Some("feat(workspace): add workspace")
427        );
428    }
429
430    #[test]
431    fn docs_changes_use_docs_type() {
432        let paths = vec![ChangedPath {
433            path: PathBuf::from("docs/API_CONTRACT.md"),
434            status: ChangeStatus::Modified,
435        }];
436        assert_eq!(
437            infer_subject(&paths, None).as_deref(),
438            Some("docs(api-contract): update documentation")
439        );
440    }
441
442    #[test]
443    fn render_commit_message_preserves_body() {
444        let rendered = render_commit_message(&GeneratedCommitMessage {
445            subject: "feat(workspace): add workspace".to_string(),
446            body: Some("yarli-run: run-123".to_string()),
447        });
448        assert_eq!(
449            rendered,
450            "feat(workspace): add workspace\n\nyarli-run: run-123"
451        );
452    }
453}