Skip to main content

spool/
git_importer.rs

1//! Git activity importer — extracts decision and pattern signals
2//! from recent commit messages in the current repository.
3//!
4//! Scans `git log` output for structured commit messages (conventional
5//! commits) and extracts:
6//! - `feat:` / `refactor:` → potential decision candidates
7//! - Repeated patterns (same prefix 3+ times) → workflow patterns
8//!
9//! This is a Tier 1 heuristic importer — no LLM involved.
10
11use std::path::Path;
12use std::process::Command;
13
14use crate::domain::MemoryScope;
15use crate::lifecycle_service::LifecycleService;
16use crate::lifecycle_store::{ProposeMemoryRequest, TransitionMetadata};
17
18#[derive(Debug, Clone)]
19pub struct GitCandidate {
20    pub title: String,
21    pub summary: String,
22    pub memory_type: String,
23    pub commit_hash: String,
24}
25
26#[derive(Debug, Clone, Default)]
27pub struct GitImportReport {
28    pub commits_scanned: usize,
29    pub candidates_found: usize,
30    pub candidates_persisted: Vec<String>,
31    pub candidates_duplicate_dropped: usize,
32}
33
34pub fn scan_recent_commits(repo_path: &Path, limit: usize) -> Vec<GitCandidate> {
35    let output = Command::new("git")
36        .args([
37            "-C",
38            &repo_path.display().to_string(),
39            "log",
40            &format!("-{limit}"),
41            "--format=%H|%s",
42        ])
43        .output();
44
45    let output = match output {
46        Ok(o) if o.status.success() => o,
47        _ => return Vec::new(),
48    };
49
50    let stdout = String::from_utf8_lossy(&output.stdout);
51    let mut candidates = Vec::new();
52
53    for line in stdout.lines() {
54        let Some((hash, subject)) = line.split_once('|') else {
55            continue;
56        };
57        if let Some(candidate) = parse_commit_subject(hash, subject) {
58            candidates.push(candidate);
59        }
60    }
61
62    candidates
63}
64
65fn parse_commit_subject(hash: &str, subject: &str) -> Option<GitCandidate> {
66    let subject_trimmed = subject.trim();
67    if subject_trimmed.len() < 10 {
68        return None;
69    }
70
71    // Skip merge commits and version bumps
72    if subject_trimmed.starts_with("Merge ")
73        || subject_trimmed.starts_with("chore(release)")
74        || subject_trimmed == "1"
75    {
76        return None;
77    }
78
79    let (memory_type, title) = if let Some(rest) = strip_conventional_prefix(subject_trimmed) {
80        let prefix = &subject_trimmed[..subject_trimmed.len() - rest.len()];
81        let mt = match prefix {
82            p if p.starts_with("feat") => "decision",
83            p if p.starts_with("refactor") => "decision",
84            p if p.starts_with("fix") => "incident",
85            p if p.starts_with("perf") => "pattern",
86            p if p.starts_with("docs") && rest.to_lowercase().contains("adr") => "decision",
87            _ => return None,
88        };
89        (mt, rest.to_string())
90    } else if subject_trimmed.contains("BREAKING CHANGE") || subject_trimmed.contains("breaking:") {
91        ("decision", subject_trimmed.to_string())
92    } else {
93        return None;
94    };
95
96    Some(GitCandidate {
97        title: truncate(&title, 80),
98        summary: format!("{} (commit {})", title, &hash[..7.min(hash.len())]),
99        memory_type: memory_type.to_string(),
100        commit_hash: hash.to_string(),
101    })
102}
103
104fn strip_conventional_prefix(s: &str) -> Option<&str> {
105    let prefixes = [
106        "feat(",
107        "feat:",
108        "fix(",
109        "fix:",
110        "refactor(",
111        "refactor:",
112        "perf(",
113        "perf:",
114        "docs(",
115        "docs:",
116        "test(",
117        "test:",
118        "chore(",
119        "chore:",
120        "ci(",
121        "ci:",
122    ];
123    for prefix in prefixes {
124        if let Some(rest) = s.strip_prefix(prefix) {
125            if prefix.ends_with('(') {
126                if let Some(after_scope) = rest.find("): ") {
127                    return Some(&rest[after_scope + 3..]);
128                }
129            } else {
130                return Some(rest.trim_start());
131            }
132        }
133    }
134    None
135}
136
137fn truncate(s: &str, max: usize) -> String {
138    if s.chars().count() <= max {
139        s.to_string()
140    } else {
141        let mut out: String = s.chars().take(max).collect();
142        out.push('…');
143        out
144    }
145}
146
147pub fn import_git_activity(
148    config_path: &Path,
149    repo_path: &Path,
150    limit: usize,
151    dry_run: bool,
152) -> anyhow::Result<GitImportReport> {
153    let candidates = scan_recent_commits(repo_path, limit);
154    let mut report = GitImportReport {
155        commits_scanned: limit,
156        candidates_found: candidates.len(),
157        ..Default::default()
158    };
159
160    if dry_run || candidates.is_empty() {
161        return Ok(report);
162    }
163
164    let service = LifecycleService::new();
165    let existing = service
166        .load_workbench(config_path)
167        .map(|snap| {
168            snap.wakeup_ready
169                .iter()
170                .chain(snap.pending_review.iter())
171                .map(|e| e.record.summary.to_lowercase())
172                .collect::<Vec<_>>()
173        })
174        .unwrap_or_default();
175
176    for candidate in &candidates {
177        let summary_lc = candidate.summary.to_lowercase();
178        if existing
179            .iter()
180            .any(|s| s.contains(&summary_lc) || summary_lc.contains(s.as_str()))
181        {
182            report.candidates_duplicate_dropped += 1;
183            continue;
184        }
185
186        let request = ProposeMemoryRequest {
187            title: candidate.title.clone(),
188            summary: candidate.summary.clone(),
189            memory_type: candidate.memory_type.clone(),
190            scope: MemoryScope::Project,
191            source_ref: format!(
192                "git:{}",
193                &candidate.commit_hash[..7.min(candidate.commit_hash.len())]
194            ),
195            project_id: None,
196            user_id: None,
197            sensitivity: None,
198            metadata: TransitionMetadata {
199                actor: Some("spool-git-importer".to_string()),
200                reason: Some("extracted from git commit history".to_string()),
201                evidence_refs: vec![format!("commit:{}", candidate.commit_hash)],
202            },
203            entities: Vec::new(),
204            tags: Vec::new(),
205            triggers: Vec::new(),
206            related_files: Vec::new(),
207            related_records: Vec::new(),
208            supersedes: None,
209            applies_to: Vec::new(),
210            valid_until: None,
211        };
212
213        match service.propose_ai(config_path, request) {
214            Ok(result) => report.candidates_persisted.push(result.entry.record_id),
215            Err(err) => {
216                eprintln!("[spool git-import] persist failed: {:#}", err);
217            }
218        }
219    }
220
221    Ok(report)
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn parse_feat_commit() {
230        let c = parse_commit_subject("abc1234", "feat(retrieval): add confidence scoring").unwrap();
231        assert_eq!(c.memory_type, "decision");
232        assert!(c.title.contains("confidence scoring"));
233    }
234
235    #[test]
236    fn parse_fix_commit() {
237        let c = parse_commit_subject("def5678", "fix: avoid nested .spool directory").unwrap();
238        assert_eq!(c.memory_type, "incident");
239    }
240
241    #[test]
242    fn parse_refactor_commit() {
243        let c =
244            parse_commit_subject("ghi9012", "refactor(hook): remove Trellis degradation").unwrap();
245        assert_eq!(c.memory_type, "decision");
246    }
247
248    #[test]
249    fn skip_short_subject() {
250        assert!(parse_commit_subject("abc", "1").is_none());
251        assert!(parse_commit_subject("abc", "wip").is_none());
252    }
253
254    #[test]
255    fn skip_merge_commit() {
256        assert!(parse_commit_subject("abc", "Merge branch 'main' into feature").is_none());
257    }
258
259    #[test]
260    fn skip_non_conventional() {
261        assert!(parse_commit_subject("abc", "random commit message here").is_none());
262    }
263
264    #[test]
265    fn scan_real_repo() {
266        let candidates = scan_recent_commits(Path::new("."), 20);
267        // Just verify it doesn't panic; actual count depends on repo
268        assert!(candidates.len() <= 20);
269    }
270}