spool-memory 0.2.3

Local-first developer memory system — persistent, structured knowledge for AI coding tools
Documentation
//! Git activity importer — extracts decision and pattern signals
//! from recent commit messages in the current repository.
//!
//! Scans `git log` output for structured commit messages (conventional
//! commits) and extracts:
//! - `feat:` / `refactor:` → potential decision candidates
//! - Repeated patterns (same prefix 3+ times) → workflow patterns
//!
//! This is a Tier 1 heuristic importer — no LLM involved.

use std::path::Path;
use std::process::Command;

use crate::domain::MemoryScope;
use crate::lifecycle_service::LifecycleService;
use crate::lifecycle_store::{ProposeMemoryRequest, TransitionMetadata};

#[derive(Debug, Clone)]
pub struct GitCandidate {
    pub title: String,
    pub summary: String,
    pub memory_type: String,
    pub commit_hash: String,
}

#[derive(Debug, Clone, Default)]
pub struct GitImportReport {
    pub commits_scanned: usize,
    pub candidates_found: usize,
    pub candidates_persisted: Vec<String>,
    pub candidates_duplicate_dropped: usize,
}

pub fn scan_recent_commits(repo_path: &Path, limit: usize) -> Vec<GitCandidate> {
    let output = Command::new("git")
        .args([
            "-C",
            &repo_path.display().to_string(),
            "log",
            &format!("-{limit}"),
            "--format=%H|%s",
        ])
        .output();

    let output = match output {
        Ok(o) if o.status.success() => o,
        _ => return Vec::new(),
    };

    let stdout = String::from_utf8_lossy(&output.stdout);
    let mut candidates = Vec::new();

    for line in stdout.lines() {
        let Some((hash, subject)) = line.split_once('|') else {
            continue;
        };
        if let Some(candidate) = parse_commit_subject(hash, subject) {
            candidates.push(candidate);
        }
    }

    candidates
}

fn parse_commit_subject(hash: &str, subject: &str) -> Option<GitCandidate> {
    let subject_trimmed = subject.trim();
    if subject_trimmed.len() < 10 {
        return None;
    }

    // Skip merge commits and version bumps
    if subject_trimmed.starts_with("Merge ")
        || subject_trimmed.starts_with("chore(release)")
        || subject_trimmed == "1"
    {
        return None;
    }

    let (memory_type, title) = if let Some(rest) = strip_conventional_prefix(subject_trimmed) {
        let prefix = &subject_trimmed[..subject_trimmed.len() - rest.len()];
        let mt = match prefix {
            p if p.starts_with("feat") => "decision",
            p if p.starts_with("refactor") => "decision",
            p if p.starts_with("fix") => "incident",
            p if p.starts_with("perf") => "pattern",
            p if p.starts_with("docs") && rest.to_lowercase().contains("adr") => "decision",
            _ => return None,
        };
        (mt, rest.to_string())
    } else if subject_trimmed.contains("BREAKING CHANGE") || subject_trimmed.contains("breaking:") {
        ("decision", subject_trimmed.to_string())
    } else {
        return None;
    };

    Some(GitCandidate {
        title: truncate(&title, 80),
        summary: format!("{} (commit {})", title, &hash[..7.min(hash.len())]),
        memory_type: memory_type.to_string(),
        commit_hash: hash.to_string(),
    })
}

fn strip_conventional_prefix(s: &str) -> Option<&str> {
    let prefixes = [
        "feat(",
        "feat:",
        "fix(",
        "fix:",
        "refactor(",
        "refactor:",
        "perf(",
        "perf:",
        "docs(",
        "docs:",
        "test(",
        "test:",
        "chore(",
        "chore:",
        "ci(",
        "ci:",
    ];
    for prefix in prefixes {
        if let Some(rest) = s.strip_prefix(prefix) {
            if prefix.ends_with('(') {
                if let Some(after_scope) = rest.find("): ") {
                    return Some(&rest[after_scope + 3..]);
                }
            } else {
                return Some(rest.trim_start());
            }
        }
    }
    None
}

fn truncate(s: &str, max: usize) -> String {
    if s.chars().count() <= max {
        s.to_string()
    } else {
        let mut out: String = s.chars().take(max).collect();
        out.push('');
        out
    }
}

pub fn import_git_activity(
    config_path: &Path,
    repo_path: &Path,
    limit: usize,
    dry_run: bool,
) -> anyhow::Result<GitImportReport> {
    let candidates = scan_recent_commits(repo_path, limit);
    let mut report = GitImportReport {
        commits_scanned: limit,
        candidates_found: candidates.len(),
        ..Default::default()
    };

    if dry_run || candidates.is_empty() {
        return Ok(report);
    }

    let service = LifecycleService::new();
    let existing = service
        .load_workbench(config_path)
        .map(|snap| {
            snap.wakeup_ready
                .iter()
                .chain(snap.pending_review.iter())
                .map(|e| e.record.summary.to_lowercase())
                .collect::<Vec<_>>()
        })
        .unwrap_or_default();

    for candidate in &candidates {
        let summary_lc = candidate.summary.to_lowercase();
        if existing
            .iter()
            .any(|s| s.contains(&summary_lc) || summary_lc.contains(s.as_str()))
        {
            report.candidates_duplicate_dropped += 1;
            continue;
        }

        let request = ProposeMemoryRequest {
            title: candidate.title.clone(),
            summary: candidate.summary.clone(),
            memory_type: candidate.memory_type.clone(),
            scope: MemoryScope::Project,
            source_ref: format!(
                "git:{}",
                &candidate.commit_hash[..7.min(candidate.commit_hash.len())]
            ),
            project_id: None,
            user_id: None,
            sensitivity: None,
            metadata: TransitionMetadata {
                actor: Some("spool-git-importer".to_string()),
                reason: Some("extracted from git commit history".to_string()),
                evidence_refs: vec![format!("commit:{}", candidate.commit_hash)],
            },
            entities: Vec::new(),
            tags: Vec::new(),
            triggers: Vec::new(),
            related_files: Vec::new(),
            related_records: Vec::new(),
            supersedes: None,
            applies_to: Vec::new(),
            valid_until: None,
        };

        match service.propose_ai(config_path, request) {
            Ok(result) => report.candidates_persisted.push(result.entry.record_id),
            Err(err) => {
                eprintln!("[spool git-import] persist failed: {:#}", err);
            }
        }
    }

    Ok(report)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_feat_commit() {
        let c = parse_commit_subject("abc1234", "feat(retrieval): add confidence scoring").unwrap();
        assert_eq!(c.memory_type, "decision");
        assert!(c.title.contains("confidence scoring"));
    }

    #[test]
    fn parse_fix_commit() {
        let c = parse_commit_subject("def5678", "fix: avoid nested .spool directory").unwrap();
        assert_eq!(c.memory_type, "incident");
    }

    #[test]
    fn parse_refactor_commit() {
        let c =
            parse_commit_subject("ghi9012", "refactor(hook): remove Trellis degradation").unwrap();
        assert_eq!(c.memory_type, "decision");
    }

    #[test]
    fn skip_short_subject() {
        assert!(parse_commit_subject("abc", "1").is_none());
        assert!(parse_commit_subject("abc", "wip").is_none());
    }

    #[test]
    fn skip_merge_commit() {
        assert!(parse_commit_subject("abc", "Merge branch 'main' into feature").is_none());
    }

    #[test]
    fn skip_non_conventional() {
        assert!(parse_commit_subject("abc", "random commit message here").is_none());
    }

    #[test]
    fn scan_real_repo() {
        let candidates = scan_recent_commits(Path::new("."), 20);
        // Just verify it doesn't panic; actual count depends on repo
        assert!(candidates.len() <= 20);
    }
}